{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9763241396143519, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "accuracy": 16.1133, "active_queue_size": 16384.0, "cl_loss": 152.6867, "doc_norm": 8.4295, "encoder_q-embeddings": 51007.5977, "encoder_q-layer.0": 53569.707, "encoder_q-layer.1": 41899.4609, "encoder_q-layer.10": 135516.8594, "encoder_q-layer.11": 77749.5703, "encoder_q-layer.2": 47946.8906, "encoder_q-layer.3": 54582.1797, "encoder_q-layer.4": 62467.4023, "encoder_q-layer.5": 71678.1875, "encoder_q-layer.6": 95023.6719, "encoder_q-layer.7": 113375.4844, "encoder_q-layer.8": 144191.7656, "encoder_q-layer.9": 117985.0703, "epoch": 0.0, "inbatch_neg_score": 40.2664, "inbatch_pos_score": 48.8438, "learning_rate": 5.000000000000001e-07, "loss": 152.6867, "norm_diff": 0.4408, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 120451.7487, "preclip_grad_norm_avg": 0.0011, "q@queue_neg_score": 40.25, "query_norm": 7.9887, "queue_k_norm": 8.42, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7838, "sent_len_1": 66.8485, "sent_max_len_0": 128.0, "sent_max_len_1": 191.14, "stdk": 0.1804, "stdq": 0.1822, "stdqueue_k": 0.1803, "stdqueue_q": 0.0, "step": 100 }, { "accuracy": 23.4375, "active_queue_size": 16384.0, "cl_loss": 98.4148, "doc_norm": 8.3214, "encoder_q-embeddings": 6186.1929, "encoder_q-layer.0": 5516.9307, "encoder_q-layer.1": 5860.103, "encoder_q-layer.10": 16453.1895, "encoder_q-layer.11": 17405.5723, "encoder_q-layer.2": 6710.937, "encoder_q-layer.3": 7751.21, "encoder_q-layer.4": 8745.4561, "encoder_q-layer.5": 9579.3057, "encoder_q-layer.6": 11831.2695, "encoder_q-layer.7": 12676.0449, "encoder_q-layer.8": 15957.1455, "encoder_q-layer.9": 12641.7002, "epoch": 0.0, "inbatch_neg_score": 36.1859, "inbatch_pos_score": 41.8125, "learning_rate": 1.0000000000000002e-06, "loss": 98.4148, "norm_diff": 1.1728, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15678.1734, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 36.2188, "query_norm": 7.1486, "queue_k_norm": 8.3464, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1228, "sent_len_1": 66.9379, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8713, "stdk": 0.1769, "stdq": 0.1352, "stdqueue_k": 0.178, "stdqueue_q": 0.0, "step": 200 }, { "accuracy": 21.9727, "active_queue_size": 16384.0, "cl_loss": 61.9136, "doc_norm": 8.2003, "encoder_q-embeddings": 3729.0554, "encoder_q-layer.0": 3289.6116, "encoder_q-layer.1": 3431.8345, "encoder_q-layer.10": 9481.2656, "encoder_q-layer.11": 14086.2002, "encoder_q-layer.2": 3918.9822, "encoder_q-layer.3": 4536.834, "encoder_q-layer.4": 5001.6821, "encoder_q-layer.5": 5331.4565, "encoder_q-layer.6": 6225.4551, "encoder_q-layer.7": 6548.1802, "encoder_q-layer.8": 7332.2334, "encoder_q-layer.9": 6760.0791, "epoch": 0.0, "inbatch_neg_score": 33.6667, "inbatch_pos_score": 37.3438, "learning_rate": 1.5e-06, "loss": 61.9136, "norm_diff": 1.3912, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10066.434, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 33.6875, "query_norm": 6.8091, "queue_k_norm": 8.22, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7772, "sent_len_1": 66.5307, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1687, "stdk": 0.1733, "stdq": 0.1107, "stdqueue_k": 0.174, "stdqueue_q": 0.0, "step": 300 }, { "accuracy": 23.1445, "active_queue_size": 16384.0, "cl_loss": 45.1854, "doc_norm": 8.0457, "encoder_q-embeddings": 2858.24, "encoder_q-layer.0": 2590.9626, "encoder_q-layer.1": 3073.8232, "encoder_q-layer.10": 6466.6074, "encoder_q-layer.11": 12099.9307, "encoder_q-layer.2": 3444.9409, "encoder_q-layer.3": 3504.2925, "encoder_q-layer.4": 3945.6741, "encoder_q-layer.5": 4097.5342, "encoder_q-layer.6": 4063.8943, "encoder_q-layer.7": 4379.2822, "encoder_q-layer.8": 5131.9609, "encoder_q-layer.9": 4168.5591, "epoch": 0.0, "inbatch_neg_score": 31.5259, "inbatch_pos_score": 34.3438, "learning_rate": 2.0000000000000003e-06, "loss": 45.1854, "norm_diff": 1.3234, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8146.2371, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 31.4531, "query_norm": 6.7223, "queue_k_norm": 8.0574, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0017, "sent_len_1": 66.9184, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4412, "stdk": 0.1667, "stdq": 0.0987, "stdqueue_k": 0.168, "stdqueue_q": 0.0, "step": 400 }, { "accuracy": 21.7773, "active_queue_size": 16384.0, "cl_loss": 34.5652, "doc_norm": 7.8753, "encoder_q-embeddings": 3824.6829, "encoder_q-layer.0": 3238.5513, "encoder_q-layer.1": 3905.2439, "encoder_q-layer.10": 5740.6982, "encoder_q-layer.11": 8291.0645, "encoder_q-layer.2": 4116.2266, "encoder_q-layer.3": 4481.3774, "encoder_q-layer.4": 4755.6123, "encoder_q-layer.5": 4885.0317, "encoder_q-layer.6": 4665.1968, "encoder_q-layer.7": 4779.8628, "encoder_q-layer.8": 5177.8521, "encoder_q-layer.9": 3587.8091, "epoch": 0.0, "inbatch_neg_score": 28.7854, "inbatch_pos_score": 30.8594, "learning_rate": 2.5e-06, "loss": 34.5652, "norm_diff": 1.1632, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7096.5572, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 28.7188, "query_norm": 6.7122, "queue_k_norm": 7.899, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8715, "sent_len_1": 66.8038, "sent_max_len_0": 128.0, "sent_max_len_1": 191.5425, "stdk": 0.1616, "stdq": 0.0973, "stdqueue_k": 0.1634, "stdqueue_q": 0.0, "step": 500 }, { "accuracy": 25.0977, "active_queue_size": 16384.0, "cl_loss": 28.0766, "doc_norm": 7.7053, "encoder_q-embeddings": 2919.553, "encoder_q-layer.0": 2174.5518, "encoder_q-layer.1": 2527.2869, "encoder_q-layer.10": 4905.1699, "encoder_q-layer.11": 8246.8965, "encoder_q-layer.2": 2868.7869, "encoder_q-layer.3": 3287.3162, "encoder_q-layer.4": 3584.0942, "encoder_q-layer.5": 3656.9592, "encoder_q-layer.6": 4182.854, "encoder_q-layer.7": 4374.104, "encoder_q-layer.8": 4863.1099, "encoder_q-layer.9": 3183.1306, "epoch": 0.01, "inbatch_neg_score": 26.7571, "inbatch_pos_score": 28.7031, "learning_rate": 3e-06, "loss": 28.0766, "norm_diff": 1.2085, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6247.8511, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 26.7188, "query_norm": 6.4967, "queue_k_norm": 7.7237, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0611, "sent_len_1": 66.6908, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3963, "stdk": 0.1564, "stdq": 0.0912, "stdqueue_k": 0.1572, "stdqueue_q": 0.0, "step": 600 }, { "accuracy": 20.3125, "active_queue_size": 16384.0, "cl_loss": 24.4455, "doc_norm": 7.5581, "encoder_q-embeddings": 4678.5566, "encoder_q-layer.0": 3961.03, "encoder_q-layer.1": 4597.9473, "encoder_q-layer.10": 6290.4248, "encoder_q-layer.11": 9845.4521, "encoder_q-layer.2": 5590.4956, "encoder_q-layer.3": 6382.0537, "encoder_q-layer.4": 7086.5435, "encoder_q-layer.5": 7008.1816, "encoder_q-layer.6": 6839.5986, "encoder_q-layer.7": 5888.8345, "encoder_q-layer.8": 5331.6733, "encoder_q-layer.9": 3622.8784, "epoch": 0.01, "inbatch_neg_score": 22.6203, "inbatch_pos_score": 24.0625, "learning_rate": 3.5000000000000004e-06, "loss": 24.4455, "norm_diff": 1.7144, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8988.4622, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 22.6406, "query_norm": 5.8437, "queue_k_norm": 7.5491, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9345, "sent_len_1": 66.8035, "sent_max_len_0": 128.0, "sent_max_len_1": 189.655, "stdk": 0.1527, "stdq": 0.0848, "stdqueue_k": 0.1516, "stdqueue_q": 0.0, "step": 700 }, { "accuracy": 23.3398, "active_queue_size": 16384.0, "cl_loss": 20.7209, "doc_norm": 7.3859, "encoder_q-embeddings": 5386.4146, "encoder_q-layer.0": 4290.5127, "encoder_q-layer.1": 5047.5718, "encoder_q-layer.10": 4962.1499, "encoder_q-layer.11": 7966.4873, "encoder_q-layer.2": 5975.3179, "encoder_q-layer.3": 6707.395, "encoder_q-layer.4": 7883.54, "encoder_q-layer.5": 8294.8926, "encoder_q-layer.6": 7347.8965, "encoder_q-layer.7": 4995.4824, "encoder_q-layer.8": 4064.4773, "encoder_q-layer.9": 2562.7864, "epoch": 0.01, "inbatch_neg_score": 16.8464, "inbatch_pos_score": 18.0938, "learning_rate": 4.000000000000001e-06, "loss": 20.7209, "norm_diff": 2.4878, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8796.3367, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 16.8125, "query_norm": 4.8981, "queue_k_norm": 7.3864, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0355, "sent_len_1": 66.9774, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8975, "stdk": 0.1448, "stdq": 0.079, "stdqueue_k": 0.1456, "stdqueue_q": 0.0, "step": 800 }, { "accuracy": 21.7773, "active_queue_size": 16384.0, "cl_loss": 17.2291, "doc_norm": 7.2554, "encoder_q-embeddings": 3836.0134, "encoder_q-layer.0": 3397.3567, "encoder_q-layer.1": 4210.4028, "encoder_q-layer.10": 3917.2729, "encoder_q-layer.11": 6850.8428, "encoder_q-layer.2": 4756.7373, "encoder_q-layer.3": 5011.4355, "encoder_q-layer.4": 5103.0269, "encoder_q-layer.5": 5417.605, "encoder_q-layer.6": 4845.3281, "encoder_q-layer.7": 4115.0869, "encoder_q-layer.8": 4466.5796, "encoder_q-layer.9": 2171.3955, "epoch": 0.01, "inbatch_neg_score": 12.5544, "inbatch_pos_score": 13.5703, "learning_rate": 4.5e-06, "loss": 17.2291, "norm_diff": 3.2823, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6738.4929, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 12.5547, "query_norm": 3.9731, "queue_k_norm": 7.2469, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9054, "sent_len_1": 66.6723, "sent_max_len_0": 128.0, "sent_max_len_1": 189.71, "stdk": 0.1418, "stdq": 0.0715, "stdqueue_k": 0.1412, "stdqueue_q": 0.0, "step": 900 }, { "accuracy": 24.3164, "active_queue_size": 16384.0, "cl_loss": 14.7158, "doc_norm": 7.0913, "encoder_q-embeddings": 7003.1187, "encoder_q-layer.0": 6053.4297, "encoder_q-layer.1": 7565.7158, "encoder_q-layer.10": 6889.4858, "encoder_q-layer.11": 9275.0742, "encoder_q-layer.2": 9215.9004, "encoder_q-layer.3": 10496.5645, "encoder_q-layer.4": 10570.2217, "encoder_q-layer.5": 12979.5234, "encoder_q-layer.6": 10683.4268, "encoder_q-layer.7": 6766.2407, "encoder_q-layer.8": 5287.9434, "encoder_q-layer.9": 3001.6709, "epoch": 0.01, "inbatch_neg_score": 6.2447, "inbatch_pos_score": 7.0664, "learning_rate": 5e-06, "loss": 14.7158, "norm_diff": 4.1098, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12373.619, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 6.2305, "query_norm": 2.9816, "queue_k_norm": 7.1011, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7538, "sent_len_1": 66.7028, "sent_max_len_0": 128.0, "sent_max_len_1": 190.835, "stdk": 0.1346, "stdq": 0.0659, "stdqueue_k": 0.1356, "stdqueue_q": 0.0, "step": 1000 }, { "accuracy": 22.8516, "active_queue_size": 16384.0, "cl_loss": 13.0161, "doc_norm": 6.9623, "encoder_q-embeddings": 5020.8496, "encoder_q-layer.0": 4367.4409, "encoder_q-layer.1": 5435.6465, "encoder_q-layer.10": 6886.0049, "encoder_q-layer.11": 9586.541, "encoder_q-layer.2": 6104.6611, "encoder_q-layer.3": 6454.103, "encoder_q-layer.4": 6910.0054, "encoder_q-layer.5": 7788.3604, "encoder_q-layer.6": 6524.2666, "encoder_q-layer.7": 4797.209, "encoder_q-layer.8": 4587.3198, "encoder_q-layer.9": 2720.4368, "epoch": 0.01, "inbatch_neg_score": 5.1627, "inbatch_pos_score": 5.9102, "learning_rate": 5.500000000000001e-06, "loss": 13.0161, "norm_diff": 4.4445, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8832.3905, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 5.1523, "query_norm": 2.5179, "queue_k_norm": 6.9724, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9937, "sent_len_1": 66.8561, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8713, "stdk": 0.1295, "stdq": 0.0613, "stdqueue_k": 0.1306, "stdqueue_q": 0.0, "step": 1100 }, { "accuracy": 21.4844, "active_queue_size": 16384.0, "cl_loss": 11.8585, "doc_norm": 6.8405, "encoder_q-embeddings": 6105.896, "encoder_q-layer.0": 5575.6924, "encoder_q-layer.1": 5750.4629, "encoder_q-layer.10": 3034.7285, "encoder_q-layer.11": 5181.7393, "encoder_q-layer.2": 5588.8657, "encoder_q-layer.3": 6181.9927, "encoder_q-layer.4": 6501.4424, "encoder_q-layer.5": 6340.4478, "encoder_q-layer.6": 5119.0654, "encoder_q-layer.7": 3790.7144, "encoder_q-layer.8": 3076.1892, "encoder_q-layer.9": 1429.9585, "epoch": 0.01, "inbatch_neg_score": 4.361, "inbatch_pos_score": 5.0312, "learning_rate": 6e-06, "loss": 11.8585, "norm_diff": 4.5677, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7643.8992, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 4.3516, "query_norm": 2.2727, "queue_k_norm": 6.8585, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9739, "sent_len_1": 66.9489, "sent_max_len_0": 128.0, "sent_max_len_1": 190.79, "stdk": 0.1242, "stdq": 0.059, "stdqueue_k": 0.1257, "stdqueue_q": 0.0, "step": 1200 }, { "accuracy": 22.3633, "active_queue_size": 16384.0, "cl_loss": 11.12, "doc_norm": 6.7301, "encoder_q-embeddings": 5076.1084, "encoder_q-layer.0": 4198.1504, "encoder_q-layer.1": 5093.6304, "encoder_q-layer.10": 4892.8271, "encoder_q-layer.11": 7971.5298, "encoder_q-layer.2": 5878.6816, "encoder_q-layer.3": 6171.4009, "encoder_q-layer.4": 6603.8149, "encoder_q-layer.5": 6605.2729, "encoder_q-layer.6": 5236.2656, "encoder_q-layer.7": 4029.437, "encoder_q-layer.8": 3778.9209, "encoder_q-layer.9": 2266.604, "epoch": 0.01, "inbatch_neg_score": 3.8684, "inbatch_pos_score": 4.5391, "learning_rate": 6.5000000000000004e-06, "loss": 11.12, "norm_diff": 4.58, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7828.1873, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 3.873, "query_norm": 2.1502, "queue_k_norm": 6.7462, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9514, "sent_len_1": 66.4993, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7375, "stdk": 0.1205, "stdq": 0.0578, "stdqueue_k": 0.1208, "stdqueue_q": 0.0, "step": 1300 }, { "accuracy": 22.7539, "active_queue_size": 16384.0, "cl_loss": 10.6478, "doc_norm": 6.6385, "encoder_q-embeddings": 6853.3169, "encoder_q-layer.0": 5723.9565, "encoder_q-layer.1": 6950.7607, "encoder_q-layer.10": 5074.8027, "encoder_q-layer.11": 7628.8955, "encoder_q-layer.2": 7920.2319, "encoder_q-layer.3": 9578.335, "encoder_q-layer.4": 11384.2676, "encoder_q-layer.5": 10430.2451, "encoder_q-layer.6": 9264.7822, "encoder_q-layer.7": 8680.9775, "encoder_q-layer.8": 7842.1167, "encoder_q-layer.9": 2144.6252, "epoch": 0.01, "inbatch_neg_score": 0.4837, "inbatch_pos_score": 1.0859, "learning_rate": 7.000000000000001e-06, "loss": 10.6478, "norm_diff": 4.5605, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11596.4019, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4954, "query_norm": 2.078, "queue_k_norm": 6.6406, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9964, "sent_len_1": 66.6952, "sent_max_len_0": 128.0, "sent_max_len_1": 193.69, "stdk": 0.1167, "stdq": 0.0576, "stdqueue_k": 0.1161, "stdqueue_q": 0.0, "step": 1400 }, { "accuracy": 29.3945, "active_queue_size": 16384.0, "cl_loss": 10.2913, "doc_norm": 6.5185, "encoder_q-embeddings": 2317.3347, "encoder_q-layer.0": 2040.4631, "encoder_q-layer.1": 2488.2002, "encoder_q-layer.10": 3016.5315, "encoder_q-layer.11": 5266.8667, "encoder_q-layer.2": 2849.0801, "encoder_q-layer.3": 2999.0083, "encoder_q-layer.4": 3306.5488, "encoder_q-layer.5": 3838.7483, "encoder_q-layer.6": 3354.4009, "encoder_q-layer.7": 2812.9854, "encoder_q-layer.8": 3181.0479, "encoder_q-layer.9": 1624.0332, "epoch": 0.01, "inbatch_neg_score": 2.8672, "inbatch_pos_score": 3.5391, "learning_rate": 7.5e-06, "loss": 10.2913, "norm_diff": 4.4826, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4384.8854, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 2.8535, "query_norm": 2.036, "queue_k_norm": 6.5362, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1342, "sent_len_1": 66.8171, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4212, "stdk": 0.1108, "stdq": 0.0551, "stdqueue_k": 0.1117, "stdqueue_q": 0.0, "step": 1500 }, { "accuracy": 25.5859, "active_queue_size": 16384.0, "cl_loss": 10.1659, "doc_norm": 6.4187, "encoder_q-embeddings": 5117.6982, "encoder_q-layer.0": 4360.0142, "encoder_q-layer.1": 5421.3486, "encoder_q-layer.10": 3502.4888, "encoder_q-layer.11": 6449.5957, "encoder_q-layer.2": 6348.2896, "encoder_q-layer.3": 7336.668, "encoder_q-layer.4": 9089.9873, "encoder_q-layer.5": 11757.1758, "encoder_q-layer.6": 11675.5254, "encoder_q-layer.7": 11565.0283, "encoder_q-layer.8": 14050.2793, "encoder_q-layer.9": 2745.9832, "epoch": 0.02, "inbatch_neg_score": 1.4739, "inbatch_pos_score": 2.1094, "learning_rate": 8.000000000000001e-06, "loss": 10.1659, "norm_diff": 4.3007, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11956.2675, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4619, "query_norm": 2.1179, "queue_k_norm": 6.4355, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.967, "sent_len_1": 66.7065, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5288, "stdk": 0.1059, "stdq": 0.0571, "stdqueue_k": 0.1074, "stdqueue_q": 0.0, "step": 1600 }, { "accuracy": 26.4648, "active_queue_size": 16384.0, "cl_loss": 9.8474, "doc_norm": 6.2998, "encoder_q-embeddings": 3390.6692, "encoder_q-layer.0": 2550.1501, "encoder_q-layer.1": 3091.7336, "encoder_q-layer.10": 4054.4353, "encoder_q-layer.11": 6081.4326, "encoder_q-layer.2": 3583.7302, "encoder_q-layer.3": 4008.1538, "encoder_q-layer.4": 4194.8525, "encoder_q-layer.5": 4100.749, "encoder_q-layer.6": 3310.9434, "encoder_q-layer.7": 2567.8972, "encoder_q-layer.8": 2661.2268, "encoder_q-layer.9": 2427.8179, "epoch": 0.02, "inbatch_neg_score": 1.2413, "inbatch_pos_score": 1.8281, "learning_rate": 8.500000000000002e-06, "loss": 9.8474, "norm_diff": 4.2423, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5235.0911, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2354, "query_norm": 2.0575, "queue_k_norm": 6.328, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.096, "sent_len_1": 66.8993, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8638, "stdk": 0.102, "stdq": 0.0569, "stdqueue_k": 0.1031, "stdqueue_q": 0.0, "step": 1700 }, { "accuracy": 25.9766, "active_queue_size": 16384.0, "cl_loss": 9.5674, "doc_norm": 6.1876, "encoder_q-embeddings": 12693.8818, "encoder_q-layer.0": 11755.1133, "encoder_q-layer.1": 13685.4785, "encoder_q-layer.10": 3898.5986, "encoder_q-layer.11": 5853.0601, "encoder_q-layer.2": 14593.3398, "encoder_q-layer.3": 15675.1875, "encoder_q-layer.4": 17667.0547, "encoder_q-layer.5": 15614.4443, "encoder_q-layer.6": 12648.0723, "encoder_q-layer.7": 9721.9795, "encoder_q-layer.8": 8022.1592, "encoder_q-layer.9": 2939.8848, "epoch": 0.02, "inbatch_neg_score": 1.8673, "inbatch_pos_score": 2.4785, "learning_rate": 9e-06, "loss": 9.5674, "norm_diff": 4.1094, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17854.8773, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 1.8643, "query_norm": 2.0782, "queue_k_norm": 6.2058, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8858, "sent_len_1": 66.7078, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2962, "stdk": 0.0986, "stdq": 0.0577, "stdqueue_k": 0.0991, "stdqueue_q": 0.0, "step": 1800 }, { "accuracy": 24.1211, "active_queue_size": 16384.0, "cl_loss": 9.4188, "doc_norm": 6.0618, "encoder_q-embeddings": 7496.1006, "encoder_q-layer.0": 6368.9683, "encoder_q-layer.1": 7031.6719, "encoder_q-layer.10": 4900.3135, "encoder_q-layer.11": 5984.6592, "encoder_q-layer.2": 6769.1797, "encoder_q-layer.3": 7260.5469, "encoder_q-layer.4": 7191.2959, "encoder_q-layer.5": 6851.2905, "encoder_q-layer.6": 4693.9624, "encoder_q-layer.7": 3475.1055, "encoder_q-layer.8": 3344.3804, "encoder_q-layer.9": 2331.8652, "epoch": 0.02, "inbatch_neg_score": 0.3877, "inbatch_pos_score": 0.9648, "learning_rate": 9.5e-06, "loss": 9.4188, "norm_diff": 3.8818, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8762.1053, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3818, "query_norm": 2.18, "queue_k_norm": 6.0767, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.038, "sent_len_1": 66.8197, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7525, "stdk": 0.0939, "stdq": 0.0609, "stdqueue_k": 0.0952, "stdqueue_q": 0.0, "step": 1900 }, { "accuracy": 24.3164, "active_queue_size": 16384.0, "cl_loss": 9.2028, "doc_norm": 5.9258, "encoder_q-embeddings": 6337.502, "encoder_q-layer.0": 5136.2886, "encoder_q-layer.1": 6217.7012, "encoder_q-layer.10": 5915.4165, "encoder_q-layer.11": 6690.9316, "encoder_q-layer.2": 7487.5703, "encoder_q-layer.3": 7591.8301, "encoder_q-layer.4": 7863.6704, "encoder_q-layer.5": 7124.5464, "encoder_q-layer.6": 5649.3862, "encoder_q-layer.7": 5210.644, "encoder_q-layer.8": 4757.3521, "encoder_q-layer.9": 3741.1741, "epoch": 0.02, "inbatch_neg_score": 1.2622, "inbatch_pos_score": 1.8281, "learning_rate": 1e-05, "loss": 9.2028, "norm_diff": 3.749, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9048.9027, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.2598, "query_norm": 2.1768, "queue_k_norm": 5.9313, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.757, "sent_len_1": 66.4313, "sent_max_len_0": 128.0, "sent_max_len_1": 188.725, "stdk": 0.092, "stdq": 0.0604, "stdqueue_k": 0.0918, "stdqueue_q": 0.0, "step": 2000 }, { "accuracy": 26.3672, "active_queue_size": 16384.0, "cl_loss": 8.9643, "doc_norm": 5.7567, "encoder_q-embeddings": 12919.2881, "encoder_q-layer.0": 11313.4482, "encoder_q-layer.1": 12379.6309, "encoder_q-layer.10": 2239.6572, "encoder_q-layer.11": 3857.6262, "encoder_q-layer.2": 14157.3076, "encoder_q-layer.3": 14984.0381, "encoder_q-layer.4": 14584.9121, "encoder_q-layer.5": 14634.5342, "encoder_q-layer.6": 9965.7676, "encoder_q-layer.7": 7566.5913, "encoder_q-layer.8": 7879.6206, "encoder_q-layer.9": 2312.843, "epoch": 0.02, "inbatch_neg_score": 1.0038, "inbatch_pos_score": 1.5898, "learning_rate": 1.05e-05, "loss": 8.9643, "norm_diff": 3.5766, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16510.0582, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.999, "query_norm": 2.1801, "queue_k_norm": 5.7695, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0797, "sent_len_1": 66.9304, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2463, "stdk": 0.0873, "stdq": 0.0609, "stdqueue_k": 0.0881, "stdqueue_q": 0.0, "step": 2100 }, { "accuracy": 26.3672, "active_queue_size": 16384.0, "cl_loss": 8.73, "doc_norm": 5.5551, "encoder_q-embeddings": 11415.8613, "encoder_q-layer.0": 9797.6904, "encoder_q-layer.1": 11668.1025, "encoder_q-layer.10": 9421.3262, "encoder_q-layer.11": 10732.5957, "encoder_q-layer.2": 13241.7676, "encoder_q-layer.3": 13175.9482, "encoder_q-layer.4": 13794.2217, "encoder_q-layer.5": 14288.5381, "encoder_q-layer.6": 14483.4561, "encoder_q-layer.7": 15834.4404, "encoder_q-layer.8": 15432.7031, "encoder_q-layer.9": 6712.8267, "epoch": 0.02, "inbatch_neg_score": 0.4619, "inbatch_pos_score": 1.0166, "learning_rate": 1.1000000000000001e-05, "loss": 8.73, "norm_diff": 3.3905, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 18184.4984, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4646, "query_norm": 2.1646, "queue_k_norm": 5.597, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7649, "sent_len_1": 66.7646, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2637, "stdk": 0.0842, "stdq": 0.0591, "stdqueue_k": 0.085, "stdqueue_q": 0.0, "step": 2200 }, { "accuracy": 25.293, "active_queue_size": 16384.0, "cl_loss": 8.3553, "doc_norm": 5.35, "encoder_q-embeddings": 5705.1411, "encoder_q-layer.0": 4769.1045, "encoder_q-layer.1": 5296.2285, "encoder_q-layer.10": 11568.3174, "encoder_q-layer.11": 11316.4355, "encoder_q-layer.2": 6202.4102, "encoder_q-layer.3": 6571.5391, "encoder_q-layer.4": 6900.2842, "encoder_q-layer.5": 6537.2485, "encoder_q-layer.6": 6244.1709, "encoder_q-layer.7": 6335.9824, "encoder_q-layer.8": 7365.1191, "encoder_q-layer.9": 7952.9458, "epoch": 0.02, "inbatch_neg_score": 1.2201, "inbatch_pos_score": 1.7646, "learning_rate": 1.1500000000000002e-05, "loss": 8.3553, "norm_diff": 3.1451, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10070.4756, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.2119, "query_norm": 2.2049, "queue_k_norm": 5.392, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8673, "sent_len_1": 67.0374, "sent_max_len_0": 128.0, "sent_max_len_1": 190.26, "stdk": 0.0814, "stdq": 0.062, "stdqueue_k": 0.0821, "stdqueue_q": 0.0, "step": 2300 }, { "accuracy": 25.5859, "active_queue_size": 16384.0, "cl_loss": 7.9879, "doc_norm": 5.1648, "encoder_q-embeddings": 15436.998, "encoder_q-layer.0": 13625.4375, "encoder_q-layer.1": 12128.8613, "encoder_q-layer.10": 24416.2891, "encoder_q-layer.11": 19975.4102, "encoder_q-layer.2": 12006.7705, "encoder_q-layer.3": 11388.1592, "encoder_q-layer.4": 10859.7891, "encoder_q-layer.5": 11582.8447, "encoder_q-layer.6": 11344.1211, "encoder_q-layer.7": 13919.2764, "encoder_q-layer.8": 16824.6543, "encoder_q-layer.9": 18599.9883, "epoch": 0.02, "inbatch_neg_score": 0.2313, "inbatch_pos_score": 0.7441, "learning_rate": 1.2e-05, "loss": 7.9879, "norm_diff": 3.0592, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 21269.5337, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2189, "query_norm": 2.1056, "queue_k_norm": 5.1696, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8346, "sent_len_1": 66.725, "sent_max_len_0": 128.0, "sent_max_len_1": 190.605, "stdk": 0.0783, "stdq": 0.0585, "stdqueue_k": 0.0792, "stdqueue_q": 0.0, "step": 2400 }, { "accuracy": 30.3711, "active_queue_size": 16384.0, "cl_loss": 7.7803, "doc_norm": 4.9201, "encoder_q-embeddings": 3455.4053, "encoder_q-layer.0": 2773.4604, "encoder_q-layer.1": 3353.9885, "encoder_q-layer.10": 28194.9023, "encoder_q-layer.11": 22591.1641, "encoder_q-layer.2": 4105.6631, "encoder_q-layer.3": 4680.0664, "encoder_q-layer.4": 6010.9863, "encoder_q-layer.5": 7773.3911, "encoder_q-layer.6": 9756.8174, "encoder_q-layer.7": 12424.1201, "encoder_q-layer.8": 15527.6143, "encoder_q-layer.9": 18891.5625, "epoch": 0.02, "inbatch_neg_score": 1.3595, "inbatch_pos_score": 1.9268, "learning_rate": 1.25e-05, "loss": 7.7803, "norm_diff": 2.7658, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17051.0924, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 1.3516, "query_norm": 2.1544, "queue_k_norm": 4.9519, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0805, "sent_len_1": 66.7136, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9263, "stdk": 0.0764, "stdq": 0.0601, "stdqueue_k": 0.0768, "stdqueue_q": 0.0, "step": 2500 }, { "accuracy": 31.0547, "active_queue_size": 16384.0, "cl_loss": 7.5198, "doc_norm": 4.7222, "encoder_q-embeddings": 3696.8184, "encoder_q-layer.0": 2900.1475, "encoder_q-layer.1": 3421.0701, "encoder_q-layer.10": 30141.0938, "encoder_q-layer.11": 22744.1016, "encoder_q-layer.2": 4143.0332, "encoder_q-layer.3": 4812.7021, "encoder_q-layer.4": 6110.4731, "encoder_q-layer.5": 7914.8164, "encoder_q-layer.6": 9566.9443, "encoder_q-layer.7": 12384.1309, "encoder_q-layer.8": 14977.874, "encoder_q-layer.9": 19830.0273, "epoch": 0.03, "inbatch_neg_score": 0.6084, "inbatch_pos_score": 1.1611, "learning_rate": 1.3000000000000001e-05, "loss": 7.5198, "norm_diff": 2.6502, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17644.0161, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.603, "query_norm": 2.072, "queue_k_norm": 4.7377, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0472, "sent_len_1": 66.7499, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4512, "stdk": 0.0744, "stdq": 0.0587, "stdqueue_k": 0.0747, "stdqueue_q": 0.0, "step": 2600 }, { "accuracy": 30.8594, "active_queue_size": 16384.0, "cl_loss": 7.4157, "doc_norm": 4.5044, "encoder_q-embeddings": 3457.7356, "encoder_q-layer.0": 2833.4492, "encoder_q-layer.1": 3381.8723, "encoder_q-layer.10": 13088.1094, "encoder_q-layer.11": 11289.1211, "encoder_q-layer.2": 3954.1545, "encoder_q-layer.3": 4109.5303, "encoder_q-layer.4": 4382.0474, "encoder_q-layer.5": 4247.3687, "encoder_q-layer.6": 3911.4045, "encoder_q-layer.7": 4476.8921, "encoder_q-layer.8": 6050.8433, "encoder_q-layer.9": 8340.6006, "epoch": 0.03, "inbatch_neg_score": 0.8111, "inbatch_pos_score": 1.3613, "learning_rate": 1.3500000000000001e-05, "loss": 7.4157, "norm_diff": 2.3987, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8398.5192, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8086, "query_norm": 2.1057, "queue_k_norm": 4.5147, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8759, "sent_len_1": 66.7117, "sent_max_len_0": 128.0, "sent_max_len_1": 191.4525, "stdk": 0.0726, "stdq": 0.0604, "stdqueue_k": 0.0726, "stdqueue_q": 0.0, "step": 2700 }, { "accuracy": 30.0781, "active_queue_size": 16384.0, "cl_loss": 7.2024, "doc_norm": 4.3026, "encoder_q-embeddings": 8673.3047, "encoder_q-layer.0": 6639.1504, "encoder_q-layer.1": 8210.8076, "encoder_q-layer.10": 38858.8438, "encoder_q-layer.11": 26500.7461, "encoder_q-layer.2": 9227.0078, "encoder_q-layer.3": 10849.252, "encoder_q-layer.4": 13533.3086, "encoder_q-layer.5": 18044.5996, "encoder_q-layer.6": 20056.5176, "encoder_q-layer.7": 23418.5762, "encoder_q-layer.8": 24949.8535, "encoder_q-layer.9": 29128.8984, "epoch": 0.03, "inbatch_neg_score": 0.5704, "inbatch_pos_score": 1.1172, "learning_rate": 1.4000000000000001e-05, "loss": 7.2024, "norm_diff": 2.1696, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27044.227, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.5586, "query_norm": 2.1329, "queue_k_norm": 4.3121, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7775, "sent_len_1": 67.0262, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8275, "stdk": 0.0706, "stdq": 0.0602, "stdqueue_k": 0.071, "stdqueue_q": 0.0, "step": 2800 }, { "accuracy": 28.9062, "active_queue_size": 16384.0, "cl_loss": 7.0893, "doc_norm": 4.0893, "encoder_q-embeddings": 3281.7393, "encoder_q-layer.0": 2514.0898, "encoder_q-layer.1": 3148.23, "encoder_q-layer.10": 29085.6016, "encoder_q-layer.11": 19870.832, "encoder_q-layer.2": 3972.3391, "encoder_q-layer.3": 4397.252, "encoder_q-layer.4": 5673.6533, "encoder_q-layer.5": 7040.9106, "encoder_q-layer.6": 9687.0752, "encoder_q-layer.7": 13598.0859, "encoder_q-layer.8": 16965.3262, "encoder_q-layer.9": 22972.1738, "epoch": 0.03, "inbatch_neg_score": 0.5606, "inbatch_pos_score": 1.0781, "learning_rate": 1.45e-05, "loss": 7.0893, "norm_diff": 2.0064, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17726.3485, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.5522, "query_norm": 2.0828, "queue_k_norm": 4.1192, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9662, "sent_len_1": 66.8527, "sent_max_len_0": 128.0, "sent_max_len_1": 191.245, "stdk": 0.0688, "stdq": 0.0598, "stdqueue_k": 0.0694, "stdqueue_q": 0.0, "step": 2900 }, { "accuracy": 30.5664, "active_queue_size": 16384.0, "cl_loss": 6.9011, "doc_norm": 3.8799, "encoder_q-embeddings": 2068.6372, "encoder_q-layer.0": 1822.0653, "encoder_q-layer.1": 1984.5815, "encoder_q-layer.10": 3455.0557, "encoder_q-layer.11": 4377.0542, "encoder_q-layer.2": 2257.0464, "encoder_q-layer.3": 2372.0884, "encoder_q-layer.4": 2447.9189, "encoder_q-layer.5": 2318.8079, "encoder_q-layer.6": 2038.561, "encoder_q-layer.7": 1868.6631, "encoder_q-layer.8": 2329.9836, "encoder_q-layer.9": 2601.5042, "epoch": 0.03, "inbatch_neg_score": 0.7292, "inbatch_pos_score": 1.252, "learning_rate": 1.5e-05, "loss": 6.9011, "norm_diff": 1.786, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3556.0123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7207, "query_norm": 2.0939, "queue_k_norm": 3.9339, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8866, "sent_len_1": 66.6738, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5475, "stdk": 0.068, "stdq": 0.0589, "stdqueue_k": 0.0684, "stdqueue_q": 0.0, "step": 3000 }, { "accuracy": 35.0586, "active_queue_size": 16384.0, "cl_loss": 6.6907, "doc_norm": 3.7704, "encoder_q-embeddings": 2260.564, "encoder_q-layer.0": 1817.0837, "encoder_q-layer.1": 2012.3044, "encoder_q-layer.10": 3071.0916, "encoder_q-layer.11": 4246.3877, "encoder_q-layer.2": 2267.8101, "encoder_q-layer.3": 2246.3926, "encoder_q-layer.4": 2442.2295, "encoder_q-layer.5": 2643.29, "encoder_q-layer.6": 2247.3586, "encoder_q-layer.7": 2094.6328, "encoder_q-layer.8": 2405.6636, "encoder_q-layer.9": 2338.0364, "epoch": 0.03, "inbatch_neg_score": 0.6745, "inbatch_pos_score": 1.2373, "learning_rate": 1.55e-05, "loss": 6.6907, "norm_diff": 1.7345, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3586.7928, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.667, "query_norm": 2.036, "queue_k_norm": 3.7682, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0487, "sent_len_1": 66.9252, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2887, "stdk": 0.0667, "stdq": 0.0575, "stdqueue_k": 0.0671, "stdqueue_q": 0.0, "step": 3100 }, { "accuracy": 32.9102, "active_queue_size": 16384.0, "cl_loss": 6.597, "doc_norm": 3.5943, "encoder_q-embeddings": 3144.855, "encoder_q-layer.0": 2712.8218, "encoder_q-layer.1": 3549.4385, "encoder_q-layer.10": 47150.1172, "encoder_q-layer.11": 30145.8125, "encoder_q-layer.2": 4702.8623, "encoder_q-layer.3": 5742.6206, "encoder_q-layer.4": 7973.0322, "encoder_q-layer.5": 10944.25, "encoder_q-layer.6": 14880.5459, "encoder_q-layer.7": 21088.8008, "encoder_q-layer.8": 26943.6953, "encoder_q-layer.9": 36047.6016, "epoch": 0.03, "inbatch_neg_score": 0.4917, "inbatch_pos_score": 1.0557, "learning_rate": 1.6000000000000003e-05, "loss": 6.597, "norm_diff": 1.5658, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27693.8371, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.4836, "query_norm": 2.0285, "queue_k_norm": 3.6222, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0023, "sent_len_1": 66.74, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5863, "stdk": 0.0656, "stdq": 0.0577, "stdqueue_k": 0.0661, "stdqueue_q": 0.0, "step": 3200 }, { "accuracy": 33.3984, "active_queue_size": 16384.0, "cl_loss": 6.5829, "doc_norm": 3.4835, "encoder_q-embeddings": 2243.1672, "encoder_q-layer.0": 1751.4442, "encoder_q-layer.1": 2030.1901, "encoder_q-layer.10": 18507.3652, "encoder_q-layer.11": 12051.4961, "encoder_q-layer.2": 2588.4795, "encoder_q-layer.3": 2908.519, "encoder_q-layer.4": 3505.5547, "encoder_q-layer.5": 4559.5269, "encoder_q-layer.6": 5131.4033, "encoder_q-layer.7": 6847.791, "encoder_q-layer.8": 9132.5527, "encoder_q-layer.9": 13863.4229, "epoch": 0.03, "inbatch_neg_score": 0.7994, "inbatch_pos_score": 1.3359, "learning_rate": 1.65e-05, "loss": 6.5829, "norm_diff": 1.411, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10611.4136, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7944, "query_norm": 2.0726, "queue_k_norm": 3.4847, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0437, "sent_len_1": 66.9894, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9712, "stdk": 0.0655, "stdq": 0.0578, "stdqueue_k": 0.0652, "stdqueue_q": 0.0, "step": 3300 }, { "accuracy": 27.5391, "active_queue_size": 16384.0, "cl_loss": 6.4904, "doc_norm": 3.3554, "encoder_q-embeddings": 2840.5635, "encoder_q-layer.0": 2248.0698, "encoder_q-layer.1": 2843.1338, "encoder_q-layer.10": 59699.25, "encoder_q-layer.11": 35836.4102, "encoder_q-layer.2": 3724.3623, "encoder_q-layer.3": 4592.377, "encoder_q-layer.4": 6229.6274, "encoder_q-layer.5": 8314.9062, "encoder_q-layer.6": 11343.9004, "encoder_q-layer.7": 17443.4434, "encoder_q-layer.8": 25554.8691, "encoder_q-layer.9": 42752.9922, "epoch": 0.03, "inbatch_neg_score": 0.3627, "inbatch_pos_score": 0.8789, "learning_rate": 1.7000000000000003e-05, "loss": 6.4904, "norm_diff": 1.2817, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 31241.8158, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.355, "query_norm": 2.0737, "queue_k_norm": 3.353, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0472, "sent_len_1": 66.6085, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4275, "stdk": 0.0645, "stdq": 0.0582, "stdqueue_k": 0.0646, "stdqueue_q": 0.0, "step": 3400 }, { "accuracy": 36.2305, "active_queue_size": 16384.0, "cl_loss": 6.3886, "doc_norm": 3.2186, "encoder_q-embeddings": 2580.5237, "encoder_q-layer.0": 2150.9041, "encoder_q-layer.1": 2411.2959, "encoder_q-layer.10": 36668.8086, "encoder_q-layer.11": 21914.4414, "encoder_q-layer.2": 3024.4646, "encoder_q-layer.3": 3150.8928, "encoder_q-layer.4": 4268.6475, "encoder_q-layer.5": 5604.1978, "encoder_q-layer.6": 7717.9839, "encoder_q-layer.7": 11863.9609, "encoder_q-layer.8": 16584.2578, "encoder_q-layer.9": 26262.9414, "epoch": 0.03, "inbatch_neg_score": 0.4726, "inbatch_pos_score": 1.0312, "learning_rate": 1.75e-05, "loss": 6.3886, "norm_diff": 1.2159, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 19564.7421, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4609, "query_norm": 2.0027, "queue_k_norm": 3.2386, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0539, "sent_len_1": 66.8938, "sent_max_len_0": 128.0, "sent_max_len_1": 188.505, "stdk": 0.0632, "stdq": 0.0555, "stdqueue_k": 0.0637, "stdqueue_q": 0.0, "step": 3500 }, { "accuracy": 33.5938, "active_queue_size": 16384.0, "cl_loss": 6.2282, "doc_norm": 3.1343, "encoder_q-embeddings": 2132.3774, "encoder_q-layer.0": 1764.6981, "encoder_q-layer.1": 2085.8801, "encoder_q-layer.10": 14154.418, "encoder_q-layer.11": 9012.9336, "encoder_q-layer.2": 2599.1843, "encoder_q-layer.3": 2678.5591, "encoder_q-layer.4": 3037.2458, "encoder_q-layer.5": 3775.6316, "encoder_q-layer.6": 4501.959, "encoder_q-layer.7": 5784.7246, "encoder_q-layer.8": 7148.4619, "encoder_q-layer.9": 10115.1875, "epoch": 0.04, "inbatch_neg_score": 0.5256, "inbatch_pos_score": 1.0566, "learning_rate": 1.8e-05, "loss": 6.2282, "norm_diff": 1.1358, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8275.9341, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.52, "query_norm": 1.9985, "queue_k_norm": 3.1307, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9419, "sent_len_1": 66.7182, "sent_max_len_0": 128.0, "sent_max_len_1": 191.4338, "stdk": 0.0629, "stdq": 0.0568, "stdqueue_k": 0.0631, "stdqueue_q": 0.0, "step": 3600 }, { "accuracy": 34.668, "active_queue_size": 16384.0, "cl_loss": 6.0376, "doc_norm": 3.0305, "encoder_q-embeddings": 1485.4427, "encoder_q-layer.0": 1242.4307, "encoder_q-layer.1": 1339.1317, "encoder_q-layer.10": 10410.7363, "encoder_q-layer.11": 7364.2119, "encoder_q-layer.2": 1518.7808, "encoder_q-layer.3": 1618.0499, "encoder_q-layer.4": 1869.7915, "encoder_q-layer.5": 2123.1375, "encoder_q-layer.6": 2919.5051, "encoder_q-layer.7": 4473.189, "encoder_q-layer.8": 6227.3149, "encoder_q-layer.9": 8197.6689, "epoch": 0.04, "inbatch_neg_score": 0.5893, "inbatch_pos_score": 1.1406, "learning_rate": 1.85e-05, "loss": 6.0376, "norm_diff": 1.0669, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6339.8204, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5869, "query_norm": 1.9636, "queue_k_norm": 3.0337, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9507, "sent_len_1": 66.8867, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5275, "stdk": 0.0624, "stdq": 0.0556, "stdqueue_k": 0.0624, "stdqueue_q": 0.0, "step": 3700 }, { "accuracy": 32.3242, "active_queue_size": 16384.0, "cl_loss": 5.9922, "doc_norm": 2.9273, "encoder_q-embeddings": 2208.2341, "encoder_q-layer.0": 1750.5997, "encoder_q-layer.1": 1943.4844, "encoder_q-layer.10": 6463.561, "encoder_q-layer.11": 5075.4531, "encoder_q-layer.2": 2299.377, "encoder_q-layer.3": 2167.1616, "encoder_q-layer.4": 2086.2922, "encoder_q-layer.5": 2002.0435, "encoder_q-layer.6": 1960.4974, "encoder_q-layer.7": 2167.9165, "encoder_q-layer.8": 3067.0, "encoder_q-layer.9": 4594.6797, "epoch": 0.04, "inbatch_neg_score": 0.5818, "inbatch_pos_score": 1.1064, "learning_rate": 1.9e-05, "loss": 5.9922, "norm_diff": 1.002, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4362.9719, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5786, "query_norm": 1.9254, "queue_k_norm": 2.9386, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9592, "sent_len_1": 66.7808, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1975, "stdk": 0.0622, "stdq": 0.0549, "stdqueue_k": 0.0619, "stdqueue_q": 0.0, "step": 3800 }, { "accuracy": 33.5938, "active_queue_size": 16384.0, "cl_loss": 5.7114, "doc_norm": 2.8333, "encoder_q-embeddings": 1736.1433, "encoder_q-layer.0": 1374.8561, "encoder_q-layer.1": 1592.6689, "encoder_q-layer.10": 11937.3896, "encoder_q-layer.11": 8270.9404, "encoder_q-layer.2": 1960.9795, "encoder_q-layer.3": 2147.1021, "encoder_q-layer.4": 2500.9417, "encoder_q-layer.5": 3594.5098, "encoder_q-layer.6": 4240.7881, "encoder_q-layer.7": 5510.0625, "encoder_q-layer.8": 6776.4927, "encoder_q-layer.9": 8980.5752, "epoch": 0.04, "inbatch_neg_score": 0.5003, "inbatch_pos_score": 1.0254, "learning_rate": 1.9500000000000003e-05, "loss": 5.7114, "norm_diff": 0.967, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7373.5008, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4946, "query_norm": 1.8663, "queue_k_norm": 2.8488, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9755, "sent_len_1": 66.6361, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7438, "stdk": 0.0615, "stdq": 0.0531, "stdqueue_k": 0.0614, "stdqueue_q": 0.0, "step": 3900 }, { "accuracy": 34.5703, "active_queue_size": 16384.0, "cl_loss": 5.5284, "doc_norm": 2.7466, "encoder_q-embeddings": 1181.4463, "encoder_q-layer.0": 973.2521, "encoder_q-layer.1": 1057.1333, "encoder_q-layer.10": 2191.6152, "encoder_q-layer.11": 3180.4678, "encoder_q-layer.2": 1242.3732, "encoder_q-layer.3": 1307.5814, "encoder_q-layer.4": 1298.3933, "encoder_q-layer.5": 1265.6052, "encoder_q-layer.6": 1347.4902, "encoder_q-layer.7": 1212.5006, "encoder_q-layer.8": 1421.6805, "encoder_q-layer.9": 1296.719, "epoch": 0.04, "inbatch_neg_score": 0.3562, "inbatch_pos_score": 0.8589, "learning_rate": 2e-05, "loss": 5.5284, "norm_diff": 0.9317, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2207.2403, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3499, "query_norm": 1.8149, "queue_k_norm": 2.7519, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1435, "sent_len_1": 66.6089, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5037, "stdk": 0.0602, "stdq": 0.0514, "stdqueue_k": 0.0607, "stdqueue_q": 0.0, "step": 4000 }, { "accuracy": 35.9375, "active_queue_size": 16384.0, "cl_loss": 5.3576, "doc_norm": 2.6494, "encoder_q-embeddings": 4444.9224, "encoder_q-layer.0": 3516.6235, "encoder_q-layer.1": 3495.1462, "encoder_q-layer.10": 9908.4307, "encoder_q-layer.11": 8489.3652, "encoder_q-layer.2": 3595.5732, "encoder_q-layer.3": 3189.418, "encoder_q-layer.4": 2606.887, "encoder_q-layer.5": 2302.4924, "encoder_q-layer.6": 2318.9956, "encoder_q-layer.7": 3246.269, "encoder_q-layer.8": 4711.1904, "encoder_q-layer.9": 7373.3809, "epoch": 0.04, "inbatch_neg_score": 0.3337, "inbatch_pos_score": 0.8481, "learning_rate": 2.05e-05, "loss": 5.3576, "norm_diff": 0.8883, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7133.1759, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3262, "query_norm": 1.7611, "queue_k_norm": 2.6589, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9338, "sent_len_1": 66.8712, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2725, "stdk": 0.0597, "stdq": 0.0497, "stdqueue_k": 0.0602, "stdqueue_q": 0.0, "step": 4100 }, { "accuracy": 37.1094, "active_queue_size": 16384.0, "cl_loss": 5.2551, "doc_norm": 2.5558, "encoder_q-embeddings": 3487.4387, "encoder_q-layer.0": 2825.7168, "encoder_q-layer.1": 3025.0967, "encoder_q-layer.10": 7373.6807, "encoder_q-layer.11": 7375.3452, "encoder_q-layer.2": 3811.0713, "encoder_q-layer.3": 3825.4785, "encoder_q-layer.4": 4018.6855, "encoder_q-layer.5": 3618.0486, "encoder_q-layer.6": 3317.5989, "encoder_q-layer.7": 3892.5146, "encoder_q-layer.8": 4671.9717, "encoder_q-layer.9": 5972.8599, "epoch": 0.04, "inbatch_neg_score": 0.3256, "inbatch_pos_score": 0.8506, "learning_rate": 2.1e-05, "loss": 5.2551, "norm_diff": 0.7984, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6449.7728, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3188, "query_norm": 1.7574, "queue_k_norm": 2.5641, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8171, "sent_len_1": 66.3805, "sent_max_len_0": 128.0, "sent_max_len_1": 190.36, "stdk": 0.0591, "stdq": 0.0492, "stdqueue_k": 0.0595, "stdqueue_q": 0.0, "step": 4200 }, { "accuracy": 38.6719, "active_queue_size": 16384.0, "cl_loss": 5.097, "doc_norm": 2.4658, "encoder_q-embeddings": 3355.0881, "encoder_q-layer.0": 2720.1055, "encoder_q-layer.1": 2829.0764, "encoder_q-layer.10": 9603.5742, "encoder_q-layer.11": 9965.4502, "encoder_q-layer.2": 3118.2839, "encoder_q-layer.3": 3078.145, "encoder_q-layer.4": 3051.335, "encoder_q-layer.5": 2807.0249, "encoder_q-layer.6": 2775.0549, "encoder_q-layer.7": 3276.5684, "encoder_q-layer.8": 3690.5154, "encoder_q-layer.9": 5789.9683, "epoch": 0.04, "inbatch_neg_score": 0.3683, "inbatch_pos_score": 0.9111, "learning_rate": 2.15e-05, "loss": 5.097, "norm_diff": 0.7691, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6811.2713, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.364, "query_norm": 1.6967, "queue_k_norm": 2.469, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8104, "sent_len_1": 66.5864, "sent_max_len_0": 128.0, "sent_max_len_1": 187.1463, "stdk": 0.0586, "stdq": 0.0477, "stdqueue_k": 0.0587, "stdqueue_q": 0.0, "step": 4300 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 5.0303, "doc_norm": 2.3764, "encoder_q-embeddings": 6556.6504, "encoder_q-layer.0": 5855.1436, "encoder_q-layer.1": 6170.3691, "encoder_q-layer.10": 5702.2886, "encoder_q-layer.11": 8014.5146, "encoder_q-layer.2": 7092.2261, "encoder_q-layer.3": 7035.042, "encoder_q-layer.4": 6688.1768, "encoder_q-layer.5": 5860.8062, "encoder_q-layer.6": 4483.292, "encoder_q-layer.7": 3102.7166, "encoder_q-layer.8": 2950.2827, "encoder_q-layer.9": 3836.657, "epoch": 0.04, "inbatch_neg_score": 0.302, "inbatch_pos_score": 0.8628, "learning_rate": 2.2000000000000003e-05, "loss": 5.0303, "norm_diff": 0.6417, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8785.1679, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2986, "query_norm": 1.7347, "queue_k_norm": 2.3814, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0837, "sent_len_1": 66.7005, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2312, "stdk": 0.058, "stdq": 0.0487, "stdqueue_k": 0.058, "stdqueue_q": 0.0, "step": 4400 }, { "accuracy": 37.8906, "active_queue_size": 16384.0, "cl_loss": 4.9001, "doc_norm": 2.2965, "encoder_q-embeddings": 3147.5359, "encoder_q-layer.0": 2737.2007, "encoder_q-layer.1": 2862.8865, "encoder_q-layer.10": 5668.4595, "encoder_q-layer.11": 6981.2539, "encoder_q-layer.2": 3117.4448, "encoder_q-layer.3": 3161.2021, "encoder_q-layer.4": 3073.3816, "encoder_q-layer.5": 3034.4358, "encoder_q-layer.6": 3033.4958, "encoder_q-layer.7": 3088.0544, "encoder_q-layer.8": 3275.9072, "encoder_q-layer.9": 4041.3496, "epoch": 0.04, "inbatch_neg_score": 0.3819, "inbatch_pos_score": 0.9116, "learning_rate": 2.25e-05, "loss": 4.9001, "norm_diff": 0.5894, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5419.6375, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3784, "query_norm": 1.7072, "queue_k_norm": 2.2959, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9556, "sent_len_1": 66.6556, "sent_max_len_0": 128.0, "sent_max_len_1": 188.135, "stdk": 0.0573, "stdq": 0.0479, "stdqueue_k": 0.0572, "stdqueue_q": 0.0, "step": 4500 }, { "accuracy": 37.9883, "active_queue_size": 16384.0, "cl_loss": 4.85, "doc_norm": 2.2089, "encoder_q-embeddings": 2241.606, "encoder_q-layer.0": 1903.6028, "encoder_q-layer.1": 1942.4916, "encoder_q-layer.10": 7102.4634, "encoder_q-layer.11": 7476.9355, "encoder_q-layer.2": 2427.8164, "encoder_q-layer.3": 2399.8853, "encoder_q-layer.4": 2293.1809, "encoder_q-layer.5": 2204.4631, "encoder_q-layer.6": 2002.8632, "encoder_q-layer.7": 2049.6877, "encoder_q-layer.8": 2889.448, "encoder_q-layer.9": 4982.8184, "epoch": 0.04, "inbatch_neg_score": 0.3638, "inbatch_pos_score": 0.874, "learning_rate": 2.3000000000000003e-05, "loss": 4.85, "norm_diff": 0.5351, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5213.9016, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3577, "query_norm": 1.6738, "queue_k_norm": 2.2199, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9349, "sent_len_1": 66.7083, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6887, "stdk": 0.0564, "stdq": 0.047, "stdqueue_k": 0.0567, "stdqueue_q": 0.0, "step": 4600 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.7714, "doc_norm": 2.1498, "encoder_q-embeddings": 6564.1084, "encoder_q-layer.0": 6634.3052, "encoder_q-layer.1": 6761.9146, "encoder_q-layer.10": 9209.4854, "encoder_q-layer.11": 9669.5732, "encoder_q-layer.2": 6862.7139, "encoder_q-layer.3": 6564.3008, "encoder_q-layer.4": 6077.4072, "encoder_q-layer.5": 6553.1069, "encoder_q-layer.6": 6518.9316, "encoder_q-layer.7": 6995.1274, "encoder_q-layer.8": 8186.9277, "encoder_q-layer.9": 8342.873, "epoch": 0.05, "inbatch_neg_score": 0.3504, "inbatch_pos_score": 0.9033, "learning_rate": 2.35e-05, "loss": 4.7714, "norm_diff": 0.489, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10581.7047, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3491, "query_norm": 1.6608, "queue_k_norm": 2.1476, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0941, "sent_len_1": 66.9886, "sent_max_len_0": 128.0, "sent_max_len_1": 191.6337, "stdk": 0.0559, "stdq": 0.0467, "stdqueue_k": 0.056, "stdqueue_q": 0.0, "step": 4700 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.7112, "doc_norm": 2.0855, "encoder_q-embeddings": 2036.2462, "encoder_q-layer.0": 1646.6766, "encoder_q-layer.1": 1728.7375, "encoder_q-layer.10": 5440.0532, "encoder_q-layer.11": 6308.624, "encoder_q-layer.2": 1917.5059, "encoder_q-layer.3": 2126.0156, "encoder_q-layer.4": 2144.7031, "encoder_q-layer.5": 2089.2493, "encoder_q-layer.6": 1843.7428, "encoder_q-layer.7": 1918.8691, "encoder_q-layer.8": 2671.3538, "encoder_q-layer.9": 4141.521, "epoch": 0.05, "inbatch_neg_score": 0.4167, "inbatch_pos_score": 0.9746, "learning_rate": 2.4e-05, "loss": 4.7112, "norm_diff": 0.3937, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4422.2589, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4131, "query_norm": 1.6918, "queue_k_norm": 2.0877, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9682, "sent_len_1": 66.6893, "sent_max_len_0": 128.0, "sent_max_len_1": 189.665, "stdk": 0.0553, "stdq": 0.0464, "stdqueue_k": 0.0554, "stdqueue_q": 0.0, "step": 4800 }, { "accuracy": 39.4531, "active_queue_size": 16384.0, "cl_loss": 4.6952, "doc_norm": 2.0254, "encoder_q-embeddings": 2260.6055, "encoder_q-layer.0": 1747.516, "encoder_q-layer.1": 1915.866, "encoder_q-layer.10": 5871.9648, "encoder_q-layer.11": 6407.9785, "encoder_q-layer.2": 2054.1997, "encoder_q-layer.3": 2084.9241, "encoder_q-layer.4": 2016.7483, "encoder_q-layer.5": 1914.6471, "encoder_q-layer.6": 1975.2855, "encoder_q-layer.7": 2436.4026, "encoder_q-layer.8": 3997.8586, "encoder_q-layer.9": 4868.1802, "epoch": 0.05, "inbatch_neg_score": 0.424, "inbatch_pos_score": 0.9463, "learning_rate": 2.45e-05, "loss": 4.6952, "norm_diff": 0.3533, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4766.1374, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4189, "query_norm": 1.6721, "queue_k_norm": 2.0322, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.1227, "sent_len_1": 66.7986, "sent_max_len_0": 128.0, "sent_max_len_1": 191.5975, "stdk": 0.0548, "stdq": 0.0467, "stdqueue_k": 0.0549, "stdqueue_q": 0.0, "step": 4900 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.6598, "doc_norm": 1.9804, "encoder_q-embeddings": 2474.8286, "encoder_q-layer.0": 2025.771, "encoder_q-layer.1": 2376.1453, "encoder_q-layer.10": 3372.7251, "encoder_q-layer.11": 4901.3452, "encoder_q-layer.2": 2857.5166, "encoder_q-layer.3": 2724.4297, "encoder_q-layer.4": 2537.5715, "encoder_q-layer.5": 2245.5276, "encoder_q-layer.6": 2253.3992, "encoder_q-layer.7": 2428.2729, "encoder_q-layer.8": 2391.8108, "encoder_q-layer.9": 2464.8755, "epoch": 0.05, "inbatch_neg_score": 0.4555, "inbatch_pos_score": 1.0059, "learning_rate": 2.5e-05, "loss": 4.6598, "norm_diff": 0.3069, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4088.4713, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4526, "query_norm": 1.6735, "queue_k_norm": 1.9827, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9284, "sent_len_1": 66.7867, "sent_max_len_0": 128.0, "sent_max_len_1": 188.915, "stdk": 0.0541, "stdq": 0.0471, "stdqueue_k": 0.0543, "stdqueue_q": 0.0, "step": 5000 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.5802, "doc_norm": 1.9378, "encoder_q-embeddings": 2976.7021, "encoder_q-layer.0": 2806.7544, "encoder_q-layer.1": 2830.1917, "encoder_q-layer.10": 6243.5229, "encoder_q-layer.11": 7323.9062, "encoder_q-layer.2": 2987.3574, "encoder_q-layer.3": 2995.032, "encoder_q-layer.4": 3117.6211, "encoder_q-layer.5": 3309.5146, "encoder_q-layer.6": 3814.915, "encoder_q-layer.7": 4304.9277, "encoder_q-layer.8": 5453.3848, "encoder_q-layer.9": 5783.5195, "epoch": 0.05, "inbatch_neg_score": 0.4943, "inbatch_pos_score": 1.0176, "learning_rate": 2.5500000000000003e-05, "loss": 4.5802, "norm_diff": 0.306, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6146.6297, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4915, "query_norm": 1.6317, "queue_k_norm": 1.9406, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0704, "sent_len_1": 66.8346, "sent_max_len_0": 128.0, "sent_max_len_1": 186.9988, "stdk": 0.0538, "stdq": 0.0453, "stdqueue_k": 0.0538, "stdqueue_q": 0.0, "step": 5100 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.562, "doc_norm": 1.8988, "encoder_q-embeddings": 1632.1571, "encoder_q-layer.0": 1287.5238, "encoder_q-layer.1": 1409.1388, "encoder_q-layer.10": 2776.8232, "encoder_q-layer.11": 4452.8403, "encoder_q-layer.2": 1621.2954, "encoder_q-layer.3": 1620.3961, "encoder_q-layer.4": 1598.0752, "encoder_q-layer.5": 1551.0533, "encoder_q-layer.6": 1457.5011, "encoder_q-layer.7": 1420.3085, "encoder_q-layer.8": 1681.2561, "encoder_q-layer.9": 2085.948, "epoch": 0.05, "inbatch_neg_score": 0.5335, "inbatch_pos_score": 1.0693, "learning_rate": 2.6000000000000002e-05, "loss": 4.562, "norm_diff": 0.2301, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2954.8142, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5278, "query_norm": 1.6687, "queue_k_norm": 1.9029, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9071, "sent_len_1": 66.5305, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3487, "stdk": 0.0532, "stdq": 0.0471, "stdqueue_k": 0.0533, "stdqueue_q": 0.0, "step": 5200 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.531, "doc_norm": 1.866, "encoder_q-embeddings": 2742.1235, "encoder_q-layer.0": 2085.7517, "encoder_q-layer.1": 2317.1113, "encoder_q-layer.10": 3458.9331, "encoder_q-layer.11": 4643.252, "encoder_q-layer.2": 2724.498, "encoder_q-layer.3": 2795.9426, "encoder_q-layer.4": 2728.2744, "encoder_q-layer.5": 2631.1555, "encoder_q-layer.6": 2314.4221, "encoder_q-layer.7": 2303.2625, "encoder_q-layer.8": 2990.0823, "encoder_q-layer.9": 3108.1934, "epoch": 0.05, "inbatch_neg_score": 0.5583, "inbatch_pos_score": 1.125, "learning_rate": 2.6500000000000004e-05, "loss": 4.531, "norm_diff": 0.1851, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4237.8587, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5571, "query_norm": 1.681, "queue_k_norm": 1.8749, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0979, "sent_len_1": 66.8008, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1575, "stdk": 0.0526, "stdq": 0.0467, "stdqueue_k": 0.053, "stdqueue_q": 0.0, "step": 5300 }, { "accuracy": 41.2109, "active_queue_size": 16384.0, "cl_loss": 4.4912, "doc_norm": 1.8411, "encoder_q-embeddings": 1841.1473, "encoder_q-layer.0": 1369.3136, "encoder_q-layer.1": 1536.9348, "encoder_q-layer.10": 5052.1421, "encoder_q-layer.11": 5471.6035, "encoder_q-layer.2": 1813.4529, "encoder_q-layer.3": 1901.3992, "encoder_q-layer.4": 2086.4277, "encoder_q-layer.5": 2441.6709, "encoder_q-layer.6": 3108.3728, "encoder_q-layer.7": 4168.1455, "encoder_q-layer.8": 5556.9971, "encoder_q-layer.9": 5077.3506, "epoch": 0.05, "inbatch_neg_score": 0.5958, "inbatch_pos_score": 1.1475, "learning_rate": 2.7000000000000002e-05, "loss": 4.4912, "norm_diff": 0.1529, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4885.8599, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5913, "query_norm": 1.6882, "queue_k_norm": 1.8463, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0059, "sent_len_1": 66.9315, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8725, "stdk": 0.0522, "stdq": 0.0471, "stdqueue_k": 0.0525, "stdqueue_q": 0.0, "step": 5400 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.4481, "doc_norm": 1.8314, "encoder_q-embeddings": 1716.3674, "encoder_q-layer.0": 1342.3389, "encoder_q-layer.1": 1383.212, "encoder_q-layer.10": 2656.1667, "encoder_q-layer.11": 4333.1353, "encoder_q-layer.2": 1659.5106, "encoder_q-layer.3": 1717.2804, "encoder_q-layer.4": 1795.4296, "encoder_q-layer.5": 2052.7449, "encoder_q-layer.6": 2264.1504, "encoder_q-layer.7": 2304.5867, "encoder_q-layer.8": 2506.0586, "encoder_q-layer.9": 1829.9856, "epoch": 0.05, "inbatch_neg_score": 0.5446, "inbatch_pos_score": 1.0859, "learning_rate": 2.7500000000000004e-05, "loss": 4.4481, "norm_diff": 0.1506, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3254.0831, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5415, "query_norm": 1.6809, "queue_k_norm": 1.825, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9985, "sent_len_1": 66.744, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8338, "stdk": 0.0523, "stdq": 0.0466, "stdqueue_k": 0.0522, "stdqueue_q": 0.0, "step": 5500 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 4.4349, "doc_norm": 1.8098, "encoder_q-embeddings": 3650.1765, "encoder_q-layer.0": 3051.0227, "encoder_q-layer.1": 3041.3528, "encoder_q-layer.10": 3014.8486, "encoder_q-layer.11": 4155.3086, "encoder_q-layer.2": 3269.729, "encoder_q-layer.3": 3092.3315, "encoder_q-layer.4": 2612.7676, "encoder_q-layer.5": 2535.4465, "encoder_q-layer.6": 2623.2439, "encoder_q-layer.7": 2878.6738, "encoder_q-layer.8": 2724.3127, "encoder_q-layer.9": 2090.7109, "epoch": 0.05, "inbatch_neg_score": 0.6292, "inbatch_pos_score": 1.1855, "learning_rate": 2.8000000000000003e-05, "loss": 4.4349, "norm_diff": 0.0833, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4482.265, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6255, "query_norm": 1.7264, "queue_k_norm": 1.8049, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0912, "sent_len_1": 66.9919, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4462, "stdk": 0.0519, "stdq": 0.0477, "stdqueue_k": 0.0518, "stdqueue_q": 0.0, "step": 5600 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 4.3714, "doc_norm": 1.7925, "encoder_q-embeddings": 2831.5884, "encoder_q-layer.0": 2408.2375, "encoder_q-layer.1": 2394.2698, "encoder_q-layer.10": 1981.9097, "encoder_q-layer.11": 3269.958, "encoder_q-layer.2": 2907.8538, "encoder_q-layer.3": 2828.4126, "encoder_q-layer.4": 2506.3503, "encoder_q-layer.5": 2295.1133, "encoder_q-layer.6": 1729.2343, "encoder_q-layer.7": 1276.2903, "encoder_q-layer.8": 1460.2573, "encoder_q-layer.9": 1390.9647, "epoch": 0.06, "inbatch_neg_score": 0.6208, "inbatch_pos_score": 1.2041, "learning_rate": 2.8499999999999998e-05, "loss": 4.3714, "norm_diff": 0.063, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3521.8281, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6157, "query_norm": 1.7295, "queue_k_norm": 1.7907, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9754, "sent_len_1": 66.8924, "sent_max_len_0": 128.0, "sent_max_len_1": 190.105, "stdk": 0.0516, "stdq": 0.0472, "stdqueue_k": 0.0515, "stdqueue_q": 0.0, "step": 5700 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.3784, "doc_norm": 1.7726, "encoder_q-embeddings": 1545.6986, "encoder_q-layer.0": 1209.6715, "encoder_q-layer.1": 1304.6588, "encoder_q-layer.10": 3422.6655, "encoder_q-layer.11": 5485.7993, "encoder_q-layer.2": 1476.8188, "encoder_q-layer.3": 1566.3169, "encoder_q-layer.4": 1607.6489, "encoder_q-layer.5": 1649.0963, "encoder_q-layer.6": 1789.5554, "encoder_q-layer.7": 2298.0103, "encoder_q-layer.8": 3141.1279, "encoder_q-layer.9": 2866.8477, "epoch": 0.06, "inbatch_neg_score": 0.6422, "inbatch_pos_score": 1.1885, "learning_rate": 2.9e-05, "loss": 4.3784, "norm_diff": 0.0454, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3552.6526, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6382, "query_norm": 1.7271, "queue_k_norm": 1.7789, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8259, "sent_len_1": 66.7822, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7038, "stdk": 0.051, "stdq": 0.0468, "stdqueue_k": 0.0513, "stdqueue_q": 0.0, "step": 5800 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.3466, "doc_norm": 1.7672, "encoder_q-embeddings": 1548.1401, "encoder_q-layer.0": 1223.3447, "encoder_q-layer.1": 1257.8894, "encoder_q-layer.10": 2138.834, "encoder_q-layer.11": 3755.585, "encoder_q-layer.2": 1496.9287, "encoder_q-layer.3": 1468.109, "encoder_q-layer.4": 1476.1311, "encoder_q-layer.5": 1548.4819, "encoder_q-layer.6": 1606.9347, "encoder_q-layer.7": 1576.9185, "encoder_q-layer.8": 2067.189, "encoder_q-layer.9": 1795.5177, "epoch": 0.06, "inbatch_neg_score": 0.5984, "inbatch_pos_score": 1.1543, "learning_rate": 2.95e-05, "loss": 4.3466, "norm_diff": 0.0217, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2727.1422, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5942, "query_norm": 1.7455, "queue_k_norm": 1.7663, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.2334, "sent_len_1": 67.0884, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5075, "stdk": 0.0509, "stdq": 0.0479, "stdqueue_k": 0.051, "stdqueue_q": 0.0, "step": 5900 }, { "accuracy": 38.8672, "active_queue_size": 16384.0, "cl_loss": 4.3359, "doc_norm": 1.7571, "encoder_q-embeddings": 3082.6055, "encoder_q-layer.0": 2822.1558, "encoder_q-layer.1": 2980.2932, "encoder_q-layer.10": 3043.7395, "encoder_q-layer.11": 4254.3994, "encoder_q-layer.2": 3343.2732, "encoder_q-layer.3": 3362.7451, "encoder_q-layer.4": 3065.9329, "encoder_q-layer.5": 3111.5674, "encoder_q-layer.6": 3554.8521, "encoder_q-layer.7": 3700.5881, "encoder_q-layer.8": 3630.3933, "encoder_q-layer.9": 3149.9485, "epoch": 0.06, "inbatch_neg_score": 0.6268, "inbatch_pos_score": 1.1533, "learning_rate": 3e-05, "loss": 4.3359, "norm_diff": 0.0124, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4786.7029, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.623, "query_norm": 1.763, "queue_k_norm": 1.7564, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.086, "sent_len_1": 66.826, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1925, "stdk": 0.0507, "stdq": 0.0481, "stdqueue_k": 0.0509, "stdqueue_q": 0.0, "step": 6000 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 4.2948, "doc_norm": 1.7415, "encoder_q-embeddings": 3046.4209, "encoder_q-layer.0": 2611.6562, "encoder_q-layer.1": 2521.1807, "encoder_q-layer.10": 2185.2114, "encoder_q-layer.11": 3660.9963, "encoder_q-layer.2": 2754.1946, "encoder_q-layer.3": 2720.585, "encoder_q-layer.4": 2971.613, "encoder_q-layer.5": 2835.0684, "encoder_q-layer.6": 2358.311, "encoder_q-layer.7": 2241.8113, "encoder_q-layer.8": 2920.1975, "encoder_q-layer.9": 2208.7305, "epoch": 0.06, "inbatch_neg_score": 0.6275, "inbatch_pos_score": 1.2305, "learning_rate": 3.05e-05, "loss": 4.2948, "norm_diff": 0.0129, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3994.0291, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.623, "query_norm": 1.7286, "queue_k_norm": 1.7454, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.6802, "sent_len_1": 66.6305, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9263, "stdk": 0.0505, "stdq": 0.0471, "stdqueue_k": 0.0506, "stdqueue_q": 0.0, "step": 6100 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 4.2646, "doc_norm": 1.7398, "encoder_q-embeddings": 6157.5576, "encoder_q-layer.0": 5343.7695, "encoder_q-layer.1": 4875.6182, "encoder_q-layer.10": 4356.9248, "encoder_q-layer.11": 7092.4297, "encoder_q-layer.2": 4654.2915, "encoder_q-layer.3": 4447.3369, "encoder_q-layer.4": 4377.6328, "encoder_q-layer.5": 4027.7229, "encoder_q-layer.6": 4059.5732, "encoder_q-layer.7": 3215.4512, "encoder_q-layer.8": 3570.8674, "encoder_q-layer.9": 3956.7844, "epoch": 0.06, "inbatch_neg_score": 0.5919, "inbatch_pos_score": 1.1689, "learning_rate": 3.1e-05, "loss": 4.2646, "norm_diff": 0.0358, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7103.1361, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5894, "query_norm": 1.704, "queue_k_norm": 1.7342, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9869, "sent_len_1": 66.8279, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2637, "stdk": 0.0507, "stdq": 0.0453, "stdqueue_k": 0.0505, "stdqueue_q": 0.0, "step": 6200 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 4.2653, "doc_norm": 1.7205, "encoder_q-embeddings": 12826.2129, "encoder_q-layer.0": 10555.5566, "encoder_q-layer.1": 11039.3877, "encoder_q-layer.10": 4343.8638, "encoder_q-layer.11": 6690.5459, "encoder_q-layer.2": 12639.8672, "encoder_q-layer.3": 13406.8174, "encoder_q-layer.4": 12872.6543, "encoder_q-layer.5": 11037.1172, "encoder_q-layer.6": 9463.3965, "encoder_q-layer.7": 6736.2422, "encoder_q-layer.8": 4578.3101, "encoder_q-layer.9": 3726.1072, "epoch": 0.06, "inbatch_neg_score": 0.5246, "inbatch_pos_score": 1.0645, "learning_rate": 3.15e-05, "loss": 4.2653, "norm_diff": 0.0195, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15263.6055, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5176, "query_norm": 1.74, "queue_k_norm": 1.7215, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1236, "sent_len_1": 67.0016, "sent_max_len_0": 128.0, "sent_max_len_1": 191.3837, "stdk": 0.05, "stdq": 0.0456, "stdqueue_k": 0.0502, "stdqueue_q": 0.0, "step": 6300 }, { "accuracy": 40.8203, "active_queue_size": 16384.0, "cl_loss": 4.2605, "doc_norm": 1.7059, "encoder_q-embeddings": 4624.5234, "encoder_q-layer.0": 3635.5151, "encoder_q-layer.1": 3700.6951, "encoder_q-layer.10": 5153.3867, "encoder_q-layer.11": 8364.6055, "encoder_q-layer.2": 4237.5078, "encoder_q-layer.3": 3946.3887, "encoder_q-layer.4": 3934.7419, "encoder_q-layer.5": 3859.0947, "encoder_q-layer.6": 3625.865, "encoder_q-layer.7": 2988.3765, "encoder_q-layer.8": 3384.5146, "encoder_q-layer.9": 3572.4299, "epoch": 0.06, "inbatch_neg_score": 0.5231, "inbatch_pos_score": 1.0635, "learning_rate": 3.2000000000000005e-05, "loss": 4.2605, "norm_diff": 0.0683, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6540.4522, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5205, "query_norm": 1.7742, "queue_k_norm": 1.71, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9553, "sent_len_1": 66.7705, "sent_max_len_0": 128.0, "sent_max_len_1": 187.275, "stdk": 0.0498, "stdq": 0.046, "stdqueue_k": 0.0501, "stdqueue_q": 0.0, "step": 6400 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 4.2312, "doc_norm": 1.7001, "encoder_q-embeddings": 5830.876, "encoder_q-layer.0": 5105.1055, "encoder_q-layer.1": 5418.9136, "encoder_q-layer.10": 3831.0039, "encoder_q-layer.11": 6853.3076, "encoder_q-layer.2": 6586.0732, "encoder_q-layer.3": 6353.438, "encoder_q-layer.4": 5763.8188, "encoder_q-layer.5": 5450.5249, "encoder_q-layer.6": 4432.5786, "encoder_q-layer.7": 3076.1313, "encoder_q-layer.8": 3026.9827, "encoder_q-layer.9": 2791.1824, "epoch": 0.06, "inbatch_neg_score": 0.5256, "inbatch_pos_score": 1.1133, "learning_rate": 3.2500000000000004e-05, "loss": 4.2312, "norm_diff": 0.1314, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7816.5019, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5186, "query_norm": 1.8314, "queue_k_norm": 1.7004, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0084, "sent_len_1": 66.6807, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6012, "stdk": 0.0499, "stdq": 0.0464, "stdqueue_k": 0.0499, "stdqueue_q": 0.0, "step": 6500 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 4.1838, "doc_norm": 1.6938, "encoder_q-embeddings": 10079.9873, "encoder_q-layer.0": 8442.5107, "encoder_q-layer.1": 9077.9365, "encoder_q-layer.10": 5757.5322, "encoder_q-layer.11": 9796.1016, "encoder_q-layer.2": 11250.4102, "encoder_q-layer.3": 11547.7246, "encoder_q-layer.4": 10636.3555, "encoder_q-layer.5": 10227.4463, "encoder_q-layer.6": 11099.1182, "encoder_q-layer.7": 10200.291, "encoder_q-layer.8": 7597.9126, "encoder_q-layer.9": 4664.3647, "epoch": 0.06, "inbatch_neg_score": 0.5374, "inbatch_pos_score": 1.1152, "learning_rate": 3.3e-05, "loss": 4.1838, "norm_diff": 0.1393, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14138.5556, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5322, "query_norm": 1.8331, "queue_k_norm": 1.6924, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7995, "sent_len_1": 66.8571, "sent_max_len_0": 128.0, "sent_max_len_1": 186.4075, "stdk": 0.0499, "stdq": 0.0457, "stdqueue_k": 0.0499, "stdqueue_q": 0.0, "step": 6600 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 4.1983, "doc_norm": 1.6842, "encoder_q-embeddings": 9139.3242, "encoder_q-layer.0": 7671.4126, "encoder_q-layer.1": 7504.9771, "encoder_q-layer.10": 5081.4395, "encoder_q-layer.11": 7049.6768, "encoder_q-layer.2": 8521.3105, "encoder_q-layer.3": 9108.4268, "encoder_q-layer.4": 8384.2627, "encoder_q-layer.5": 8248.627, "encoder_q-layer.6": 6519.0693, "encoder_q-layer.7": 4759.1479, "encoder_q-layer.8": 4488.7783, "encoder_q-layer.9": 3878.7708, "epoch": 0.07, "inbatch_neg_score": 0.5764, "inbatch_pos_score": 1.1436, "learning_rate": 3.35e-05, "loss": 4.1983, "norm_diff": 0.2071, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10786.8664, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5684, "query_norm": 1.8913, "queue_k_norm": 1.6831, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8284, "sent_len_1": 66.7389, "sent_max_len_0": 128.0, "sent_max_len_1": 190.235, "stdk": 0.0497, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 6700 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.1806, "doc_norm": 1.6735, "encoder_q-embeddings": 4335.5176, "encoder_q-layer.0": 3438.823, "encoder_q-layer.1": 3650.8928, "encoder_q-layer.10": 3347.7729, "encoder_q-layer.11": 6336.6694, "encoder_q-layer.2": 4077.6787, "encoder_q-layer.3": 4217.5503, "encoder_q-layer.4": 4024.168, "encoder_q-layer.5": 4269.8677, "encoder_q-layer.6": 4670.251, "encoder_q-layer.7": 4280.6562, "encoder_q-layer.8": 5005.7451, "encoder_q-layer.9": 3321.8132, "epoch": 0.07, "inbatch_neg_score": 0.5793, "inbatch_pos_score": 1.1338, "learning_rate": 3.4000000000000007e-05, "loss": 4.1806, "norm_diff": 0.2444, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6254.1959, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5718, "query_norm": 1.9179, "queue_k_norm": 1.6786, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8582, "sent_len_1": 66.6977, "sent_max_len_0": 128.0, "sent_max_len_1": 191.9338, "stdk": 0.0493, "stdq": 0.0464, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 6800 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.1264, "doc_norm": 1.6718, "encoder_q-embeddings": 6578.7891, "encoder_q-layer.0": 5056.333, "encoder_q-layer.1": 5000.7324, "encoder_q-layer.10": 3529.1582, "encoder_q-layer.11": 6308.2515, "encoder_q-layer.2": 5560.5181, "encoder_q-layer.3": 5883.2344, "encoder_q-layer.4": 5330.3965, "encoder_q-layer.5": 5508.5107, "encoder_q-layer.6": 5454.5415, "encoder_q-layer.7": 3664.938, "encoder_q-layer.8": 3023.4946, "encoder_q-layer.9": 2596.5898, "epoch": 0.07, "inbatch_neg_score": 0.5938, "inbatch_pos_score": 1.1553, "learning_rate": 3.45e-05, "loss": 4.1264, "norm_diff": 0.2423, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7563.4759, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5845, "query_norm": 1.9141, "queue_k_norm": 1.6703, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0216, "sent_len_1": 66.8705, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9812, "stdk": 0.0494, "stdq": 0.0461, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 6900 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 4.1054, "doc_norm": 1.6618, "encoder_q-embeddings": 4363.8872, "encoder_q-layer.0": 3404.1799, "encoder_q-layer.1": 3937.2637, "encoder_q-layer.10": 3475.4028, "encoder_q-layer.11": 6841.4736, "encoder_q-layer.2": 4338.2095, "encoder_q-layer.3": 4292.6089, "encoder_q-layer.4": 4549.0522, "encoder_q-layer.5": 4796.8149, "encoder_q-layer.6": 4553.6426, "encoder_q-layer.7": 3834.6418, "encoder_q-layer.8": 3516.1377, "encoder_q-layer.9": 2525.4731, "epoch": 0.07, "inbatch_neg_score": 0.5613, "inbatch_pos_score": 1.126, "learning_rate": 3.5e-05, "loss": 4.1054, "norm_diff": 0.257, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6392.8717, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5542, "query_norm": 1.9188, "queue_k_norm": 1.6669, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1198, "sent_len_1": 66.9368, "sent_max_len_0": 128.0, "sent_max_len_1": 186.2188, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 7000 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 4.1002, "doc_norm": 1.6576, "encoder_q-embeddings": 14933.0967, "encoder_q-layer.0": 11326.6113, "encoder_q-layer.1": 12658.6562, "encoder_q-layer.10": 4904.3467, "encoder_q-layer.11": 8805.7061, "encoder_q-layer.2": 15708.8232, "encoder_q-layer.3": 14747.7754, "encoder_q-layer.4": 13139.3584, "encoder_q-layer.5": 13481.3076, "encoder_q-layer.6": 12920.2861, "encoder_q-layer.7": 10571.1514, "encoder_q-layer.8": 5541.7769, "encoder_q-layer.9": 3708.8401, "epoch": 0.07, "inbatch_neg_score": 0.5824, "inbatch_pos_score": 1.1895, "learning_rate": 3.55e-05, "loss": 4.1002, "norm_diff": 0.2674, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17945.3169, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.5742, "query_norm": 1.925, "queue_k_norm": 1.6608, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7945, "sent_len_1": 66.6045, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5563, "stdk": 0.0491, "stdq": 0.0464, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 7100 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 4.0972, "doc_norm": 1.6479, "encoder_q-embeddings": 5179.75, "encoder_q-layer.0": 4301.9121, "encoder_q-layer.1": 4461.6079, "encoder_q-layer.10": 2882.1272, "encoder_q-layer.11": 5831.6318, "encoder_q-layer.2": 5106.7314, "encoder_q-layer.3": 4771.0249, "encoder_q-layer.4": 4626.7495, "encoder_q-layer.5": 4646.9004, "encoder_q-layer.6": 3827.5779, "encoder_q-layer.7": 3061.1714, "encoder_q-layer.8": 2854.2827, "encoder_q-layer.9": 2403.7659, "epoch": 0.07, "inbatch_neg_score": 0.5384, "inbatch_pos_score": 1.1035, "learning_rate": 3.6e-05, "loss": 4.0972, "norm_diff": 0.2782, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6427.3504, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5293, "query_norm": 1.9261, "queue_k_norm": 1.6573, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9208, "sent_len_1": 66.6823, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4613, "stdk": 0.0489, "stdq": 0.0457, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 7200 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.1085, "doc_norm": 1.6481, "encoder_q-embeddings": 4465.02, "encoder_q-layer.0": 3330.5779, "encoder_q-layer.1": 3203.8201, "encoder_q-layer.10": 3303.8386, "encoder_q-layer.11": 6765.4092, "encoder_q-layer.2": 3515.5962, "encoder_q-layer.3": 3619.8489, "encoder_q-layer.4": 3686.459, "encoder_q-layer.5": 3730.6997, "encoder_q-layer.6": 3719.6553, "encoder_q-layer.7": 3849.9509, "encoder_q-layer.8": 3681.0601, "encoder_q-layer.9": 2807.0762, "epoch": 0.07, "inbatch_neg_score": 0.5504, "inbatch_pos_score": 1.1143, "learning_rate": 3.65e-05, "loss": 4.1085, "norm_diff": 0.2983, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5850.5879, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.542, "query_norm": 1.9464, "queue_k_norm": 1.6488, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9557, "sent_len_1": 66.7591, "sent_max_len_0": 128.0, "sent_max_len_1": 189.73, "stdk": 0.049, "stdq": 0.0467, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 7300 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.039, "doc_norm": 1.6434, "encoder_q-embeddings": 3841.0474, "encoder_q-layer.0": 3134.6201, "encoder_q-layer.1": 3510.9939, "encoder_q-layer.10": 3997.1157, "encoder_q-layer.11": 6638.77, "encoder_q-layer.2": 4208.4033, "encoder_q-layer.3": 4737.3984, "encoder_q-layer.4": 4810.1948, "encoder_q-layer.5": 5033.7505, "encoder_q-layer.6": 4704.6885, "encoder_q-layer.7": 4102.0098, "encoder_q-layer.8": 3994.8623, "encoder_q-layer.9": 3879.7871, "epoch": 0.07, "inbatch_neg_score": 0.5464, "inbatch_pos_score": 1.0801, "learning_rate": 3.7e-05, "loss": 4.039, "norm_diff": 0.2831, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6382.1788, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5376, "query_norm": 1.9265, "queue_k_norm": 1.6468, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9347, "sent_len_1": 66.7611, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6025, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 7400 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 4.0199, "doc_norm": 1.6427, "encoder_q-embeddings": 3333.1528, "encoder_q-layer.0": 2427.4639, "encoder_q-layer.1": 2536.9778, "encoder_q-layer.10": 3342.0698, "encoder_q-layer.11": 6263.3555, "encoder_q-layer.2": 2927.8064, "encoder_q-layer.3": 2980.1267, "encoder_q-layer.4": 3164.6946, "encoder_q-layer.5": 3346.6018, "encoder_q-layer.6": 3398.051, "encoder_q-layer.7": 3368.5378, "encoder_q-layer.8": 3954.7695, "encoder_q-layer.9": 3110.9331, "epoch": 0.07, "inbatch_neg_score": 0.5246, "inbatch_pos_score": 1.0938, "learning_rate": 3.7500000000000003e-05, "loss": 4.0199, "norm_diff": 0.2707, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5140.2205, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5156, "query_norm": 1.9134, "queue_k_norm": 1.6398, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0926, "sent_len_1": 66.9453, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4125, "stdk": 0.049, "stdq": 0.0457, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 7500 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.9981, "doc_norm": 1.6286, "encoder_q-embeddings": 3143.6313, "encoder_q-layer.0": 2298.6775, "encoder_q-layer.1": 2447.5061, "encoder_q-layer.10": 2765.3386, "encoder_q-layer.11": 5407.6099, "encoder_q-layer.2": 2796.2537, "encoder_q-layer.3": 2688.8979, "encoder_q-layer.4": 2810.3853, "encoder_q-layer.5": 2828.9753, "encoder_q-layer.6": 2626.9978, "encoder_q-layer.7": 2299.4333, "encoder_q-layer.8": 2409.6406, "encoder_q-layer.9": 2195.2214, "epoch": 0.07, "inbatch_neg_score": 0.4893, "inbatch_pos_score": 1.0586, "learning_rate": 3.8e-05, "loss": 3.9981, "norm_diff": 0.3189, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4365.0486, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4805, "query_norm": 1.9475, "queue_k_norm": 1.636, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9815, "sent_len_1": 66.8387, "sent_max_len_0": 128.0, "sent_max_len_1": 191.7025, "stdk": 0.0485, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 7600 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.0429, "doc_norm": 1.6322, "encoder_q-embeddings": 2745.519, "encoder_q-layer.0": 2144.1682, "encoder_q-layer.1": 2261.6624, "encoder_q-layer.10": 3021.8599, "encoder_q-layer.11": 6000.1904, "encoder_q-layer.2": 2489.7769, "encoder_q-layer.3": 2548.1135, "encoder_q-layer.4": 2670.9585, "encoder_q-layer.5": 2780.7493, "encoder_q-layer.6": 2696.4009, "encoder_q-layer.7": 2450.5454, "encoder_q-layer.8": 2494.5493, "encoder_q-layer.9": 2164.4553, "epoch": 0.08, "inbatch_neg_score": 0.4894, "inbatch_pos_score": 1.0605, "learning_rate": 3.85e-05, "loss": 4.0429, "norm_diff": 0.264, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4371.821, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4832, "query_norm": 1.8962, "queue_k_norm": 1.6309, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8225, "sent_len_1": 66.5445, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0137, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 7700 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 4.0201, "doc_norm": 1.6233, "encoder_q-embeddings": 2307.7273, "encoder_q-layer.0": 1677.8064, "encoder_q-layer.1": 1733.4159, "encoder_q-layer.10": 2546.0913, "encoder_q-layer.11": 5302.2407, "encoder_q-layer.2": 1924.5396, "encoder_q-layer.3": 1992.9025, "encoder_q-layer.4": 2115.551, "encoder_q-layer.5": 2027.1743, "encoder_q-layer.6": 2018.0228, "encoder_q-layer.7": 1999.8083, "encoder_q-layer.8": 2326.7268, "encoder_q-layer.9": 2027.1915, "epoch": 0.08, "inbatch_neg_score": 0.4866, "inbatch_pos_score": 1.0938, "learning_rate": 3.9000000000000006e-05, "loss": 4.0201, "norm_diff": 0.2667, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3653.6909, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4788, "query_norm": 1.8899, "queue_k_norm": 1.6241, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1475, "sent_len_1": 66.7962, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4062, "stdk": 0.0487, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 7800 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 4.0355, "doc_norm": 1.6185, "encoder_q-embeddings": 3930.7703, "encoder_q-layer.0": 3070.0327, "encoder_q-layer.1": 3082.45, "encoder_q-layer.10": 2552.324, "encoder_q-layer.11": 5134.9961, "encoder_q-layer.2": 3621.0925, "encoder_q-layer.3": 3685.8403, "encoder_q-layer.4": 3536.2622, "encoder_q-layer.5": 3571.3672, "encoder_q-layer.6": 3062.0022, "encoder_q-layer.7": 2531.6292, "encoder_q-layer.8": 2752.7292, "encoder_q-layer.9": 2239.9033, "epoch": 0.08, "inbatch_neg_score": 0.486, "inbatch_pos_score": 1.1016, "learning_rate": 3.9500000000000005e-05, "loss": 4.0355, "norm_diff": 0.2838, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4995.2658, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4758, "query_norm": 1.9023, "queue_k_norm": 1.62, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7465, "sent_len_1": 66.6071, "sent_max_len_0": 128.0, "sent_max_len_1": 187.25, "stdk": 0.0487, "stdq": 0.0465, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 7900 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 4.0528, "doc_norm": 1.6121, "encoder_q-embeddings": 3166.0195, "encoder_q-layer.0": 2272.5168, "encoder_q-layer.1": 2443.8884, "encoder_q-layer.10": 3152.6899, "encoder_q-layer.11": 6116.4502, "encoder_q-layer.2": 2812.2642, "encoder_q-layer.3": 2811.3955, "encoder_q-layer.4": 2997.0549, "encoder_q-layer.5": 3205.772, "encoder_q-layer.6": 3237.2151, "encoder_q-layer.7": 4032.5964, "encoder_q-layer.8": 3576.127, "encoder_q-layer.9": 2727.1023, "epoch": 0.08, "inbatch_neg_score": 0.4831, "inbatch_pos_score": 1.0342, "learning_rate": 4e-05, "loss": 4.0528, "norm_diff": 0.2822, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4974.0772, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4751, "query_norm": 1.8944, "queue_k_norm": 1.6185, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.919, "sent_len_1": 66.4893, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9863, "stdk": 0.0485, "stdq": 0.0457, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 8000 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9835, "doc_norm": 1.6101, "encoder_q-embeddings": 5378.2241, "encoder_q-layer.0": 4232.8379, "encoder_q-layer.1": 4464.2007, "encoder_q-layer.10": 2565.0317, "encoder_q-layer.11": 5325.9043, "encoder_q-layer.2": 4730.8809, "encoder_q-layer.3": 4513.9092, "encoder_q-layer.4": 4461.3076, "encoder_q-layer.5": 4624.9517, "encoder_q-layer.6": 4136.0415, "encoder_q-layer.7": 3244.355, "encoder_q-layer.8": 2798.5903, "encoder_q-layer.9": 2190.6545, "epoch": 0.08, "inbatch_neg_score": 0.4856, "inbatch_pos_score": 1.0664, "learning_rate": 4.05e-05, "loss": 3.9835, "norm_diff": 0.3064, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6280.5552, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4763, "query_norm": 1.9166, "queue_k_norm": 1.612, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0903, "sent_len_1": 66.7248, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7438, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 8100 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.964, "doc_norm": 1.6004, "encoder_q-embeddings": 5836.0229, "encoder_q-layer.0": 4536.7139, "encoder_q-layer.1": 4646.1846, "encoder_q-layer.10": 5046.1309, "encoder_q-layer.11": 11108.626, "encoder_q-layer.2": 5165.8174, "encoder_q-layer.3": 5071.2529, "encoder_q-layer.4": 5279.4668, "encoder_q-layer.5": 5407.5322, "encoder_q-layer.6": 4977.0146, "encoder_q-layer.7": 4587.9019, "encoder_q-layer.8": 5112.7275, "encoder_q-layer.9": 4052.0347, "epoch": 0.08, "inbatch_neg_score": 0.4852, "inbatch_pos_score": 1.082, "learning_rate": 4.1e-05, "loss": 3.964, "norm_diff": 0.2734, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8536.7546, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4778, "query_norm": 1.8739, "queue_k_norm": 1.6076, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9377, "sent_len_1": 66.7896, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2675, "stdk": 0.0485, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 8200 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.9701, "doc_norm": 1.604, "encoder_q-embeddings": 11814.498, "encoder_q-layer.0": 9397.1992, "encoder_q-layer.1": 10738.8662, "encoder_q-layer.10": 5488.5713, "encoder_q-layer.11": 12476.7881, "encoder_q-layer.2": 12527.3916, "encoder_q-layer.3": 13987.6816, "encoder_q-layer.4": 13304.8887, "encoder_q-layer.5": 11308.9326, "encoder_q-layer.6": 10630.2988, "encoder_q-layer.7": 8877.582, "encoder_q-layer.8": 7255.1377, "encoder_q-layer.9": 4883.2471, "epoch": 0.08, "inbatch_neg_score": 0.4968, "inbatch_pos_score": 1.0635, "learning_rate": 4.15e-05, "loss": 3.9701, "norm_diff": 0.2241, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16218.581, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4922, "query_norm": 1.8281, "queue_k_norm": 1.6045, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0322, "sent_len_1": 66.6502, "sent_max_len_0": 128.0, "sent_max_len_1": 190.32, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 8300 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.9767, "doc_norm": 1.5939, "encoder_q-embeddings": 8513.5, "encoder_q-layer.0": 6485.2949, "encoder_q-layer.1": 6957.0928, "encoder_q-layer.10": 5007.5591, "encoder_q-layer.11": 11138.7158, "encoder_q-layer.2": 7758.8765, "encoder_q-layer.3": 7822.5059, "encoder_q-layer.4": 8511.5332, "encoder_q-layer.5": 8750.8359, "encoder_q-layer.6": 8830.6621, "encoder_q-layer.7": 7251.2158, "encoder_q-layer.8": 5624.1079, "encoder_q-layer.9": 4044.615, "epoch": 0.08, "inbatch_neg_score": 0.4703, "inbatch_pos_score": 1.0439, "learning_rate": 4.2e-05, "loss": 3.9767, "norm_diff": 0.1648, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11416.8741, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4653, "query_norm": 1.7587, "queue_k_norm": 1.6011, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.709, "sent_len_1": 66.8283, "sent_max_len_0": 128.0, "sent_max_len_1": 190.94, "stdk": 0.0484, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 8400 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 3.9471, "doc_norm": 1.5908, "encoder_q-embeddings": 14496.5781, "encoder_q-layer.0": 10713.9824, "encoder_q-layer.1": 11070.0381, "encoder_q-layer.10": 5808.6099, "encoder_q-layer.11": 11595.4756, "encoder_q-layer.2": 12815.8408, "encoder_q-layer.3": 13781.3408, "encoder_q-layer.4": 14669.5352, "encoder_q-layer.5": 14948.6846, "encoder_q-layer.6": 15221.8594, "encoder_q-layer.7": 13152.2998, "encoder_q-layer.8": 10046.7725, "encoder_q-layer.9": 6164.0352, "epoch": 0.08, "inbatch_neg_score": 0.451, "inbatch_pos_score": 1.0137, "learning_rate": 4.25e-05, "loss": 3.9471, "norm_diff": 0.1809, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 18192.4707, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4473, "query_norm": 1.7717, "queue_k_norm": 1.5956, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0091, "sent_len_1": 66.7102, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9175, "stdk": 0.0484, "stdq": 0.047, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 8500 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.9375, "doc_norm": 1.5895, "encoder_q-embeddings": 6345.3765, "encoder_q-layer.0": 5102.5527, "encoder_q-layer.1": 5084.8115, "encoder_q-layer.10": 2255.1584, "encoder_q-layer.11": 4605.4111, "encoder_q-layer.2": 4386.0376, "encoder_q-layer.3": 4025.8145, "encoder_q-layer.4": 3812.7598, "encoder_q-layer.5": 4288.7979, "encoder_q-layer.6": 3868.6743, "encoder_q-layer.7": 2960.4236, "encoder_q-layer.8": 2346.3757, "encoder_q-layer.9": 1885.9329, "epoch": 0.08, "inbatch_neg_score": 0.4395, "inbatch_pos_score": 1.0469, "learning_rate": 4.3e-05, "loss": 3.9375, "norm_diff": 0.1742, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6344.1302, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4355, "query_norm": 1.7637, "queue_k_norm": 1.5898, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8119, "sent_len_1": 66.7382, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0662, "stdk": 0.0486, "stdq": 0.0458, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 8600 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.9281, "doc_norm": 1.5915, "encoder_q-embeddings": 2426.4014, "encoder_q-layer.0": 1754.2914, "encoder_q-layer.1": 1874.1854, "encoder_q-layer.10": 2505.6128, "encoder_q-layer.11": 5364.6553, "encoder_q-layer.2": 2002.113, "encoder_q-layer.3": 2076.9189, "encoder_q-layer.4": 2284.7288, "encoder_q-layer.5": 2254.9426, "encoder_q-layer.6": 2245.4102, "encoder_q-layer.7": 2052.8594, "encoder_q-layer.8": 2324.8662, "encoder_q-layer.9": 2129.377, "epoch": 0.08, "inbatch_neg_score": 0.4545, "inbatch_pos_score": 1.0273, "learning_rate": 4.35e-05, "loss": 3.9281, "norm_diff": 0.1795, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3821.4707, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4492, "query_norm": 1.771, "queue_k_norm": 1.585, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.923, "sent_len_1": 66.8144, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7388, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 8700 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8882, "doc_norm": 1.5922, "encoder_q-embeddings": 2237.5703, "encoder_q-layer.0": 1691.1095, "encoder_q-layer.1": 1758.0088, "encoder_q-layer.10": 2526.5715, "encoder_q-layer.11": 5309.3408, "encoder_q-layer.2": 1917.2933, "encoder_q-layer.3": 2107.8115, "encoder_q-layer.4": 2272.5217, "encoder_q-layer.5": 2338.7725, "encoder_q-layer.6": 2417.4688, "encoder_q-layer.7": 2855.7729, "encoder_q-layer.8": 2767.7241, "encoder_q-layer.9": 2192.061, "epoch": 0.09, "inbatch_neg_score": 0.4581, "inbatch_pos_score": 1.041, "learning_rate": 4.4000000000000006e-05, "loss": 3.8882, "norm_diff": 0.2385, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3983.0947, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4524, "query_norm": 1.8307, "queue_k_norm": 1.5813, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9688, "sent_len_1": 66.841, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4737, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 8800 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.9092, "doc_norm": 1.5752, "encoder_q-embeddings": 4108.9956, "encoder_q-layer.0": 3037.6924, "encoder_q-layer.1": 3400.7451, "encoder_q-layer.10": 2490.0752, "encoder_q-layer.11": 5089.5552, "encoder_q-layer.2": 3858.3384, "encoder_q-layer.3": 4104.0342, "encoder_q-layer.4": 4065.3992, "encoder_q-layer.5": 3964.0813, "encoder_q-layer.6": 4026.21, "encoder_q-layer.7": 3029.4194, "encoder_q-layer.8": 2937.0071, "encoder_q-layer.9": 2062.4868, "epoch": 0.09, "inbatch_neg_score": 0.472, "inbatch_pos_score": 1.0684, "learning_rate": 4.4500000000000004e-05, "loss": 3.9092, "norm_diff": 0.2397, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5520.3714, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4661, "query_norm": 1.815, "queue_k_norm": 1.5775, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9027, "sent_len_1": 66.7621, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2038, "stdk": 0.0483, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 8900 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9331, "doc_norm": 1.5733, "encoder_q-embeddings": 28428.25, "encoder_q-layer.0": 21571.3809, "encoder_q-layer.1": 26044.1523, "encoder_q-layer.10": 2285.6973, "encoder_q-layer.11": 5008.3965, "encoder_q-layer.2": 30118.0449, "encoder_q-layer.3": 33871.4141, "encoder_q-layer.4": 36140.0742, "encoder_q-layer.5": 32741.4531, "encoder_q-layer.6": 34471.875, "encoder_q-layer.7": 21305.8809, "encoder_q-layer.8": 10587.3799, "encoder_q-layer.9": 2321.0054, "epoch": 0.09, "inbatch_neg_score": 0.474, "inbatch_pos_score": 1.0508, "learning_rate": 4.5e-05, "loss": 3.9331, "norm_diff": 0.1872, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 37530.8416, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.4668, "query_norm": 1.7605, "queue_k_norm": 1.5707, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1418, "sent_len_1": 66.7443, "sent_max_len_0": 128.0, "sent_max_len_1": 186.975, "stdk": 0.0483, "stdq": 0.046, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 9000 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9052, "doc_norm": 1.5634, "encoder_q-embeddings": 2329.1304, "encoder_q-layer.0": 1826.3383, "encoder_q-layer.1": 1970.1744, "encoder_q-layer.10": 2476.2373, "encoder_q-layer.11": 5106.1118, "encoder_q-layer.2": 2055.3235, "encoder_q-layer.3": 2157.665, "encoder_q-layer.4": 2132.2021, "encoder_q-layer.5": 2112.8799, "encoder_q-layer.6": 2342.783, "encoder_q-layer.7": 2179.1875, "encoder_q-layer.8": 2350.6882, "encoder_q-layer.9": 1995.0621, "epoch": 0.09, "inbatch_neg_score": 0.4699, "inbatch_pos_score": 1.0566, "learning_rate": 4.55e-05, "loss": 3.9052, "norm_diff": 0.1518, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3631.6899, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4653, "query_norm": 1.7152, "queue_k_norm": 1.5677, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0246, "sent_len_1": 66.6872, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7113, "stdk": 0.0482, "stdq": 0.0463, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 9100 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.9374, "doc_norm": 1.5644, "encoder_q-embeddings": 2091.7727, "encoder_q-layer.0": 1546.6447, "encoder_q-layer.1": 1638.9701, "encoder_q-layer.10": 2110.6799, "encoder_q-layer.11": 4823.5708, "encoder_q-layer.2": 1812.7776, "encoder_q-layer.3": 1949.2878, "encoder_q-layer.4": 1951.0656, "encoder_q-layer.5": 1983.5402, "encoder_q-layer.6": 2177.2021, "encoder_q-layer.7": 2120.3562, "encoder_q-layer.8": 2118.207, "encoder_q-layer.9": 1829.3379, "epoch": 0.09, "inbatch_neg_score": 0.4193, "inbatch_pos_score": 0.9941, "learning_rate": 4.600000000000001e-05, "loss": 3.9374, "norm_diff": 0.0883, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3443.4014, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4138, "query_norm": 1.6527, "queue_k_norm": 1.5632, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8553, "sent_len_1": 66.7163, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4688, "stdk": 0.0483, "stdq": 0.0444, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 9200 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 3.8898, "doc_norm": 1.5653, "encoder_q-embeddings": 2218.9312, "encoder_q-layer.0": 1766.0363, "encoder_q-layer.1": 1895.9043, "encoder_q-layer.10": 2422.158, "encoder_q-layer.11": 5220.5703, "encoder_q-layer.2": 2068.668, "encoder_q-layer.3": 2100.2437, "encoder_q-layer.4": 2077.6531, "encoder_q-layer.5": 2152.5139, "encoder_q-layer.6": 2244.936, "encoder_q-layer.7": 2220.1985, "encoder_q-layer.8": 2362.0061, "encoder_q-layer.9": 1879.469, "epoch": 0.09, "inbatch_neg_score": 0.3858, "inbatch_pos_score": 0.9375, "learning_rate": 4.6500000000000005e-05, "loss": 3.8898, "norm_diff": 0.1514, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3710.735, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3818, "query_norm": 1.7167, "queue_k_norm": 1.5601, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9834, "sent_len_1": 66.9512, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0712, "stdk": 0.0484, "stdq": 0.0454, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 9300 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.8729, "doc_norm": 1.5559, "encoder_q-embeddings": 3369.2881, "encoder_q-layer.0": 2620.5684, "encoder_q-layer.1": 2815.5239, "encoder_q-layer.10": 1913.45, "encoder_q-layer.11": 4345.3291, "encoder_q-layer.2": 3147.3723, "encoder_q-layer.3": 3240.8855, "encoder_q-layer.4": 3305.894, "encoder_q-layer.5": 3248.479, "encoder_q-layer.6": 2992.4417, "encoder_q-layer.7": 2381.6123, "encoder_q-layer.8": 2380.469, "encoder_q-layer.9": 1801.8413, "epoch": 0.09, "inbatch_neg_score": 0.3835, "inbatch_pos_score": 0.9595, "learning_rate": 4.7e-05, "loss": 3.8729, "norm_diff": 0.2672, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4405.2575, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3755, "query_norm": 1.8232, "queue_k_norm": 1.555, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1445, "sent_len_1": 66.5738, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9425, "stdk": 0.0484, "stdq": 0.0455, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 9400 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.8584, "doc_norm": 1.5533, "encoder_q-embeddings": 3717.4285, "encoder_q-layer.0": 2822.4722, "encoder_q-layer.1": 3090.3821, "encoder_q-layer.10": 1998.957, "encoder_q-layer.11": 3932.3835, "encoder_q-layer.2": 3385.2422, "encoder_q-layer.3": 3316.064, "encoder_q-layer.4": 3315.4941, "encoder_q-layer.5": 3212.0479, "encoder_q-layer.6": 3813.3418, "encoder_q-layer.7": 3512.3132, "encoder_q-layer.8": 3182.5552, "encoder_q-layer.9": 1708.0924, "epoch": 0.09, "inbatch_neg_score": 0.4157, "inbatch_pos_score": 1.0381, "learning_rate": 4.75e-05, "loss": 3.8584, "norm_diff": 0.3134, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4863.1222, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4072, "query_norm": 1.8666, "queue_k_norm": 1.5512, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8107, "sent_len_1": 66.7812, "sent_max_len_0": 128.0, "sent_max_len_1": 188.985, "stdk": 0.0485, "stdq": 0.0461, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 9500 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.8739, "doc_norm": 1.5477, "encoder_q-embeddings": 1844.7899, "encoder_q-layer.0": 1298.0183, "encoder_q-layer.1": 1377.6073, "encoder_q-layer.10": 2034.6401, "encoder_q-layer.11": 4578.0435, "encoder_q-layer.2": 1566.8417, "encoder_q-layer.3": 1690.0135, "encoder_q-layer.4": 1760.3304, "encoder_q-layer.5": 1844.8112, "encoder_q-layer.6": 1889.5553, "encoder_q-layer.7": 1775.187, "encoder_q-layer.8": 2020.3032, "encoder_q-layer.9": 1794.3826, "epoch": 0.09, "inbatch_neg_score": 0.4416, "inbatch_pos_score": 1.0244, "learning_rate": 4.8e-05, "loss": 3.8739, "norm_diff": 0.2028, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3202.4884, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4368, "query_norm": 1.7504, "queue_k_norm": 1.549, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8528, "sent_len_1": 66.5521, "sent_max_len_0": 128.0, "sent_max_len_1": 187.3638, "stdk": 0.0484, "stdq": 0.0441, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 9600 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.8596, "doc_norm": 1.5431, "encoder_q-embeddings": 3558.7866, "encoder_q-layer.0": 2880.3789, "encoder_q-layer.1": 2854.7271, "encoder_q-layer.10": 1949.1479, "encoder_q-layer.11": 4393.7417, "encoder_q-layer.2": 3158.1055, "encoder_q-layer.3": 3273.113, "encoder_q-layer.4": 3378.5376, "encoder_q-layer.5": 3444.7158, "encoder_q-layer.6": 3171.4875, "encoder_q-layer.7": 2401.4519, "encoder_q-layer.8": 2005.3352, "encoder_q-layer.9": 1695.6982, "epoch": 0.09, "inbatch_neg_score": 0.4255, "inbatch_pos_score": 1.041, "learning_rate": 4.85e-05, "loss": 3.8596, "norm_diff": 0.1356, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4564.656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4211, "query_norm": 1.6787, "queue_k_norm": 1.5457, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0084, "sent_len_1": 66.7332, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7713, "stdk": 0.0483, "stdq": 0.0454, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 9700 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.88, "doc_norm": 1.5472, "encoder_q-embeddings": 2744.8625, "encoder_q-layer.0": 1941.8469, "encoder_q-layer.1": 2199.5332, "encoder_q-layer.10": 2007.4723, "encoder_q-layer.11": 4725.0591, "encoder_q-layer.2": 2307.0527, "encoder_q-layer.3": 2333.4902, "encoder_q-layer.4": 2351.4927, "encoder_q-layer.5": 2357.8501, "encoder_q-layer.6": 2570.4758, "encoder_q-layer.7": 2241.8457, "encoder_q-layer.8": 2334.7244, "encoder_q-layer.9": 1862.7498, "epoch": 0.1, "inbatch_neg_score": 0.4155, "inbatch_pos_score": 1.0078, "learning_rate": 4.9e-05, "loss": 3.88, "norm_diff": 0.1088, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3734.3654, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4097, "query_norm": 1.6561, "queue_k_norm": 1.5418, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0695, "sent_len_1": 66.8636, "sent_max_len_0": 128.0, "sent_max_len_1": 191.8187, "stdk": 0.0485, "stdq": 0.0446, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 9800 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.8568, "doc_norm": 1.5415, "encoder_q-embeddings": 3521.6423, "encoder_q-layer.0": 2799.7722, "encoder_q-layer.1": 2855.229, "encoder_q-layer.10": 2274.8401, "encoder_q-layer.11": 4555.9673, "encoder_q-layer.2": 3607.9863, "encoder_q-layer.3": 3846.7373, "encoder_q-layer.4": 3728.3733, "encoder_q-layer.5": 3387.5396, "encoder_q-layer.6": 3067.1875, "encoder_q-layer.7": 2344.4485, "encoder_q-layer.8": 2202.8059, "encoder_q-layer.9": 1899.2386, "epoch": 0.1, "inbatch_neg_score": 0.371, "inbatch_pos_score": 1.002, "learning_rate": 4.9500000000000004e-05, "loss": 3.8568, "norm_diff": 0.1678, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4858.9896, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3665, "query_norm": 1.7093, "queue_k_norm": 1.5392, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1624, "sent_len_1": 66.8869, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7337, "stdk": 0.0484, "stdq": 0.0464, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 9900 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.8404, "doc_norm": 1.531, "encoder_q-embeddings": 3037.1802, "encoder_q-layer.0": 2177.2124, "encoder_q-layer.1": 2348.6587, "encoder_q-layer.10": 2309.438, "encoder_q-layer.11": 4882.7256, "encoder_q-layer.2": 2721.4749, "encoder_q-layer.3": 2714.4055, "encoder_q-layer.4": 2788.7021, "encoder_q-layer.5": 2721.9194, "encoder_q-layer.6": 2684.6882, "encoder_q-layer.7": 2293.5112, "encoder_q-layer.8": 2341.2776, "encoder_q-layer.9": 2012.359, "epoch": 0.1, "inbatch_neg_score": 0.3737, "inbatch_pos_score": 0.9526, "learning_rate": 5e-05, "loss": 3.8404, "norm_diff": 0.2067, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4082.5754, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3672, "query_norm": 1.7376, "queue_k_norm": 1.5345, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8207, "sent_len_1": 66.6558, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0488, "stdk": 0.0482, "stdq": 0.0451, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 10000 }, { "dev_runtime": 37.3558, "dev_samples_per_second": 1.713, "dev_steps_per_second": 0.027, "epoch": 0.1, "step": 10000, "test_accuracy": 92.6513671875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.42117059230804443, "test_doc_norm": 1.5123836994171143, "test_inbatch_neg_score": 0.7163257598876953, "test_inbatch_pos_score": 1.519754409790039, "test_loss": 0.42117059230804443, "test_loss_align": 2.13350772857666, "test_loss_unif": 3.730835437774658, "test_loss_unif_q@queue": 3.7308356761932373, "test_norm_diff": 0.18010081350803375, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.35869747400283813, "test_query_norm": 1.6924846172332764, "test_queue_k_norm": 1.534543752670288, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.041490890085697174, "test_stdq": 0.03906825929880142, "test_stdqueue_k": 0.04841500148177147, "test_stdqueue_q": 0.0 }, { "dev_runtime": 37.3558, "dev_samples_per_second": 1.713, "dev_steps_per_second": 0.027, "epoch": 0.1, "eval_beir-arguana_ndcg@10": 0.26148, "eval_beir-arguana_recall@10": 0.4495, "eval_beir-arguana_recall@100": 0.76885, "eval_beir-arguana_recall@20": 0.57824, "eval_beir-avg_ndcg@10": 0.1817200833333333, "eval_beir-avg_recall@10": 0.23396208333333335, "eval_beir-avg_recall@100": 0.40604483333333335, "eval_beir-avg_recall@20": 0.28441916666666667, "eval_beir-cqadupstack_ndcg@10": 0.13934083333333333, "eval_beir-cqadupstack_recall@10": 0.19650083333333335, "eval_beir-cqadupstack_recall@100": 0.37933833333333333, "eval_beir-cqadupstack_recall@20": 0.24443166666666669, "eval_beir-fiqa_ndcg@10": 0.09338, "eval_beir-fiqa_recall@10": 0.12618, "eval_beir-fiqa_recall@100": 0.30558, "eval_beir-fiqa_recall@20": 0.16784, "eval_beir-nfcorpus_ndcg@10": 0.16898, "eval_beir-nfcorpus_recall@10": 0.0785, "eval_beir-nfcorpus_recall@100": 0.19207, "eval_beir-nfcorpus_recall@20": 0.10706, "eval_beir-nq_ndcg@10": 0.10015, "eval_beir-nq_recall@10": 0.18342, "eval_beir-nq_recall@100": 0.47806, "eval_beir-nq_recall@20": 0.26511, "eval_beir-quora_ndcg@10": 0.17089, "eval_beir-quora_recall@10": 0.24223, "eval_beir-quora_recall@100": 0.40254, "eval_beir-quora_recall@20": 0.28077, "eval_beir-scidocs_ndcg@10": 0.08637, "eval_beir-scidocs_recall@10": 0.09362, "eval_beir-scidocs_recall@100": 0.2435, "eval_beir-scidocs_recall@20": 0.12987, "eval_beir-scifact_ndcg@10": 0.48329, "eval_beir-scifact_recall@10": 0.62172, "eval_beir-scifact_recall@100": 0.82844, "eval_beir-scifact_recall@20": 0.70294, "eval_beir-trec-covid_ndcg@10": 0.25987, "eval_beir-trec-covid_recall@10": 0.298, "eval_beir-trec-covid_recall@100": 0.2212, "eval_beir-trec-covid_recall@20": 0.287, "eval_beir-webis-touche2020_ndcg@10": 0.05345, "eval_beir-webis-touche2020_recall@10": 0.04995, "eval_beir-webis-touche2020_recall@100": 0.24087, "eval_beir-webis-touche2020_recall@20": 0.08093, "eval_senteval-avg_sts": 0.6678796689318702, "eval_senteval-sickr_spearman": 0.6164997442695914, "eval_senteval-stsb_spearman": 0.7192595935941491, "step": 10000, "test_accuracy": 92.6513671875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.42117059230804443, "test_doc_norm": 1.5123836994171143, "test_inbatch_neg_score": 0.7163257598876953, "test_inbatch_pos_score": 1.519754409790039, "test_loss": 0.42117059230804443, "test_loss_align": 2.13350772857666, "test_loss_unif": 3.730835437774658, "test_loss_unif_q@queue": 3.7308356761932373, "test_norm_diff": 0.18010081350803375, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.35869747400283813, "test_query_norm": 1.6924846172332764, "test_queue_k_norm": 1.534543752670288, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.041490890085697174, "test_stdq": 0.03906825929880142, "test_stdqueue_k": 0.04841500148177147, "test_stdqueue_q": 0.0 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.8527, "doc_norm": 1.5329, "encoder_q-embeddings": 9126.6426, "encoder_q-layer.0": 7649.1934, "encoder_q-layer.1": 7255.1392, "encoder_q-layer.10": 2286.7781, "encoder_q-layer.11": 4760.5239, "encoder_q-layer.2": 9067.2764, "encoder_q-layer.3": 10007.8398, "encoder_q-layer.4": 9854.9131, "encoder_q-layer.5": 9124.4883, "encoder_q-layer.6": 6630.6279, "encoder_q-layer.7": 4149.6177, "encoder_q-layer.8": 2981.0652, "encoder_q-layer.9": 2114.3831, "epoch": 0.1, "inbatch_neg_score": 0.3936, "inbatch_pos_score": 1.002, "learning_rate": 4.994444444444445e-05, "loss": 3.8527, "norm_diff": 0.2115, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10727.1283, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3875, "query_norm": 1.7445, "queue_k_norm": 1.5314, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8094, "sent_len_1": 66.6801, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4663, "stdk": 0.0483, "stdq": 0.0459, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 10100 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 3.8198, "doc_norm": 1.5327, "encoder_q-embeddings": 3807.8469, "encoder_q-layer.0": 2810.8528, "encoder_q-layer.1": 3383.6948, "encoder_q-layer.10": 2192.8823, "encoder_q-layer.11": 4729.8022, "encoder_q-layer.2": 4051.3137, "encoder_q-layer.3": 4327.147, "encoder_q-layer.4": 4400.4375, "encoder_q-layer.5": 4281.4028, "encoder_q-layer.6": 3957.0659, "encoder_q-layer.7": 2910.5562, "encoder_q-layer.8": 2732.2275, "encoder_q-layer.9": 2152.5439, "epoch": 0.1, "inbatch_neg_score": 0.3915, "inbatch_pos_score": 0.9648, "learning_rate": 4.9888888888888894e-05, "loss": 3.8198, "norm_diff": 0.1514, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5377.5491, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3884, "query_norm": 1.6841, "queue_k_norm": 1.5307, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8705, "sent_len_1": 66.8151, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0325, "stdk": 0.0484, "stdq": 0.0455, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 10200 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.826, "doc_norm": 1.53, "encoder_q-embeddings": 3528.8335, "encoder_q-layer.0": 2714.446, "encoder_q-layer.1": 2971.1436, "encoder_q-layer.10": 1990.5042, "encoder_q-layer.11": 4643.021, "encoder_q-layer.2": 3421.1052, "encoder_q-layer.3": 3610.9348, "encoder_q-layer.4": 3816.533, "encoder_q-layer.5": 3823.1963, "encoder_q-layer.6": 3806.8533, "encoder_q-layer.7": 3070.812, "encoder_q-layer.8": 2433.1572, "encoder_q-layer.9": 1802.348, "epoch": 0.1, "inbatch_neg_score": 0.4029, "inbatch_pos_score": 0.9976, "learning_rate": 4.9833333333333336e-05, "loss": 3.826, "norm_diff": 0.0889, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4903.2727, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3994, "query_norm": 1.6189, "queue_k_norm": 1.5261, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8732, "sent_len_1": 66.6755, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4837, "stdk": 0.0483, "stdq": 0.0454, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 10300 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.8361, "doc_norm": 1.5305, "encoder_q-embeddings": 5092.9858, "encoder_q-layer.0": 4160.6533, "encoder_q-layer.1": 4443.5386, "encoder_q-layer.10": 1863.7574, "encoder_q-layer.11": 4635.3784, "encoder_q-layer.2": 5174.2197, "encoder_q-layer.3": 5051.2676, "encoder_q-layer.4": 5035.7852, "encoder_q-layer.5": 5053.9819, "encoder_q-layer.6": 4500.6914, "encoder_q-layer.7": 3439.0391, "encoder_q-layer.8": 2555.9219, "encoder_q-layer.9": 1660.7036, "epoch": 0.1, "inbatch_neg_score": 0.3886, "inbatch_pos_score": 0.9976, "learning_rate": 4.977777777777778e-05, "loss": 3.8361, "norm_diff": 0.0251, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6541.2822, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3867, "query_norm": 1.5556, "queue_k_norm": 1.5259, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0462, "sent_len_1": 66.7898, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2488, "stdk": 0.0485, "stdq": 0.0442, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 10400 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.8066, "doc_norm": 1.5202, "encoder_q-embeddings": 3183.2275, "encoder_q-layer.0": 2436.6934, "encoder_q-layer.1": 2649.2988, "encoder_q-layer.10": 1984.7385, "encoder_q-layer.11": 4489.6431, "encoder_q-layer.2": 3022.0579, "encoder_q-layer.3": 3347.6313, "encoder_q-layer.4": 3189.8015, "encoder_q-layer.5": 3283.4211, "encoder_q-layer.6": 3129.0393, "encoder_q-layer.7": 2726.1777, "encoder_q-layer.8": 2639.2686, "encoder_q-layer.9": 1914.3292, "epoch": 0.1, "inbatch_neg_score": 0.3808, "inbatch_pos_score": 0.9624, "learning_rate": 4.972222222222223e-05, "loss": 3.8066, "norm_diff": 0.0622, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4417.4018, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3767, "query_norm": 1.5824, "queue_k_norm": 1.5241, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7236, "sent_len_1": 66.5268, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0225, "stdk": 0.0482, "stdq": 0.0444, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 10500 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.8153, "doc_norm": 1.5204, "encoder_q-embeddings": 6467.0107, "encoder_q-layer.0": 5363.4141, "encoder_q-layer.1": 5938.9492, "encoder_q-layer.10": 1841.9348, "encoder_q-layer.11": 4281.978, "encoder_q-layer.2": 7152.3696, "encoder_q-layer.3": 7440.4116, "encoder_q-layer.4": 7049.3364, "encoder_q-layer.5": 7199.3096, "encoder_q-layer.6": 7204.8369, "encoder_q-layer.7": 7142.2749, "encoder_q-layer.8": 3946.3896, "encoder_q-layer.9": 1930.1853, "epoch": 0.1, "inbatch_neg_score": 0.3854, "inbatch_pos_score": 0.9453, "learning_rate": 4.966666666666667e-05, "loss": 3.8153, "norm_diff": 0.0713, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8793.4157, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3813, "query_norm": 1.5918, "queue_k_norm": 1.5206, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8097, "sent_len_1": 66.6343, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8688, "stdk": 0.0482, "stdq": 0.0439, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 10600 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.8165, "doc_norm": 1.5187, "encoder_q-embeddings": 2093.6509, "encoder_q-layer.0": 1595.834, "encoder_q-layer.1": 1692.9539, "encoder_q-layer.10": 1823.4008, "encoder_q-layer.11": 4308.9785, "encoder_q-layer.2": 1775.3636, "encoder_q-layer.3": 1826.3744, "encoder_q-layer.4": 1931.1608, "encoder_q-layer.5": 1906.7373, "encoder_q-layer.6": 1905.8811, "encoder_q-layer.7": 2017.8956, "encoder_q-layer.8": 2036.4572, "encoder_q-layer.9": 1652.1947, "epoch": 0.1, "inbatch_neg_score": 0.3979, "inbatch_pos_score": 0.9692, "learning_rate": 4.961111111111111e-05, "loss": 3.8165, "norm_diff": 0.0697, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3247.6487, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3943, "query_norm": 1.5884, "queue_k_norm": 1.5201, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8869, "sent_len_1": 66.6298, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1925, "stdk": 0.0482, "stdq": 0.0435, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 10700 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.8092, "doc_norm": 1.5166, "encoder_q-embeddings": 5026.9312, "encoder_q-layer.0": 3968.1172, "encoder_q-layer.1": 4203.2939, "encoder_q-layer.10": 1832.7747, "encoder_q-layer.11": 4365.3252, "encoder_q-layer.2": 4266.0493, "encoder_q-layer.3": 4388.186, "encoder_q-layer.4": 3903.1602, "encoder_q-layer.5": 3627.5889, "encoder_q-layer.6": 3520.8335, "encoder_q-layer.7": 3197.1594, "encoder_q-layer.8": 2447.0422, "encoder_q-layer.9": 1753.3865, "epoch": 0.11, "inbatch_neg_score": 0.3925, "inbatch_pos_score": 1.0127, "learning_rate": 4.955555555555556e-05, "loss": 3.8092, "norm_diff": 0.1426, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5569.3297, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3894, "query_norm": 1.6592, "queue_k_norm": 1.5182, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0488, "sent_len_1": 66.7179, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9762, "stdk": 0.0481, "stdq": 0.0474, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 10800 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.7832, "doc_norm": 1.5186, "encoder_q-embeddings": 9006.2217, "encoder_q-layer.0": 6940.6367, "encoder_q-layer.1": 7265.8545, "encoder_q-layer.10": 1915.7791, "encoder_q-layer.11": 4700.2114, "encoder_q-layer.2": 9047.6377, "encoder_q-layer.3": 9197.207, "encoder_q-layer.4": 9395.9961, "encoder_q-layer.5": 10576.4092, "encoder_q-layer.6": 10681.1758, "encoder_q-layer.7": 10260.6982, "encoder_q-layer.8": 7573.458, "encoder_q-layer.9": 2245.3857, "epoch": 0.11, "inbatch_neg_score": 0.3853, "inbatch_pos_score": 0.9492, "learning_rate": 4.9500000000000004e-05, "loss": 3.7832, "norm_diff": 0.0536, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12171.1804, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3823, "query_norm": 1.5722, "queue_k_norm": 1.5181, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9138, "sent_len_1": 66.8879, "sent_max_len_0": 128.0, "sent_max_len_1": 187.1163, "stdk": 0.0483, "stdq": 0.0449, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 10900 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.8115, "doc_norm": 1.519, "encoder_q-embeddings": 4906.2681, "encoder_q-layer.0": 5366.8262, "encoder_q-layer.1": 5699.4824, "encoder_q-layer.10": 1641.1504, "encoder_q-layer.11": 3598.5437, "encoder_q-layer.2": 7036.3223, "encoder_q-layer.3": 5715.5039, "encoder_q-layer.4": 4678.5635, "encoder_q-layer.5": 4143.9629, "encoder_q-layer.6": 3898.6199, "encoder_q-layer.7": 3288.509, "encoder_q-layer.8": 2578.9526, "encoder_q-layer.9": 1565.1617, "epoch": 0.11, "inbatch_neg_score": 0.3909, "inbatch_pos_score": 1.0029, "learning_rate": 4.9444444444444446e-05, "loss": 3.8115, "norm_diff": 0.0253, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7004.1396, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3877, "query_norm": 1.5443, "queue_k_norm": 1.5147, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.6331, "sent_len_1": 66.6173, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8175, "stdk": 0.0483, "stdq": 0.0445, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 11000 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.8238, "doc_norm": 1.5179, "encoder_q-embeddings": 11654.9873, "encoder_q-layer.0": 10654.6865, "encoder_q-layer.1": 10746.0928, "encoder_q-layer.10": 2038.4668, "encoder_q-layer.11": 4614.9595, "encoder_q-layer.2": 13012.2959, "encoder_q-layer.3": 12863.9688, "encoder_q-layer.4": 13796.1162, "encoder_q-layer.5": 14591.7295, "encoder_q-layer.6": 11452.4561, "encoder_q-layer.7": 10041.4893, "encoder_q-layer.8": 6701.5894, "encoder_q-layer.9": 2291.3992, "epoch": 0.11, "inbatch_neg_score": 0.3949, "inbatch_pos_score": 0.9849, "learning_rate": 4.938888888888889e-05, "loss": 3.8238, "norm_diff": 0.0198, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15979.7269, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3916, "query_norm": 1.5378, "queue_k_norm": 1.5137, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0409, "sent_len_1": 66.8451, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6475, "stdk": 0.0483, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 11100 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7834, "doc_norm": 1.5192, "encoder_q-embeddings": 1962.4546, "encoder_q-layer.0": 1458.5718, "encoder_q-layer.1": 1617.8356, "encoder_q-layer.10": 848.2279, "encoder_q-layer.11": 1883.2726, "encoder_q-layer.2": 1879.4332, "encoder_q-layer.3": 1823.5004, "encoder_q-layer.4": 1765.0256, "encoder_q-layer.5": 1799.6224, "encoder_q-layer.6": 1716.7664, "encoder_q-layer.7": 1599.3937, "encoder_q-layer.8": 1682.4143, "encoder_q-layer.9": 1159.5663, "epoch": 0.11, "inbatch_neg_score": 0.3977, "inbatch_pos_score": 0.9805, "learning_rate": 4.933333333333334e-05, "loss": 3.7834, "norm_diff": 0.0064, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2491.6134, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3948, "query_norm": 1.5173, "queue_k_norm": 1.515, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9445, "sent_len_1": 66.9824, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3325, "stdk": 0.0483, "stdq": 0.0432, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 11200 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.8175, "doc_norm": 1.5084, "encoder_q-embeddings": 598.2411, "encoder_q-layer.0": 452.1927, "encoder_q-layer.1": 482.4727, "encoder_q-layer.10": 419.9093, "encoder_q-layer.11": 1103.7334, "encoder_q-layer.2": 529.6849, "encoder_q-layer.3": 534.6472, "encoder_q-layer.4": 539.7852, "encoder_q-layer.5": 534.9767, "encoder_q-layer.6": 526.1143, "encoder_q-layer.7": 542.257, "encoder_q-layer.8": 555.155, "encoder_q-layer.9": 409.3682, "epoch": 0.11, "inbatch_neg_score": 0.3696, "inbatch_pos_score": 0.9795, "learning_rate": 4.927777777777778e-05, "loss": 3.8175, "norm_diff": 0.0191, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 852.8091, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3689, "query_norm": 1.521, "queue_k_norm": 1.5113, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9675, "sent_len_1": 66.6418, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8162, "stdk": 0.048, "stdq": 0.0441, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 11300 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.7944, "doc_norm": 1.5071, "encoder_q-embeddings": 1256.6489, "encoder_q-layer.0": 973.5815, "encoder_q-layer.1": 1042.5499, "encoder_q-layer.10": 436.2685, "encoder_q-layer.11": 1126.8124, "encoder_q-layer.2": 1162.8326, "encoder_q-layer.3": 1221.2096, "encoder_q-layer.4": 1269.3165, "encoder_q-layer.5": 1302.2251, "encoder_q-layer.6": 1189.7611, "encoder_q-layer.7": 1063.6256, "encoder_q-layer.8": 847.2612, "encoder_q-layer.9": 476.1317, "epoch": 0.11, "inbatch_neg_score": 0.3657, "inbatch_pos_score": 0.957, "learning_rate": 4.922222222222222e-05, "loss": 3.7944, "norm_diff": 0.0099, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1590.7854, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3633, "query_norm": 1.4989, "queue_k_norm": 1.5106, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0055, "sent_len_1": 66.8457, "sent_max_len_0": 128.0, "sent_max_len_1": 188.17, "stdk": 0.048, "stdq": 0.0434, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 11400 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.7666, "doc_norm": 1.5168, "encoder_q-embeddings": 781.6425, "encoder_q-layer.0": 599.9102, "encoder_q-layer.1": 623.575, "encoder_q-layer.10": 458.3023, "encoder_q-layer.11": 1046.6364, "encoder_q-layer.2": 732.7254, "encoder_q-layer.3": 788.2367, "encoder_q-layer.4": 823.9196, "encoder_q-layer.5": 839.5878, "encoder_q-layer.6": 888.6884, "encoder_q-layer.7": 873.8758, "encoder_q-layer.8": 916.8347, "encoder_q-layer.9": 533.5176, "epoch": 0.11, "inbatch_neg_score": 0.35, "inbatch_pos_score": 0.9912, "learning_rate": 4.9166666666666665e-05, "loss": 3.7666, "norm_diff": 0.0108, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1130.5347, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3491, "query_norm": 1.5172, "queue_k_norm": 1.5094, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7696, "sent_len_1": 66.6455, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8275, "stdk": 0.0484, "stdq": 0.0452, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 11500 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7891, "doc_norm": 1.5101, "encoder_q-embeddings": 1715.0293, "encoder_q-layer.0": 1282.1475, "encoder_q-layer.1": 1421.3412, "encoder_q-layer.10": 452.36, "encoder_q-layer.11": 937.1433, "encoder_q-layer.2": 1458.2859, "encoder_q-layer.3": 1580.1786, "encoder_q-layer.4": 1601.8695, "encoder_q-layer.5": 1397.0217, "encoder_q-layer.6": 1232.0605, "encoder_q-layer.7": 1280.9523, "encoder_q-layer.8": 890.4059, "encoder_q-layer.9": 502.9158, "epoch": 0.11, "inbatch_neg_score": 0.3522, "inbatch_pos_score": 0.936, "learning_rate": 4.9111111111111114e-05, "loss": 3.7891, "norm_diff": 0.0313, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1926.2904, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3501, "query_norm": 1.4787, "queue_k_norm": 1.507, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9652, "sent_len_1": 66.8126, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5263, "stdk": 0.0482, "stdq": 0.0443, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 11600 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.7857, "doc_norm": 1.511, "encoder_q-embeddings": 1475.4409, "encoder_q-layer.0": 1081.061, "encoder_q-layer.1": 1234.4814, "encoder_q-layer.10": 428.0195, "encoder_q-layer.11": 1025.491, "encoder_q-layer.2": 1261.8141, "encoder_q-layer.3": 1326.8323, "encoder_q-layer.4": 1409.1532, "encoder_q-layer.5": 1509.5879, "encoder_q-layer.6": 1509.7963, "encoder_q-layer.7": 1629.637, "encoder_q-layer.8": 1627.3967, "encoder_q-layer.9": 758.9493, "epoch": 0.11, "inbatch_neg_score": 0.3525, "inbatch_pos_score": 0.9795, "learning_rate": 4.905555555555556e-05, "loss": 3.7857, "norm_diff": 0.0248, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1897.035, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3518, "query_norm": 1.4862, "queue_k_norm": 1.5069, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0807, "sent_len_1": 66.9321, "sent_max_len_0": 128.0, "sent_max_len_1": 189.745, "stdk": 0.0482, "stdq": 0.0445, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 11700 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.755, "doc_norm": 1.4982, "encoder_q-embeddings": 1723.7433, "encoder_q-layer.0": 1755.9176, "encoder_q-layer.1": 1924.1892, "encoder_q-layer.10": 425.4989, "encoder_q-layer.11": 920.5051, "encoder_q-layer.2": 2064.9136, "encoder_q-layer.3": 1225.0648, "encoder_q-layer.4": 1254.9219, "encoder_q-layer.5": 1227.0945, "encoder_q-layer.6": 1204.5237, "encoder_q-layer.7": 1053.7251, "encoder_q-layer.8": 835.418, "encoder_q-layer.9": 532.4515, "epoch": 0.12, "inbatch_neg_score": 0.3383, "inbatch_pos_score": 0.9404, "learning_rate": 4.9e-05, "loss": 3.755, "norm_diff": 0.0049, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2053.2782, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3354, "query_norm": 1.4977, "queue_k_norm": 1.504, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9645, "sent_len_1": 66.996, "sent_max_len_0": 128.0, "sent_max_len_1": 189.59, "stdk": 0.0478, "stdq": 0.0448, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 11800 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.7907, "doc_norm": 1.5023, "encoder_q-embeddings": 1300.2075, "encoder_q-layer.0": 989.0618, "encoder_q-layer.1": 1105.1931, "encoder_q-layer.10": 429.1985, "encoder_q-layer.11": 924.9601, "encoder_q-layer.2": 1313.5441, "encoder_q-layer.3": 1345.287, "encoder_q-layer.4": 1423.4465, "encoder_q-layer.5": 1429.0365, "encoder_q-layer.6": 1400.5262, "encoder_q-layer.7": 1089.5223, "encoder_q-layer.8": 667.5069, "encoder_q-layer.9": 420.3381, "epoch": 0.12, "inbatch_neg_score": 0.3202, "inbatch_pos_score": 0.9189, "learning_rate": 4.894444444444445e-05, "loss": 3.7907, "norm_diff": 0.0427, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1681.81, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3193, "query_norm": 1.4596, "queue_k_norm": 1.502, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0614, "sent_len_1": 67.0893, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9888, "stdk": 0.048, "stdq": 0.0437, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 11900 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.7701, "doc_norm": 1.5014, "encoder_q-embeddings": 2925.7332, "encoder_q-layer.0": 2205.3018, "encoder_q-layer.1": 2256.5393, "encoder_q-layer.10": 431.851, "encoder_q-layer.11": 913.819, "encoder_q-layer.2": 2395.676, "encoder_q-layer.3": 2430.8835, "encoder_q-layer.4": 2358.5859, "encoder_q-layer.5": 2470.3872, "encoder_q-layer.6": 2583.0725, "encoder_q-layer.7": 2256.8599, "encoder_q-layer.8": 1329.7, "encoder_q-layer.9": 519.6412, "epoch": 0.12, "inbatch_neg_score": 0.3325, "inbatch_pos_score": 0.9253, "learning_rate": 4.888888888888889e-05, "loss": 3.7701, "norm_diff": 0.034, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3255.1087, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3301, "query_norm": 1.4674, "queue_k_norm": 1.4987, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7794, "sent_len_1": 66.5976, "sent_max_len_0": 128.0, "sent_max_len_1": 186.1725, "stdk": 0.0481, "stdq": 0.044, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 12000 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.7758, "doc_norm": 1.5002, "encoder_q-embeddings": 1659.0184, "encoder_q-layer.0": 1230.9836, "encoder_q-layer.1": 1366.6635, "encoder_q-layer.10": 416.1913, "encoder_q-layer.11": 1232.2761, "encoder_q-layer.2": 1731.7726, "encoder_q-layer.3": 1879.1377, "encoder_q-layer.4": 1819.7449, "encoder_q-layer.5": 2067.6311, "encoder_q-layer.6": 1737.8129, "encoder_q-layer.7": 1241.5262, "encoder_q-layer.8": 833.4688, "encoder_q-layer.9": 476.087, "epoch": 0.12, "inbatch_neg_score": 0.2987, "inbatch_pos_score": 0.8984, "learning_rate": 4.883333333333334e-05, "loss": 3.7758, "norm_diff": 0.0344, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2155.0325, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2986, "query_norm": 1.4658, "queue_k_norm": 1.4955, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.858, "sent_len_1": 66.7496, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2862, "stdk": 0.0481, "stdq": 0.0441, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 12100 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.7278, "doc_norm": 1.4922, "encoder_q-embeddings": 1070.7878, "encoder_q-layer.0": 786.878, "encoder_q-layer.1": 941.4999, "encoder_q-layer.10": 383.8946, "encoder_q-layer.11": 973.4283, "encoder_q-layer.2": 1093.4214, "encoder_q-layer.3": 1164.6896, "encoder_q-layer.4": 1290.7715, "encoder_q-layer.5": 1141.9205, "encoder_q-layer.6": 1323.0315, "encoder_q-layer.7": 1079.6373, "encoder_q-layer.8": 757.5689, "encoder_q-layer.9": 509.3365, "epoch": 0.12, "inbatch_neg_score": 0.287, "inbatch_pos_score": 0.9092, "learning_rate": 4.8777777777777775e-05, "loss": 3.7278, "norm_diff": 0.027, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1488.3412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2864, "query_norm": 1.4656, "queue_k_norm": 1.4925, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1276, "sent_len_1": 67.0253, "sent_max_len_0": 128.0, "sent_max_len_1": 191.7862, "stdk": 0.0479, "stdq": 0.0445, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 12200 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.7959, "doc_norm": 1.4882, "encoder_q-embeddings": 3817.4216, "encoder_q-layer.0": 2915.0354, "encoder_q-layer.1": 3526.8811, "encoder_q-layer.10": 428.5457, "encoder_q-layer.11": 1024.1432, "encoder_q-layer.2": 3564.1799, "encoder_q-layer.3": 3328.9626, "encoder_q-layer.4": 3126.2471, "encoder_q-layer.5": 2898.2136, "encoder_q-layer.6": 2388.1985, "encoder_q-layer.7": 1448.3685, "encoder_q-layer.8": 865.1682, "encoder_q-layer.9": 534.4011, "epoch": 0.12, "inbatch_neg_score": 0.2897, "inbatch_pos_score": 0.8979, "learning_rate": 4.8722222222222224e-05, "loss": 3.7959, "norm_diff": 0.024, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4030.6605, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2876, "query_norm": 1.4642, "queue_k_norm": 1.4919, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9609, "sent_len_1": 66.7459, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3688, "stdk": 0.0478, "stdq": 0.0449, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 12300 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.7613, "doc_norm": 1.4923, "encoder_q-embeddings": 3849.2231, "encoder_q-layer.0": 2869.9299, "encoder_q-layer.1": 2915.3867, "encoder_q-layer.10": 403.0017, "encoder_q-layer.11": 927.0325, "encoder_q-layer.2": 3165.5605, "encoder_q-layer.3": 3598.7512, "encoder_q-layer.4": 3609.0828, "encoder_q-layer.5": 3408.1492, "encoder_q-layer.6": 3036.3408, "encoder_q-layer.7": 2408.4207, "encoder_q-layer.8": 1595.8793, "encoder_q-layer.9": 610.8804, "epoch": 0.12, "inbatch_neg_score": 0.2949, "inbatch_pos_score": 0.9106, "learning_rate": 4.866666666666667e-05, "loss": 3.7613, "norm_diff": 0.0214, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4171.6513, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2939, "query_norm": 1.4709, "queue_k_norm": 1.4872, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7347, "sent_len_1": 66.6425, "sent_max_len_0": 128.0, "sent_max_len_1": 188.085, "stdk": 0.048, "stdq": 0.0446, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 12400 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.8, "doc_norm": 1.4868, "encoder_q-embeddings": 1724.8857, "encoder_q-layer.0": 1306.5692, "encoder_q-layer.1": 1340.756, "encoder_q-layer.10": 392.868, "encoder_q-layer.11": 869.6208, "encoder_q-layer.2": 1379.0487, "encoder_q-layer.3": 1369.6888, "encoder_q-layer.4": 1269.3848, "encoder_q-layer.5": 1093.7417, "encoder_q-layer.6": 972.4558, "encoder_q-layer.7": 707.7662, "encoder_q-layer.8": 505.7879, "encoder_q-layer.9": 381.732, "epoch": 0.12, "inbatch_neg_score": 0.2922, "inbatch_pos_score": 0.9326, "learning_rate": 4.8611111111111115e-05, "loss": 3.8, "norm_diff": 0.0151, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1703.9897, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2913, "query_norm": 1.4717, "queue_k_norm": 1.4842, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9319, "sent_len_1": 66.9227, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6012, "stdk": 0.0479, "stdq": 0.0441, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 12500 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.7904, "doc_norm": 1.4799, "encoder_q-embeddings": 2584.7935, "encoder_q-layer.0": 1870.4818, "encoder_q-layer.1": 2363.5754, "encoder_q-layer.10": 404.9762, "encoder_q-layer.11": 999.093, "encoder_q-layer.2": 2561.6116, "encoder_q-layer.3": 2555.0261, "encoder_q-layer.4": 2778.4517, "encoder_q-layer.5": 3003.4722, "encoder_q-layer.6": 3175.4131, "encoder_q-layer.7": 2936.5498, "encoder_q-layer.8": 2223.2451, "encoder_q-layer.9": 842.515, "epoch": 0.12, "inbatch_neg_score": 0.2993, "inbatch_pos_score": 0.9199, "learning_rate": 4.855555555555556e-05, "loss": 3.7904, "norm_diff": 0.042, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3415.5143, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2998, "query_norm": 1.5219, "queue_k_norm": 1.4815, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8902, "sent_len_1": 66.8596, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8913, "stdk": 0.0477, "stdq": 0.045, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 12600 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.8249, "doc_norm": 1.4749, "encoder_q-embeddings": 1280.451, "encoder_q-layer.0": 879.083, "encoder_q-layer.1": 1032.9998, "encoder_q-layer.10": 389.7971, "encoder_q-layer.11": 1020.317, "encoder_q-layer.2": 1019.4603, "encoder_q-layer.3": 1159.4561, "encoder_q-layer.4": 1437.1007, "encoder_q-layer.5": 1294.5591, "encoder_q-layer.6": 1056.8418, "encoder_q-layer.7": 818.9913, "encoder_q-layer.8": 618.0167, "encoder_q-layer.9": 447.2755, "epoch": 0.12, "inbatch_neg_score": 0.3011, "inbatch_pos_score": 0.9014, "learning_rate": 4.85e-05, "loss": 3.8249, "norm_diff": 0.0466, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1498.7808, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3003, "query_norm": 1.5215, "queue_k_norm": 1.4765, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.952, "sent_len_1": 66.7794, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2937, "stdk": 0.0477, "stdq": 0.0446, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 12700 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7746, "doc_norm": 1.467, "encoder_q-embeddings": 971.1915, "encoder_q-layer.0": 716.5073, "encoder_q-layer.1": 793.5526, "encoder_q-layer.10": 426.6133, "encoder_q-layer.11": 965.2045, "encoder_q-layer.2": 822.8741, "encoder_q-layer.3": 874.7257, "encoder_q-layer.4": 858.3135, "encoder_q-layer.5": 913.206, "encoder_q-layer.6": 849.5543, "encoder_q-layer.7": 694.137, "encoder_q-layer.8": 624.3719, "encoder_q-layer.9": 434.1303, "epoch": 0.12, "inbatch_neg_score": 0.3102, "inbatch_pos_score": 0.8989, "learning_rate": 4.844444444444445e-05, "loss": 3.7746, "norm_diff": 0.0391, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1171.5425, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3081, "query_norm": 1.5061, "queue_k_norm": 1.4713, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9294, "sent_len_1": 66.7471, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3088, "stdk": 0.0475, "stdq": 0.0438, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 12800 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.8324, "doc_norm": 1.4648, "encoder_q-embeddings": 8559.5244, "encoder_q-layer.0": 6427.5249, "encoder_q-layer.1": 6577.3276, "encoder_q-layer.10": 423.8161, "encoder_q-layer.11": 1010.9144, "encoder_q-layer.2": 7428.4478, "encoder_q-layer.3": 7648.1519, "encoder_q-layer.4": 7765.6934, "encoder_q-layer.5": 6665.1035, "encoder_q-layer.6": 6173.5576, "encoder_q-layer.7": 5069.1138, "encoder_q-layer.8": 2539.0054, "encoder_q-layer.9": 694.0531, "epoch": 0.13, "inbatch_neg_score": 0.3133, "inbatch_pos_score": 0.937, "learning_rate": 4.838888888888889e-05, "loss": 3.8324, "norm_diff": 0.0501, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9114.6638, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3125, "query_norm": 1.5148, "queue_k_norm": 1.4663, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9291, "sent_len_1": 66.7826, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0025, "stdk": 0.0475, "stdq": 0.045, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 12900 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.8114, "doc_norm": 1.4696, "encoder_q-embeddings": 1792.8325, "encoder_q-layer.0": 1210.3773, "encoder_q-layer.1": 1469.3761, "encoder_q-layer.10": 428.5696, "encoder_q-layer.11": 954.15, "encoder_q-layer.2": 1693.5155, "encoder_q-layer.3": 1796.7406, "encoder_q-layer.4": 2054.5073, "encoder_q-layer.5": 1962.6506, "encoder_q-layer.6": 1618.5724, "encoder_q-layer.7": 1054.6565, "encoder_q-layer.8": 634.4353, "encoder_q-layer.9": 430.726, "epoch": 0.13, "inbatch_neg_score": 0.3195, "inbatch_pos_score": 0.9043, "learning_rate": 4.8333333333333334e-05, "loss": 3.8114, "norm_diff": 0.014, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2128.4061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3188, "query_norm": 1.4813, "queue_k_norm": 1.4637, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.965, "sent_len_1": 66.8035, "sent_max_len_0": 128.0, "sent_max_len_1": 187.4925, "stdk": 0.0478, "stdq": 0.0435, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 13000 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.748, "doc_norm": 1.4594, "encoder_q-embeddings": 3085.5171, "encoder_q-layer.0": 2323.0063, "encoder_q-layer.1": 2660.0283, "encoder_q-layer.10": 407.7632, "encoder_q-layer.11": 1070.3639, "encoder_q-layer.2": 2930.7327, "encoder_q-layer.3": 3196.678, "encoder_q-layer.4": 3611.0891, "encoder_q-layer.5": 3837.4463, "encoder_q-layer.6": 3979.0002, "encoder_q-layer.7": 3357.2705, "encoder_q-layer.8": 1326.553, "encoder_q-layer.9": 606.3932, "epoch": 0.13, "inbatch_neg_score": 0.2741, "inbatch_pos_score": 0.8574, "learning_rate": 4.8277777777777776e-05, "loss": 3.748, "norm_diff": 0.0114, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4116.9472, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2715, "query_norm": 1.4615, "queue_k_norm": 1.4602, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.6497, "sent_len_1": 66.7572, "sent_max_len_0": 128.0, "sent_max_len_1": 189.505, "stdk": 0.0474, "stdq": 0.0442, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 13100 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.7714, "doc_norm": 1.4572, "encoder_q-embeddings": 2867.1382, "encoder_q-layer.0": 2068.6909, "encoder_q-layer.1": 2501.4746, "encoder_q-layer.10": 232.044, "encoder_q-layer.11": 483.0477, "encoder_q-layer.2": 3178.6438, "encoder_q-layer.3": 3242.0679, "encoder_q-layer.4": 3549.3455, "encoder_q-layer.5": 3032.9878, "encoder_q-layer.6": 3557.541, "encoder_q-layer.7": 3228.9175, "encoder_q-layer.8": 1314.3352, "encoder_q-layer.9": 309.3714, "epoch": 0.13, "inbatch_neg_score": 0.273, "inbatch_pos_score": 0.8623, "learning_rate": 4.8222222222222225e-05, "loss": 3.7714, "norm_diff": 0.0131, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3941.7886, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2734, "query_norm": 1.4491, "queue_k_norm": 1.4578, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8796, "sent_len_1": 66.7485, "sent_max_len_0": 128.0, "sent_max_len_1": 191.7063, "stdk": 0.0474, "stdq": 0.0435, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 13200 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.7491, "doc_norm": 1.4534, "encoder_q-embeddings": 1620.7775, "encoder_q-layer.0": 1115.4755, "encoder_q-layer.1": 1232.1075, "encoder_q-layer.10": 200.7894, "encoder_q-layer.11": 523.566, "encoder_q-layer.2": 1512.2428, "encoder_q-layer.3": 1536.4409, "encoder_q-layer.4": 1456.811, "encoder_q-layer.5": 1227.5809, "encoder_q-layer.6": 954.4739, "encoder_q-layer.7": 685.7849, "encoder_q-layer.8": 420.0695, "encoder_q-layer.9": 211.886, "epoch": 0.13, "inbatch_neg_score": 0.2858, "inbatch_pos_score": 0.896, "learning_rate": 4.8166666666666674e-05, "loss": 3.7491, "norm_diff": 0.0236, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1677.6978, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2834, "query_norm": 1.4767, "queue_k_norm": 1.4568, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.1258, "sent_len_1": 66.8954, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1262, "stdk": 0.0473, "stdq": 0.044, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 13300 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.7654, "doc_norm": 1.4548, "encoder_q-embeddings": 1104.2333, "encoder_q-layer.0": 788.7063, "encoder_q-layer.1": 906.4, "encoder_q-layer.10": 196.6455, "encoder_q-layer.11": 461.0955, "encoder_q-layer.2": 1114.1243, "encoder_q-layer.3": 1090.0879, "encoder_q-layer.4": 1069.2125, "encoder_q-layer.5": 1000.2854, "encoder_q-layer.6": 967.7065, "encoder_q-layer.7": 811.5062, "encoder_q-layer.8": 439.9344, "encoder_q-layer.9": 203.2442, "epoch": 0.13, "inbatch_neg_score": 0.2884, "inbatch_pos_score": 0.9023, "learning_rate": 4.811111111111111e-05, "loss": 3.7654, "norm_diff": 0.0586, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1276.8469, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2866, "query_norm": 1.5134, "queue_k_norm": 1.4531, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9198, "sent_len_1": 66.6096, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7912, "stdk": 0.0475, "stdq": 0.0446, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 13400 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.7721, "doc_norm": 1.4565, "encoder_q-embeddings": 798.809, "encoder_q-layer.0": 572.8371, "encoder_q-layer.1": 686.9156, "encoder_q-layer.10": 179.0298, "encoder_q-layer.11": 525.9814, "encoder_q-layer.2": 792.1835, "encoder_q-layer.3": 789.966, "encoder_q-layer.4": 694.6115, "encoder_q-layer.5": 715.4411, "encoder_q-layer.6": 715.8323, "encoder_q-layer.7": 539.1501, "encoder_q-layer.8": 336.9373, "encoder_q-layer.9": 185.6522, "epoch": 0.13, "inbatch_neg_score": 0.3018, "inbatch_pos_score": 0.9111, "learning_rate": 4.805555555555556e-05, "loss": 3.7721, "norm_diff": 0.0391, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 922.3438, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3013, "query_norm": 1.4956, "queue_k_norm": 1.4506, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8971, "sent_len_1": 66.6873, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0513, "stdk": 0.0476, "stdq": 0.044, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 13500 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.8003, "doc_norm": 1.4398, "encoder_q-embeddings": 435.5981, "encoder_q-layer.0": 310.6275, "encoder_q-layer.1": 356.2498, "encoder_q-layer.10": 193.2817, "encoder_q-layer.11": 506.2979, "encoder_q-layer.2": 396.5273, "encoder_q-layer.3": 359.9361, "encoder_q-layer.4": 344.3542, "encoder_q-layer.5": 324.8602, "encoder_q-layer.6": 380.1005, "encoder_q-layer.7": 346.1803, "encoder_q-layer.8": 269.0069, "encoder_q-layer.9": 182.7588, "epoch": 0.13, "inbatch_neg_score": 0.317, "inbatch_pos_score": 0.9028, "learning_rate": 4.8e-05, "loss": 3.8003, "norm_diff": 0.1011, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 532.6078, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3145, "query_norm": 1.541, "queue_k_norm": 1.4474, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8766, "sent_len_1": 66.718, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6037, "stdk": 0.0471, "stdq": 0.044, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 13600 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.8109, "doc_norm": 1.4457, "encoder_q-embeddings": 4997.5889, "encoder_q-layer.0": 3809.2131, "encoder_q-layer.1": 3694.7424, "encoder_q-layer.10": 231.4, "encoder_q-layer.11": 570.2198, "encoder_q-layer.2": 3817.0847, "encoder_q-layer.3": 3713.9468, "encoder_q-layer.4": 3590.8774, "encoder_q-layer.5": 3446.105, "encoder_q-layer.6": 3089.9011, "encoder_q-layer.7": 1863.7864, "encoder_q-layer.8": 900.6384, "encoder_q-layer.9": 282.9417, "epoch": 0.13, "inbatch_neg_score": 0.2943, "inbatch_pos_score": 0.8892, "learning_rate": 4.794444444444445e-05, "loss": 3.8109, "norm_diff": 0.0409, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4744.878, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2913, "query_norm": 1.4865, "queue_k_norm": 1.4435, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.1424, "sent_len_1": 66.7509, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3837, "stdk": 0.0474, "stdq": 0.0433, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 13700 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.8204, "doc_norm": 1.4422, "encoder_q-embeddings": 1263.9434, "encoder_q-layer.0": 889.8225, "encoder_q-layer.1": 952.1967, "encoder_q-layer.10": 197.67, "encoder_q-layer.11": 552.4765, "encoder_q-layer.2": 999.1175, "encoder_q-layer.3": 975.7026, "encoder_q-layer.4": 959.0341, "encoder_q-layer.5": 939.6488, "encoder_q-layer.6": 912.5176, "encoder_q-layer.7": 714.6954, "encoder_q-layer.8": 470.8221, "encoder_q-layer.9": 205.3938, "epoch": 0.13, "inbatch_neg_score": 0.294, "inbatch_pos_score": 0.8989, "learning_rate": 4.7888888888888886e-05, "loss": 3.8204, "norm_diff": 0.1043, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1287.1149, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2917, "query_norm": 1.5465, "queue_k_norm": 1.4396, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0157, "sent_len_1": 66.9074, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0863, "stdk": 0.0473, "stdq": 0.0454, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 13800 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.8002, "doc_norm": 1.4369, "encoder_q-embeddings": 2190.3188, "encoder_q-layer.0": 1641.8998, "encoder_q-layer.1": 1760.7063, "encoder_q-layer.10": 193.0977, "encoder_q-layer.11": 498.0404, "encoder_q-layer.2": 2018.7343, "encoder_q-layer.3": 2104.27, "encoder_q-layer.4": 2344.3547, "encoder_q-layer.5": 2310.738, "encoder_q-layer.6": 2475.2764, "encoder_q-layer.7": 2305.7166, "encoder_q-layer.8": 1235.2456, "encoder_q-layer.9": 285.6683, "epoch": 0.14, "inbatch_neg_score": 0.2862, "inbatch_pos_score": 0.8965, "learning_rate": 4.7833333333333335e-05, "loss": 3.8002, "norm_diff": 0.0424, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2726.532, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2847, "query_norm": 1.4792, "queue_k_norm": 1.4375, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0018, "sent_len_1": 66.8779, "sent_max_len_0": 128.0, "sent_max_len_1": 191.9875, "stdk": 0.0472, "stdq": 0.0442, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 13900 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.8111, "doc_norm": 1.4357, "encoder_q-embeddings": 2751.6443, "encoder_q-layer.0": 1939.4822, "encoder_q-layer.1": 2168.0828, "encoder_q-layer.10": 212.5211, "encoder_q-layer.11": 434.0795, "encoder_q-layer.2": 2391.2361, "encoder_q-layer.3": 2669.8669, "encoder_q-layer.4": 2925.283, "encoder_q-layer.5": 2680.0696, "encoder_q-layer.6": 2443.1284, "encoder_q-layer.7": 2063.3191, "encoder_q-layer.8": 1215.4247, "encoder_q-layer.9": 333.4556, "epoch": 0.14, "inbatch_neg_score": 0.2619, "inbatch_pos_score": 0.8413, "learning_rate": 4.7777777777777784e-05, "loss": 3.8111, "norm_diff": 0.0258, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3191.7227, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.26, "query_norm": 1.4615, "queue_k_norm": 1.434, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9663, "sent_len_1": 66.6392, "sent_max_len_0": 128.0, "sent_max_len_1": 191.68, "stdk": 0.0473, "stdq": 0.0445, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 14000 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.7773, "doc_norm": 1.4322, "encoder_q-embeddings": 795.2767, "encoder_q-layer.0": 591.1854, "encoder_q-layer.1": 604.8469, "encoder_q-layer.10": 215.9927, "encoder_q-layer.11": 547.3765, "encoder_q-layer.2": 659.8074, "encoder_q-layer.3": 714.7322, "encoder_q-layer.4": 803.6387, "encoder_q-layer.5": 732.9355, "encoder_q-layer.6": 712.5629, "encoder_q-layer.7": 465.6397, "encoder_q-layer.8": 304.2286, "encoder_q-layer.9": 197.7587, "epoch": 0.14, "inbatch_neg_score": 0.2428, "inbatch_pos_score": 0.8315, "learning_rate": 4.7722222222222226e-05, "loss": 3.7773, "norm_diff": 0.0584, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 896.1205, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2416, "query_norm": 1.4906, "queue_k_norm": 1.4317, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8507, "sent_len_1": 66.7548, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1637, "stdk": 0.0472, "stdq": 0.0442, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 14100 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.7557, "doc_norm": 1.4255, "encoder_q-embeddings": 422.2756, "encoder_q-layer.0": 314.3701, "encoder_q-layer.1": 328.9502, "encoder_q-layer.10": 179.3673, "encoder_q-layer.11": 400.5014, "encoder_q-layer.2": 365.5537, "encoder_q-layer.3": 368.4449, "encoder_q-layer.4": 349.9974, "encoder_q-layer.5": 335.298, "encoder_q-layer.6": 350.9282, "encoder_q-layer.7": 359.1332, "encoder_q-layer.8": 317.0159, "encoder_q-layer.9": 209.487, "epoch": 0.14, "inbatch_neg_score": 0.2332, "inbatch_pos_score": 0.856, "learning_rate": 4.766666666666667e-05, "loss": 3.7557, "norm_diff": 0.09, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 502.082, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2324, "query_norm": 1.5155, "queue_k_norm": 1.4282, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9606, "sent_len_1": 66.7424, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6012, "stdk": 0.0471, "stdq": 0.0451, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14200 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.7827, "doc_norm": 1.4333, "encoder_q-embeddings": 1015.936, "encoder_q-layer.0": 819.5609, "encoder_q-layer.1": 875.043, "encoder_q-layer.10": 192.5464, "encoder_q-layer.11": 451.9021, "encoder_q-layer.2": 917.2497, "encoder_q-layer.3": 931.4559, "encoder_q-layer.4": 936.2823, "encoder_q-layer.5": 955.656, "encoder_q-layer.6": 835.4227, "encoder_q-layer.7": 768.1489, "encoder_q-layer.8": 466.4324, "encoder_q-layer.9": 219.0986, "epoch": 0.14, "inbatch_neg_score": 0.2526, "inbatch_pos_score": 0.8779, "learning_rate": 4.761111111111111e-05, "loss": 3.7827, "norm_diff": 0.0999, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1163.9857, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2524, "query_norm": 1.5333, "queue_k_norm": 1.4245, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8515, "sent_len_1": 66.6147, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5462, "stdk": 0.0475, "stdq": 0.0453, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14300 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.7477, "doc_norm": 1.4198, "encoder_q-embeddings": 1108.3652, "encoder_q-layer.0": 817.4711, "encoder_q-layer.1": 815.2624, "encoder_q-layer.10": 176.7507, "encoder_q-layer.11": 458.7482, "encoder_q-layer.2": 925.5131, "encoder_q-layer.3": 934.9206, "encoder_q-layer.4": 880.9213, "encoder_q-layer.5": 829.9568, "encoder_q-layer.6": 808.3671, "encoder_q-layer.7": 690.8621, "encoder_q-layer.8": 722.7192, "encoder_q-layer.9": 392.4698, "epoch": 0.14, "inbatch_neg_score": 0.2635, "inbatch_pos_score": 0.8721, "learning_rate": 4.755555555555556e-05, "loss": 3.7477, "norm_diff": 0.0947, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1179.7032, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.262, "query_norm": 1.5145, "queue_k_norm": 1.4226, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8742, "sent_len_1": 66.7042, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2475, "stdk": 0.047, "stdq": 0.0443, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14400 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7719, "doc_norm": 1.415, "encoder_q-embeddings": 1131.4501, "encoder_q-layer.0": 827.1096, "encoder_q-layer.1": 883.7488, "encoder_q-layer.10": 236.7186, "encoder_q-layer.11": 555.2093, "encoder_q-layer.2": 1012.5291, "encoder_q-layer.3": 1152.9346, "encoder_q-layer.4": 1318.772, "encoder_q-layer.5": 1390.8135, "encoder_q-layer.6": 1136.7885, "encoder_q-layer.7": 818.8971, "encoder_q-layer.8": 475.4359, "encoder_q-layer.9": 254.5848, "epoch": 0.14, "inbatch_neg_score": 0.2687, "inbatch_pos_score": 0.8296, "learning_rate": 4.75e-05, "loss": 3.7719, "norm_diff": 0.1031, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1386.6726, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2656, "query_norm": 1.5181, "queue_k_norm": 1.4232, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9217, "sent_len_1": 66.7756, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4875, "stdk": 0.0468, "stdq": 0.044, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14500 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.7644, "doc_norm": 1.4217, "encoder_q-embeddings": 1059.2546, "encoder_q-layer.0": 769.2678, "encoder_q-layer.1": 867.0495, "encoder_q-layer.10": 205.9626, "encoder_q-layer.11": 473.9135, "encoder_q-layer.2": 971.1918, "encoder_q-layer.3": 991.6965, "encoder_q-layer.4": 901.6828, "encoder_q-layer.5": 933.2266, "encoder_q-layer.6": 803.4598, "encoder_q-layer.7": 564.1382, "encoder_q-layer.8": 310.6254, "encoder_q-layer.9": 188.0536, "epoch": 0.14, "inbatch_neg_score": 0.2676, "inbatch_pos_score": 0.8599, "learning_rate": 4.7444444444444445e-05, "loss": 3.7644, "norm_diff": 0.1157, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1142.2735, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2642, "query_norm": 1.5374, "queue_k_norm": 1.4186, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9403, "sent_len_1": 66.7049, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2175, "stdk": 0.0471, "stdq": 0.0454, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 14600 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.7581, "doc_norm": 1.4146, "encoder_q-embeddings": 1587.9374, "encoder_q-layer.0": 1263.6409, "encoder_q-layer.1": 1207.2095, "encoder_q-layer.10": 250.047, "encoder_q-layer.11": 474.3844, "encoder_q-layer.2": 1465.3113, "encoder_q-layer.3": 1567.7484, "encoder_q-layer.4": 1477.5537, "encoder_q-layer.5": 1462.697, "encoder_q-layer.6": 1572.3326, "encoder_q-layer.7": 1710.2887, "encoder_q-layer.8": 1715.2892, "encoder_q-layer.9": 1002.7847, "epoch": 0.14, "inbatch_neg_score": 0.2614, "inbatch_pos_score": 0.8506, "learning_rate": 4.7388888888888894e-05, "loss": 3.7581, "norm_diff": 0.1213, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2058.9713, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2598, "query_norm": 1.5358, "queue_k_norm": 1.4181, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0879, "sent_len_1": 66.7892, "sent_max_len_0": 128.0, "sent_max_len_1": 189.97, "stdk": 0.0469, "stdq": 0.0458, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 14700 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.784, "doc_norm": 1.4183, "encoder_q-embeddings": 212.5462, "encoder_q-layer.0": 139.8297, "encoder_q-layer.1": 153.1564, "encoder_q-layer.10": 183.8877, "encoder_q-layer.11": 407.072, "encoder_q-layer.2": 173.479, "encoder_q-layer.3": 171.0556, "encoder_q-layer.4": 180.8121, "encoder_q-layer.5": 182.1551, "encoder_q-layer.6": 187.7803, "encoder_q-layer.7": 200.1177, "encoder_q-layer.8": 222.0048, "encoder_q-layer.9": 178.9089, "epoch": 0.14, "inbatch_neg_score": 0.2548, "inbatch_pos_score": 0.8696, "learning_rate": 4.7333333333333336e-05, "loss": 3.784, "norm_diff": 0.0925, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 312.1071, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2532, "query_norm": 1.5108, "queue_k_norm": 1.418, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.5792, "sent_len_1": 66.7561, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0575, "stdk": 0.0471, "stdq": 0.0448, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14800 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.7415, "doc_norm": 1.4188, "encoder_q-embeddings": 358.8188, "encoder_q-layer.0": 247.8521, "encoder_q-layer.1": 302.4949, "encoder_q-layer.10": 193.947, "encoder_q-layer.11": 453.4926, "encoder_q-layer.2": 342.6641, "encoder_q-layer.3": 378.2597, "encoder_q-layer.4": 444.5037, "encoder_q-layer.5": 458.0376, "encoder_q-layer.6": 562.1864, "encoder_q-layer.7": 511.6816, "encoder_q-layer.8": 283.0871, "encoder_q-layer.9": 190.5224, "epoch": 0.15, "inbatch_neg_score": 0.2645, "inbatch_pos_score": 0.8608, "learning_rate": 4.727777777777778e-05, "loss": 3.7415, "norm_diff": 0.106, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 555.2857, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2644, "query_norm": 1.5249, "queue_k_norm": 1.4184, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0314, "sent_len_1": 67.0511, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5838, "stdk": 0.0472, "stdq": 0.0445, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14900 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.7283, "doc_norm": 1.4155, "encoder_q-embeddings": 1741.8225, "encoder_q-layer.0": 1236.5345, "encoder_q-layer.1": 1311.1008, "encoder_q-layer.10": 212.9071, "encoder_q-layer.11": 490.235, "encoder_q-layer.2": 1496.5485, "encoder_q-layer.3": 1504.3628, "encoder_q-layer.4": 1467.3281, "encoder_q-layer.5": 1293.5248, "encoder_q-layer.6": 1198.1888, "encoder_q-layer.7": 1080.5836, "encoder_q-layer.8": 976.5828, "encoder_q-layer.9": 636.894, "epoch": 0.15, "inbatch_neg_score": 0.266, "inbatch_pos_score": 0.8477, "learning_rate": 4.722222222222222e-05, "loss": 3.7283, "norm_diff": 0.0973, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1824.6911, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2654, "query_norm": 1.5127, "queue_k_norm": 1.4151, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9089, "sent_len_1": 66.7752, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4263, "stdk": 0.0471, "stdq": 0.0439, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 15000 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.7332, "doc_norm": 1.4147, "encoder_q-embeddings": 533.086, "encoder_q-layer.0": 416.5028, "encoder_q-layer.1": 458.2967, "encoder_q-layer.10": 180.9997, "encoder_q-layer.11": 390.2127, "encoder_q-layer.2": 495.113, "encoder_q-layer.3": 470.0929, "encoder_q-layer.4": 486.375, "encoder_q-layer.5": 519.9919, "encoder_q-layer.6": 523.8776, "encoder_q-layer.7": 489.1165, "encoder_q-layer.8": 297.7428, "encoder_q-layer.9": 181.5132, "epoch": 0.15, "inbatch_neg_score": 0.2623, "inbatch_pos_score": 0.8945, "learning_rate": 4.716666666666667e-05, "loss": 3.7332, "norm_diff": 0.1143, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 651.2777, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2607, "query_norm": 1.529, "queue_k_norm": 1.4136, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0549, "sent_len_1": 66.9405, "sent_max_len_0": 128.0, "sent_max_len_1": 191.255, "stdk": 0.0471, "stdq": 0.0457, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 15100 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.735, "doc_norm": 1.4135, "encoder_q-embeddings": 3169.7559, "encoder_q-layer.0": 2342.1697, "encoder_q-layer.1": 2507.304, "encoder_q-layer.10": 411.5363, "encoder_q-layer.11": 932.9662, "encoder_q-layer.2": 2915.4971, "encoder_q-layer.3": 3091.9917, "encoder_q-layer.4": 3362.2898, "encoder_q-layer.5": 3245.4194, "encoder_q-layer.6": 2635.144, "encoder_q-layer.7": 2008.2775, "encoder_q-layer.8": 1164.9126, "encoder_q-layer.9": 527.6869, "epoch": 0.15, "inbatch_neg_score": 0.2383, "inbatch_pos_score": 0.8418, "learning_rate": 4.711111111111111e-05, "loss": 3.735, "norm_diff": 0.1085, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3639.1394, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2388, "query_norm": 1.522, "queue_k_norm": 1.4145, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.834, "sent_len_1": 66.7538, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0825, "stdk": 0.0471, "stdq": 0.0455, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 15200 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.7476, "doc_norm": 1.4103, "encoder_q-embeddings": 10891.6543, "encoder_q-layer.0": 7714.4053, "encoder_q-layer.1": 7944.7563, "encoder_q-layer.10": 355.1263, "encoder_q-layer.11": 913.6475, "encoder_q-layer.2": 9287.6875, "encoder_q-layer.3": 10692.2109, "encoder_q-layer.4": 10407.0566, "encoder_q-layer.5": 9735.8125, "encoder_q-layer.6": 7649.7695, "encoder_q-layer.7": 4857.9585, "encoder_q-layer.8": 1943.9766, "encoder_q-layer.9": 499.6726, "epoch": 0.15, "inbatch_neg_score": 0.27, "inbatch_pos_score": 0.8667, "learning_rate": 4.7055555555555555e-05, "loss": 3.7476, "norm_diff": 0.099, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11471.9891, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.269, "query_norm": 1.5093, "queue_k_norm": 1.4114, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.167, "sent_len_1": 66.7633, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6025, "stdk": 0.047, "stdq": 0.0447, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 15300 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.7253, "doc_norm": 1.4123, "encoder_q-embeddings": 2200.8816, "encoder_q-layer.0": 1562.4109, "encoder_q-layer.1": 1727.1785, "encoder_q-layer.10": 388.364, "encoder_q-layer.11": 861.1769, "encoder_q-layer.2": 2250.5054, "encoder_q-layer.3": 2424.1499, "encoder_q-layer.4": 2412.5911, "encoder_q-layer.5": 2101.5083, "encoder_q-layer.6": 2077.0281, "encoder_q-layer.7": 1956.6439, "encoder_q-layer.8": 1129.1847, "encoder_q-layer.9": 459.9446, "epoch": 0.15, "inbatch_neg_score": 0.2279, "inbatch_pos_score": 0.8188, "learning_rate": 4.7e-05, "loss": 3.7253, "norm_diff": 0.0759, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2669.7441, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2271, "query_norm": 1.4883, "queue_k_norm": 1.4125, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0852, "sent_len_1": 66.7339, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0462, "stdk": 0.0471, "stdq": 0.0451, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 15400 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.7093, "doc_norm": 1.4176, "encoder_q-embeddings": 2254.3416, "encoder_q-layer.0": 1579.5554, "encoder_q-layer.1": 2081.0908, "encoder_q-layer.10": 416.1562, "encoder_q-layer.11": 848.3127, "encoder_q-layer.2": 2480.3091, "encoder_q-layer.3": 2741.7891, "encoder_q-layer.4": 2943.6089, "encoder_q-layer.5": 2622.7346, "encoder_q-layer.6": 2055.252, "encoder_q-layer.7": 1248.4773, "encoder_q-layer.8": 512.4978, "encoder_q-layer.9": 341.7864, "epoch": 0.15, "inbatch_neg_score": 0.2304, "inbatch_pos_score": 0.8481, "learning_rate": 4.6944444444444446e-05, "loss": 3.7093, "norm_diff": 0.0643, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2832.4658, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2286, "query_norm": 1.4819, "queue_k_norm": 1.4101, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8778, "sent_len_1": 66.7929, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5625, "stdk": 0.0473, "stdq": 0.0446, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 15500 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.7382, "doc_norm": 1.4055, "encoder_q-embeddings": 912.9368, "encoder_q-layer.0": 678.1561, "encoder_q-layer.1": 745.2568, "encoder_q-layer.10": 376.242, "encoder_q-layer.11": 928.5197, "encoder_q-layer.2": 847.4238, "encoder_q-layer.3": 875.5267, "encoder_q-layer.4": 819.2725, "encoder_q-layer.5": 846.9387, "encoder_q-layer.6": 775.3784, "encoder_q-layer.7": 669.6934, "encoder_q-layer.8": 451.0255, "encoder_q-layer.9": 356.7258, "epoch": 0.15, "inbatch_neg_score": 0.2417, "inbatch_pos_score": 0.8091, "learning_rate": 4.6888888888888895e-05, "loss": 3.7382, "norm_diff": 0.041, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1104.0046, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2394, "query_norm": 1.4465, "queue_k_norm": 1.4089, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.2431, "sent_len_1": 66.8668, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0137, "stdk": 0.0469, "stdq": 0.0437, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 15600 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.7558, "doc_norm": 1.4024, "encoder_q-embeddings": 1048.4872, "encoder_q-layer.0": 714.452, "encoder_q-layer.1": 709.6117, "encoder_q-layer.10": 457.4622, "encoder_q-layer.11": 970.8304, "encoder_q-layer.2": 738.3916, "encoder_q-layer.3": 731.2146, "encoder_q-layer.4": 739.3929, "encoder_q-layer.5": 792.5068, "encoder_q-layer.6": 760.2808, "encoder_q-layer.7": 748.2123, "encoder_q-layer.8": 673.7017, "encoder_q-layer.9": 455.4038, "epoch": 0.15, "inbatch_neg_score": 0.2412, "inbatch_pos_score": 0.9175, "learning_rate": 4.683333333333334e-05, "loss": 3.7558, "norm_diff": 0.1402, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1126.2983, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2382, "query_norm": 1.5426, "queue_k_norm": 1.4078, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.082, "sent_len_1": 66.8991, "sent_max_len_0": 128.0, "sent_max_len_1": 188.13, "stdk": 0.0468, "stdq": 0.0478, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 15700 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.7408, "doc_norm": 1.4088, "encoder_q-embeddings": 2626.095, "encoder_q-layer.0": 1833.373, "encoder_q-layer.1": 1935.2697, "encoder_q-layer.10": 339.0569, "encoder_q-layer.11": 773.4079, "encoder_q-layer.2": 2663.7603, "encoder_q-layer.3": 2213.3801, "encoder_q-layer.4": 1721.7786, "encoder_q-layer.5": 1557.9772, "encoder_q-layer.6": 1431.899, "encoder_q-layer.7": 1333.0714, "encoder_q-layer.8": 911.9082, "encoder_q-layer.9": 380.6698, "epoch": 0.15, "inbatch_neg_score": 0.235, "inbatch_pos_score": 0.832, "learning_rate": 4.677777777777778e-05, "loss": 3.7408, "norm_diff": 0.0685, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2582.0204, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2343, "query_norm": 1.4773, "queue_k_norm": 1.4067, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.988, "sent_len_1": 66.5382, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7688, "stdk": 0.0471, "stdq": 0.0451, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 15800 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.7039, "doc_norm": 1.4063, "encoder_q-embeddings": 1252.1451, "encoder_q-layer.0": 939.847, "encoder_q-layer.1": 1008.0081, "encoder_q-layer.10": 371.2855, "encoder_q-layer.11": 863.2305, "encoder_q-layer.2": 1048.7152, "encoder_q-layer.3": 1110.6632, "encoder_q-layer.4": 1054.385, "encoder_q-layer.5": 1034.4124, "encoder_q-layer.6": 869.2734, "encoder_q-layer.7": 730.7847, "encoder_q-layer.8": 441.3317, "encoder_q-layer.9": 333.2159, "epoch": 0.16, "inbatch_neg_score": 0.2404, "inbatch_pos_score": 0.8677, "learning_rate": 4.672222222222222e-05, "loss": 3.7039, "norm_diff": 0.0688, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1364.4632, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2402, "query_norm": 1.4751, "queue_k_norm": 1.4073, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0708, "sent_len_1": 66.9053, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0488, "stdk": 0.0471, "stdq": 0.0458, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 15900 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.7178, "doc_norm": 1.4135, "encoder_q-embeddings": 28072.6504, "encoder_q-layer.0": 20548.6719, "encoder_q-layer.1": 18052.1699, "encoder_q-layer.10": 241.4423, "encoder_q-layer.11": 453.7159, "encoder_q-layer.2": 22249.3223, "encoder_q-layer.3": 20311.4668, "encoder_q-layer.4": 17888.834, "encoder_q-layer.5": 16491.0742, "encoder_q-layer.6": 16818.793, "encoder_q-layer.7": 10798.9414, "encoder_q-layer.8": 5461.9761, "encoder_q-layer.9": 936.6177, "epoch": 0.16, "inbatch_neg_score": 0.2608, "inbatch_pos_score": 0.8848, "learning_rate": 4.666666666666667e-05, "loss": 3.7178, "norm_diff": 0.0596, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25686.5623, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2607, "query_norm": 1.4731, "queue_k_norm": 1.4067, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9276, "sent_len_1": 66.9262, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6763, "stdk": 0.0474, "stdq": 0.0451, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 16000 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.7148, "doc_norm": 1.413, "encoder_q-embeddings": 1182.7577, "encoder_q-layer.0": 827.0007, "encoder_q-layer.1": 902.8774, "encoder_q-layer.10": 212.3741, "encoder_q-layer.11": 468.0152, "encoder_q-layer.2": 993.398, "encoder_q-layer.3": 1034.8196, "encoder_q-layer.4": 1120.3062, "encoder_q-layer.5": 1138.1552, "encoder_q-layer.6": 1368.4474, "encoder_q-layer.7": 1787.4846, "encoder_q-layer.8": 2005.4099, "encoder_q-layer.9": 950.6409, "epoch": 0.16, "inbatch_neg_score": 0.2665, "inbatch_pos_score": 0.8657, "learning_rate": 4.6611111111111114e-05, "loss": 3.7148, "norm_diff": 0.0761, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1793.4853, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2651, "query_norm": 1.4891, "queue_k_norm": 1.4063, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0663, "sent_len_1": 66.8392, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5312, "stdk": 0.0473, "stdq": 0.0459, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 16100 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.7036, "doc_norm": 1.4067, "encoder_q-embeddings": 701.0217, "encoder_q-layer.0": 475.1675, "encoder_q-layer.1": 538.0632, "encoder_q-layer.10": 201.0547, "encoder_q-layer.11": 490.5031, "encoder_q-layer.2": 613.6672, "encoder_q-layer.3": 648.1406, "encoder_q-layer.4": 702.6463, "encoder_q-layer.5": 584.5582, "encoder_q-layer.6": 504.18, "encoder_q-layer.7": 305.4601, "encoder_q-layer.8": 231.5768, "encoder_q-layer.9": 186.2904, "epoch": 0.16, "inbatch_neg_score": 0.2641, "inbatch_pos_score": 0.8535, "learning_rate": 4.6555555555555556e-05, "loss": 3.7036, "norm_diff": 0.0497, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 797.4695, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2637, "query_norm": 1.4564, "queue_k_norm": 1.4051, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0441, "sent_len_1": 66.8308, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1962, "stdk": 0.0471, "stdq": 0.044, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16200 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7043, "doc_norm": 1.4074, "encoder_q-embeddings": 841.4246, "encoder_q-layer.0": 624.8605, "encoder_q-layer.1": 643.5723, "encoder_q-layer.10": 191.2644, "encoder_q-layer.11": 438.1519, "encoder_q-layer.2": 758.8859, "encoder_q-layer.3": 785.2599, "encoder_q-layer.4": 899.8054, "encoder_q-layer.5": 806.9026, "encoder_q-layer.6": 715.1705, "encoder_q-layer.7": 586.3497, "encoder_q-layer.8": 306.8842, "encoder_q-layer.9": 191.9863, "epoch": 0.16, "inbatch_neg_score": 0.2563, "inbatch_pos_score": 0.8491, "learning_rate": 4.6500000000000005e-05, "loss": 3.7043, "norm_diff": 0.0599, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 960.9096, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2534, "query_norm": 1.4673, "queue_k_norm": 1.4032, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9168, "sent_len_1": 66.7472, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2612, "stdk": 0.0471, "stdq": 0.045, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16300 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.6831, "doc_norm": 1.402, "encoder_q-embeddings": 2075.948, "encoder_q-layer.0": 1503.925, "encoder_q-layer.1": 1534.533, "encoder_q-layer.10": 238.4414, "encoder_q-layer.11": 520.8948, "encoder_q-layer.2": 1842.5541, "encoder_q-layer.3": 1896.3843, "encoder_q-layer.4": 1931.1184, "encoder_q-layer.5": 2091.8738, "encoder_q-layer.6": 2217.9817, "encoder_q-layer.7": 1444.4355, "encoder_q-layer.8": 411.2575, "encoder_q-layer.9": 218.521, "epoch": 0.16, "inbatch_neg_score": 0.2614, "inbatch_pos_score": 0.8589, "learning_rate": 4.644444444444445e-05, "loss": 3.6831, "norm_diff": 0.0495, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2387.6488, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2603, "query_norm": 1.4515, "queue_k_norm": 1.404, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.2199, "sent_len_1": 66.9548, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4062, "stdk": 0.047, "stdq": 0.0437, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16400 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.679, "doc_norm": 1.4014, "encoder_q-embeddings": 582.403, "encoder_q-layer.0": 422.6291, "encoder_q-layer.1": 488.9368, "encoder_q-layer.10": 208.8659, "encoder_q-layer.11": 439.2017, "encoder_q-layer.2": 603.7133, "encoder_q-layer.3": 657.0931, "encoder_q-layer.4": 560.5911, "encoder_q-layer.5": 489.5139, "encoder_q-layer.6": 493.6404, "encoder_q-layer.7": 410.53, "encoder_q-layer.8": 246.5434, "encoder_q-layer.9": 186.7983, "epoch": 0.16, "inbatch_neg_score": 0.2554, "inbatch_pos_score": 0.8633, "learning_rate": 4.638888888888889e-05, "loss": 3.679, "norm_diff": 0.0873, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 704.0172, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2532, "query_norm": 1.4886, "queue_k_norm": 1.4034, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0985, "sent_len_1": 66.8092, "sent_max_len_0": 128.0, "sent_max_len_1": 192.1138, "stdk": 0.0469, "stdq": 0.0456, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16500 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.7149, "doc_norm": 1.4024, "encoder_q-embeddings": 320.9492, "encoder_q-layer.0": 237.4339, "encoder_q-layer.1": 265.9968, "encoder_q-layer.10": 188.2703, "encoder_q-layer.11": 422.2701, "encoder_q-layer.2": 276.2502, "encoder_q-layer.3": 280.3205, "encoder_q-layer.4": 279.9183, "encoder_q-layer.5": 274.705, "encoder_q-layer.6": 292.8484, "encoder_q-layer.7": 236.5598, "encoder_q-layer.8": 207.2865, "encoder_q-layer.9": 171.8089, "epoch": 0.16, "inbatch_neg_score": 0.2435, "inbatch_pos_score": 0.8706, "learning_rate": 4.633333333333333e-05, "loss": 3.7149, "norm_diff": 0.0897, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 399.6498, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2433, "query_norm": 1.4921, "queue_k_norm": 1.4037, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7783, "sent_len_1": 66.9321, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8862, "stdk": 0.047, "stdq": 0.0461, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16600 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.6983, "doc_norm": 1.3997, "encoder_q-embeddings": 744.143, "encoder_q-layer.0": 504.9815, "encoder_q-layer.1": 594.9958, "encoder_q-layer.10": 215.2236, "encoder_q-layer.11": 452.3077, "encoder_q-layer.2": 700.6279, "encoder_q-layer.3": 791.2682, "encoder_q-layer.4": 919.1357, "encoder_q-layer.5": 870.3819, "encoder_q-layer.6": 956.6893, "encoder_q-layer.7": 544.0125, "encoder_q-layer.8": 246.8699, "encoder_q-layer.9": 183.5899, "epoch": 0.16, "inbatch_neg_score": 0.2615, "inbatch_pos_score": 0.8892, "learning_rate": 4.627777777777778e-05, "loss": 3.6983, "norm_diff": 0.1141, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 959.787, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2593, "query_norm": 1.5138, "queue_k_norm": 1.4039, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9366, "sent_len_1": 66.9835, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2837, "stdk": 0.0469, "stdq": 0.0462, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 16700 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.6969, "doc_norm": 1.4047, "encoder_q-embeddings": 329.9292, "encoder_q-layer.0": 223.3442, "encoder_q-layer.1": 244.1288, "encoder_q-layer.10": 182.2275, "encoder_q-layer.11": 413.9463, "encoder_q-layer.2": 275.2245, "encoder_q-layer.3": 279.997, "encoder_q-layer.4": 280.0677, "encoder_q-layer.5": 282.5807, "encoder_q-layer.6": 317.522, "encoder_q-layer.7": 336.8553, "encoder_q-layer.8": 359.5425, "encoder_q-layer.9": 230.6185, "epoch": 0.16, "inbatch_neg_score": 0.2553, "inbatch_pos_score": 0.8848, "learning_rate": 4.6222222222222224e-05, "loss": 3.6969, "norm_diff": 0.0875, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 440.8046, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2534, "query_norm": 1.4922, "queue_k_norm": 1.4049, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9766, "sent_len_1": 66.7595, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9762, "stdk": 0.0471, "stdq": 0.046, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 16800 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.6832, "doc_norm": 1.4027, "encoder_q-embeddings": 1294.7759, "encoder_q-layer.0": 930.2153, "encoder_q-layer.1": 1025.7069, "encoder_q-layer.10": 170.0993, "encoder_q-layer.11": 408.0142, "encoder_q-layer.2": 1196.2966, "encoder_q-layer.3": 1222.6422, "encoder_q-layer.4": 1281.0061, "encoder_q-layer.5": 1227.757, "encoder_q-layer.6": 1029.2234, "encoder_q-layer.7": 731.5608, "encoder_q-layer.8": 289.2668, "encoder_q-layer.9": 172.4314, "epoch": 0.16, "inbatch_neg_score": 0.2556, "inbatch_pos_score": 0.8711, "learning_rate": 4.6166666666666666e-05, "loss": 3.6832, "norm_diff": 0.0725, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1411.4619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2542, "query_norm": 1.4751, "queue_k_norm": 1.4039, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0245, "sent_len_1": 66.6111, "sent_max_len_0": 128.0, "sent_max_len_1": 187.3725, "stdk": 0.047, "stdq": 0.0448, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 16900 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.6509, "doc_norm": 1.3949, "encoder_q-embeddings": 5786.2402, "encoder_q-layer.0": 4663.4385, "encoder_q-layer.1": 4832.209, "encoder_q-layer.10": 176.3794, "encoder_q-layer.11": 403.2226, "encoder_q-layer.2": 5899.3018, "encoder_q-layer.3": 6673.5156, "encoder_q-layer.4": 6754.8599, "encoder_q-layer.5": 7430.8662, "encoder_q-layer.6": 6523.9233, "encoder_q-layer.7": 4032.9512, "encoder_q-layer.8": 1683.1449, "encoder_q-layer.9": 401.7845, "epoch": 0.17, "inbatch_neg_score": 0.2635, "inbatch_pos_score": 0.8481, "learning_rate": 4.6111111111111115e-05, "loss": 3.6509, "norm_diff": 0.0681, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7397.5867, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2617, "query_norm": 1.463, "queue_k_norm": 1.4053, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9539, "sent_len_1": 66.8168, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2575, "stdk": 0.0468, "stdq": 0.0446, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 17000 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.6853, "doc_norm": 1.4035, "encoder_q-embeddings": 515.8788, "encoder_q-layer.0": 372.5059, "encoder_q-layer.1": 440.0763, "encoder_q-layer.10": 210.877, "encoder_q-layer.11": 465.8947, "encoder_q-layer.2": 539.7372, "encoder_q-layer.3": 528.1481, "encoder_q-layer.4": 589.9855, "encoder_q-layer.5": 526.9517, "encoder_q-layer.6": 468.4024, "encoder_q-layer.7": 377.8751, "encoder_q-layer.8": 288.2725, "encoder_q-layer.9": 199.2092, "epoch": 0.17, "inbatch_neg_score": 0.2627, "inbatch_pos_score": 0.9204, "learning_rate": 4.605555555555556e-05, "loss": 3.6853, "norm_diff": 0.1178, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 654.1217, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.261, "query_norm": 1.5213, "queue_k_norm": 1.4045, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0057, "sent_len_1": 66.8655, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9437, "stdk": 0.0471, "stdq": 0.0471, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 17100 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.7034, "doc_norm": 1.4094, "encoder_q-embeddings": 1657.6027, "encoder_q-layer.0": 1157.9812, "encoder_q-layer.1": 1236.9668, "encoder_q-layer.10": 186.4518, "encoder_q-layer.11": 484.6173, "encoder_q-layer.2": 1402.939, "encoder_q-layer.3": 1532.244, "encoder_q-layer.4": 1647.7589, "encoder_q-layer.5": 1693.3566, "encoder_q-layer.6": 2025.5414, "encoder_q-layer.7": 2038.4303, "encoder_q-layer.8": 1210.8477, "encoder_q-layer.9": 294.932, "epoch": 0.17, "inbatch_neg_score": 0.2641, "inbatch_pos_score": 0.8682, "learning_rate": 4.600000000000001e-05, "loss": 3.7034, "norm_diff": 0.0782, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2165.1222, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2646, "query_norm": 1.4876, "queue_k_norm": 1.4022, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9778, "sent_len_1": 66.9516, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9512, "stdk": 0.0473, "stdq": 0.0462, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 17200 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.752, "doc_norm": 1.405, "encoder_q-embeddings": 637.668, "encoder_q-layer.0": 453.5364, "encoder_q-layer.1": 501.9983, "encoder_q-layer.10": 197.6379, "encoder_q-layer.11": 495.2677, "encoder_q-layer.2": 567.5832, "encoder_q-layer.3": 584.4075, "encoder_q-layer.4": 661.741, "encoder_q-layer.5": 584.7655, "encoder_q-layer.6": 616.3918, "encoder_q-layer.7": 578.8821, "encoder_q-layer.8": 400.3969, "encoder_q-layer.9": 193.3036, "epoch": 0.17, "inbatch_neg_score": 0.2869, "inbatch_pos_score": 0.8867, "learning_rate": 4.594444444444444e-05, "loss": 3.752, "norm_diff": 0.065, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 793.9456, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2832, "query_norm": 1.4699, "queue_k_norm": 1.4042, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8245, "sent_len_1": 66.7684, "sent_max_len_0": 128.0, "sent_max_len_1": 189.63, "stdk": 0.047, "stdq": 0.045, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 17300 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.7117, "doc_norm": 1.4052, "encoder_q-embeddings": 1126.4989, "encoder_q-layer.0": 753.838, "encoder_q-layer.1": 811.7047, "encoder_q-layer.10": 214.9194, "encoder_q-layer.11": 499.5439, "encoder_q-layer.2": 969.0172, "encoder_q-layer.3": 992.5029, "encoder_q-layer.4": 1000.4146, "encoder_q-layer.5": 985.7787, "encoder_q-layer.6": 1015.6859, "encoder_q-layer.7": 827.111, "encoder_q-layer.8": 469.3026, "encoder_q-layer.9": 239.1533, "epoch": 0.17, "inbatch_neg_score": 0.2834, "inbatch_pos_score": 0.8818, "learning_rate": 4.588888888888889e-05, "loss": 3.7117, "norm_diff": 0.0649, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1235.039, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.283, "query_norm": 1.4702, "queue_k_norm": 1.4065, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8101, "sent_len_1": 66.6826, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5737, "stdk": 0.047, "stdq": 0.0446, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 17400 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.7036, "doc_norm": 1.4022, "encoder_q-embeddings": 5209.1621, "encoder_q-layer.0": 3171.3943, "encoder_q-layer.1": 3032.1838, "encoder_q-layer.10": 164.1274, "encoder_q-layer.11": 390.6615, "encoder_q-layer.2": 2679.125, "encoder_q-layer.3": 2691.0713, "encoder_q-layer.4": 2803.6394, "encoder_q-layer.5": 2158.6279, "encoder_q-layer.6": 1745.0059, "encoder_q-layer.7": 1623.4635, "encoder_q-layer.8": 875.9292, "encoder_q-layer.9": 265.5596, "epoch": 0.17, "inbatch_neg_score": 0.2937, "inbatch_pos_score": 0.916, "learning_rate": 4.5833333333333334e-05, "loss": 3.7036, "norm_diff": 0.0916, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4086.8895, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.293, "query_norm": 1.4938, "queue_k_norm": 1.4087, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.109, "sent_len_1": 66.9925, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8288, "stdk": 0.0469, "stdq": 0.0451, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 17500 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.6947, "doc_norm": 1.4005, "encoder_q-embeddings": 3387.6172, "encoder_q-layer.0": 2316.6272, "encoder_q-layer.1": 2653.1011, "encoder_q-layer.10": 198.856, "encoder_q-layer.11": 465.8602, "encoder_q-layer.2": 2715.9873, "encoder_q-layer.3": 3163.7808, "encoder_q-layer.4": 3028.0928, "encoder_q-layer.5": 2317.7349, "encoder_q-layer.6": 2260.6221, "encoder_q-layer.7": 2149.5535, "encoder_q-layer.8": 1303.3611, "encoder_q-layer.9": 319.3143, "epoch": 0.17, "inbatch_neg_score": 0.2955, "inbatch_pos_score": 0.8564, "learning_rate": 4.577777777777778e-05, "loss": 3.6947, "norm_diff": 0.0464, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3517.4546, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2937, "query_norm": 1.4469, "queue_k_norm": 1.4085, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1006, "sent_len_1": 66.6582, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2063, "stdk": 0.0468, "stdq": 0.0436, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 17600 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.697, "doc_norm": 1.4085, "encoder_q-embeddings": 1357.1682, "encoder_q-layer.0": 969.774, "encoder_q-layer.1": 1099.7329, "encoder_q-layer.10": 198.0203, "encoder_q-layer.11": 438.6694, "encoder_q-layer.2": 1081.1906, "encoder_q-layer.3": 1139.3794, "encoder_q-layer.4": 1117.7994, "encoder_q-layer.5": 924.501, "encoder_q-layer.6": 900.6042, "encoder_q-layer.7": 780.9453, "encoder_q-layer.8": 433.614, "encoder_q-layer.9": 220.0501, "epoch": 0.17, "inbatch_neg_score": 0.2955, "inbatch_pos_score": 0.8926, "learning_rate": 4.572222222222222e-05, "loss": 3.697, "norm_diff": 0.0599, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1374.4682, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.4684, "queue_k_norm": 1.4098, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8575, "sent_len_1": 67.0099, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8775, "stdk": 0.0471, "stdq": 0.0441, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 17700 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.7108, "doc_norm": 1.4144, "encoder_q-embeddings": 1674.6956, "encoder_q-layer.0": 1015.9118, "encoder_q-layer.1": 1043.0096, "encoder_q-layer.10": 192.2271, "encoder_q-layer.11": 453.877, "encoder_q-layer.2": 1032.1792, "encoder_q-layer.3": 1141.4756, "encoder_q-layer.4": 1174.6505, "encoder_q-layer.5": 1158.7028, "encoder_q-layer.6": 1137.4181, "encoder_q-layer.7": 1058.8611, "encoder_q-layer.8": 630.6962, "encoder_q-layer.9": 216.6463, "epoch": 0.17, "inbatch_neg_score": 0.2987, "inbatch_pos_score": 0.9248, "learning_rate": 4.566666666666667e-05, "loss": 3.7108, "norm_diff": 0.0922, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1581.0842, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2971, "query_norm": 1.5066, "queue_k_norm": 1.4085, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0613, "sent_len_1": 66.8495, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9238, "stdk": 0.0473, "stdq": 0.0463, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 17800 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.7068, "doc_norm": 1.4138, "encoder_q-embeddings": 851.5773, "encoder_q-layer.0": 631.8976, "encoder_q-layer.1": 663.7853, "encoder_q-layer.10": 203.5052, "encoder_q-layer.11": 491.9795, "encoder_q-layer.2": 679.9135, "encoder_q-layer.3": 715.6171, "encoder_q-layer.4": 693.3563, "encoder_q-layer.5": 699.3101, "encoder_q-layer.6": 723.6272, "encoder_q-layer.7": 605.5711, "encoder_q-layer.8": 342.183, "encoder_q-layer.9": 191.9534, "epoch": 0.17, "inbatch_neg_score": 0.2918, "inbatch_pos_score": 0.8789, "learning_rate": 4.561111111111112e-05, "loss": 3.7068, "norm_diff": 0.0478, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 928.5365, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2908, "query_norm": 1.4616, "queue_k_norm": 1.4102, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9925, "sent_len_1": 66.7714, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7925, "stdk": 0.0472, "stdq": 0.0445, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 17900 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.7132, "doc_norm": 1.4115, "encoder_q-embeddings": 9034.6309, "encoder_q-layer.0": 6595.9736, "encoder_q-layer.1": 6569.293, "encoder_q-layer.10": 473.9172, "encoder_q-layer.11": 1081.7544, "encoder_q-layer.2": 6733.7554, "encoder_q-layer.3": 6520.188, "encoder_q-layer.4": 6005.0889, "encoder_q-layer.5": 6219.5884, "encoder_q-layer.6": 5745.5698, "encoder_q-layer.7": 4334.1035, "encoder_q-layer.8": 1921.8358, "encoder_q-layer.9": 555.2422, "epoch": 0.18, "inbatch_neg_score": 0.2812, "inbatch_pos_score": 0.8823, "learning_rate": 4.555555555555556e-05, "loss": 3.7132, "norm_diff": 0.0491, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8680.7333, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2817, "query_norm": 1.4606, "queue_k_norm": 1.4129, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7364, "sent_len_1": 66.6119, "sent_max_len_0": 128.0, "sent_max_len_1": 190.81, "stdk": 0.0471, "stdq": 0.0454, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 18000 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.7152, "doc_norm": 1.4127, "encoder_q-embeddings": 1066.0984, "encoder_q-layer.0": 783.1487, "encoder_q-layer.1": 882.4213, "encoder_q-layer.10": 393.5575, "encoder_q-layer.11": 1004.3089, "encoder_q-layer.2": 936.4866, "encoder_q-layer.3": 966.8971, "encoder_q-layer.4": 927.2573, "encoder_q-layer.5": 849.6568, "encoder_q-layer.6": 858.0427, "encoder_q-layer.7": 858.9401, "encoder_q-layer.8": 623.6135, "encoder_q-layer.9": 395.9897, "epoch": 0.18, "inbatch_neg_score": 0.298, "inbatch_pos_score": 0.8926, "learning_rate": 4.55e-05, "loss": 3.7152, "norm_diff": 0.0328, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1279.0283, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2981, "query_norm": 1.4454, "queue_k_norm": 1.4131, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0911, "sent_len_1": 66.5922, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4462, "stdk": 0.0471, "stdq": 0.0447, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 18100 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.7171, "doc_norm": 1.4175, "encoder_q-embeddings": 7370.5942, "encoder_q-layer.0": 5560.5107, "encoder_q-layer.1": 6291.6392, "encoder_q-layer.10": 411.3118, "encoder_q-layer.11": 951.916, "encoder_q-layer.2": 6831.312, "encoder_q-layer.3": 7607.2876, "encoder_q-layer.4": 8384.6963, "encoder_q-layer.5": 9515.5469, "encoder_q-layer.6": 7224.2617, "encoder_q-layer.7": 6729.625, "encoder_q-layer.8": 2577.2354, "encoder_q-layer.9": 617.1458, "epoch": 0.18, "inbatch_neg_score": 0.2889, "inbatch_pos_score": 0.896, "learning_rate": 4.5444444444444444e-05, "loss": 3.7171, "norm_diff": 0.0562, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9314.2602, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2888, "query_norm": 1.4737, "queue_k_norm": 1.4169, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8967, "sent_len_1": 66.613, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0725, "stdk": 0.0473, "stdq": 0.0461, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 18200 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.6935, "doc_norm": 1.4166, "encoder_q-embeddings": 1582.0549, "encoder_q-layer.0": 1121.9978, "encoder_q-layer.1": 1213.204, "encoder_q-layer.10": 371.2173, "encoder_q-layer.11": 935.5358, "encoder_q-layer.2": 1388.0028, "encoder_q-layer.3": 1480.9048, "encoder_q-layer.4": 1492.691, "encoder_q-layer.5": 1296.7861, "encoder_q-layer.6": 1408.8463, "encoder_q-layer.7": 1195.0363, "encoder_q-layer.8": 800.1848, "encoder_q-layer.9": 383.4941, "epoch": 0.18, "inbatch_neg_score": 0.2828, "inbatch_pos_score": 0.8662, "learning_rate": 4.538888888888889e-05, "loss": 3.6935, "norm_diff": 0.0108, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1828.6239, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2827, "query_norm": 1.4195, "queue_k_norm": 1.4173, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0201, "sent_len_1": 66.9928, "sent_max_len_0": 128.0, "sent_max_len_1": 186.4275, "stdk": 0.0472, "stdq": 0.044, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 18300 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.698, "doc_norm": 1.4161, "encoder_q-embeddings": 1917.1768, "encoder_q-layer.0": 1400.553, "encoder_q-layer.1": 1473.6007, "encoder_q-layer.10": 342.4746, "encoder_q-layer.11": 876.2931, "encoder_q-layer.2": 1807.5647, "encoder_q-layer.3": 1839.1068, "encoder_q-layer.4": 1703.7872, "encoder_q-layer.5": 1806.1366, "encoder_q-layer.6": 1556.5087, "encoder_q-layer.7": 1121.4736, "encoder_q-layer.8": 537.0094, "encoder_q-layer.9": 337.6828, "epoch": 0.18, "inbatch_neg_score": 0.2775, "inbatch_pos_score": 0.9263, "learning_rate": 4.5333333333333335e-05, "loss": 3.698, "norm_diff": 0.0287, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2120.0878, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2769, "query_norm": 1.4442, "queue_k_norm": 1.4181, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8453, "sent_len_1": 66.6626, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8475, "stdk": 0.0472, "stdq": 0.045, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 18400 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.6867, "doc_norm": 1.42, "encoder_q-embeddings": 850.2382, "encoder_q-layer.0": 606.9716, "encoder_q-layer.1": 657.2075, "encoder_q-layer.10": 351.2126, "encoder_q-layer.11": 923.2324, "encoder_q-layer.2": 726.4685, "encoder_q-layer.3": 855.2262, "encoder_q-layer.4": 773.5623, "encoder_q-layer.5": 742.8327, "encoder_q-layer.6": 608.9172, "encoder_q-layer.7": 540.7654, "encoder_q-layer.8": 474.1552, "encoder_q-layer.9": 346.6003, "epoch": 0.18, "inbatch_neg_score": 0.2651, "inbatch_pos_score": 0.8491, "learning_rate": 4.527777777777778e-05, "loss": 3.6867, "norm_diff": 0.0099, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1024.4933, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2651, "query_norm": 1.4225, "queue_k_norm": 1.4169, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0281, "sent_len_1": 66.9267, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3487, "stdk": 0.0474, "stdq": 0.0444, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 18500 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.7181, "doc_norm": 1.4191, "encoder_q-embeddings": 3529.3538, "encoder_q-layer.0": 2613.1453, "encoder_q-layer.1": 3096.8091, "encoder_q-layer.10": 344.2018, "encoder_q-layer.11": 831.6951, "encoder_q-layer.2": 3935.0105, "encoder_q-layer.3": 3979.6201, "encoder_q-layer.4": 3903.5308, "encoder_q-layer.5": 4278.9902, "encoder_q-layer.6": 3819.6604, "encoder_q-layer.7": 2883.311, "encoder_q-layer.8": 1395.9659, "encoder_q-layer.9": 422.3774, "epoch": 0.18, "inbatch_neg_score": 0.2712, "inbatch_pos_score": 0.9214, "learning_rate": 4.522222222222223e-05, "loss": 3.7181, "norm_diff": 0.0598, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4474.0669, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2708, "query_norm": 1.4789, "queue_k_norm": 1.417, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7645, "sent_len_1": 66.5222, "sent_max_len_0": 128.0, "sent_max_len_1": 187.915, "stdk": 0.0474, "stdq": 0.0463, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 18600 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.6604, "doc_norm": 1.4072, "encoder_q-embeddings": 1180.1023, "encoder_q-layer.0": 837.0622, "encoder_q-layer.1": 1045.6249, "encoder_q-layer.10": 367.8062, "encoder_q-layer.11": 868.9481, "encoder_q-layer.2": 1185.8783, "encoder_q-layer.3": 1153.547, "encoder_q-layer.4": 1017.0213, "encoder_q-layer.5": 1059.5062, "encoder_q-layer.6": 954.7859, "encoder_q-layer.7": 849.492, "encoder_q-layer.8": 494.3997, "encoder_q-layer.9": 349.8059, "epoch": 0.18, "inbatch_neg_score": 0.2661, "inbatch_pos_score": 0.877, "learning_rate": 4.516666666666667e-05, "loss": 3.6604, "norm_diff": 0.0339, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1363.2353, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2649, "query_norm": 1.441, "queue_k_norm": 1.4181, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.002, "sent_len_1": 66.9397, "sent_max_len_0": 128.0, "sent_max_len_1": 188.87, "stdk": 0.0469, "stdq": 0.045, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 18700 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.7105, "doc_norm": 1.4191, "encoder_q-embeddings": 537.825, "encoder_q-layer.0": 432.1359, "encoder_q-layer.1": 422.2435, "encoder_q-layer.10": 368.3806, "encoder_q-layer.11": 841.0267, "encoder_q-layer.2": 447.5409, "encoder_q-layer.3": 456.2989, "encoder_q-layer.4": 495.9431, "encoder_q-layer.5": 451.4252, "encoder_q-layer.6": 539.4727, "encoder_q-layer.7": 644.567, "encoder_q-layer.8": 692.5669, "encoder_q-layer.9": 494.3228, "epoch": 0.18, "inbatch_neg_score": 0.2647, "inbatch_pos_score": 0.874, "learning_rate": 4.511111111111112e-05, "loss": 3.7105, "norm_diff": 0.0216, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 820.0407, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2646, "query_norm": 1.4375, "queue_k_norm": 1.4157, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7787, "sent_len_1": 66.8917, "sent_max_len_0": 128.0, "sent_max_len_1": 189.435, "stdk": 0.0475, "stdq": 0.0453, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 18800 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.7065, "doc_norm": 1.4165, "encoder_q-embeddings": 2604.0552, "encoder_q-layer.0": 1809.871, "encoder_q-layer.1": 2005.1831, "encoder_q-layer.10": 351.7495, "encoder_q-layer.11": 883.3115, "encoder_q-layer.2": 2369.7439, "encoder_q-layer.3": 2702.147, "encoder_q-layer.4": 2743.5701, "encoder_q-layer.5": 2926.5417, "encoder_q-layer.6": 3060.3096, "encoder_q-layer.7": 2069.6462, "encoder_q-layer.8": 1377.5507, "encoder_q-layer.9": 740.8237, "epoch": 0.18, "inbatch_neg_score": 0.2689, "inbatch_pos_score": 0.875, "learning_rate": 4.5055555555555554e-05, "loss": 3.7065, "norm_diff": 0.0092, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3181.6385, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2683, "query_norm": 1.4195, "queue_k_norm": 1.4169, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7128, "sent_len_1": 66.7373, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5425, "stdk": 0.0474, "stdq": 0.0443, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 18900 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.6766, "doc_norm": 1.4117, "encoder_q-embeddings": 841.6739, "encoder_q-layer.0": 578.217, "encoder_q-layer.1": 655.1666, "encoder_q-layer.10": 389.3698, "encoder_q-layer.11": 927.1432, "encoder_q-layer.2": 741.6503, "encoder_q-layer.3": 777.184, "encoder_q-layer.4": 744.1923, "encoder_q-layer.5": 723.9521, "encoder_q-layer.6": 626.8442, "encoder_q-layer.7": 586.6871, "encoder_q-layer.8": 520.6122, "encoder_q-layer.9": 389.704, "epoch": 0.19, "inbatch_neg_score": 0.2665, "inbatch_pos_score": 0.8564, "learning_rate": 4.5e-05, "loss": 3.6766, "norm_diff": 0.0508, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1017.9742, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2651, "query_norm": 1.4625, "queue_k_norm": 1.4152, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0856, "sent_len_1": 66.6779, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7413, "stdk": 0.0472, "stdq": 0.0458, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 19000 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.6841, "doc_norm": 1.411, "encoder_q-embeddings": 1435.6006, "encoder_q-layer.0": 919.4254, "encoder_q-layer.1": 1148.8915, "encoder_q-layer.10": 390.1514, "encoder_q-layer.11": 999.1635, "encoder_q-layer.2": 1414.1001, "encoder_q-layer.3": 1389.9095, "encoder_q-layer.4": 1471.1411, "encoder_q-layer.5": 1565.064, "encoder_q-layer.6": 1256.8174, "encoder_q-layer.7": 1105.4943, "encoder_q-layer.8": 887.4532, "encoder_q-layer.9": 459.4852, "epoch": 0.19, "inbatch_neg_score": 0.2558, "inbatch_pos_score": 0.853, "learning_rate": 4.4944444444444445e-05, "loss": 3.6841, "norm_diff": 0.051, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1746.1664, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2561, "query_norm": 1.462, "queue_k_norm": 1.414, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9936, "sent_len_1": 66.9517, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3913, "stdk": 0.0473, "stdq": 0.0455, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 19100 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.7105, "doc_norm": 1.4096, "encoder_q-embeddings": 906.2639, "encoder_q-layer.0": 638.5667, "encoder_q-layer.1": 683.2079, "encoder_q-layer.10": 394.6326, "encoder_q-layer.11": 1039.0537, "encoder_q-layer.2": 722.9813, "encoder_q-layer.3": 780.7156, "encoder_q-layer.4": 862.5294, "encoder_q-layer.5": 831.6731, "encoder_q-layer.6": 808.9592, "encoder_q-layer.7": 643.3441, "encoder_q-layer.8": 634.231, "encoder_q-layer.9": 435.3466, "epoch": 0.19, "inbatch_neg_score": 0.2636, "inbatch_pos_score": 0.8525, "learning_rate": 4.4888888888888894e-05, "loss": 3.7105, "norm_diff": 0.0196, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1128.605, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2639, "query_norm": 1.4292, "queue_k_norm": 1.4124, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0158, "sent_len_1": 66.5321, "sent_max_len_0": 128.0, "sent_max_len_1": 189.98, "stdk": 0.0472, "stdq": 0.0442, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 19200 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.6779, "doc_norm": 1.4111, "encoder_q-embeddings": 4355.2681, "encoder_q-layer.0": 3277.9062, "encoder_q-layer.1": 3889.6436, "encoder_q-layer.10": 343.6989, "encoder_q-layer.11": 910.6744, "encoder_q-layer.2": 3658.7234, "encoder_q-layer.3": 4069.844, "encoder_q-layer.4": 4905.4619, "encoder_q-layer.5": 4545.8511, "encoder_q-layer.6": 4165.7329, "encoder_q-layer.7": 3857.793, "encoder_q-layer.8": 2449.2114, "encoder_q-layer.9": 556.5851, "epoch": 0.19, "inbatch_neg_score": 0.2692, "inbatch_pos_score": 0.8984, "learning_rate": 4.483333333333333e-05, "loss": 3.6779, "norm_diff": 0.0415, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5447.5908, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2686, "query_norm": 1.4526, "queue_k_norm": 1.4109, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0261, "sent_len_1": 66.9331, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8562, "stdk": 0.0473, "stdq": 0.0452, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 19300 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.6694, "doc_norm": 1.4096, "encoder_q-embeddings": 1714.5846, "encoder_q-layer.0": 1322.3607, "encoder_q-layer.1": 1218.5131, "encoder_q-layer.10": 389.3027, "encoder_q-layer.11": 932.1772, "encoder_q-layer.2": 1497.8259, "encoder_q-layer.3": 1390.214, "encoder_q-layer.4": 1577.8846, "encoder_q-layer.5": 1358.1139, "encoder_q-layer.6": 1219.8013, "encoder_q-layer.7": 1103.5815, "encoder_q-layer.8": 735.0831, "encoder_q-layer.9": 367.3231, "epoch": 0.19, "inbatch_neg_score": 0.2455, "inbatch_pos_score": 0.8247, "learning_rate": 4.477777777777778e-05, "loss": 3.6694, "norm_diff": 0.0398, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1879.7838, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2454, "query_norm": 1.4494, "queue_k_norm": 1.4112, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9522, "sent_len_1": 66.6016, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4512, "stdk": 0.0473, "stdq": 0.0458, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 19400 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.6645, "doc_norm": 1.4099, "encoder_q-embeddings": 768.418, "encoder_q-layer.0": 514.7711, "encoder_q-layer.1": 544.7568, "encoder_q-layer.10": 394.1882, "encoder_q-layer.11": 1011.0898, "encoder_q-layer.2": 592.8517, "encoder_q-layer.3": 625.7061, "encoder_q-layer.4": 649.1873, "encoder_q-layer.5": 644.9188, "encoder_q-layer.6": 678.933, "encoder_q-layer.7": 695.6519, "encoder_q-layer.8": 631.4683, "encoder_q-layer.9": 403.2744, "epoch": 0.19, "inbatch_neg_score": 0.2563, "inbatch_pos_score": 0.8901, "learning_rate": 4.472222222222223e-05, "loss": 3.6645, "norm_diff": 0.0653, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 992.9612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2566, "query_norm": 1.4752, "queue_k_norm": 1.4123, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9196, "sent_len_1": 66.6443, "sent_max_len_0": 128.0, "sent_max_len_1": 189.475, "stdk": 0.0473, "stdq": 0.0462, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 19500 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.7062, "doc_norm": 1.4122, "encoder_q-embeddings": 405.7465, "encoder_q-layer.0": 257.8206, "encoder_q-layer.1": 266.2433, "encoder_q-layer.10": 354.1282, "encoder_q-layer.11": 879.9171, "encoder_q-layer.2": 313.5178, "encoder_q-layer.3": 328.8215, "encoder_q-layer.4": 347.3798, "encoder_q-layer.5": 357.4089, "encoder_q-layer.6": 391.5436, "encoder_q-layer.7": 418.9671, "encoder_q-layer.8": 413.9175, "encoder_q-layer.9": 351.6747, "epoch": 0.19, "inbatch_neg_score": 0.265, "inbatch_pos_score": 0.9229, "learning_rate": 4.466666666666667e-05, "loss": 3.7062, "norm_diff": 0.0528, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 640.892, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2656, "query_norm": 1.465, "queue_k_norm": 1.4107, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1508, "sent_len_1": 66.9686, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3925, "stdk": 0.0475, "stdq": 0.046, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 19600 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.672, "doc_norm": 1.4143, "encoder_q-embeddings": 1640.1128, "encoder_q-layer.0": 1259.7136, "encoder_q-layer.1": 1392.3971, "encoder_q-layer.10": 367.5541, "encoder_q-layer.11": 909.3582, "encoder_q-layer.2": 1427.1775, "encoder_q-layer.3": 1351.9202, "encoder_q-layer.4": 1372.3774, "encoder_q-layer.5": 1088.1914, "encoder_q-layer.6": 1106.3669, "encoder_q-layer.7": 849.8629, "encoder_q-layer.8": 523.7574, "encoder_q-layer.9": 356.1806, "epoch": 0.19, "inbatch_neg_score": 0.2545, "inbatch_pos_score": 0.8906, "learning_rate": 4.461111111111111e-05, "loss": 3.672, "norm_diff": 0.0388, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1747.0146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2529, "query_norm": 1.4531, "queue_k_norm": 1.4107, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8396, "sent_len_1": 66.7518, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7075, "stdk": 0.0475, "stdq": 0.046, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 19700 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 3.6734, "doc_norm": 1.4108, "encoder_q-embeddings": 2004.9823, "encoder_q-layer.0": 1528.8389, "encoder_q-layer.1": 1695.5734, "encoder_q-layer.10": 398.7404, "encoder_q-layer.11": 1009.4551, "encoder_q-layer.2": 2071.167, "encoder_q-layer.3": 2283.2966, "encoder_q-layer.4": 2285.8606, "encoder_q-layer.5": 2390.7617, "encoder_q-layer.6": 2379.7996, "encoder_q-layer.7": 1900.4156, "encoder_q-layer.8": 1438.2664, "encoder_q-layer.9": 730.7394, "epoch": 0.19, "inbatch_neg_score": 0.2595, "inbatch_pos_score": 0.8311, "learning_rate": 4.4555555555555555e-05, "loss": 3.6734, "norm_diff": 0.0141, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2766.6239, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2598, "query_norm": 1.4242, "queue_k_norm": 1.4086, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.019, "sent_len_1": 66.9759, "sent_max_len_0": 128.0, "sent_max_len_1": 191.7075, "stdk": 0.0474, "stdq": 0.0451, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 19800 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.6805, "doc_norm": 1.4064, "encoder_q-embeddings": 1884.6013, "encoder_q-layer.0": 1273.4453, "encoder_q-layer.1": 1402.7159, "encoder_q-layer.10": 358.041, "encoder_q-layer.11": 919.1593, "encoder_q-layer.2": 1530.1965, "encoder_q-layer.3": 1641.0219, "encoder_q-layer.4": 1648.6068, "encoder_q-layer.5": 1517.2518, "encoder_q-layer.6": 1763.938, "encoder_q-layer.7": 1259.98, "encoder_q-layer.8": 569.788, "encoder_q-layer.9": 336.459, "epoch": 0.19, "inbatch_neg_score": 0.2498, "inbatch_pos_score": 0.8667, "learning_rate": 4.4500000000000004e-05, "loss": 3.6805, "norm_diff": 0.0206, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2016.006, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2507, "query_norm": 1.427, "queue_k_norm": 1.4095, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0469, "sent_len_1": 66.6552, "sent_max_len_0": 128.0, "sent_max_len_1": 188.68, "stdk": 0.0473, "stdq": 0.0454, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 19900 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.6897, "doc_norm": 1.4125, "encoder_q-embeddings": 1515.3854, "encoder_q-layer.0": 973.5408, "encoder_q-layer.1": 1053.8535, "encoder_q-layer.10": 712.5607, "encoder_q-layer.11": 1931.8512, "encoder_q-layer.2": 1214.9441, "encoder_q-layer.3": 1237.6625, "encoder_q-layer.4": 1373.9677, "encoder_q-layer.5": 1391.6843, "encoder_q-layer.6": 1477.0901, "encoder_q-layer.7": 1419.6503, "encoder_q-layer.8": 1195.9203, "encoder_q-layer.9": 710.5776, "epoch": 0.2, "inbatch_neg_score": 0.2443, "inbatch_pos_score": 0.8462, "learning_rate": 4.4444444444444447e-05, "loss": 3.6897, "norm_diff": 0.012, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1984.8954, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2441, "query_norm": 1.4102, "queue_k_norm": 1.4122, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9698, "sent_len_1": 66.7501, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9412, "stdk": 0.0475, "stdq": 0.045, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 20000 }, { "dev_runtime": 37.8767, "dev_samples_per_second": 1.69, "dev_steps_per_second": 0.026, "epoch": 0.2, "step": 20000, "test_accuracy": 93.4814453125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.38453954458236694, "test_doc_norm": 1.4031305313110352, "test_inbatch_neg_score": 0.5904818773269653, "test_inbatch_pos_score": 1.4654431343078613, "test_loss": 0.38453954458236694, "test_loss_align": 1.1792842149734497, "test_loss_unif": 3.8744869232177734, "test_loss_unif_q@queue": 3.8744866847991943, "test_norm_diff": 0.051639288663864136, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2303803563117981, "test_query_norm": 1.4547698497772217, "test_queue_k_norm": 1.4126696586608887, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04144138842821121, "test_stdq": 0.04164225608110428, "test_stdqueue_k": 0.04765129089355469, "test_stdqueue_q": 0.0 }, { "dev_runtime": 37.8767, "dev_samples_per_second": 1.69, "dev_steps_per_second": 0.026, "epoch": 0.2, "eval_beir-arguana_ndcg@10": 0.32258, "eval_beir-arguana_recall@10": 0.55263, "eval_beir-arguana_recall@100": 0.85846, "eval_beir-arguana_recall@20": 0.69132, "eval_beir-avg_ndcg@10": 0.31629966666666665, "eval_beir-avg_recall@10": 0.37913633333333335, "eval_beir-avg_recall@100": 0.5573134166666667, "eval_beir-avg_recall@20": 0.43658425, "eval_beir-cqadupstack_ndcg@10": 0.21258666666666662, "eval_beir-cqadupstack_recall@10": 0.29197333333333336, "eval_beir-cqadupstack_recall@100": 0.5054841666666667, "eval_beir-cqadupstack_recall@20": 0.3496225, "eval_beir-fiqa_ndcg@10": 0.15151, "eval_beir-fiqa_recall@10": 0.19531, "eval_beir-fiqa_recall@100": 0.42902, "eval_beir-fiqa_recall@20": 0.24829, "eval_beir-nfcorpus_ndcg@10": 0.23706, "eval_beir-nfcorpus_recall@10": 0.11762, "eval_beir-nfcorpus_recall@100": 0.23963, "eval_beir-nfcorpus_recall@20": 0.14578, "eval_beir-nq_ndcg@10": 0.20267, "eval_beir-nq_recall@10": 0.34299, "eval_beir-nq_recall@100": 0.67017, "eval_beir-nq_recall@20": 0.45184, "eval_beir-quora_ndcg@10": 0.76882, "eval_beir-quora_recall@10": 0.87736, "eval_beir-quora_recall@100": 0.97153, "eval_beir-quora_recall@20": 0.9176, "eval_beir-scidocs_ndcg@10": 0.11634, "eval_beir-scidocs_recall@10": 0.12388, "eval_beir-scidocs_recall@100": 0.29848, "eval_beir-scidocs_recall@20": 0.17203, "eval_beir-scifact_ndcg@10": 0.5606, "eval_beir-scifact_recall@10": 0.71783, "eval_beir-scifact_recall@100": 0.86589, "eval_beir-scifact_recall@20": 0.76867, "eval_beir-trec-covid_ndcg@10": 0.42215, "eval_beir-trec-covid_recall@10": 0.448, "eval_beir-trec-covid_recall@100": 0.3212, "eval_beir-trec-covid_recall@20": 0.431, "eval_beir-webis-touche2020_ndcg@10": 0.16868, "eval_beir-webis-touche2020_recall@10": 0.12377, "eval_beir-webis-touche2020_recall@100": 0.41327, "eval_beir-webis-touche2020_recall@20": 0.18969, "eval_senteval-avg_sts": 0.7037160105314686, "eval_senteval-sickr_spearman": 0.6874844287739638, "eval_senteval-stsb_spearman": 0.7199475922889734, "step": 20000, "test_accuracy": 93.4814453125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.38453954458236694, "test_doc_norm": 1.4031305313110352, "test_inbatch_neg_score": 0.5904818773269653, "test_inbatch_pos_score": 1.4654431343078613, "test_loss": 0.38453954458236694, "test_loss_align": 1.1792842149734497, "test_loss_unif": 3.8744869232177734, "test_loss_unif_q@queue": 3.8744866847991943, "test_norm_diff": 0.051639288663864136, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2303803563117981, "test_query_norm": 1.4547698497772217, "test_queue_k_norm": 1.4126696586608887, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04144138842821121, "test_stdq": 0.04164225608110428, "test_stdqueue_k": 0.04765129089355469, "test_stdqueue_q": 0.0 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.6744, "doc_norm": 1.4068, "encoder_q-embeddings": 1238.1823, "encoder_q-layer.0": 925.5707, "encoder_q-layer.1": 949.5054, "encoder_q-layer.10": 732.6279, "encoder_q-layer.11": 1768.1814, "encoder_q-layer.2": 1155.6656, "encoder_q-layer.3": 1195.7097, "encoder_q-layer.4": 1233.547, "encoder_q-layer.5": 1157.1794, "encoder_q-layer.6": 903.8389, "encoder_q-layer.7": 916.5795, "encoder_q-layer.8": 787.5237, "encoder_q-layer.9": 641.6774, "epoch": 0.2, "inbatch_neg_score": 0.2212, "inbatch_pos_score": 0.833, "learning_rate": 4.438888888888889e-05, "loss": 3.6744, "norm_diff": 0.0091, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1655.3701, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2202, "query_norm": 1.4073, "queue_k_norm": 1.4094, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7549, "sent_len_1": 66.6159, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1687, "stdk": 0.0474, "stdq": 0.0452, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 20100 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.6623, "doc_norm": 1.409, "encoder_q-embeddings": 1439.5933, "encoder_q-layer.0": 929.0284, "encoder_q-layer.1": 996.3851, "encoder_q-layer.10": 730.9801, "encoder_q-layer.11": 1850.4895, "encoder_q-layer.2": 1296.5708, "encoder_q-layer.3": 1459.0968, "encoder_q-layer.4": 1264.9927, "encoder_q-layer.5": 1038.6907, "encoder_q-layer.6": 935.4612, "encoder_q-layer.7": 875.5612, "encoder_q-layer.8": 812.1811, "encoder_q-layer.9": 663.8297, "epoch": 0.2, "inbatch_neg_score": 0.2161, "inbatch_pos_score": 0.7925, "learning_rate": 4.433333333333334e-05, "loss": 3.6623, "norm_diff": 0.0302, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1762.4213, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2156, "query_norm": 1.3788, "queue_k_norm": 1.4098, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9586, "sent_len_1": 66.8587, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6725, "stdk": 0.0475, "stdq": 0.044, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 20200 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.6457, "doc_norm": 1.4042, "encoder_q-embeddings": 3970.1138, "encoder_q-layer.0": 2643.5166, "encoder_q-layer.1": 3236.8169, "encoder_q-layer.10": 724.2651, "encoder_q-layer.11": 1924.8302, "encoder_q-layer.2": 3779.6345, "encoder_q-layer.3": 3338.1926, "encoder_q-layer.4": 2633.03, "encoder_q-layer.5": 2155.969, "encoder_q-layer.6": 2113.4915, "encoder_q-layer.7": 1804.7622, "encoder_q-layer.8": 1245.2444, "encoder_q-layer.9": 670.4775, "epoch": 0.2, "inbatch_neg_score": 0.2329, "inbatch_pos_score": 0.8203, "learning_rate": 4.427777777777778e-05, "loss": 3.6457, "norm_diff": 0.0145, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3940.9865, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2329, "query_norm": 1.4032, "queue_k_norm": 1.4064, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0194, "sent_len_1": 66.7255, "sent_max_len_0": 128.0, "sent_max_len_1": 189.07, "stdk": 0.0474, "stdq": 0.0447, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 20300 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.6531, "doc_norm": 1.4052, "encoder_q-embeddings": 4181.7427, "encoder_q-layer.0": 3084.0969, "encoder_q-layer.1": 3130.3386, "encoder_q-layer.10": 753.1086, "encoder_q-layer.11": 1938.9761, "encoder_q-layer.2": 3454.6311, "encoder_q-layer.3": 3467.418, "encoder_q-layer.4": 3281.8911, "encoder_q-layer.5": 3250.7256, "encoder_q-layer.6": 3268.5459, "encoder_q-layer.7": 2430.9236, "encoder_q-layer.8": 1431.8525, "encoder_q-layer.9": 755.4531, "epoch": 0.2, "inbatch_neg_score": 0.2249, "inbatch_pos_score": 0.8477, "learning_rate": 4.422222222222222e-05, "loss": 3.6531, "norm_diff": 0.02, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4415.1079, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2236, "query_norm": 1.3857, "queue_k_norm": 1.4066, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9556, "sent_len_1": 66.855, "sent_max_len_0": 128.0, "sent_max_len_1": 189.25, "stdk": 0.0474, "stdq": 0.0445, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 20400 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.6482, "doc_norm": 1.4114, "encoder_q-embeddings": 16276.501, "encoder_q-layer.0": 10585.6572, "encoder_q-layer.1": 11991.8564, "encoder_q-layer.10": 679.3613, "encoder_q-layer.11": 1942.0946, "encoder_q-layer.2": 14036.0742, "encoder_q-layer.3": 12844.5117, "encoder_q-layer.4": 11718.5664, "encoder_q-layer.5": 11564.8691, "encoder_q-layer.6": 9798.3516, "encoder_q-layer.7": 6338.7764, "encoder_q-layer.8": 2268.8647, "encoder_q-layer.9": 777.4878, "epoch": 0.2, "inbatch_neg_score": 0.2219, "inbatch_pos_score": 0.8105, "learning_rate": 4.4166666666666665e-05, "loss": 3.6482, "norm_diff": 0.0207, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15443.6663, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2211, "query_norm": 1.3907, "queue_k_norm": 1.4066, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9714, "sent_len_1": 66.7399, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0575, "stdk": 0.0477, "stdq": 0.0448, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 20500 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.6503, "doc_norm": 1.4086, "encoder_q-embeddings": 871.1415, "encoder_q-layer.0": 625.0912, "encoder_q-layer.1": 672.2936, "encoder_q-layer.10": 714.1212, "encoder_q-layer.11": 1742.2745, "encoder_q-layer.2": 704.2947, "encoder_q-layer.3": 736.4985, "encoder_q-layer.4": 760.8802, "encoder_q-layer.5": 757.4772, "encoder_q-layer.6": 741.2864, "encoder_q-layer.7": 717.8003, "encoder_q-layer.8": 764.9335, "encoder_q-layer.9": 633.3941, "epoch": 0.2, "inbatch_neg_score": 0.2176, "inbatch_pos_score": 0.8765, "learning_rate": 4.4111111111111114e-05, "loss": 3.6503, "norm_diff": 0.0101, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1303.0323, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.218, "query_norm": 1.4108, "queue_k_norm": 1.4055, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9286, "sent_len_1": 66.7078, "sent_max_len_0": 128.0, "sent_max_len_1": 186.53, "stdk": 0.0476, "stdq": 0.0458, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 20600 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.6541, "doc_norm": 1.4052, "encoder_q-embeddings": 2042.7272, "encoder_q-layer.0": 1346.1987, "encoder_q-layer.1": 1533.4705, "encoder_q-layer.10": 824.86, "encoder_q-layer.11": 1810.4769, "encoder_q-layer.2": 1749.7375, "encoder_q-layer.3": 1957.0193, "encoder_q-layer.4": 2044.6406, "encoder_q-layer.5": 2293.2798, "encoder_q-layer.6": 2289.8018, "encoder_q-layer.7": 1950.1404, "encoder_q-layer.8": 1499.7225, "encoder_q-layer.9": 719.4613, "epoch": 0.2, "inbatch_neg_score": 0.2127, "inbatch_pos_score": 0.8423, "learning_rate": 4.4055555555555557e-05, "loss": 3.6541, "norm_diff": 0.0311, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2612.9304, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2124, "query_norm": 1.3741, "queue_k_norm": 1.4066, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0558, "sent_len_1": 66.6157, "sent_max_len_0": 128.0, "sent_max_len_1": 192.2587, "stdk": 0.0475, "stdq": 0.0445, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 20700 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.6291, "doc_norm": 1.404, "encoder_q-embeddings": 1006.7381, "encoder_q-layer.0": 713.5001, "encoder_q-layer.1": 830.8566, "encoder_q-layer.10": 683.2622, "encoder_q-layer.11": 1700.9886, "encoder_q-layer.2": 1034.4989, "encoder_q-layer.3": 1172.0814, "encoder_q-layer.4": 1445.7532, "encoder_q-layer.5": 1414.3739, "encoder_q-layer.6": 1437.4263, "encoder_q-layer.7": 1283.6648, "encoder_q-layer.8": 899.0558, "encoder_q-layer.9": 658.41, "epoch": 0.2, "inbatch_neg_score": 0.2134, "inbatch_pos_score": 0.8481, "learning_rate": 4.4000000000000006e-05, "loss": 3.6291, "norm_diff": 0.0082, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1715.4243, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.213, "query_norm": 1.4087, "queue_k_norm": 1.4054, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0729, "sent_len_1": 67.0935, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1662, "stdk": 0.0475, "stdq": 0.0457, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 20800 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.6512, "doc_norm": 1.3966, "encoder_q-embeddings": 1974.3002, "encoder_q-layer.0": 1432.636, "encoder_q-layer.1": 1569.4801, "encoder_q-layer.10": 692.991, "encoder_q-layer.11": 1889.0093, "encoder_q-layer.2": 1758.0071, "encoder_q-layer.3": 2012.0502, "encoder_q-layer.4": 2043.1879, "encoder_q-layer.5": 1716.4247, "encoder_q-layer.6": 1480.1908, "encoder_q-layer.7": 1156.9004, "encoder_q-layer.8": 876.6447, "encoder_q-layer.9": 665.3245, "epoch": 0.2, "inbatch_neg_score": 0.2342, "inbatch_pos_score": 0.8184, "learning_rate": 4.394444444444445e-05, "loss": 3.6512, "norm_diff": 0.0087, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2392.7868, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2323, "query_norm": 1.3986, "queue_k_norm": 1.4043, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9641, "sent_len_1": 66.7494, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8975, "stdk": 0.0472, "stdq": 0.0444, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 20900 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.6157, "doc_norm": 1.4101, "encoder_q-embeddings": 5810.2817, "encoder_q-layer.0": 4015.791, "encoder_q-layer.1": 4422.1763, "encoder_q-layer.10": 670.6324, "encoder_q-layer.11": 1809.5466, "encoder_q-layer.2": 4801.3589, "encoder_q-layer.3": 5022.9961, "encoder_q-layer.4": 5381.0029, "encoder_q-layer.5": 5293.3213, "encoder_q-layer.6": 4826.5776, "encoder_q-layer.7": 3938.0117, "encoder_q-layer.8": 2190.3606, "encoder_q-layer.9": 801.5458, "epoch": 0.21, "inbatch_neg_score": 0.2404, "inbatch_pos_score": 0.8662, "learning_rate": 4.388888888888889e-05, "loss": 3.6157, "norm_diff": 0.0098, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6286.9261, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2394, "query_norm": 1.4196, "queue_k_norm": 1.4043, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0225, "sent_len_1": 66.9417, "sent_max_len_0": 128.0, "sent_max_len_1": 191.3125, "stdk": 0.0477, "stdq": 0.0453, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 21000 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.656, "doc_norm": 1.4026, "encoder_q-embeddings": 5646.7222, "encoder_q-layer.0": 4256.7993, "encoder_q-layer.1": 4694.647, "encoder_q-layer.10": 734.9106, "encoder_q-layer.11": 1840.3044, "encoder_q-layer.2": 5579.1372, "encoder_q-layer.3": 5157.7061, "encoder_q-layer.4": 4995.8496, "encoder_q-layer.5": 4540.4292, "encoder_q-layer.6": 4041.5437, "encoder_q-layer.7": 3336.3159, "encoder_q-layer.8": 1565.9414, "encoder_q-layer.9": 748.8441, "epoch": 0.21, "inbatch_neg_score": 0.2333, "inbatch_pos_score": 0.8481, "learning_rate": 4.383333333333334e-05, "loss": 3.656, "norm_diff": 0.0206, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6086.5939, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2319, "query_norm": 1.4208, "queue_k_norm": 1.4045, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9441, "sent_len_1": 66.7035, "sent_max_len_0": 128.0, "sent_max_len_1": 189.95, "stdk": 0.0474, "stdq": 0.0455, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 21100 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.6412, "doc_norm": 1.4017, "encoder_q-embeddings": 3397.7815, "encoder_q-layer.0": 2265.3176, "encoder_q-layer.1": 2412.0835, "encoder_q-layer.10": 682.3354, "encoder_q-layer.11": 1834.3969, "encoder_q-layer.2": 2439.4292, "encoder_q-layer.3": 2590.4866, "encoder_q-layer.4": 2695.3855, "encoder_q-layer.5": 2467.2727, "encoder_q-layer.6": 2469.6465, "encoder_q-layer.7": 2196.1777, "encoder_q-layer.8": 1317.3826, "encoder_q-layer.9": 707.8461, "epoch": 0.21, "inbatch_neg_score": 0.2357, "inbatch_pos_score": 0.8682, "learning_rate": 4.377777777777778e-05, "loss": 3.6412, "norm_diff": 0.0127, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3498.2947, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2351, "query_norm": 1.4102, "queue_k_norm": 1.4039, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8642, "sent_len_1": 66.8108, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2463, "stdk": 0.0474, "stdq": 0.0451, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 21200 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.6352, "doc_norm": 1.4067, "encoder_q-embeddings": 1585.1516, "encoder_q-layer.0": 1246.2286, "encoder_q-layer.1": 1371.1304, "encoder_q-layer.10": 693.8217, "encoder_q-layer.11": 1801.6388, "encoder_q-layer.2": 1323.1305, "encoder_q-layer.3": 1247.5039, "encoder_q-layer.4": 1215.8278, "encoder_q-layer.5": 1159.7771, "encoder_q-layer.6": 1224.9232, "encoder_q-layer.7": 1083.1996, "encoder_q-layer.8": 871.6111, "encoder_q-layer.9": 681.1541, "epoch": 0.21, "inbatch_neg_score": 0.2299, "inbatch_pos_score": 0.8818, "learning_rate": 4.3722222222222224e-05, "loss": 3.6352, "norm_diff": 0.0376, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1897.4008, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2316, "query_norm": 1.4443, "queue_k_norm": 1.4035, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0378, "sent_len_1": 66.6547, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4487, "stdk": 0.0476, "stdq": 0.0462, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 21300 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.6103, "doc_norm": 1.4075, "encoder_q-embeddings": 2062.8748, "encoder_q-layer.0": 1451.9348, "encoder_q-layer.1": 1659.4902, "encoder_q-layer.10": 728.5291, "encoder_q-layer.11": 1870.8025, "encoder_q-layer.2": 1936.7646, "encoder_q-layer.3": 1959.7657, "encoder_q-layer.4": 2234.4285, "encoder_q-layer.5": 2147.4204, "encoder_q-layer.6": 1967.3094, "encoder_q-layer.7": 1845.4354, "encoder_q-layer.8": 1254.1143, "encoder_q-layer.9": 728.0189, "epoch": 0.21, "inbatch_neg_score": 0.246, "inbatch_pos_score": 0.8613, "learning_rate": 4.3666666666666666e-05, "loss": 3.6103, "norm_diff": 0.0335, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2639.4306, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2455, "query_norm": 1.441, "queue_k_norm": 1.4039, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9402, "sent_len_1": 66.9216, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6125, "stdk": 0.0476, "stdq": 0.0461, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 21400 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.6427, "doc_norm": 1.4075, "encoder_q-embeddings": 1065.8875, "encoder_q-layer.0": 688.2351, "encoder_q-layer.1": 746.5103, "encoder_q-layer.10": 653.9229, "encoder_q-layer.11": 1761.7258, "encoder_q-layer.2": 763.0791, "encoder_q-layer.3": 759.4487, "encoder_q-layer.4": 769.9341, "encoder_q-layer.5": 751.7296, "encoder_q-layer.6": 731.0483, "encoder_q-layer.7": 771.0158, "encoder_q-layer.8": 748.4575, "encoder_q-layer.9": 641.0685, "epoch": 0.21, "inbatch_neg_score": 0.2496, "inbatch_pos_score": 0.8638, "learning_rate": 4.3611111111111116e-05, "loss": 3.6427, "norm_diff": 0.0159, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1400.7476, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.249, "query_norm": 1.4234, "queue_k_norm": 1.405, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9799, "sent_len_1": 66.6345, "sent_max_len_0": 128.0, "sent_max_len_1": 189.57, "stdk": 0.0476, "stdq": 0.0455, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 21500 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.6355, "doc_norm": 1.4073, "encoder_q-embeddings": 2269.9583, "encoder_q-layer.0": 1546.8578, "encoder_q-layer.1": 1677.6477, "encoder_q-layer.10": 725.0486, "encoder_q-layer.11": 2000.2567, "encoder_q-layer.2": 1898.5203, "encoder_q-layer.3": 2024.0823, "encoder_q-layer.4": 1989.0905, "encoder_q-layer.5": 1732.9651, "encoder_q-layer.6": 1795.0535, "encoder_q-layer.7": 1702.813, "encoder_q-layer.8": 1158.6821, "encoder_q-layer.9": 704.8473, "epoch": 0.21, "inbatch_neg_score": 0.2494, "inbatch_pos_score": 0.8682, "learning_rate": 4.355555555555556e-05, "loss": 3.6355, "norm_diff": 0.0231, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2592.9102, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2498, "query_norm": 1.4304, "queue_k_norm": 1.405, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7478, "sent_len_1": 66.685, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2912, "stdk": 0.0476, "stdq": 0.0457, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 21600 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.6368, "doc_norm": 1.4099, "encoder_q-embeddings": 2372.5952, "encoder_q-layer.0": 1738.2679, "encoder_q-layer.1": 1783.7593, "encoder_q-layer.10": 721.6495, "encoder_q-layer.11": 1936.2815, "encoder_q-layer.2": 2052.4722, "encoder_q-layer.3": 2236.1294, "encoder_q-layer.4": 2327.7346, "encoder_q-layer.5": 2883.3545, "encoder_q-layer.6": 2531.7109, "encoder_q-layer.7": 2248.4209, "encoder_q-layer.8": 1980.8938, "encoder_q-layer.9": 1234.1089, "epoch": 0.21, "inbatch_neg_score": 0.2501, "inbatch_pos_score": 0.8701, "learning_rate": 4.35e-05, "loss": 3.6368, "norm_diff": 0.0284, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3092.4001, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2499, "query_norm": 1.4383, "queue_k_norm": 1.4058, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9446, "sent_len_1": 66.8822, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1962, "stdk": 0.0477, "stdq": 0.0458, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 21700 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.6365, "doc_norm": 1.4105, "encoder_q-embeddings": 1175.5316, "encoder_q-layer.0": 806.7217, "encoder_q-layer.1": 881.6093, "encoder_q-layer.10": 728.8169, "encoder_q-layer.11": 1954.3879, "encoder_q-layer.2": 983.6409, "encoder_q-layer.3": 1095.2211, "encoder_q-layer.4": 1220.3379, "encoder_q-layer.5": 1112.9543, "encoder_q-layer.6": 1138.5565, "encoder_q-layer.7": 1181.7294, "encoder_q-layer.8": 957.9765, "encoder_q-layer.9": 673.194, "epoch": 0.21, "inbatch_neg_score": 0.2456, "inbatch_pos_score": 0.8623, "learning_rate": 4.344444444444445e-05, "loss": 3.6365, "norm_diff": 0.021, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1711.0514, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2471, "query_norm": 1.4315, "queue_k_norm": 1.406, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9044, "sent_len_1": 66.9669, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2212, "stdk": 0.0477, "stdq": 0.0456, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 21800 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.6067, "doc_norm": 1.4021, "encoder_q-embeddings": 892.0833, "encoder_q-layer.0": 614.1625, "encoder_q-layer.1": 672.5717, "encoder_q-layer.10": 740.2905, "encoder_q-layer.11": 1902.2957, "encoder_q-layer.2": 692.403, "encoder_q-layer.3": 711.4027, "encoder_q-layer.4": 703.5872, "encoder_q-layer.5": 666.347, "encoder_q-layer.6": 728.6129, "encoder_q-layer.7": 790.4483, "encoder_q-layer.8": 836.1508, "encoder_q-layer.9": 675.3912, "epoch": 0.21, "inbatch_neg_score": 0.2405, "inbatch_pos_score": 0.8467, "learning_rate": 4.338888888888889e-05, "loss": 3.6067, "norm_diff": 0.0373, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1377.3941, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2404, "query_norm": 1.4394, "queue_k_norm": 1.407, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8823, "sent_len_1": 66.8593, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4688, "stdk": 0.0474, "stdq": 0.0458, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 21900 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.6101, "doc_norm": 1.4152, "encoder_q-embeddings": 2503.8142, "encoder_q-layer.0": 1998.1382, "encoder_q-layer.1": 2137.5024, "encoder_q-layer.10": 1318.1263, "encoder_q-layer.11": 3457.5308, "encoder_q-layer.2": 2175.4136, "encoder_q-layer.3": 2040.3018, "encoder_q-layer.4": 2039.0209, "encoder_q-layer.5": 1890.4874, "encoder_q-layer.6": 1991.7964, "encoder_q-layer.7": 1963.7295, "encoder_q-layer.8": 1730.4539, "encoder_q-layer.9": 1318.6471, "epoch": 0.21, "inbatch_neg_score": 0.2459, "inbatch_pos_score": 0.8872, "learning_rate": 4.3333333333333334e-05, "loss": 3.6101, "norm_diff": 0.0177, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3240.5889, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2458, "query_norm": 1.4268, "queue_k_norm": 1.4079, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1557, "sent_len_1": 67.2133, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3688, "stdk": 0.0479, "stdq": 0.0452, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 22000 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.6051, "doc_norm": 1.4069, "encoder_q-embeddings": 3706.0234, "encoder_q-layer.0": 2532.8706, "encoder_q-layer.1": 2861.2012, "encoder_q-layer.10": 1579.2969, "encoder_q-layer.11": 4130.9136, "encoder_q-layer.2": 2835.8088, "encoder_q-layer.3": 3017.5691, "encoder_q-layer.4": 3304.2693, "encoder_q-layer.5": 3140.1267, "encoder_q-layer.6": 3269.9512, "encoder_q-layer.7": 3058.936, "encoder_q-layer.8": 2337.7449, "encoder_q-layer.9": 1430.2625, "epoch": 0.22, "inbatch_neg_score": 0.2445, "inbatch_pos_score": 0.8354, "learning_rate": 4.3277777777777776e-05, "loss": 3.6051, "norm_diff": 0.0064, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4508.6883, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2434, "query_norm": 1.402, "queue_k_norm": 1.4082, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0459, "sent_len_1": 66.7114, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3787, "stdk": 0.0475, "stdq": 0.0444, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 22100 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.5998, "doc_norm": 1.4075, "encoder_q-embeddings": 3496.5762, "encoder_q-layer.0": 2316.728, "encoder_q-layer.1": 2661.2573, "encoder_q-layer.10": 1424.775, "encoder_q-layer.11": 3567.1377, "encoder_q-layer.2": 2636.9011, "encoder_q-layer.3": 2617.9202, "encoder_q-layer.4": 2501.2041, "encoder_q-layer.5": 2559.8503, "encoder_q-layer.6": 2073.1841, "encoder_q-layer.7": 2142.2771, "encoder_q-layer.8": 1627.7817, "encoder_q-layer.9": 1267.4159, "epoch": 0.22, "inbatch_neg_score": 0.2448, "inbatch_pos_score": 0.8735, "learning_rate": 4.3222222222222226e-05, "loss": 3.5998, "norm_diff": 0.0246, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3840.7987, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2439, "query_norm": 1.4316, "queue_k_norm": 1.4081, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9578, "sent_len_1": 66.8299, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8613, "stdk": 0.0475, "stdq": 0.0457, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 22200 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.6219, "doc_norm": 1.4092, "encoder_q-embeddings": 13431.8135, "encoder_q-layer.0": 9404.4385, "encoder_q-layer.1": 11312.5293, "encoder_q-layer.10": 1336.5458, "encoder_q-layer.11": 3306.5383, "encoder_q-layer.2": 11131.9424, "encoder_q-layer.3": 12300.3291, "encoder_q-layer.4": 10938.8848, "encoder_q-layer.5": 10187.208, "encoder_q-layer.6": 10392.1484, "encoder_q-layer.7": 8817.3623, "encoder_q-layer.8": 4345.5991, "encoder_q-layer.9": 1631.3113, "epoch": 0.22, "inbatch_neg_score": 0.2318, "inbatch_pos_score": 0.8608, "learning_rate": 4.316666666666667e-05, "loss": 3.6219, "norm_diff": 0.0111, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14531.1955, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2308, "query_norm": 1.4161, "queue_k_norm": 1.4076, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0311, "sent_len_1": 66.7417, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5525, "stdk": 0.0476, "stdq": 0.0458, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 22300 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.6391, "doc_norm": 1.4131, "encoder_q-embeddings": 4512.4009, "encoder_q-layer.0": 3167.5908, "encoder_q-layer.1": 3646.2915, "encoder_q-layer.10": 1375.1631, "encoder_q-layer.11": 3260.7505, "encoder_q-layer.2": 4019.3982, "encoder_q-layer.3": 4284.8374, "encoder_q-layer.4": 4318.9541, "encoder_q-layer.5": 4306.3828, "encoder_q-layer.6": 4780.7871, "encoder_q-layer.7": 4829.311, "encoder_q-layer.8": 2718.4395, "encoder_q-layer.9": 1455.2354, "epoch": 0.22, "inbatch_neg_score": 0.2024, "inbatch_pos_score": 0.8315, "learning_rate": 4.311111111111111e-05, "loss": 3.6391, "norm_diff": 0.0269, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5658.6522, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2029, "query_norm": 1.3862, "queue_k_norm": 1.4084, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9493, "sent_len_1": 66.7765, "sent_max_len_0": 128.0, "sent_max_len_1": 188.735, "stdk": 0.0478, "stdq": 0.0456, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 22400 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.624, "doc_norm": 1.4048, "encoder_q-embeddings": 3292.4585, "encoder_q-layer.0": 2327.605, "encoder_q-layer.1": 2542.7039, "encoder_q-layer.10": 1378.4287, "encoder_q-layer.11": 3434.1226, "encoder_q-layer.2": 2738.8411, "encoder_q-layer.3": 2811.7319, "encoder_q-layer.4": 2820.4282, "encoder_q-layer.5": 2735.2168, "encoder_q-layer.6": 2652.7061, "encoder_q-layer.7": 2550.0569, "encoder_q-layer.8": 2021.9401, "encoder_q-layer.9": 1355.5892, "epoch": 0.22, "inbatch_neg_score": 0.2081, "inbatch_pos_score": 0.7983, "learning_rate": 4.305555555555556e-05, "loss": 3.624, "norm_diff": 0.0128, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3924.3975, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.207, "query_norm": 1.392, "queue_k_norm": 1.4078, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8468, "sent_len_1": 66.5989, "sent_max_len_0": 128.0, "sent_max_len_1": 189.725, "stdk": 0.0475, "stdq": 0.0452, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 22500 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.6243, "doc_norm": 1.4056, "encoder_q-embeddings": 1801.0687, "encoder_q-layer.0": 1231.5109, "encoder_q-layer.1": 1377.1641, "encoder_q-layer.10": 1331.8094, "encoder_q-layer.11": 3080.2461, "encoder_q-layer.2": 1466.7032, "encoder_q-layer.3": 1597.0383, "encoder_q-layer.4": 1783.7549, "encoder_q-layer.5": 1912.4165, "encoder_q-layer.6": 2223.6235, "encoder_q-layer.7": 2199.4973, "encoder_q-layer.8": 1922.5112, "encoder_q-layer.9": 1390.3849, "epoch": 0.22, "inbatch_neg_score": 0.2007, "inbatch_pos_score": 0.8916, "learning_rate": 4.3e-05, "loss": 3.6243, "norm_diff": 0.0262, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2770.7319, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1996, "query_norm": 1.4318, "queue_k_norm": 1.4075, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.6868, "sent_len_1": 66.4877, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8113, "stdk": 0.0476, "stdq": 0.0467, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 22600 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.5978, "doc_norm": 1.4101, "encoder_q-embeddings": 2018.0083, "encoder_q-layer.0": 1432.0581, "encoder_q-layer.1": 1557.7783, "encoder_q-layer.10": 1268.5103, "encoder_q-layer.11": 3108.3628, "encoder_q-layer.2": 1681.326, "encoder_q-layer.3": 1780.8315, "encoder_q-layer.4": 1977.7126, "encoder_q-layer.5": 2017.4597, "encoder_q-layer.6": 2057.0273, "encoder_q-layer.7": 1892.4855, "encoder_q-layer.8": 1681.3557, "encoder_q-layer.9": 1314.4568, "epoch": 0.22, "inbatch_neg_score": 0.2175, "inbatch_pos_score": 0.8467, "learning_rate": 4.294444444444445e-05, "loss": 3.5978, "norm_diff": 0.015, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2841.8286, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2169, "query_norm": 1.403, "queue_k_norm": 1.408, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8527, "sent_len_1": 66.9319, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4375, "stdk": 0.0478, "stdq": 0.0453, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 22700 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.6238, "doc_norm": 1.4009, "encoder_q-embeddings": 1844.6071, "encoder_q-layer.0": 1198.5017, "encoder_q-layer.1": 1314.2782, "encoder_q-layer.10": 1481.4857, "encoder_q-layer.11": 3119.3772, "encoder_q-layer.2": 1571.9701, "encoder_q-layer.3": 1825.545, "encoder_q-layer.4": 1963.4264, "encoder_q-layer.5": 1970.0953, "encoder_q-layer.6": 2174.064, "encoder_q-layer.7": 2512.6648, "encoder_q-layer.8": 2711.3445, "encoder_q-layer.9": 1902.424, "epoch": 0.22, "inbatch_neg_score": 0.1926, "inbatch_pos_score": 0.8345, "learning_rate": 4.2888888888888886e-05, "loss": 3.6238, "norm_diff": 0.0145, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2960.4967, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1937, "query_norm": 1.4146, "queue_k_norm": 1.4077, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1964, "sent_len_1": 66.9495, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4437, "stdk": 0.0475, "stdq": 0.0459, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 22800 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.5799, "doc_norm": 1.4042, "encoder_q-embeddings": 2163.4851, "encoder_q-layer.0": 1412.8202, "encoder_q-layer.1": 1488.662, "encoder_q-layer.10": 1298.1719, "encoder_q-layer.11": 3003.1184, "encoder_q-layer.2": 1647.1156, "encoder_q-layer.3": 1711.807, "encoder_q-layer.4": 1814.46, "encoder_q-layer.5": 1762.7004, "encoder_q-layer.6": 1758.7183, "encoder_q-layer.7": 1697.3467, "encoder_q-layer.8": 1655.4149, "encoder_q-layer.9": 1330.0714, "epoch": 0.22, "inbatch_neg_score": 0.2253, "inbatch_pos_score": 0.8638, "learning_rate": 4.2833333333333335e-05, "loss": 3.5799, "norm_diff": 0.0118, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2746.1556, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2263, "query_norm": 1.4147, "queue_k_norm": 1.4079, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9606, "sent_len_1": 66.8906, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9613, "stdk": 0.0476, "stdq": 0.0453, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 22900 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.5935, "doc_norm": 1.4049, "encoder_q-embeddings": 3588.0007, "encoder_q-layer.0": 2472.6519, "encoder_q-layer.1": 2917.5098, "encoder_q-layer.10": 1236.9369, "encoder_q-layer.11": 3083.9822, "encoder_q-layer.2": 3453.0034, "encoder_q-layer.3": 3422.2983, "encoder_q-layer.4": 3741.229, "encoder_q-layer.5": 3745.3049, "encoder_q-layer.6": 3604.8086, "encoder_q-layer.7": 3647.1389, "encoder_q-layer.8": 2187.8438, "encoder_q-layer.9": 1391.3909, "epoch": 0.22, "inbatch_neg_score": 0.2323, "inbatch_pos_score": 0.8306, "learning_rate": 4.277777777777778e-05, "loss": 3.5935, "norm_diff": 0.0113, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4695.7255, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2311, "query_norm": 1.3943, "queue_k_norm": 1.4053, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0192, "sent_len_1": 66.9587, "sent_max_len_0": 128.0, "sent_max_len_1": 187.3413, "stdk": 0.0476, "stdq": 0.0447, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 23000 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.617, "doc_norm": 1.4062, "encoder_q-embeddings": 2197.012, "encoder_q-layer.0": 1520.8915, "encoder_q-layer.1": 1608.5641, "encoder_q-layer.10": 1254.6937, "encoder_q-layer.11": 3132.0479, "encoder_q-layer.2": 1853.3009, "encoder_q-layer.3": 1947.9946, "encoder_q-layer.4": 2009.8712, "encoder_q-layer.5": 1903.3256, "encoder_q-layer.6": 1923.4484, "encoder_q-layer.7": 1844.0907, "encoder_q-layer.8": 1763.6982, "encoder_q-layer.9": 1238.5104, "epoch": 0.23, "inbatch_neg_score": 0.2278, "inbatch_pos_score": 0.833, "learning_rate": 4.272222222222223e-05, "loss": 3.617, "norm_diff": 0.0385, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2919.8752, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2266, "query_norm": 1.3676, "queue_k_norm": 1.4078, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8986, "sent_len_1": 66.6515, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0087, "stdk": 0.0476, "stdq": 0.044, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 23100 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.5956, "doc_norm": 1.4121, "encoder_q-embeddings": 4292.5698, "encoder_q-layer.0": 3177.3362, "encoder_q-layer.1": 3719.3362, "encoder_q-layer.10": 1426.2882, "encoder_q-layer.11": 3540.4192, "encoder_q-layer.2": 3746.6934, "encoder_q-layer.3": 4066.0945, "encoder_q-layer.4": 4192.5449, "encoder_q-layer.5": 3953.0811, "encoder_q-layer.6": 3746.8706, "encoder_q-layer.7": 3528.5908, "encoder_q-layer.8": 2575.1982, "encoder_q-layer.9": 1451.1039, "epoch": 0.23, "inbatch_neg_score": 0.2337, "inbatch_pos_score": 0.8501, "learning_rate": 4.266666666666667e-05, "loss": 3.5956, "norm_diff": 0.0212, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5249.3057, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2338, "query_norm": 1.391, "queue_k_norm": 1.4093, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8447, "sent_len_1": 66.7732, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2025, "stdk": 0.0479, "stdq": 0.0448, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 23200 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.574, "doc_norm": 1.3986, "encoder_q-embeddings": 2588.0908, "encoder_q-layer.0": 1838.6442, "encoder_q-layer.1": 1906.0819, "encoder_q-layer.10": 1294.6283, "encoder_q-layer.11": 3004.9729, "encoder_q-layer.2": 2200.6113, "encoder_q-layer.3": 2349.104, "encoder_q-layer.4": 2683.8206, "encoder_q-layer.5": 2586.3337, "encoder_q-layer.6": 2391.667, "encoder_q-layer.7": 2396.2837, "encoder_q-layer.8": 1961.424, "encoder_q-layer.9": 1391.3147, "epoch": 0.23, "inbatch_neg_score": 0.2287, "inbatch_pos_score": 0.8594, "learning_rate": 4.261111111111111e-05, "loss": 3.574, "norm_diff": 0.0243, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3357.456, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2284, "query_norm": 1.4223, "queue_k_norm": 1.4078, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9872, "sent_len_1": 66.7806, "sent_max_len_0": 128.0, "sent_max_len_1": 189.58, "stdk": 0.0474, "stdq": 0.046, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 23300 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.6014, "doc_norm": 1.4033, "encoder_q-embeddings": 3225.189, "encoder_q-layer.0": 2353.8491, "encoder_q-layer.1": 2622.6938, "encoder_q-layer.10": 1323.7844, "encoder_q-layer.11": 3080.4714, "encoder_q-layer.2": 3041.4473, "encoder_q-layer.3": 3037.9509, "encoder_q-layer.4": 3332.1294, "encoder_q-layer.5": 3574.7417, "encoder_q-layer.6": 3202.762, "encoder_q-layer.7": 3208.0903, "encoder_q-layer.8": 2330.0215, "encoder_q-layer.9": 1405.6007, "epoch": 0.23, "inbatch_neg_score": 0.2262, "inbatch_pos_score": 0.7998, "learning_rate": 4.255555555555556e-05, "loss": 3.6014, "norm_diff": 0.026, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4205.5623, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.225, "query_norm": 1.3773, "queue_k_norm": 1.4061, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1183, "sent_len_1": 66.8537, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3113, "stdk": 0.0475, "stdq": 0.0442, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 23400 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.5789, "doc_norm": 1.4135, "encoder_q-embeddings": 2982.509, "encoder_q-layer.0": 1970.8741, "encoder_q-layer.1": 2270.9568, "encoder_q-layer.10": 1333.4247, "encoder_q-layer.11": 2947.635, "encoder_q-layer.2": 2582.9915, "encoder_q-layer.3": 2717.551, "encoder_q-layer.4": 2472.6238, "encoder_q-layer.5": 2529.8774, "encoder_q-layer.6": 2836.8904, "encoder_q-layer.7": 2287.7085, "encoder_q-layer.8": 1776.8845, "encoder_q-layer.9": 1352.8851, "epoch": 0.23, "inbatch_neg_score": 0.2302, "inbatch_pos_score": 0.8584, "learning_rate": 4.25e-05, "loss": 3.5789, "norm_diff": 0.0187, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3564.6937, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2307, "query_norm": 1.3948, "queue_k_norm": 1.4092, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9249, "sent_len_1": 66.7481, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8237, "stdk": 0.0479, "stdq": 0.0447, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 23500 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.5691, "doc_norm": 1.406, "encoder_q-embeddings": 2322.4875, "encoder_q-layer.0": 1586.5276, "encoder_q-layer.1": 1815.7781, "encoder_q-layer.10": 1397.8749, "encoder_q-layer.11": 2892.0986, "encoder_q-layer.2": 2174.4324, "encoder_q-layer.3": 2473.2898, "encoder_q-layer.4": 2439.3601, "encoder_q-layer.5": 2562.6155, "encoder_q-layer.6": 2922.8613, "encoder_q-layer.7": 3025.9265, "encoder_q-layer.8": 2345.8772, "encoder_q-layer.9": 1385.1841, "epoch": 0.23, "inbatch_neg_score": 0.2242, "inbatch_pos_score": 0.8057, "learning_rate": 4.2444444444444445e-05, "loss": 3.5691, "norm_diff": 0.0124, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3425.8516, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2249, "query_norm": 1.3945, "queue_k_norm": 1.4073, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0805, "sent_len_1": 66.7718, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4025, "stdk": 0.0477, "stdq": 0.0445, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 23600 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.5872, "doc_norm": 1.4068, "encoder_q-embeddings": 1530.6113, "encoder_q-layer.0": 997.1699, "encoder_q-layer.1": 1145.0341, "encoder_q-layer.10": 1497.0886, "encoder_q-layer.11": 2880.1621, "encoder_q-layer.2": 1351.1687, "encoder_q-layer.3": 1434.4486, "encoder_q-layer.4": 1521.6628, "encoder_q-layer.5": 1361.0128, "encoder_q-layer.6": 1434.2611, "encoder_q-layer.7": 1475.564, "encoder_q-layer.8": 1534.5137, "encoder_q-layer.9": 1284.0227, "epoch": 0.23, "inbatch_neg_score": 0.2294, "inbatch_pos_score": 0.8701, "learning_rate": 4.238888888888889e-05, "loss": 3.5872, "norm_diff": 0.0246, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2330.3359, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2297, "query_norm": 1.4315, "queue_k_norm": 1.4073, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9906, "sent_len_1": 66.7462, "sent_max_len_0": 128.0, "sent_max_len_1": 189.28, "stdk": 0.0477, "stdq": 0.0455, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 23700 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.5718, "doc_norm": 1.4085, "encoder_q-embeddings": 1305.0663, "encoder_q-layer.0": 926.1746, "encoder_q-layer.1": 959.4102, "encoder_q-layer.10": 1394.382, "encoder_q-layer.11": 2909.5505, "encoder_q-layer.2": 1084.5039, "encoder_q-layer.3": 1142.0828, "encoder_q-layer.4": 1216.4233, "encoder_q-layer.5": 1162.5623, "encoder_q-layer.6": 1271.6853, "encoder_q-layer.7": 1386.3175, "encoder_q-layer.8": 1512.6034, "encoder_q-layer.9": 1367.5359, "epoch": 0.23, "inbatch_neg_score": 0.2302, "inbatch_pos_score": 0.8779, "learning_rate": 4.233333333333334e-05, "loss": 3.5718, "norm_diff": 0.038, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2117.3673, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2292, "query_norm": 1.4465, "queue_k_norm": 1.4077, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7617, "sent_len_1": 66.773, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2425, "stdk": 0.0478, "stdq": 0.046, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 23800 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.5719, "doc_norm": 1.4078, "encoder_q-embeddings": 2297.7986, "encoder_q-layer.0": 1484.4125, "encoder_q-layer.1": 1737.5536, "encoder_q-layer.10": 1234.7692, "encoder_q-layer.11": 2938.458, "encoder_q-layer.2": 1996.7877, "encoder_q-layer.3": 2198.085, "encoder_q-layer.4": 2520.0894, "encoder_q-layer.5": 2688.6724, "encoder_q-layer.6": 2615.0977, "encoder_q-layer.7": 2601.2397, "encoder_q-layer.8": 2557.99, "encoder_q-layer.9": 1725.6864, "epoch": 0.23, "inbatch_neg_score": 0.2295, "inbatch_pos_score": 0.8613, "learning_rate": 4.227777777777778e-05, "loss": 3.5719, "norm_diff": 0.0078, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3378.1722, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2286, "query_norm": 1.4066, "queue_k_norm": 1.4079, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.667, "sent_len_1": 66.6614, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4837, "stdk": 0.0478, "stdq": 0.045, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 23900 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.5713, "doc_norm": 1.4032, "encoder_q-embeddings": 3459.79, "encoder_q-layer.0": 2206.8093, "encoder_q-layer.1": 2320.2864, "encoder_q-layer.10": 2518.967, "encoder_q-layer.11": 6143.6133, "encoder_q-layer.2": 2527.5444, "encoder_q-layer.3": 2636.3894, "encoder_q-layer.4": 2908.0276, "encoder_q-layer.5": 2858.7522, "encoder_q-layer.6": 3202.9019, "encoder_q-layer.7": 3232.6675, "encoder_q-layer.8": 3353.6975, "encoder_q-layer.9": 2585.4253, "epoch": 0.23, "inbatch_neg_score": 0.224, "inbatch_pos_score": 0.8574, "learning_rate": 4.222222222222222e-05, "loss": 3.5713, "norm_diff": 0.0111, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4963.7615, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.222, "query_norm": 1.4104, "queue_k_norm": 1.4093, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9125, "sent_len_1": 66.886, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2688, "stdk": 0.0476, "stdq": 0.0453, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 24000 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.547, "doc_norm": 1.4109, "encoder_q-embeddings": 10265.2842, "encoder_q-layer.0": 6931.9639, "encoder_q-layer.1": 7056.2603, "encoder_q-layer.10": 2582.1152, "encoder_q-layer.11": 5954.2788, "encoder_q-layer.2": 8804.9834, "encoder_q-layer.3": 8914.7148, "encoder_q-layer.4": 10310.4248, "encoder_q-layer.5": 9830.8418, "encoder_q-layer.6": 10915.6221, "encoder_q-layer.7": 9890.5537, "encoder_q-layer.8": 6952.0151, "encoder_q-layer.9": 3098.6875, "epoch": 0.24, "inbatch_neg_score": 0.2213, "inbatch_pos_score": 0.8408, "learning_rate": 4.216666666666667e-05, "loss": 3.547, "norm_diff": 0.0195, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12456.7658, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2203, "query_norm": 1.3922, "queue_k_norm": 1.4092, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0569, "sent_len_1": 66.9633, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2575, "stdk": 0.0479, "stdq": 0.0447, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 24100 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.566, "doc_norm": 1.4085, "encoder_q-embeddings": 8513.6787, "encoder_q-layer.0": 6656.6738, "encoder_q-layer.1": 6399.8848, "encoder_q-layer.10": 1623.7386, "encoder_q-layer.11": 3324.2686, "encoder_q-layer.2": 6104.1343, "encoder_q-layer.3": 6016.7144, "encoder_q-layer.4": 5937.2998, "encoder_q-layer.5": 5727.7954, "encoder_q-layer.6": 4062.3701, "encoder_q-layer.7": 2896.2278, "encoder_q-layer.8": 2190.9924, "encoder_q-layer.9": 1576.1761, "epoch": 0.24, "inbatch_neg_score": 0.206, "inbatch_pos_score": 0.8247, "learning_rate": 4.211111111111111e-05, "loss": 3.566, "norm_diff": 0.014, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7941.0816, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2064, "query_norm": 1.4034, "queue_k_norm": 1.4092, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0568, "sent_len_1": 66.8877, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0775, "stdk": 0.0478, "stdq": 0.0458, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 24200 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.5659, "doc_norm": 1.4125, "encoder_q-embeddings": 1705.1709, "encoder_q-layer.0": 1154.1046, "encoder_q-layer.1": 1259.8827, "encoder_q-layer.10": 1307.4722, "encoder_q-layer.11": 2806.2351, "encoder_q-layer.2": 1404.4512, "encoder_q-layer.3": 1311.0679, "encoder_q-layer.4": 1433.3123, "encoder_q-layer.5": 1523.8708, "encoder_q-layer.6": 1675.7119, "encoder_q-layer.7": 1644.9497, "encoder_q-layer.8": 1620.5249, "encoder_q-layer.9": 1287.3176, "epoch": 0.24, "inbatch_neg_score": 0.1884, "inbatch_pos_score": 0.8359, "learning_rate": 4.205555555555556e-05, "loss": 3.5659, "norm_diff": 0.0229, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2361.0204, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1882, "query_norm": 1.3896, "queue_k_norm": 1.4077, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0042, "sent_len_1": 66.7976, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1925, "stdk": 0.048, "stdq": 0.0457, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 24300 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.5891, "doc_norm": 1.4082, "encoder_q-embeddings": 1310.2006, "encoder_q-layer.0": 900.2599, "encoder_q-layer.1": 944.9661, "encoder_q-layer.10": 1288.2823, "encoder_q-layer.11": 2969.2017, "encoder_q-layer.2": 1041.3353, "encoder_q-layer.3": 1060.3147, "encoder_q-layer.4": 1124.5089, "encoder_q-layer.5": 1156.1047, "encoder_q-layer.6": 1305.1735, "encoder_q-layer.7": 1443.01, "encoder_q-layer.8": 1549.918, "encoder_q-layer.9": 1233.9718, "epoch": 0.24, "inbatch_neg_score": 0.1854, "inbatch_pos_score": 0.7827, "learning_rate": 4.2e-05, "loss": 3.5891, "norm_diff": 0.0581, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2167.5711, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1857, "query_norm": 1.3501, "queue_k_norm": 1.4071, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9818, "sent_len_1": 66.6916, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6262, "stdk": 0.0479, "stdq": 0.0445, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 24400 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.5632, "doc_norm": 1.4044, "encoder_q-embeddings": 1926.8082, "encoder_q-layer.0": 1424.1288, "encoder_q-layer.1": 1579.7301, "encoder_q-layer.10": 1373.3003, "encoder_q-layer.11": 2889.9219, "encoder_q-layer.2": 1683.0625, "encoder_q-layer.3": 1703.1108, "encoder_q-layer.4": 1688.6277, "encoder_q-layer.5": 1806.5778, "encoder_q-layer.6": 1921.9536, "encoder_q-layer.7": 1802.0238, "encoder_q-layer.8": 1764.7095, "encoder_q-layer.9": 1354.8308, "epoch": 0.24, "inbatch_neg_score": 0.1963, "inbatch_pos_score": 0.8472, "learning_rate": 4.194444444444445e-05, "loss": 3.5632, "norm_diff": 0.0234, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2670.3085, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1963, "query_norm": 1.381, "queue_k_norm": 1.4059, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0131, "sent_len_1": 66.8902, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7413, "stdk": 0.0478, "stdq": 0.0454, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 24500 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.5831, "doc_norm": 1.4047, "encoder_q-embeddings": 1922.0703, "encoder_q-layer.0": 1349.8608, "encoder_q-layer.1": 1329.2041, "encoder_q-layer.10": 1269.2135, "encoder_q-layer.11": 2883.9678, "encoder_q-layer.2": 1611.4182, "encoder_q-layer.3": 1458.2748, "encoder_q-layer.4": 1543.2598, "encoder_q-layer.5": 1542.1842, "encoder_q-layer.6": 1807.6936, "encoder_q-layer.7": 1563.0177, "encoder_q-layer.8": 1488.6674, "encoder_q-layer.9": 1249.9965, "epoch": 0.24, "inbatch_neg_score": 0.1957, "inbatch_pos_score": 0.7939, "learning_rate": 4.188888888888889e-05, "loss": 3.5831, "norm_diff": 0.0546, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2539.4154, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1957, "query_norm": 1.3501, "queue_k_norm": 1.4051, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8895, "sent_len_1": 66.351, "sent_max_len_0": 128.0, "sent_max_len_1": 189.19, "stdk": 0.0478, "stdq": 0.044, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 24600 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.5733, "doc_norm": 1.405, "encoder_q-embeddings": 934.355, "encoder_q-layer.0": 572.7925, "encoder_q-layer.1": 603.4168, "encoder_q-layer.10": 622.5825, "encoder_q-layer.11": 1333.3092, "encoder_q-layer.2": 655.3574, "encoder_q-layer.3": 708.8694, "encoder_q-layer.4": 684.6823, "encoder_q-layer.5": 682.7202, "encoder_q-layer.6": 741.801, "encoder_q-layer.7": 788.8773, "encoder_q-layer.8": 828.6475, "encoder_q-layer.9": 638.4784, "epoch": 0.24, "inbatch_neg_score": 0.1788, "inbatch_pos_score": 0.8184, "learning_rate": 4.183333333333334e-05, "loss": 3.5733, "norm_diff": 0.027, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1171.0291, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1792, "query_norm": 1.378, "queue_k_norm": 1.4063, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8187, "sent_len_1": 66.7158, "sent_max_len_0": 128.0, "sent_max_len_1": 191.71, "stdk": 0.0479, "stdq": 0.0456, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 24700 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.5459, "doc_norm": 1.4034, "encoder_q-embeddings": 1033.2865, "encoder_q-layer.0": 667.8786, "encoder_q-layer.1": 755.6133, "encoder_q-layer.10": 689.7005, "encoder_q-layer.11": 1436.0685, "encoder_q-layer.2": 870.5583, "encoder_q-layer.3": 900.4341, "encoder_q-layer.4": 961.5785, "encoder_q-layer.5": 995.0579, "encoder_q-layer.6": 973.025, "encoder_q-layer.7": 1071.7722, "encoder_q-layer.8": 859.6398, "encoder_q-layer.9": 630.2768, "epoch": 0.24, "inbatch_neg_score": 0.1761, "inbatch_pos_score": 0.7832, "learning_rate": 4.177777777777778e-05, "loss": 3.5459, "norm_diff": 0.0504, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1386.0328, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1771, "query_norm": 1.353, "queue_k_norm": 1.4018, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0274, "sent_len_1": 66.858, "sent_max_len_0": 128.0, "sent_max_len_1": 190.75, "stdk": 0.0479, "stdq": 0.0447, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 24800 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.5496, "doc_norm": 1.4007, "encoder_q-embeddings": 882.5779, "encoder_q-layer.0": 597.355, "encoder_q-layer.1": 694.0412, "encoder_q-layer.10": 590.1336, "encoder_q-layer.11": 1362.5961, "encoder_q-layer.2": 772.8394, "encoder_q-layer.3": 822.7858, "encoder_q-layer.4": 971.8295, "encoder_q-layer.5": 926.5442, "encoder_q-layer.6": 1037.9303, "encoder_q-layer.7": 1089.0382, "encoder_q-layer.8": 1127.6836, "encoder_q-layer.9": 787.0945, "epoch": 0.24, "inbatch_neg_score": 0.1808, "inbatch_pos_score": 0.8472, "learning_rate": 4.172222222222222e-05, "loss": 3.5496, "norm_diff": 0.0044, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1360.8163, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.181, "query_norm": 1.3989, "queue_k_norm": 1.4018, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9363, "sent_len_1": 66.7553, "sent_max_len_0": 128.0, "sent_max_len_1": 186.3713, "stdk": 0.0478, "stdq": 0.0463, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 24900 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.5697, "doc_norm": 1.3977, "encoder_q-embeddings": 816.1497, "encoder_q-layer.0": 561.1924, "encoder_q-layer.1": 620.3623, "encoder_q-layer.10": 621.9829, "encoder_q-layer.11": 1432.0073, "encoder_q-layer.2": 713.9332, "encoder_q-layer.3": 691.9762, "encoder_q-layer.4": 697.6194, "encoder_q-layer.5": 711.0004, "encoder_q-layer.6": 681.537, "encoder_q-layer.7": 680.4737, "encoder_q-layer.8": 699.9769, "encoder_q-layer.9": 622.3752, "epoch": 0.24, "inbatch_neg_score": 0.1753, "inbatch_pos_score": 0.8062, "learning_rate": 4.166666666666667e-05, "loss": 3.5697, "norm_diff": 0.0316, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1156.4828, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1748, "query_norm": 1.3661, "queue_k_norm": 1.4016, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.28, "sent_len_1": 66.9843, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1625, "stdk": 0.0477, "stdq": 0.045, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 25000 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.5112, "doc_norm": 1.3986, "encoder_q-embeddings": 6851.2241, "encoder_q-layer.0": 4937.1279, "encoder_q-layer.1": 5628.4907, "encoder_q-layer.10": 632.2959, "encoder_q-layer.11": 1429.4181, "encoder_q-layer.2": 6969.8579, "encoder_q-layer.3": 7158.3276, "encoder_q-layer.4": 8189.5679, "encoder_q-layer.5": 8145.7334, "encoder_q-layer.6": 8363.2236, "encoder_q-layer.7": 9160.4727, "encoder_q-layer.8": 4204.3154, "encoder_q-layer.9": 1010.9896, "epoch": 0.25, "inbatch_neg_score": 0.1741, "inbatch_pos_score": 0.8037, "learning_rate": 4.1611111111111114e-05, "loss": 3.5112, "norm_diff": 0.0273, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9431.4961, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1735, "query_norm": 1.3713, "queue_k_norm": 1.4025, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8112, "sent_len_1": 66.8377, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9938, "stdk": 0.0478, "stdq": 0.0453, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 25100 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.5497, "doc_norm": 1.4029, "encoder_q-embeddings": 788.2953, "encoder_q-layer.0": 525.5163, "encoder_q-layer.1": 597.5987, "encoder_q-layer.10": 579.0491, "encoder_q-layer.11": 1321.7043, "encoder_q-layer.2": 630.2535, "encoder_q-layer.3": 649.7164, "encoder_q-layer.4": 649.5215, "encoder_q-layer.5": 603.5771, "encoder_q-layer.6": 657.5274, "encoder_q-layer.7": 720.9599, "encoder_q-layer.8": 746.6126, "encoder_q-layer.9": 631.1224, "epoch": 0.25, "inbatch_neg_score": 0.1782, "inbatch_pos_score": 0.8364, "learning_rate": 4.155555555555556e-05, "loss": 3.5497, "norm_diff": 0.007, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1089.1405, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1774, "query_norm": 1.3991, "queue_k_norm": 1.3997, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.013, "sent_len_1": 66.7933, "sent_max_len_0": 128.0, "sent_max_len_1": 190.225, "stdk": 0.048, "stdq": 0.046, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 25200 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.5143, "doc_norm": 1.3998, "encoder_q-embeddings": 1690.8929, "encoder_q-layer.0": 1224.5664, "encoder_q-layer.1": 1393.4215, "encoder_q-layer.10": 655.324, "encoder_q-layer.11": 1431.0282, "encoder_q-layer.2": 1676.6263, "encoder_q-layer.3": 1694.9559, "encoder_q-layer.4": 1643.9779, "encoder_q-layer.5": 1625.0046, "encoder_q-layer.6": 1617.3436, "encoder_q-layer.7": 1331.283, "encoder_q-layer.8": 903.6763, "encoder_q-layer.9": 647.5179, "epoch": 0.25, "inbatch_neg_score": 0.1669, "inbatch_pos_score": 0.7578, "learning_rate": 4.15e-05, "loss": 3.5143, "norm_diff": 0.0331, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2098.1811, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1671, "query_norm": 1.3667, "queue_k_norm": 1.3989, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9039, "sent_len_1": 66.9086, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7937, "stdk": 0.0479, "stdq": 0.0449, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 25300 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.5273, "doc_norm": 1.3944, "encoder_q-embeddings": 653.9332, "encoder_q-layer.0": 441.3697, "encoder_q-layer.1": 484.5655, "encoder_q-layer.10": 667.628, "encoder_q-layer.11": 1353.0951, "encoder_q-layer.2": 571.1337, "encoder_q-layer.3": 636.2188, "encoder_q-layer.4": 754.7802, "encoder_q-layer.5": 709.5286, "encoder_q-layer.6": 682.3445, "encoder_q-layer.7": 725.6863, "encoder_q-layer.8": 773.0938, "encoder_q-layer.9": 686.7794, "epoch": 0.25, "inbatch_neg_score": 0.1608, "inbatch_pos_score": 0.7554, "learning_rate": 4.144444444444445e-05, "loss": 3.5273, "norm_diff": 0.0377, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1087.5457, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1599, "query_norm": 1.3568, "queue_k_norm": 1.3998, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9387, "sent_len_1": 66.8254, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7775, "stdk": 0.0477, "stdq": 0.0447, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 25400 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.5518, "doc_norm": 1.3994, "encoder_q-embeddings": 607.7343, "encoder_q-layer.0": 399.4394, "encoder_q-layer.1": 422.9692, "encoder_q-layer.10": 594.6741, "encoder_q-layer.11": 1324.2738, "encoder_q-layer.2": 459.2203, "encoder_q-layer.3": 462.6089, "encoder_q-layer.4": 519.4867, "encoder_q-layer.5": 520.1624, "encoder_q-layer.6": 578.7879, "encoder_q-layer.7": 624.2927, "encoder_q-layer.8": 705.2097, "encoder_q-layer.9": 600.8412, "epoch": 0.25, "inbatch_neg_score": 0.164, "inbatch_pos_score": 0.812, "learning_rate": 4.138888888888889e-05, "loss": 3.5518, "norm_diff": 0.0234, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 967.6744, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1642, "query_norm": 1.3761, "queue_k_norm": 1.3968, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9204, "sent_len_1": 66.5848, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5275, "stdk": 0.0479, "stdq": 0.0454, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 25500 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.5581, "doc_norm": 1.3931, "encoder_q-embeddings": 1234.7136, "encoder_q-layer.0": 887.1579, "encoder_q-layer.1": 1085.578, "encoder_q-layer.10": 611.2844, "encoder_q-layer.11": 1422.4746, "encoder_q-layer.2": 1228.5321, "encoder_q-layer.3": 1385.6758, "encoder_q-layer.4": 1379.2295, "encoder_q-layer.5": 1512.0835, "encoder_q-layer.6": 1446.1877, "encoder_q-layer.7": 1173.692, "encoder_q-layer.8": 884.6814, "encoder_q-layer.9": 667.8317, "epoch": 0.25, "inbatch_neg_score": 0.151, "inbatch_pos_score": 0.79, "learning_rate": 4.133333333333333e-05, "loss": 3.5581, "norm_diff": 0.0315, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1734.5822, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1517, "query_norm": 1.3616, "queue_k_norm": 1.3965, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9664, "sent_len_1": 66.6213, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8063, "stdk": 0.0478, "stdq": 0.0452, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 25600 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.5375, "doc_norm": 1.3983, "encoder_q-embeddings": 671.6667, "encoder_q-layer.0": 464.782, "encoder_q-layer.1": 487.2064, "encoder_q-layer.10": 674.0069, "encoder_q-layer.11": 1403.1613, "encoder_q-layer.2": 541.2934, "encoder_q-layer.3": 544.9177, "encoder_q-layer.4": 608.1513, "encoder_q-layer.5": 600.2668, "encoder_q-layer.6": 699.0875, "encoder_q-layer.7": 746.1172, "encoder_q-layer.8": 803.3029, "encoder_q-layer.9": 656.0577, "epoch": 0.25, "inbatch_neg_score": 0.1585, "inbatch_pos_score": 0.7832, "learning_rate": 4.127777777777778e-05, "loss": 3.5375, "norm_diff": 0.0574, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1078.8687, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1589, "query_norm": 1.3409, "queue_k_norm": 1.393, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9656, "sent_len_1": 66.5649, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8, "stdk": 0.048, "stdq": 0.0445, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 25700 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.5403, "doc_norm": 1.3972, "encoder_q-embeddings": 908.3321, "encoder_q-layer.0": 682.1144, "encoder_q-layer.1": 686.8648, "encoder_q-layer.10": 648.6836, "encoder_q-layer.11": 1399.6937, "encoder_q-layer.2": 758.0508, "encoder_q-layer.3": 749.5003, "encoder_q-layer.4": 738.66, "encoder_q-layer.5": 680.2478, "encoder_q-layer.6": 728.2859, "encoder_q-layer.7": 751.4062, "encoder_q-layer.8": 798.1392, "encoder_q-layer.9": 681.0893, "epoch": 0.25, "inbatch_neg_score": 0.1629, "inbatch_pos_score": 0.7783, "learning_rate": 4.1222222222222224e-05, "loss": 3.5403, "norm_diff": 0.0497, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1225.7888, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1625, "query_norm": 1.3475, "queue_k_norm": 1.393, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9617, "sent_len_1": 66.7989, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6037, "stdk": 0.048, "stdq": 0.0448, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 25800 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.5307, "doc_norm": 1.3923, "encoder_q-embeddings": 1150.6343, "encoder_q-layer.0": 750.9628, "encoder_q-layer.1": 877.5085, "encoder_q-layer.10": 636.1991, "encoder_q-layer.11": 1359.3192, "encoder_q-layer.2": 964.3879, "encoder_q-layer.3": 1047.7778, "encoder_q-layer.4": 1176.2489, "encoder_q-layer.5": 1202.72, "encoder_q-layer.6": 1317.64, "encoder_q-layer.7": 1151.1555, "encoder_q-layer.8": 890.9929, "encoder_q-layer.9": 693.3546, "epoch": 0.25, "inbatch_neg_score": 0.1617, "inbatch_pos_score": 0.8242, "learning_rate": 4.116666666666667e-05, "loss": 3.5307, "norm_diff": 0.0312, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1564.3005, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1606, "query_norm": 1.3611, "queue_k_norm": 1.3925, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9629, "sent_len_1": 66.7961, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9787, "stdk": 0.0478, "stdq": 0.0452, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 25900 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.5221, "doc_norm": 1.3913, "encoder_q-embeddings": 834.9116, "encoder_q-layer.0": 574.2308, "encoder_q-layer.1": 631.3254, "encoder_q-layer.10": 608.8398, "encoder_q-layer.11": 1285.1926, "encoder_q-layer.2": 687.5097, "encoder_q-layer.3": 701.9085, "encoder_q-layer.4": 754.2084, "encoder_q-layer.5": 735.2666, "encoder_q-layer.6": 820.6118, "encoder_q-layer.7": 791.1491, "encoder_q-layer.8": 807.9592, "encoder_q-layer.9": 656.1193, "epoch": 0.25, "inbatch_neg_score": 0.1579, "inbatch_pos_score": 0.8101, "learning_rate": 4.111111111111111e-05, "loss": 3.5221, "norm_diff": 0.0284, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1171.2132, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1572, "query_norm": 1.3629, "queue_k_norm": 1.3921, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9771, "sent_len_1": 66.8276, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4863, "stdk": 0.0478, "stdq": 0.0455, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 26000 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.5376, "doc_norm": 1.3916, "encoder_q-embeddings": 1018.6212, "encoder_q-layer.0": 676.7032, "encoder_q-layer.1": 713.3555, "encoder_q-layer.10": 689.7332, "encoder_q-layer.11": 1308.3473, "encoder_q-layer.2": 770.7841, "encoder_q-layer.3": 837.4434, "encoder_q-layer.4": 894.8958, "encoder_q-layer.5": 836.6804, "encoder_q-layer.6": 934.0964, "encoder_q-layer.7": 911.0344, "encoder_q-layer.8": 1053.7948, "encoder_q-layer.9": 751.9384, "epoch": 0.25, "inbatch_neg_score": 0.1622, "inbatch_pos_score": 0.8071, "learning_rate": 4.105555555555556e-05, "loss": 3.5376, "norm_diff": 0.0396, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1340.5905, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1626, "query_norm": 1.352, "queue_k_norm": 1.3918, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9682, "sent_len_1": 66.6759, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1538, "stdk": 0.0479, "stdq": 0.0452, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 26100 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.5361, "doc_norm": 1.3917, "encoder_q-embeddings": 1201.308, "encoder_q-layer.0": 809.2301, "encoder_q-layer.1": 784.938, "encoder_q-layer.10": 608.929, "encoder_q-layer.11": 1336.6559, "encoder_q-layer.2": 842.9503, "encoder_q-layer.3": 867.3347, "encoder_q-layer.4": 880.0499, "encoder_q-layer.5": 861.022, "encoder_q-layer.6": 987.8536, "encoder_q-layer.7": 1099.7781, "encoder_q-layer.8": 1221.1567, "encoder_q-layer.9": 942.696, "epoch": 0.26, "inbatch_neg_score": 0.1623, "inbatch_pos_score": 0.7759, "learning_rate": 4.1e-05, "loss": 3.5361, "norm_diff": 0.0416, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1489.5752, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1615, "query_norm": 1.3501, "queue_k_norm": 1.3912, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.6091, "sent_len_1": 66.5303, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8325, "stdk": 0.0479, "stdq": 0.045, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 26200 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.5181, "doc_norm": 1.3955, "encoder_q-embeddings": 822.0681, "encoder_q-layer.0": 520.69, "encoder_q-layer.1": 568.8849, "encoder_q-layer.10": 657.7006, "encoder_q-layer.11": 1342.9023, "encoder_q-layer.2": 658.5743, "encoder_q-layer.3": 730.9957, "encoder_q-layer.4": 874.441, "encoder_q-layer.5": 812.2322, "encoder_q-layer.6": 869.7023, "encoder_q-layer.7": 951.3959, "encoder_q-layer.8": 990.9709, "encoder_q-layer.9": 762.0366, "epoch": 0.26, "inbatch_neg_score": 0.1554, "inbatch_pos_score": 0.7988, "learning_rate": 4.094444444444445e-05, "loss": 3.5181, "norm_diff": 0.0378, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1227.6262, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1558, "query_norm": 1.3577, "queue_k_norm": 1.3918, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9338, "sent_len_1": 66.7686, "sent_max_len_0": 128.0, "sent_max_len_1": 190.175, "stdk": 0.0481, "stdq": 0.0451, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 26300 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.5474, "doc_norm": 1.3925, "encoder_q-embeddings": 434.8495, "encoder_q-layer.0": 275.7167, "encoder_q-layer.1": 291.1348, "encoder_q-layer.10": 299.6149, "encoder_q-layer.11": 671.0661, "encoder_q-layer.2": 341.0104, "encoder_q-layer.3": 346.9167, "encoder_q-layer.4": 369.6541, "encoder_q-layer.5": 394.1975, "encoder_q-layer.6": 431.6562, "encoder_q-layer.7": 443.7527, "encoder_q-layer.8": 409.6902, "encoder_q-layer.9": 323.8975, "epoch": 0.26, "inbatch_neg_score": 0.1695, "inbatch_pos_score": 0.7949, "learning_rate": 4.088888888888889e-05, "loss": 3.5474, "norm_diff": 0.0316, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 593.6864, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1689, "query_norm": 1.3609, "queue_k_norm": 1.3906, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0958, "sent_len_1": 66.8328, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7438, "stdk": 0.0479, "stdq": 0.045, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 26400 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.5248, "doc_norm": 1.3958, "encoder_q-embeddings": 731.1332, "encoder_q-layer.0": 528.668, "encoder_q-layer.1": 526.669, "encoder_q-layer.10": 365.2702, "encoder_q-layer.11": 734.7773, "encoder_q-layer.2": 653.6866, "encoder_q-layer.3": 650.3055, "encoder_q-layer.4": 704.3323, "encoder_q-layer.5": 864.8784, "encoder_q-layer.6": 759.0584, "encoder_q-layer.7": 774.3522, "encoder_q-layer.8": 663.5508, "encoder_q-layer.9": 372.2983, "epoch": 0.26, "inbatch_neg_score": 0.1708, "inbatch_pos_score": 0.7656, "learning_rate": 4.0833333333333334e-05, "loss": 3.5248, "norm_diff": 0.0657, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 978.9363, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1697, "query_norm": 1.3301, "queue_k_norm": 1.3918, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.972, "sent_len_1": 66.67, "sent_max_len_0": 128.0, "sent_max_len_1": 192.3562, "stdk": 0.0481, "stdq": 0.044, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 26500 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.5085, "doc_norm": 1.3974, "encoder_q-embeddings": 2590.9241, "encoder_q-layer.0": 1648.0104, "encoder_q-layer.1": 2022.7517, "encoder_q-layer.10": 318.1642, "encoder_q-layer.11": 650.1288, "encoder_q-layer.2": 2254.6875, "encoder_q-layer.3": 2482.2295, "encoder_q-layer.4": 2882.1702, "encoder_q-layer.5": 2908.5691, "encoder_q-layer.6": 2847.249, "encoder_q-layer.7": 2696.9863, "encoder_q-layer.8": 1828.0314, "encoder_q-layer.9": 494.6945, "epoch": 0.26, "inbatch_neg_score": 0.1702, "inbatch_pos_score": 0.8232, "learning_rate": 4.0777777777777783e-05, "loss": 3.5085, "norm_diff": 0.0285, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3278.4848, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1703, "query_norm": 1.3688, "queue_k_norm": 1.3902, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.926, "sent_len_1": 66.7146, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9025, "stdk": 0.0482, "stdq": 0.0452, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 26600 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.4935, "doc_norm": 1.392, "encoder_q-embeddings": 294.8237, "encoder_q-layer.0": 189.7005, "encoder_q-layer.1": 200.5098, "encoder_q-layer.10": 294.4138, "encoder_q-layer.11": 637.7573, "encoder_q-layer.2": 222.725, "encoder_q-layer.3": 235.5561, "encoder_q-layer.4": 249.1794, "encoder_q-layer.5": 255.2377, "encoder_q-layer.6": 287.7271, "encoder_q-layer.7": 313.5806, "encoder_q-layer.8": 353.6754, "encoder_q-layer.9": 301.3709, "epoch": 0.26, "inbatch_neg_score": 0.1744, "inbatch_pos_score": 0.793, "learning_rate": 4.0722222222222226e-05, "loss": 3.4935, "norm_diff": 0.0458, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 471.8889, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1749, "query_norm": 1.3462, "queue_k_norm": 1.3904, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8871, "sent_len_1": 66.6826, "sent_max_len_0": 128.0, "sent_max_len_1": 189.375, "stdk": 0.0479, "stdq": 0.0444, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 26700 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.5497, "doc_norm": 1.3913, "encoder_q-embeddings": 346.0812, "encoder_q-layer.0": 230.4144, "encoder_q-layer.1": 247.5842, "encoder_q-layer.10": 304.1868, "encoder_q-layer.11": 661.2893, "encoder_q-layer.2": 280.2791, "encoder_q-layer.3": 289.0876, "encoder_q-layer.4": 314.1222, "encoder_q-layer.5": 316.5557, "encoder_q-layer.6": 357.868, "encoder_q-layer.7": 369.4095, "encoder_q-layer.8": 389.0192, "encoder_q-layer.9": 327.0127, "epoch": 0.26, "inbatch_neg_score": 0.1704, "inbatch_pos_score": 0.7646, "learning_rate": 4.066666666666667e-05, "loss": 3.5497, "norm_diff": 0.0419, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 526.4968, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1693, "query_norm": 1.3494, "queue_k_norm": 1.3919, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1841, "sent_len_1": 66.3981, "sent_max_len_0": 128.0, "sent_max_len_1": 186.99, "stdk": 0.0479, "stdq": 0.0448, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 26800 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.4855, "doc_norm": 1.395, "encoder_q-embeddings": 415.6185, "encoder_q-layer.0": 262.8745, "encoder_q-layer.1": 287.9299, "encoder_q-layer.10": 323.6875, "encoder_q-layer.11": 714.2651, "encoder_q-layer.2": 340.694, "encoder_q-layer.3": 377.3113, "encoder_q-layer.4": 388.7909, "encoder_q-layer.5": 366.0081, "encoder_q-layer.6": 406.4124, "encoder_q-layer.7": 453.693, "encoder_q-layer.8": 509.3709, "encoder_q-layer.9": 395.7092, "epoch": 0.26, "inbatch_neg_score": 0.1764, "inbatch_pos_score": 0.7886, "learning_rate": 4.061111111111111e-05, "loss": 3.4855, "norm_diff": 0.027, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 620.4213, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1763, "query_norm": 1.368, "queue_k_norm": 1.392, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9759, "sent_len_1": 66.8957, "sent_max_len_0": 128.0, "sent_max_len_1": 188.78, "stdk": 0.0481, "stdq": 0.0451, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 26900 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.5075, "doc_norm": 1.3987, "encoder_q-embeddings": 422.9335, "encoder_q-layer.0": 283.9524, "encoder_q-layer.1": 294.6614, "encoder_q-layer.10": 344.2137, "encoder_q-layer.11": 693.5967, "encoder_q-layer.2": 334.8974, "encoder_q-layer.3": 353.0682, "encoder_q-layer.4": 389.657, "encoder_q-layer.5": 374.6926, "encoder_q-layer.6": 418.3292, "encoder_q-layer.7": 438.0147, "encoder_q-layer.8": 440.9618, "encoder_q-layer.9": 358.6366, "epoch": 0.26, "inbatch_neg_score": 0.1757, "inbatch_pos_score": 0.8105, "learning_rate": 4.055555555555556e-05, "loss": 3.5075, "norm_diff": 0.0257, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 600.4656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.176, "query_norm": 1.373, "queue_k_norm": 1.3922, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8242, "sent_len_1": 66.7009, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3975, "stdk": 0.0482, "stdq": 0.0453, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 27000 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.4817, "doc_norm": 1.3936, "encoder_q-embeddings": 342.4071, "encoder_q-layer.0": 228.2847, "encoder_q-layer.1": 253.4028, "encoder_q-layer.10": 316.7568, "encoder_q-layer.11": 676.728, "encoder_q-layer.2": 276.8078, "encoder_q-layer.3": 304.1387, "encoder_q-layer.4": 331.607, "encoder_q-layer.5": 346.9325, "encoder_q-layer.6": 370.0833, "encoder_q-layer.7": 391.8976, "encoder_q-layer.8": 420.2132, "encoder_q-layer.9": 332.3729, "epoch": 0.26, "inbatch_neg_score": 0.1788, "inbatch_pos_score": 0.8315, "learning_rate": 4.05e-05, "loss": 3.4817, "norm_diff": 0.0108, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 541.6928, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1799, "query_norm": 1.4009, "queue_k_norm": 1.3924, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0406, "sent_len_1": 66.815, "sent_max_len_0": 128.0, "sent_max_len_1": 188.79, "stdk": 0.048, "stdq": 0.0463, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 27100 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.5016, "doc_norm": 1.3923, "encoder_q-embeddings": 302.4758, "encoder_q-layer.0": 189.554, "encoder_q-layer.1": 206.2597, "encoder_q-layer.10": 305.8005, "encoder_q-layer.11": 644.6493, "encoder_q-layer.2": 232.7827, "encoder_q-layer.3": 249.6786, "encoder_q-layer.4": 258.4534, "encoder_q-layer.5": 261.7056, "encoder_q-layer.6": 298.1148, "encoder_q-layer.7": 328.8524, "encoder_q-layer.8": 356.2351, "encoder_q-layer.9": 312.4963, "epoch": 0.27, "inbatch_neg_score": 0.1703, "inbatch_pos_score": 0.835, "learning_rate": 4.0444444444444444e-05, "loss": 3.5016, "norm_diff": 0.0096, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 479.4482, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1714, "query_norm": 1.3949, "queue_k_norm": 1.3915, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1094, "sent_len_1": 66.8766, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8625, "stdk": 0.048, "stdq": 0.0462, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 27200 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.4781, "doc_norm": 1.3904, "encoder_q-embeddings": 375.3645, "encoder_q-layer.0": 250.8079, "encoder_q-layer.1": 277.5989, "encoder_q-layer.10": 309.9203, "encoder_q-layer.11": 673.5418, "encoder_q-layer.2": 321.6376, "encoder_q-layer.3": 336.8546, "encoder_q-layer.4": 360.3401, "encoder_q-layer.5": 375.1649, "encoder_q-layer.6": 407.0685, "encoder_q-layer.7": 485.9859, "encoder_q-layer.8": 492.8849, "encoder_q-layer.9": 376.9204, "epoch": 0.27, "inbatch_neg_score": 0.1685, "inbatch_pos_score": 0.7993, "learning_rate": 4.038888888888889e-05, "loss": 3.4781, "norm_diff": 0.0222, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 593.4435, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1687, "query_norm": 1.3709, "queue_k_norm": 1.3914, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9587, "sent_len_1": 66.9557, "sent_max_len_0": 128.0, "sent_max_len_1": 187.435, "stdk": 0.0479, "stdq": 0.0452, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 27300 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.4856, "doc_norm": 1.4018, "encoder_q-embeddings": 353.5304, "encoder_q-layer.0": 222.2991, "encoder_q-layer.1": 236.6762, "encoder_q-layer.10": 296.3898, "encoder_q-layer.11": 617.7743, "encoder_q-layer.2": 290.8299, "encoder_q-layer.3": 318.4363, "encoder_q-layer.4": 350.7294, "encoder_q-layer.5": 317.9522, "encoder_q-layer.6": 313.2959, "encoder_q-layer.7": 325.9909, "encoder_q-layer.8": 347.3604, "encoder_q-layer.9": 291.961, "epoch": 0.27, "inbatch_neg_score": 0.1659, "inbatch_pos_score": 0.8545, "learning_rate": 4.0333333333333336e-05, "loss": 3.4856, "norm_diff": 0.0166, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 511.8589, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1659, "query_norm": 1.3852, "queue_k_norm": 1.3906, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8918, "sent_len_1": 67.0359, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4038, "stdk": 0.0483, "stdq": 0.046, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 27400 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.465, "doc_norm": 1.3867, "encoder_q-embeddings": 601.6387, "encoder_q-layer.0": 402.3464, "encoder_q-layer.1": 462.7727, "encoder_q-layer.10": 333.6154, "encoder_q-layer.11": 673.7175, "encoder_q-layer.2": 541.8199, "encoder_q-layer.3": 632.8795, "encoder_q-layer.4": 666.3254, "encoder_q-layer.5": 629.3623, "encoder_q-layer.6": 626.1603, "encoder_q-layer.7": 617.8134, "encoder_q-layer.8": 605.3142, "encoder_q-layer.9": 419.7522, "epoch": 0.27, "inbatch_neg_score": 0.1666, "inbatch_pos_score": 0.8027, "learning_rate": 4.027777777777778e-05, "loss": 3.465, "norm_diff": 0.0151, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 833.7951, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1659, "query_norm": 1.3752, "queue_k_norm": 1.3914, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8513, "sent_len_1": 66.747, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8275, "stdk": 0.0477, "stdq": 0.0453, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 27500 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.4982, "doc_norm": 1.3872, "encoder_q-embeddings": 346.261, "encoder_q-layer.0": 230.2973, "encoder_q-layer.1": 243.5777, "encoder_q-layer.10": 317.7522, "encoder_q-layer.11": 650.7831, "encoder_q-layer.2": 279.2197, "encoder_q-layer.3": 289.3611, "encoder_q-layer.4": 307.4388, "encoder_q-layer.5": 325.4483, "encoder_q-layer.6": 379.2369, "encoder_q-layer.7": 389.2622, "encoder_q-layer.8": 405.872, "encoder_q-layer.9": 331.5341, "epoch": 0.27, "inbatch_neg_score": 0.1668, "inbatch_pos_score": 0.7686, "learning_rate": 4.022222222222222e-05, "loss": 3.4982, "norm_diff": 0.0137, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 530.6624, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.166, "query_norm": 1.378, "queue_k_norm": 1.3911, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.2192, "sent_len_1": 66.7863, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9663, "stdk": 0.0478, "stdq": 0.0452, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 27600 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.4639, "doc_norm": 1.3907, "encoder_q-embeddings": 298.5613, "encoder_q-layer.0": 192.212, "encoder_q-layer.1": 202.6815, "encoder_q-layer.10": 306.3604, "encoder_q-layer.11": 683.1004, "encoder_q-layer.2": 225.2671, "encoder_q-layer.3": 231.7056, "encoder_q-layer.4": 250.2779, "encoder_q-layer.5": 260.3818, "encoder_q-layer.6": 290.6517, "encoder_q-layer.7": 328.9333, "encoder_q-layer.8": 384.2188, "encoder_q-layer.9": 327.0975, "epoch": 0.27, "inbatch_neg_score": 0.1712, "inbatch_pos_score": 0.8047, "learning_rate": 4.016666666666667e-05, "loss": 3.4639, "norm_diff": 0.0068, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 487.6429, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1707, "query_norm": 1.3863, "queue_k_norm": 1.3902, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0202, "sent_len_1": 66.9073, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1312, "stdk": 0.0479, "stdq": 0.0455, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 27700 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.5119, "doc_norm": 1.3952, "encoder_q-embeddings": 374.5482, "encoder_q-layer.0": 240.9914, "encoder_q-layer.1": 265.3622, "encoder_q-layer.10": 315.3367, "encoder_q-layer.11": 668.0703, "encoder_q-layer.2": 282.7263, "encoder_q-layer.3": 278.5398, "encoder_q-layer.4": 278.1819, "encoder_q-layer.5": 283.6913, "encoder_q-layer.6": 305.9671, "encoder_q-layer.7": 324.252, "encoder_q-layer.8": 359.6732, "encoder_q-layer.9": 319.6258, "epoch": 0.27, "inbatch_neg_score": 0.1746, "inbatch_pos_score": 0.8281, "learning_rate": 4.011111111111111e-05, "loss": 3.5119, "norm_diff": 0.0216, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 520.9467, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1749, "query_norm": 1.3736, "queue_k_norm": 1.3894, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9542, "sent_len_1": 66.8313, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6525, "stdk": 0.0481, "stdq": 0.0449, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 27800 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.4944, "doc_norm": 1.3905, "encoder_q-embeddings": 346.4968, "encoder_q-layer.0": 217.0985, "encoder_q-layer.1": 239.3578, "encoder_q-layer.10": 302.6161, "encoder_q-layer.11": 666.8978, "encoder_q-layer.2": 262.0471, "encoder_q-layer.3": 262.0487, "encoder_q-layer.4": 263.7651, "encoder_q-layer.5": 274.7829, "encoder_q-layer.6": 306.566, "encoder_q-layer.7": 359.2784, "encoder_q-layer.8": 376.7516, "encoder_q-layer.9": 311.2013, "epoch": 0.27, "inbatch_neg_score": 0.184, "inbatch_pos_score": 0.8296, "learning_rate": 4.0055555555555554e-05, "loss": 3.4944, "norm_diff": 0.0084, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 516.9232, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1833, "query_norm": 1.3841, "queue_k_norm": 1.3922, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0764, "sent_len_1": 66.92, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8887, "stdk": 0.0479, "stdq": 0.045, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 27900 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.5004, "doc_norm": 1.3894, "encoder_q-embeddings": 343.2004, "encoder_q-layer.0": 221.8574, "encoder_q-layer.1": 252.1581, "encoder_q-layer.10": 296.596, "encoder_q-layer.11": 649.8074, "encoder_q-layer.2": 299.2414, "encoder_q-layer.3": 322.8567, "encoder_q-layer.4": 318.3813, "encoder_q-layer.5": 334.9, "encoder_q-layer.6": 344.2604, "encoder_q-layer.7": 360.5254, "encoder_q-layer.8": 355.1637, "encoder_q-layer.9": 310.8172, "epoch": 0.27, "inbatch_neg_score": 0.1905, "inbatch_pos_score": 0.8203, "learning_rate": 4e-05, "loss": 3.5004, "norm_diff": 0.0126, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 522.2304, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1907, "query_norm": 1.3959, "queue_k_norm": 1.3891, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1867, "sent_len_1": 66.7977, "sent_max_len_0": 128.0, "sent_max_len_1": 189.055, "stdk": 0.0479, "stdq": 0.0451, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 28000 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.4727, "doc_norm": 1.3981, "encoder_q-embeddings": 420.0651, "encoder_q-layer.0": 285.7755, "encoder_q-layer.1": 299.4875, "encoder_q-layer.10": 313.2478, "encoder_q-layer.11": 677.5815, "encoder_q-layer.2": 335.0786, "encoder_q-layer.3": 348.6338, "encoder_q-layer.4": 357.3847, "encoder_q-layer.5": 346.0302, "encoder_q-layer.6": 401.2935, "encoder_q-layer.7": 374.0186, "encoder_q-layer.8": 387.6236, "encoder_q-layer.9": 312.1217, "epoch": 0.27, "inbatch_neg_score": 0.1982, "inbatch_pos_score": 0.8403, "learning_rate": 3.9944444444444446e-05, "loss": 3.4727, "norm_diff": 0.0183, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 583.6885, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1968, "query_norm": 1.4163, "queue_k_norm": 1.3918, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9572, "sent_len_1": 67.1188, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1138, "stdk": 0.0481, "stdq": 0.0458, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 28100 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.4711, "doc_norm": 1.3929, "encoder_q-embeddings": 347.2731, "encoder_q-layer.0": 221.7388, "encoder_q-layer.1": 233.8527, "encoder_q-layer.10": 317.85, "encoder_q-layer.11": 730.4453, "encoder_q-layer.2": 266.7227, "encoder_q-layer.3": 297.9767, "encoder_q-layer.4": 315.1057, "encoder_q-layer.5": 311.1672, "encoder_q-layer.6": 359.6851, "encoder_q-layer.7": 392.1165, "encoder_q-layer.8": 416.3395, "encoder_q-layer.9": 334.5261, "epoch": 0.28, "inbatch_neg_score": 0.197, "inbatch_pos_score": 0.8213, "learning_rate": 3.9888888888888895e-05, "loss": 3.4711, "norm_diff": 0.0076, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 554.1314, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1968, "query_norm": 1.3947, "queue_k_norm": 1.3934, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9461, "sent_len_1": 66.7811, "sent_max_len_0": 128.0, "sent_max_len_1": 192.3338, "stdk": 0.0479, "stdq": 0.045, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 28200 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.4927, "doc_norm": 1.3934, "encoder_q-embeddings": 857.4952, "encoder_q-layer.0": 595.813, "encoder_q-layer.1": 712.618, "encoder_q-layer.10": 317.4207, "encoder_q-layer.11": 679.4676, "encoder_q-layer.2": 819.0377, "encoder_q-layer.3": 842.0222, "encoder_q-layer.4": 905.3379, "encoder_q-layer.5": 840.8854, "encoder_q-layer.6": 867.9388, "encoder_q-layer.7": 803.2452, "encoder_q-layer.8": 699.1875, "encoder_q-layer.9": 407.1569, "epoch": 0.28, "inbatch_neg_score": 0.1925, "inbatch_pos_score": 0.8345, "learning_rate": 3.983333333333333e-05, "loss": 3.4927, "norm_diff": 0.0118, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1102.284, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1925, "query_norm": 1.398, "queue_k_norm": 1.3921, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1289, "sent_len_1": 66.8193, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2763, "stdk": 0.0479, "stdq": 0.0458, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 28300 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.4847, "doc_norm": 1.3902, "encoder_q-embeddings": 3135.0586, "encoder_q-layer.0": 2246.8845, "encoder_q-layer.1": 2670.4297, "encoder_q-layer.10": 713.8407, "encoder_q-layer.11": 1416.9149, "encoder_q-layer.2": 3422.9207, "encoder_q-layer.3": 3409.24, "encoder_q-layer.4": 3381.0413, "encoder_q-layer.5": 3364.0869, "encoder_q-layer.6": 3218.1396, "encoder_q-layer.7": 3735.9138, "encoder_q-layer.8": 2933.6494, "encoder_q-layer.9": 1365.4336, "epoch": 0.28, "inbatch_neg_score": 0.1797, "inbatch_pos_score": 0.8198, "learning_rate": 3.977777777777778e-05, "loss": 3.4847, "norm_diff": 0.0174, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4235.2498, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1797, "query_norm": 1.3825, "queue_k_norm": 1.3946, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1203, "sent_len_1": 66.9696, "sent_max_len_0": 128.0, "sent_max_len_1": 187.1763, "stdk": 0.0478, "stdq": 0.0456, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 28400 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.4563, "doc_norm": 1.3898, "encoder_q-embeddings": 598.5494, "encoder_q-layer.0": 383.8035, "encoder_q-layer.1": 419.5444, "encoder_q-layer.10": 596.1348, "encoder_q-layer.11": 1339.6506, "encoder_q-layer.2": 478.8961, "encoder_q-layer.3": 493.0173, "encoder_q-layer.4": 540.1423, "encoder_q-layer.5": 527.1987, "encoder_q-layer.6": 573.7214, "encoder_q-layer.7": 645.0833, "encoder_q-layer.8": 707.3516, "encoder_q-layer.9": 628.204, "epoch": 0.28, "inbatch_neg_score": 0.1698, "inbatch_pos_score": 0.811, "learning_rate": 3.972222222222222e-05, "loss": 3.4563, "norm_diff": 0.0262, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 977.867, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1696, "query_norm": 1.3673, "queue_k_norm": 1.3928, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8099, "sent_len_1": 66.6719, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6275, "stdk": 0.0478, "stdq": 0.0454, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 28500 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.481, "doc_norm": 1.3925, "encoder_q-embeddings": 784.5367, "encoder_q-layer.0": 529.4267, "encoder_q-layer.1": 598.5735, "encoder_q-layer.10": 599.4656, "encoder_q-layer.11": 1400.8477, "encoder_q-layer.2": 677.0276, "encoder_q-layer.3": 708.3558, "encoder_q-layer.4": 744.191, "encoder_q-layer.5": 733.0649, "encoder_q-layer.6": 774.0969, "encoder_q-layer.7": 884.2452, "encoder_q-layer.8": 983.4915, "encoder_q-layer.9": 750.6884, "epoch": 0.28, "inbatch_neg_score": 0.1678, "inbatch_pos_score": 0.7861, "learning_rate": 3.966666666666667e-05, "loss": 3.481, "norm_diff": 0.0265, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1211.8104, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1682, "query_norm": 1.3681, "queue_k_norm": 1.3947, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8048, "sent_len_1": 66.754, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8438, "stdk": 0.0479, "stdq": 0.0456, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 28600 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.4683, "doc_norm": 1.3824, "encoder_q-embeddings": 731.1481, "encoder_q-layer.0": 487.1625, "encoder_q-layer.1": 530.7601, "encoder_q-layer.10": 598.067, "encoder_q-layer.11": 1432.0481, "encoder_q-layer.2": 610.6943, "encoder_q-layer.3": 665.2195, "encoder_q-layer.4": 779.403, "encoder_q-layer.5": 765.603, "encoder_q-layer.6": 897.1092, "encoder_q-layer.7": 886.6963, "encoder_q-layer.8": 808.048, "encoder_q-layer.9": 662.9845, "epoch": 0.28, "inbatch_neg_score": 0.1629, "inbatch_pos_score": 0.8052, "learning_rate": 3.961111111111111e-05, "loss": 3.4683, "norm_diff": 0.0177, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1177.5492, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1631, "query_norm": 1.3647, "queue_k_norm": 1.3941, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9124, "sent_len_1": 66.7057, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6488, "stdk": 0.0475, "stdq": 0.0454, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 28700 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.468, "doc_norm": 1.3916, "encoder_q-embeddings": 794.1321, "encoder_q-layer.0": 561.8221, "encoder_q-layer.1": 612.3479, "encoder_q-layer.10": 572.9445, "encoder_q-layer.11": 1308.3022, "encoder_q-layer.2": 679.9037, "encoder_q-layer.3": 695.8387, "encoder_q-layer.4": 730.5717, "encoder_q-layer.5": 748.0062, "encoder_q-layer.6": 785.1478, "encoder_q-layer.7": 883.6098, "encoder_q-layer.8": 843.8471, "encoder_q-layer.9": 605.913, "epoch": 0.28, "inbatch_neg_score": 0.1551, "inbatch_pos_score": 0.8203, "learning_rate": 3.9555555555555556e-05, "loss": 3.468, "norm_diff": 0.0389, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1158.7019, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1548, "query_norm": 1.3527, "queue_k_norm": 1.3937, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9285, "sent_len_1": 66.7019, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7237, "stdk": 0.0479, "stdq": 0.0452, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 28800 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.489, "doc_norm": 1.3882, "encoder_q-embeddings": 663.7396, "encoder_q-layer.0": 446.0032, "encoder_q-layer.1": 486.0212, "encoder_q-layer.10": 618.6823, "encoder_q-layer.11": 1298.6927, "encoder_q-layer.2": 545.3929, "encoder_q-layer.3": 595.8863, "encoder_q-layer.4": 615.9988, "encoder_q-layer.5": 649.7404, "encoder_q-layer.6": 731.676, "encoder_q-layer.7": 858.8658, "encoder_q-layer.8": 782.895, "encoder_q-layer.9": 632.9567, "epoch": 0.28, "inbatch_neg_score": 0.1519, "inbatch_pos_score": 0.7959, "learning_rate": 3.9500000000000005e-05, "loss": 3.489, "norm_diff": 0.0355, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1062.4589, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1517, "query_norm": 1.3527, "queue_k_norm": 1.3916, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0138, "sent_len_1": 66.5179, "sent_max_len_0": 128.0, "sent_max_len_1": 186.4175, "stdk": 0.0478, "stdq": 0.0453, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 28900 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.433, "doc_norm": 1.3906, "encoder_q-embeddings": 593.0281, "encoder_q-layer.0": 368.2375, "encoder_q-layer.1": 376.9623, "encoder_q-layer.10": 644.426, "encoder_q-layer.11": 1375.1624, "encoder_q-layer.2": 400.4505, "encoder_q-layer.3": 426.1071, "encoder_q-layer.4": 455.6331, "encoder_q-layer.5": 464.2518, "encoder_q-layer.6": 539.9892, "encoder_q-layer.7": 643.7612, "encoder_q-layer.8": 709.8179, "encoder_q-layer.9": 610.7318, "epoch": 0.28, "inbatch_neg_score": 0.1431, "inbatch_pos_score": 0.7734, "learning_rate": 3.944444444444445e-05, "loss": 3.433, "norm_diff": 0.0658, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 957.2409, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1429, "query_norm": 1.3248, "queue_k_norm": 1.3912, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7859, "sent_len_1": 66.7305, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8512, "stdk": 0.0479, "stdq": 0.0445, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 29000 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.4714, "doc_norm": 1.3887, "encoder_q-embeddings": 1627.2789, "encoder_q-layer.0": 1162.1538, "encoder_q-layer.1": 1252.764, "encoder_q-layer.10": 581.7797, "encoder_q-layer.11": 1317.1597, "encoder_q-layer.2": 1412.2751, "encoder_q-layer.3": 1608.6775, "encoder_q-layer.4": 1751.0814, "encoder_q-layer.5": 1972.1448, "encoder_q-layer.6": 2176.2786, "encoder_q-layer.7": 2580.6807, "encoder_q-layer.8": 1565.8423, "encoder_q-layer.9": 640.9696, "epoch": 0.28, "inbatch_neg_score": 0.1501, "inbatch_pos_score": 0.7598, "learning_rate": 3.938888888888889e-05, "loss": 3.4714, "norm_diff": 0.0553, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2427.2139, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1499, "query_norm": 1.3334, "queue_k_norm": 1.3896, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7663, "sent_len_1": 66.8876, "sent_max_len_0": 128.0, "sent_max_len_1": 190.58, "stdk": 0.0479, "stdq": 0.0445, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 29100 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.4799, "doc_norm": 1.3943, "encoder_q-embeddings": 751.0612, "encoder_q-layer.0": 530.5705, "encoder_q-layer.1": 568.816, "encoder_q-layer.10": 626.2019, "encoder_q-layer.11": 1446.153, "encoder_q-layer.2": 651.2855, "encoder_q-layer.3": 638.7267, "encoder_q-layer.4": 649.0042, "encoder_q-layer.5": 651.2932, "encoder_q-layer.6": 689.9622, "encoder_q-layer.7": 732.9716, "encoder_q-layer.8": 733.8787, "encoder_q-layer.9": 602.8826, "epoch": 0.29, "inbatch_neg_score": 0.1485, "inbatch_pos_score": 0.7666, "learning_rate": 3.933333333333333e-05, "loss": 3.4799, "norm_diff": 0.057, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1140.2813, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1487, "query_norm": 1.3373, "queue_k_norm": 1.3912, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1043, "sent_len_1": 66.9059, "sent_max_len_0": 128.0, "sent_max_len_1": 190.145, "stdk": 0.0481, "stdq": 0.0446, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 29200 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4361, "doc_norm": 1.3888, "encoder_q-embeddings": 726.0633, "encoder_q-layer.0": 485.118, "encoder_q-layer.1": 511.9567, "encoder_q-layer.10": 655.6023, "encoder_q-layer.11": 1334.6447, "encoder_q-layer.2": 541.5114, "encoder_q-layer.3": 601.9948, "encoder_q-layer.4": 616.5939, "encoder_q-layer.5": 631.6752, "encoder_q-layer.6": 700.851, "encoder_q-layer.7": 793.55, "encoder_q-layer.8": 806.941, "encoder_q-layer.9": 642.6925, "epoch": 0.29, "inbatch_neg_score": 0.1445, "inbatch_pos_score": 0.7871, "learning_rate": 3.927777777777778e-05, "loss": 3.4361, "norm_diff": 0.0191, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1082.0113, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1444, "query_norm": 1.3696, "queue_k_norm": 1.3898, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9393, "sent_len_1": 66.939, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7237, "stdk": 0.048, "stdq": 0.0459, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 29300 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4679, "doc_norm": 1.3838, "encoder_q-embeddings": 655.6889, "encoder_q-layer.0": 426.4973, "encoder_q-layer.1": 471.1734, "encoder_q-layer.10": 629.3854, "encoder_q-layer.11": 1416.5835, "encoder_q-layer.2": 549.6725, "encoder_q-layer.3": 588.0253, "encoder_q-layer.4": 615.4957, "encoder_q-layer.5": 671.6792, "encoder_q-layer.6": 746.4229, "encoder_q-layer.7": 806.4343, "encoder_q-layer.8": 896.5707, "encoder_q-layer.9": 677.0565, "epoch": 0.29, "inbatch_neg_score": 0.1338, "inbatch_pos_score": 0.7559, "learning_rate": 3.922222222222223e-05, "loss": 3.4679, "norm_diff": 0.0589, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1110.5559, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1338, "query_norm": 1.3249, "queue_k_norm": 1.3881, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0486, "sent_len_1": 66.599, "sent_max_len_0": 128.0, "sent_max_len_1": 190.66, "stdk": 0.0478, "stdq": 0.0445, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 29400 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.4478, "doc_norm": 1.3855, "encoder_q-embeddings": 1402.8934, "encoder_q-layer.0": 984.7115, "encoder_q-layer.1": 1171.4819, "encoder_q-layer.10": 579.3382, "encoder_q-layer.11": 1273.7118, "encoder_q-layer.2": 1389.9948, "encoder_q-layer.3": 1529.9806, "encoder_q-layer.4": 1624.176, "encoder_q-layer.5": 1571.2291, "encoder_q-layer.6": 1624.928, "encoder_q-layer.7": 1605.8124, "encoder_q-layer.8": 1354.942, "encoder_q-layer.9": 810.9839, "epoch": 0.29, "inbatch_neg_score": 0.1232, "inbatch_pos_score": 0.7441, "learning_rate": 3.9166666666666665e-05, "loss": 3.4478, "norm_diff": 0.0546, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1983.6659, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.123, "query_norm": 1.3309, "queue_k_norm": 1.386, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8987, "sent_len_1": 66.5796, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6312, "stdk": 0.0479, "stdq": 0.0449, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 29500 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.4611, "doc_norm": 1.389, "encoder_q-embeddings": 1400.8192, "encoder_q-layer.0": 925.5343, "encoder_q-layer.1": 987.3005, "encoder_q-layer.10": 640.984, "encoder_q-layer.11": 1326.5619, "encoder_q-layer.2": 1079.6589, "encoder_q-layer.3": 1164.7699, "encoder_q-layer.4": 1259.8539, "encoder_q-layer.5": 1133.5531, "encoder_q-layer.6": 1290.8114, "encoder_q-layer.7": 1047.1473, "encoder_q-layer.8": 967.5212, "encoder_q-layer.9": 701.904, "epoch": 0.29, "inbatch_neg_score": 0.1234, "inbatch_pos_score": 0.8291, "learning_rate": 3.9111111111111115e-05, "loss": 3.4611, "norm_diff": 0.0145, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1656.8074, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1227, "query_norm": 1.3954, "queue_k_norm": 1.3873, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0617, "sent_len_1": 67.123, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0012, "stdk": 0.0481, "stdq": 0.0472, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 29600 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.4514, "doc_norm": 1.3807, "encoder_q-embeddings": 285.1942, "encoder_q-layer.0": 185.1839, "encoder_q-layer.1": 195.8015, "encoder_q-layer.10": 305.9779, "encoder_q-layer.11": 656.3795, "encoder_q-layer.2": 221.0107, "encoder_q-layer.3": 226.7665, "encoder_q-layer.4": 245.8741, "encoder_q-layer.5": 239.0941, "encoder_q-layer.6": 266.4803, "encoder_q-layer.7": 313.1242, "encoder_q-layer.8": 345.7126, "encoder_q-layer.9": 309.1768, "epoch": 0.29, "inbatch_neg_score": 0.1178, "inbatch_pos_score": 0.7793, "learning_rate": 3.905555555555556e-05, "loss": 3.4514, "norm_diff": 0.0188, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 463.5834, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1172, "query_norm": 1.3623, "queue_k_norm": 1.3885, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0198, "sent_len_1": 66.9426, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1987, "stdk": 0.0478, "stdq": 0.0458, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 29700 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.4614, "doc_norm": 1.3843, "encoder_q-embeddings": 442.89, "encoder_q-layer.0": 291.0706, "encoder_q-layer.1": 311.4517, "encoder_q-layer.10": 330.1254, "encoder_q-layer.11": 672.4741, "encoder_q-layer.2": 373.0616, "encoder_q-layer.3": 392.0553, "encoder_q-layer.4": 449.9597, "encoder_q-layer.5": 474.0485, "encoder_q-layer.6": 499.3018, "encoder_q-layer.7": 441.2113, "encoder_q-layer.8": 417.8773, "encoder_q-layer.9": 331.8708, "epoch": 0.29, "inbatch_neg_score": 0.1191, "inbatch_pos_score": 0.7588, "learning_rate": 3.9000000000000006e-05, "loss": 3.4614, "norm_diff": 0.0651, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 634.4898, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.119, "query_norm": 1.3192, "queue_k_norm": 1.3857, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0186, "sent_len_1": 67.0511, "sent_max_len_0": 128.0, "sent_max_len_1": 190.395, "stdk": 0.048, "stdq": 0.044, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 29800 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.4574, "doc_norm": 1.3805, "encoder_q-embeddings": 317.4329, "encoder_q-layer.0": 191.2176, "encoder_q-layer.1": 201.3423, "encoder_q-layer.10": 320.4917, "encoder_q-layer.11": 652.6097, "encoder_q-layer.2": 219.5969, "encoder_q-layer.3": 233.0954, "encoder_q-layer.4": 255.7206, "encoder_q-layer.5": 261.2845, "encoder_q-layer.6": 311.1333, "encoder_q-layer.7": 355.8026, "encoder_q-layer.8": 381.3768, "encoder_q-layer.9": 317.1201, "epoch": 0.29, "inbatch_neg_score": 0.111, "inbatch_pos_score": 0.729, "learning_rate": 3.894444444444444e-05, "loss": 3.4574, "norm_diff": 0.0198, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 492.603, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1116, "query_norm": 1.3607, "queue_k_norm": 1.3848, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8698, "sent_len_1": 66.9204, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6937, "stdk": 0.0479, "stdq": 0.0454, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 29900 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.4547, "doc_norm": 1.3883, "encoder_q-embeddings": 605.2714, "encoder_q-layer.0": 402.2841, "encoder_q-layer.1": 467.2162, "encoder_q-layer.10": 288.9337, "encoder_q-layer.11": 626.6219, "encoder_q-layer.2": 604.9283, "encoder_q-layer.3": 554.6036, "encoder_q-layer.4": 570.2275, "encoder_q-layer.5": 541.5269, "encoder_q-layer.6": 586.2702, "encoder_q-layer.7": 531.697, "encoder_q-layer.8": 573.3337, "encoder_q-layer.9": 356.4286, "epoch": 0.29, "inbatch_neg_score": 0.1085, "inbatch_pos_score": 0.7568, "learning_rate": 3.888888888888889e-05, "loss": 3.4547, "norm_diff": 0.0118, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 798.7838, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1072, "query_norm": 1.3764, "queue_k_norm": 1.3822, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7597, "sent_len_1": 66.7155, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3025, "stdk": 0.0482, "stdq": 0.0456, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 30000 }, { "dev_runtime": 38.1229, "dev_samples_per_second": 1.679, "dev_steps_per_second": 0.026, "epoch": 0.29, "step": 30000, "test_accuracy": 94.0185546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3498751223087311, "test_doc_norm": 1.380068063735962, "test_inbatch_neg_score": 0.46064990758895874, "test_inbatch_pos_score": 1.4007467031478882, "test_loss": 0.3498751223087311, "test_loss_align": 1.2858573198318481, "test_loss_unif": 3.963012218475342, "test_loss_unif_q@queue": 3.963012218475342, "test_norm_diff": 0.08707223832607269, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.08252152800559998, "test_query_norm": 1.4671404361724854, "test_queue_k_norm": 1.3822989463806152, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04247192665934563, "test_stdq": 0.04322770610451698, "test_stdqueue_k": 0.04808451980352402, "test_stdqueue_q": 0.0 }, { "dev_runtime": 38.1229, "dev_samples_per_second": 1.679, "dev_steps_per_second": 0.026, "epoch": 0.29, "eval_beir-arguana_ndcg@10": 0.37221, "eval_beir-arguana_recall@10": 0.62518, "eval_beir-arguana_recall@100": 0.90967, "eval_beir-arguana_recall@20": 0.75818, "eval_beir-avg_ndcg@10": 0.3373613333333333, "eval_beir-avg_recall@10": 0.402616, "eval_beir-avg_recall@100": 0.578604, "eval_beir-avg_recall@20": 0.4599897499999999, "eval_beir-cqadupstack_ndcg@10": 0.2407933333333334, "eval_beir-cqadupstack_recall@10": 0.32663000000000003, "eval_beir-cqadupstack_recall@100": 0.55051, "eval_beir-cqadupstack_recall@20": 0.39045749999999996, "eval_beir-fiqa_ndcg@10": 0.16827, "eval_beir-fiqa_recall@10": 0.21612, "eval_beir-fiqa_recall@100": 0.45689, "eval_beir-fiqa_recall@20": 0.28102, "eval_beir-nfcorpus_ndcg@10": 0.2608, "eval_beir-nfcorpus_recall@10": 0.12924, "eval_beir-nfcorpus_recall@100": 0.25164, "eval_beir-nfcorpus_recall@20": 0.15714, "eval_beir-nq_ndcg@10": 0.2449, "eval_beir-nq_recall@10": 0.4052, "eval_beir-nq_recall@100": 0.75135, "eval_beir-nq_recall@20": 0.51972, "eval_beir-quora_ndcg@10": 0.791, "eval_beir-quora_recall@10": 0.89303, "eval_beir-quora_recall@100": 0.97892, "eval_beir-quora_recall@20": 0.93155, "eval_beir-scidocs_ndcg@10": 0.12623, "eval_beir-scidocs_recall@10": 0.13348, "eval_beir-scidocs_recall@100": 0.32103, "eval_beir-scidocs_recall@20": 0.18533, "eval_beir-scifact_ndcg@10": 0.59499, "eval_beir-scifact_recall@10": 0.73833, "eval_beir-scifact_recall@100": 0.89189, "eval_beir-scifact_recall@20": 0.78833, "eval_beir-trec-covid_ndcg@10": 0.40669, "eval_beir-trec-covid_recall@10": 0.424, "eval_beir-trec-covid_recall@100": 0.3042, "eval_beir-trec-covid_recall@20": 0.39, "eval_beir-webis-touche2020_ndcg@10": 0.16773, "eval_beir-webis-touche2020_recall@10": 0.13495, "eval_beir-webis-touche2020_recall@100": 0.36994, "eval_beir-webis-touche2020_recall@20": 0.19817, "eval_senteval-avg_sts": 0.7341376697602899, "eval_senteval-sickr_spearman": 0.7004668032127727, "eval_senteval-stsb_spearman": 0.7678085363078071, "step": 30000, "test_accuracy": 94.0185546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3498751223087311, "test_doc_norm": 1.380068063735962, "test_inbatch_neg_score": 0.46064990758895874, "test_inbatch_pos_score": 1.4007467031478882, "test_loss": 0.3498751223087311, "test_loss_align": 1.2858573198318481, "test_loss_unif": 3.963012218475342, "test_loss_unif_q@queue": 3.963012218475342, "test_norm_diff": 0.08707223832607269, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.08252152800559998, "test_query_norm": 1.4671404361724854, "test_queue_k_norm": 1.3822989463806152, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04247192665934563, "test_stdq": 0.04322770610451698, "test_stdqueue_k": 0.04808451980352402, "test_stdqueue_q": 0.0 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.454, "doc_norm": 1.3866, "encoder_q-embeddings": 280.5482, "encoder_q-layer.0": 178.5174, "encoder_q-layer.1": 194.6327, "encoder_q-layer.10": 374.2943, "encoder_q-layer.11": 668.9407, "encoder_q-layer.2": 218.0716, "encoder_q-layer.3": 231.2326, "encoder_q-layer.4": 245.5285, "encoder_q-layer.5": 254.2705, "encoder_q-layer.6": 303.0859, "encoder_q-layer.7": 342.4812, "encoder_q-layer.8": 381.1265, "encoder_q-layer.9": 351.576, "epoch": 0.29, "inbatch_neg_score": 0.0937, "inbatch_pos_score": 0.7788, "learning_rate": 3.883333333333333e-05, "loss": 3.454, "norm_diff": 0.0499, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 482.87, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0936, "query_norm": 1.4364, "queue_k_norm": 1.3843, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0609, "sent_len_1": 66.9788, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2587, "stdk": 0.0482, "stdq": 0.0473, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 30100 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.4633, "doc_norm": 1.3817, "encoder_q-embeddings": 368.3238, "encoder_q-layer.0": 249.5048, "encoder_q-layer.1": 272.9344, "encoder_q-layer.10": 334.1044, "encoder_q-layer.11": 654.7311, "encoder_q-layer.2": 311.6733, "encoder_q-layer.3": 336.2488, "encoder_q-layer.4": 361.3743, "encoder_q-layer.5": 370.3396, "encoder_q-layer.6": 394.8345, "encoder_q-layer.7": 388.946, "encoder_q-layer.8": 422.5164, "encoder_q-layer.9": 341.915, "epoch": 0.29, "inbatch_neg_score": 0.1006, "inbatch_pos_score": 0.7207, "learning_rate": 3.877777777777778e-05, "loss": 3.4633, "norm_diff": 0.006, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 560.2296, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1001, "query_norm": 1.3803, "queue_k_norm": 1.3807, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.6236, "sent_len_1": 66.7555, "sent_max_len_0": 128.0, "sent_max_len_1": 191.4625, "stdk": 0.0481, "stdq": 0.0455, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 30200 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.4371, "doc_norm": 1.3832, "encoder_q-embeddings": 341.3515, "encoder_q-layer.0": 233.5754, "encoder_q-layer.1": 245.5852, "encoder_q-layer.10": 288.7607, "encoder_q-layer.11": 613.0705, "encoder_q-layer.2": 278.2746, "encoder_q-layer.3": 289.2029, "encoder_q-layer.4": 329.1838, "encoder_q-layer.5": 336.2146, "encoder_q-layer.6": 388.6993, "encoder_q-layer.7": 400.1411, "encoder_q-layer.8": 398.3086, "encoder_q-layer.9": 301.0706, "epoch": 0.3, "inbatch_neg_score": 0.1121, "inbatch_pos_score": 0.791, "learning_rate": 3.8722222222222225e-05, "loss": 3.4371, "norm_diff": 0.0076, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 526.5611, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1111, "query_norm": 1.3885, "queue_k_norm": 1.3807, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9858, "sent_len_1": 66.7527, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1337, "stdk": 0.0482, "stdq": 0.0461, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 30300 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4571, "doc_norm": 1.3788, "encoder_q-embeddings": 474.1507, "encoder_q-layer.0": 343.4873, "encoder_q-layer.1": 385.6761, "encoder_q-layer.10": 303.8468, "encoder_q-layer.11": 672.322, "encoder_q-layer.2": 472.5108, "encoder_q-layer.3": 508.7935, "encoder_q-layer.4": 565.8071, "encoder_q-layer.5": 611.9758, "encoder_q-layer.6": 634.4288, "encoder_q-layer.7": 569.3544, "encoder_q-layer.8": 475.5955, "encoder_q-layer.9": 337.5597, "epoch": 0.3, "inbatch_neg_score": 0.1108, "inbatch_pos_score": 0.7661, "learning_rate": 3.866666666666667e-05, "loss": 3.4571, "norm_diff": 0.0087, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 731.7751, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1103, "query_norm": 1.3743, "queue_k_norm": 1.3788, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9136, "sent_len_1": 66.8388, "sent_max_len_0": 128.0, "sent_max_len_1": 190.795, "stdk": 0.048, "stdq": 0.0459, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 30400 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.433, "doc_norm": 1.371, "encoder_q-embeddings": 296.9214, "encoder_q-layer.0": 192.3117, "encoder_q-layer.1": 213.067, "encoder_q-layer.10": 277.5073, "encoder_q-layer.11": 630.6231, "encoder_q-layer.2": 232.1652, "encoder_q-layer.3": 253.053, "encoder_q-layer.4": 284.0829, "encoder_q-layer.5": 282.1805, "encoder_q-layer.6": 313.1195, "encoder_q-layer.7": 369.0649, "encoder_q-layer.8": 399.6806, "encoder_q-layer.9": 322.2369, "epoch": 0.3, "inbatch_neg_score": 0.1137, "inbatch_pos_score": 0.7627, "learning_rate": 3.8611111111111116e-05, "loss": 3.433, "norm_diff": 0.0234, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 491.4719, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1135, "query_norm": 1.3476, "queue_k_norm": 1.3798, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0483, "sent_len_1": 66.9817, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9075, "stdk": 0.0478, "stdq": 0.0454, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 30500 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.4395, "doc_norm": 1.3794, "encoder_q-embeddings": 278.8021, "encoder_q-layer.0": 181.3715, "encoder_q-layer.1": 193.0242, "encoder_q-layer.10": 293.9362, "encoder_q-layer.11": 667.9576, "encoder_q-layer.2": 212.4348, "encoder_q-layer.3": 227.884, "encoder_q-layer.4": 233.6788, "encoder_q-layer.5": 249.8927, "encoder_q-layer.6": 278.4362, "encoder_q-layer.7": 315.1787, "encoder_q-layer.8": 361.655, "encoder_q-layer.9": 314.8848, "epoch": 0.3, "inbatch_neg_score": 0.1203, "inbatch_pos_score": 0.772, "learning_rate": 3.855555555555556e-05, "loss": 3.4395, "norm_diff": 0.0329, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 471.6485, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1202, "query_norm": 1.3465, "queue_k_norm": 1.3814, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8753, "sent_len_1": 66.7127, "sent_max_len_0": 128.0, "sent_max_len_1": 192.475, "stdk": 0.0481, "stdq": 0.0455, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 30600 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.4249, "doc_norm": 1.3824, "encoder_q-embeddings": 366.9658, "encoder_q-layer.0": 244.4004, "encoder_q-layer.1": 260.4187, "encoder_q-layer.10": 301.1602, "encoder_q-layer.11": 620.4222, "encoder_q-layer.2": 294.6967, "encoder_q-layer.3": 296.3588, "encoder_q-layer.4": 316.1601, "encoder_q-layer.5": 315.5422, "encoder_q-layer.6": 350.979, "encoder_q-layer.7": 362.6467, "encoder_q-layer.8": 410.5288, "encoder_q-layer.9": 325.3217, "epoch": 0.3, "inbatch_neg_score": 0.1153, "inbatch_pos_score": 0.7446, "learning_rate": 3.85e-05, "loss": 3.4249, "norm_diff": 0.0546, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 529.2165, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1146, "query_norm": 1.3279, "queue_k_norm": 1.3798, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9558, "sent_len_1": 66.9005, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2163, "stdk": 0.0482, "stdq": 0.0449, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 30700 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.4597, "doc_norm": 1.3776, "encoder_q-embeddings": 320.7915, "encoder_q-layer.0": 203.5895, "encoder_q-layer.1": 226.0952, "encoder_q-layer.10": 286.535, "encoder_q-layer.11": 619.6965, "encoder_q-layer.2": 250.7513, "encoder_q-layer.3": 261.0804, "encoder_q-layer.4": 275.5554, "encoder_q-layer.5": 273.5303, "encoder_q-layer.6": 313.9961, "encoder_q-layer.7": 345.7357, "encoder_q-layer.8": 390.0846, "encoder_q-layer.9": 310.047, "epoch": 0.3, "inbatch_neg_score": 0.1116, "inbatch_pos_score": 0.7461, "learning_rate": 3.844444444444444e-05, "loss": 3.4597, "norm_diff": 0.0601, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 493.7567, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.113, "query_norm": 1.3175, "queue_k_norm": 1.3779, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7002, "sent_len_1": 66.6415, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1863, "stdk": 0.0481, "stdq": 0.0447, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 30800 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.4338, "doc_norm": 1.3863, "encoder_q-embeddings": 272.305, "encoder_q-layer.0": 171.3681, "encoder_q-layer.1": 174.0996, "encoder_q-layer.10": 300.1933, "encoder_q-layer.11": 636.0707, "encoder_q-layer.2": 197.1947, "encoder_q-layer.3": 203.5813, "encoder_q-layer.4": 223.611, "encoder_q-layer.5": 230.891, "encoder_q-layer.6": 262.9292, "encoder_q-layer.7": 305.1797, "encoder_q-layer.8": 345.8303, "encoder_q-layer.9": 312.8163, "epoch": 0.3, "inbatch_neg_score": 0.1151, "inbatch_pos_score": 0.7905, "learning_rate": 3.838888888888889e-05, "loss": 3.4338, "norm_diff": 0.0229, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 450.5826, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1149, "query_norm": 1.3661, "queue_k_norm": 1.3771, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0196, "sent_len_1": 66.5207, "sent_max_len_0": 128.0, "sent_max_len_1": 189.16, "stdk": 0.0484, "stdq": 0.0464, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 30900 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.4217, "doc_norm": 1.3789, "encoder_q-embeddings": 289.8737, "encoder_q-layer.0": 184.4675, "encoder_q-layer.1": 194.3885, "encoder_q-layer.10": 393.9234, "encoder_q-layer.11": 721.4958, "encoder_q-layer.2": 214.0194, "encoder_q-layer.3": 221.8728, "encoder_q-layer.4": 239.303, "encoder_q-layer.5": 242.6071, "encoder_q-layer.6": 288.9218, "encoder_q-layer.7": 341.1543, "encoder_q-layer.8": 420.8401, "encoder_q-layer.9": 401.449, "epoch": 0.3, "inbatch_neg_score": 0.1179, "inbatch_pos_score": 0.7251, "learning_rate": 3.8333333333333334e-05, "loss": 3.4217, "norm_diff": 0.0295, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 504.1768, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1176, "query_norm": 1.3493, "queue_k_norm": 1.3762, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9068, "sent_len_1": 66.6287, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3325, "stdk": 0.0481, "stdq": 0.0458, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 31000 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.424, "doc_norm": 1.3722, "encoder_q-embeddings": 349.7294, "encoder_q-layer.0": 225.4463, "encoder_q-layer.1": 241.314, "encoder_q-layer.10": 294.541, "encoder_q-layer.11": 658.2355, "encoder_q-layer.2": 265.9107, "encoder_q-layer.3": 288.6017, "encoder_q-layer.4": 320.9901, "encoder_q-layer.5": 323.0116, "encoder_q-layer.6": 340.0059, "encoder_q-layer.7": 379.2144, "encoder_q-layer.8": 386.954, "encoder_q-layer.9": 304.0169, "epoch": 0.3, "inbatch_neg_score": 0.1181, "inbatch_pos_score": 0.7368, "learning_rate": 3.827777777777778e-05, "loss": 3.424, "norm_diff": 0.041, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 522.7045, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1189, "query_norm": 1.3311, "queue_k_norm": 1.3758, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.6905, "sent_len_1": 66.8089, "sent_max_len_0": 128.0, "sent_max_len_1": 191.6725, "stdk": 0.0479, "stdq": 0.0451, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 31100 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.4188, "doc_norm": 1.3803, "encoder_q-embeddings": 301.32, "encoder_q-layer.0": 200.8216, "encoder_q-layer.1": 207.226, "encoder_q-layer.10": 339.6295, "encoder_q-layer.11": 680.9681, "encoder_q-layer.2": 235.76, "encoder_q-layer.3": 255.7731, "encoder_q-layer.4": 272.9095, "encoder_q-layer.5": 274.7479, "encoder_q-layer.6": 304.4538, "encoder_q-layer.7": 340.4616, "encoder_q-layer.8": 395.2029, "encoder_q-layer.9": 353.8735, "epoch": 0.3, "inbatch_neg_score": 0.126, "inbatch_pos_score": 0.7603, "learning_rate": 3.8222222222222226e-05, "loss": 3.4188, "norm_diff": 0.0574, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 496.2139, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.125, "query_norm": 1.3229, "queue_k_norm": 1.3761, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9287, "sent_len_1": 66.7848, "sent_max_len_0": 128.0, "sent_max_len_1": 191.4187, "stdk": 0.0482, "stdq": 0.0445, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 31200 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.4404, "doc_norm": 1.3811, "encoder_q-embeddings": 855.4653, "encoder_q-layer.0": 584.805, "encoder_q-layer.1": 696.1826, "encoder_q-layer.10": 327.0185, "encoder_q-layer.11": 696.358, "encoder_q-layer.2": 755.5555, "encoder_q-layer.3": 870.929, "encoder_q-layer.4": 917.5845, "encoder_q-layer.5": 968.691, "encoder_q-layer.6": 997.0345, "encoder_q-layer.7": 1005.5869, "encoder_q-layer.8": 899.2949, "encoder_q-layer.9": 446.4858, "epoch": 0.31, "inbatch_neg_score": 0.1296, "inbatch_pos_score": 0.7285, "learning_rate": 3.816666666666667e-05, "loss": 3.4404, "norm_diff": 0.0411, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1166.6489, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1284, "query_norm": 1.3399, "queue_k_norm": 1.3764, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.1473, "sent_len_1": 66.7842, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1475, "stdk": 0.0482, "stdq": 0.0451, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 31300 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.4255, "doc_norm": 1.3705, "encoder_q-embeddings": 334.2563, "encoder_q-layer.0": 229.487, "encoder_q-layer.1": 249.5126, "encoder_q-layer.10": 286.0422, "encoder_q-layer.11": 627.2976, "encoder_q-layer.2": 282.0812, "encoder_q-layer.3": 308.979, "encoder_q-layer.4": 323.1936, "encoder_q-layer.5": 316.5303, "encoder_q-layer.6": 310.8006, "encoder_q-layer.7": 322.1045, "encoder_q-layer.8": 376.8978, "encoder_q-layer.9": 295.084, "epoch": 0.31, "inbatch_neg_score": 0.1292, "inbatch_pos_score": 0.7832, "learning_rate": 3.811111111111112e-05, "loss": 3.4255, "norm_diff": 0.0135, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 504.6346, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1278, "query_norm": 1.3582, "queue_k_norm": 1.3786, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9701, "sent_len_1": 66.8726, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4425, "stdk": 0.0478, "stdq": 0.0457, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 31400 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.4274, "doc_norm": 1.3755, "encoder_q-embeddings": 299.8251, "encoder_q-layer.0": 187.3497, "encoder_q-layer.1": 191.9148, "encoder_q-layer.10": 296.9844, "encoder_q-layer.11": 614.5362, "encoder_q-layer.2": 218.1567, "encoder_q-layer.3": 231.1966, "encoder_q-layer.4": 247.8283, "encoder_q-layer.5": 254.4945, "encoder_q-layer.6": 288.0428, "encoder_q-layer.7": 307.4443, "encoder_q-layer.8": 342.6272, "encoder_q-layer.9": 290.8919, "epoch": 0.31, "inbatch_neg_score": 0.1316, "inbatch_pos_score": 0.7671, "learning_rate": 3.805555555555555e-05, "loss": 3.4274, "norm_diff": 0.0421, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 463.6692, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1315, "query_norm": 1.3333, "queue_k_norm": 1.3789, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0824, "sent_len_1": 67.1225, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6188, "stdk": 0.048, "stdq": 0.0448, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 31500 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.4064, "doc_norm": 1.3767, "encoder_q-embeddings": 502.3304, "encoder_q-layer.0": 313.8199, "encoder_q-layer.1": 324.5991, "encoder_q-layer.10": 309.5508, "encoder_q-layer.11": 677.4636, "encoder_q-layer.2": 372.6641, "encoder_q-layer.3": 426.833, "encoder_q-layer.4": 462.7313, "encoder_q-layer.5": 485.2301, "encoder_q-layer.6": 512.4251, "encoder_q-layer.7": 561.4656, "encoder_q-layer.8": 501.5811, "encoder_q-layer.9": 331.5261, "epoch": 0.31, "inbatch_neg_score": 0.1344, "inbatch_pos_score": 0.7939, "learning_rate": 3.8e-05, "loss": 3.4064, "norm_diff": 0.0182, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 676.3165, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1343, "query_norm": 1.3602, "queue_k_norm": 1.3797, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8437, "sent_len_1": 66.7214, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2887, "stdk": 0.048, "stdq": 0.0458, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 31600 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.4312, "doc_norm": 1.3846, "encoder_q-embeddings": 697.2238, "encoder_q-layer.0": 451.4753, "encoder_q-layer.1": 471.8613, "encoder_q-layer.10": 600.5973, "encoder_q-layer.11": 1204.605, "encoder_q-layer.2": 564.4144, "encoder_q-layer.3": 606.4318, "encoder_q-layer.4": 700.127, "encoder_q-layer.5": 724.9402, "encoder_q-layer.6": 804.2742, "encoder_q-layer.7": 875.3527, "encoder_q-layer.8": 943.1266, "encoder_q-layer.9": 757.0526, "epoch": 0.31, "inbatch_neg_score": 0.1345, "inbatch_pos_score": 0.7769, "learning_rate": 3.7944444444444444e-05, "loss": 3.4312, "norm_diff": 0.0413, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1115.4323, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.135, "query_norm": 1.3434, "queue_k_norm": 1.3768, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7887, "sent_len_1": 66.7368, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8237, "stdk": 0.0483, "stdq": 0.045, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 31700 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.4042, "doc_norm": 1.3751, "encoder_q-embeddings": 641.7238, "encoder_q-layer.0": 425.4843, "encoder_q-layer.1": 460.3211, "encoder_q-layer.10": 609.5402, "encoder_q-layer.11": 1383.5685, "encoder_q-layer.2": 507.6791, "encoder_q-layer.3": 565.1183, "encoder_q-layer.4": 611.7704, "encoder_q-layer.5": 616.5628, "encoder_q-layer.6": 708.8625, "encoder_q-layer.7": 743.7507, "encoder_q-layer.8": 818.7391, "encoder_q-layer.9": 683.9813, "epoch": 0.31, "inbatch_neg_score": 0.1406, "inbatch_pos_score": 0.8027, "learning_rate": 3.7888888888888894e-05, "loss": 3.4042, "norm_diff": 0.01, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1055.8361, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1411, "query_norm": 1.3763, "queue_k_norm": 1.3778, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.876, "sent_len_1": 66.6571, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0062, "stdk": 0.0479, "stdq": 0.046, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 31800 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.4346, "doc_norm": 1.374, "encoder_q-embeddings": 536.9716, "encoder_q-layer.0": 355.7304, "encoder_q-layer.1": 371.3701, "encoder_q-layer.10": 579.4551, "encoder_q-layer.11": 1261.6665, "encoder_q-layer.2": 415.0086, "encoder_q-layer.3": 438.7644, "encoder_q-layer.4": 464.7516, "encoder_q-layer.5": 484.9456, "encoder_q-layer.6": 561.824, "encoder_q-layer.7": 619.3617, "encoder_q-layer.8": 709.6611, "encoder_q-layer.9": 596.8021, "epoch": 0.31, "inbatch_neg_score": 0.1454, "inbatch_pos_score": 0.7832, "learning_rate": 3.7833333333333336e-05, "loss": 3.4346, "norm_diff": 0.0208, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 902.8982, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1443, "query_norm": 1.3532, "queue_k_norm": 1.3818, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9206, "sent_len_1": 66.8313, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5012, "stdk": 0.0479, "stdq": 0.045, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 31900 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.4246, "doc_norm": 1.3774, "encoder_q-embeddings": 995.9264, "encoder_q-layer.0": 622.9302, "encoder_q-layer.1": 673.9819, "encoder_q-layer.10": 675.8527, "encoder_q-layer.11": 1338.1176, "encoder_q-layer.2": 789.348, "encoder_q-layer.3": 887.179, "encoder_q-layer.4": 985.7158, "encoder_q-layer.5": 1081.3486, "encoder_q-layer.6": 1290.0127, "encoder_q-layer.7": 1856.6559, "encoder_q-layer.8": 2312.2422, "encoder_q-layer.9": 1499.7362, "epoch": 0.31, "inbatch_neg_score": 0.1435, "inbatch_pos_score": 0.7603, "learning_rate": 3.777777777777778e-05, "loss": 3.4246, "norm_diff": 0.0147, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1889.2727, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1444, "query_norm": 1.3627, "queue_k_norm": 1.3787, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7285, "sent_len_1": 66.7858, "sent_max_len_0": 128.0, "sent_max_len_1": 187.2375, "stdk": 0.048, "stdq": 0.0453, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 32000 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.4246, "doc_norm": 1.3754, "encoder_q-embeddings": 538.1599, "encoder_q-layer.0": 344.7924, "encoder_q-layer.1": 364.9943, "encoder_q-layer.10": 623.8983, "encoder_q-layer.11": 1267.0154, "encoder_q-layer.2": 403.6741, "encoder_q-layer.3": 422.6486, "encoder_q-layer.4": 452.7309, "encoder_q-layer.5": 446.3288, "encoder_q-layer.6": 506.3129, "encoder_q-layer.7": 614.0193, "encoder_q-layer.8": 703.0637, "encoder_q-layer.9": 640.0541, "epoch": 0.31, "inbatch_neg_score": 0.1469, "inbatch_pos_score": 0.8374, "learning_rate": 3.772222222222223e-05, "loss": 3.4246, "norm_diff": 0.0193, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 885.3088, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1478, "query_norm": 1.3924, "queue_k_norm": 1.3811, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9603, "sent_len_1": 66.7738, "sent_max_len_0": 128.0, "sent_max_len_1": 187.59, "stdk": 0.0479, "stdq": 0.0463, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 32100 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.3798, "doc_norm": 1.3848, "encoder_q-embeddings": 781.974, "encoder_q-layer.0": 509.3951, "encoder_q-layer.1": 579.3201, "encoder_q-layer.10": 582.0287, "encoder_q-layer.11": 1336.6405, "encoder_q-layer.2": 643.1027, "encoder_q-layer.3": 706.2607, "encoder_q-layer.4": 697.3978, "encoder_q-layer.5": 769.8038, "encoder_q-layer.6": 889.5179, "encoder_q-layer.7": 915.5873, "encoder_q-layer.8": 946.3196, "encoder_q-layer.9": 633.7072, "epoch": 0.31, "inbatch_neg_score": 0.1503, "inbatch_pos_score": 0.75, "learning_rate": 3.766666666666667e-05, "loss": 3.3798, "norm_diff": 0.0403, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1201.5055, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1487, "query_norm": 1.3445, "queue_k_norm": 1.3824, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9209, "sent_len_1": 67.0209, "sent_max_len_0": 128.0, "sent_max_len_1": 191.6562, "stdk": 0.0482, "stdq": 0.0447, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 32200 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.4026, "doc_norm": 1.3838, "encoder_q-embeddings": 1045.9771, "encoder_q-layer.0": 715.9929, "encoder_q-layer.1": 860.4742, "encoder_q-layer.10": 580.7891, "encoder_q-layer.11": 1304.5306, "encoder_q-layer.2": 1056.8632, "encoder_q-layer.3": 1207.7693, "encoder_q-layer.4": 1300.9337, "encoder_q-layer.5": 1431.6687, "encoder_q-layer.6": 1316.4879, "encoder_q-layer.7": 1222.7627, "encoder_q-layer.8": 835.1311, "encoder_q-layer.9": 626.16, "epoch": 0.32, "inbatch_neg_score": 0.1518, "inbatch_pos_score": 0.8047, "learning_rate": 3.761111111111111e-05, "loss": 3.4026, "norm_diff": 0.0149, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1577.8123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1508, "query_norm": 1.3754, "queue_k_norm": 1.3821, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.6932, "sent_len_1": 66.7401, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1712, "stdk": 0.0482, "stdq": 0.0456, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 32300 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.3901, "doc_norm": 1.3888, "encoder_q-embeddings": 858.9704, "encoder_q-layer.0": 579.0088, "encoder_q-layer.1": 634.9401, "encoder_q-layer.10": 578.2537, "encoder_q-layer.11": 1265.2333, "encoder_q-layer.2": 731.7856, "encoder_q-layer.3": 849.2943, "encoder_q-layer.4": 884.3734, "encoder_q-layer.5": 878.6885, "encoder_q-layer.6": 1017.0172, "encoder_q-layer.7": 964.452, "encoder_q-layer.8": 891.7581, "encoder_q-layer.9": 656.6426, "epoch": 0.32, "inbatch_neg_score": 0.1558, "inbatch_pos_score": 0.7954, "learning_rate": 3.7555555555555554e-05, "loss": 3.3901, "norm_diff": 0.0103, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1247.8641, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1558, "query_norm": 1.3828, "queue_k_norm": 1.3829, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8021, "sent_len_1": 66.6921, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0037, "stdk": 0.0483, "stdq": 0.0453, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 32400 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.4174, "doc_norm": 1.384, "encoder_q-embeddings": 565.011, "encoder_q-layer.0": 366.8632, "encoder_q-layer.1": 394.1293, "encoder_q-layer.10": 553.558, "encoder_q-layer.11": 1219.5487, "encoder_q-layer.2": 452.4711, "encoder_q-layer.3": 464.6905, "encoder_q-layer.4": 489.0354, "encoder_q-layer.5": 506.0476, "encoder_q-layer.6": 551.5798, "encoder_q-layer.7": 605.963, "encoder_q-layer.8": 682.475, "encoder_q-layer.9": 583.0072, "epoch": 0.32, "inbatch_neg_score": 0.1678, "inbatch_pos_score": 0.8218, "learning_rate": 3.7500000000000003e-05, "loss": 3.4174, "norm_diff": 0.0168, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 899.5454, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1677, "query_norm": 1.3988, "queue_k_norm": 1.3844, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9416, "sent_len_1": 66.7314, "sent_max_len_0": 128.0, "sent_max_len_1": 189.475, "stdk": 0.0481, "stdq": 0.0455, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 32500 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.383, "doc_norm": 1.3932, "encoder_q-embeddings": 1228.4341, "encoder_q-layer.0": 831.584, "encoder_q-layer.1": 818.6694, "encoder_q-layer.10": 574.4443, "encoder_q-layer.11": 1265.1036, "encoder_q-layer.2": 961.3421, "encoder_q-layer.3": 994.0351, "encoder_q-layer.4": 1023.3383, "encoder_q-layer.5": 934.7885, "encoder_q-layer.6": 1017.669, "encoder_q-layer.7": 1017.4345, "encoder_q-layer.8": 912.4229, "encoder_q-layer.9": 588.221, "epoch": 0.32, "inbatch_neg_score": 0.1826, "inbatch_pos_score": 0.8374, "learning_rate": 3.7444444444444446e-05, "loss": 3.383, "norm_diff": 0.0238, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1462.1262, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1819, "query_norm": 1.4165, "queue_k_norm": 1.3872, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0163, "sent_len_1": 66.9566, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3587, "stdk": 0.0484, "stdq": 0.0453, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 32600 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.4147, "doc_norm": 1.3867, "encoder_q-embeddings": 629.8568, "encoder_q-layer.0": 401.3384, "encoder_q-layer.1": 437.7686, "encoder_q-layer.10": 669.0426, "encoder_q-layer.11": 1382.1389, "encoder_q-layer.2": 491.6976, "encoder_q-layer.3": 495.2484, "encoder_q-layer.4": 510.4324, "encoder_q-layer.5": 530.4768, "encoder_q-layer.6": 620.1228, "encoder_q-layer.7": 673.8422, "encoder_q-layer.8": 796.6137, "encoder_q-layer.9": 652.9197, "epoch": 0.32, "inbatch_neg_score": 0.1898, "inbatch_pos_score": 0.8145, "learning_rate": 3.738888888888889e-05, "loss": 3.4147, "norm_diff": 0.0312, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 995.748, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1882, "query_norm": 1.4179, "queue_k_norm": 1.3875, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7862, "sent_len_1": 66.907, "sent_max_len_0": 127.9938, "sent_max_len_1": 190.8887, "stdk": 0.0481, "stdq": 0.0454, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 32700 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.3845, "doc_norm": 1.3872, "encoder_q-embeddings": 1094.9276, "encoder_q-layer.0": 811.7403, "encoder_q-layer.1": 983.8295, "encoder_q-layer.10": 610.3517, "encoder_q-layer.11": 1298.7789, "encoder_q-layer.2": 1190.8508, "encoder_q-layer.3": 1091.5809, "encoder_q-layer.4": 1107.7832, "encoder_q-layer.5": 1004.8062, "encoder_q-layer.6": 1085.4924, "encoder_q-layer.7": 1025.4967, "encoder_q-layer.8": 888.8201, "encoder_q-layer.9": 657.7963, "epoch": 0.32, "inbatch_neg_score": 0.2013, "inbatch_pos_score": 0.8462, "learning_rate": 3.733333333333334e-05, "loss": 3.3845, "norm_diff": 0.0491, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1528.0442, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2004, "query_norm": 1.4363, "queue_k_norm": 1.3903, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7262, "sent_len_1": 66.6586, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8525, "stdk": 0.0481, "stdq": 0.0457, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 32800 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.4436, "doc_norm": 1.3905, "encoder_q-embeddings": 824.6462, "encoder_q-layer.0": 571.1013, "encoder_q-layer.1": 627.9854, "encoder_q-layer.10": 603.557, "encoder_q-layer.11": 1296.4722, "encoder_q-layer.2": 739.9105, "encoder_q-layer.3": 830.4653, "encoder_q-layer.4": 810.952, "encoder_q-layer.5": 770.6436, "encoder_q-layer.6": 845.1725, "encoder_q-layer.7": 742.8347, "encoder_q-layer.8": 721.063, "encoder_q-layer.9": 616.3286, "epoch": 0.32, "inbatch_neg_score": 0.2056, "inbatch_pos_score": 0.832, "learning_rate": 3.727777777777778e-05, "loss": 3.4436, "norm_diff": 0.0175, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1173.4032, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2041, "query_norm": 1.406, "queue_k_norm": 1.3912, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9396, "sent_len_1": 66.4789, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6375, "stdk": 0.0481, "stdq": 0.0449, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 32900 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.4076, "doc_norm": 1.3872, "encoder_q-embeddings": 1339.8234, "encoder_q-layer.0": 930.6437, "encoder_q-layer.1": 964.3276, "encoder_q-layer.10": 663.6592, "encoder_q-layer.11": 1372.3993, "encoder_q-layer.2": 1114.6703, "encoder_q-layer.3": 1273.1814, "encoder_q-layer.4": 1411.0192, "encoder_q-layer.5": 1454.6106, "encoder_q-layer.6": 1895.8619, "encoder_q-layer.7": 2132.7385, "encoder_q-layer.8": 3090.1213, "encoder_q-layer.9": 1960.2994, "epoch": 0.32, "inbatch_neg_score": 0.1973, "inbatch_pos_score": 0.8574, "learning_rate": 3.722222222222222e-05, "loss": 3.4076, "norm_diff": 0.0239, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2522.7582, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1973, "query_norm": 1.4111, "queue_k_norm": 1.3919, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9517, "sent_len_1": 66.5999, "sent_max_len_0": 128.0, "sent_max_len_1": 188.465, "stdk": 0.048, "stdq": 0.0459, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 33000 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.3998, "doc_norm": 1.3898, "encoder_q-embeddings": 1680.1329, "encoder_q-layer.0": 1136.8539, "encoder_q-layer.1": 1149.072, "encoder_q-layer.10": 588.304, "encoder_q-layer.11": 1349.6948, "encoder_q-layer.2": 1255.5181, "encoder_q-layer.3": 1265.749, "encoder_q-layer.4": 1305.041, "encoder_q-layer.5": 1422.4856, "encoder_q-layer.6": 1430.2257, "encoder_q-layer.7": 1395.866, "encoder_q-layer.8": 1109.7488, "encoder_q-layer.9": 684.7908, "epoch": 0.32, "inbatch_neg_score": 0.1993, "inbatch_pos_score": 0.8188, "learning_rate": 3.7166666666666664e-05, "loss": 3.3998, "norm_diff": 0.0067, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1912.7182, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1987, "query_norm": 1.391, "queue_k_norm": 1.394, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7963, "sent_len_1": 66.921, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0888, "stdk": 0.048, "stdq": 0.0454, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 33100 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.4186, "doc_norm": 1.3959, "encoder_q-embeddings": 2693.9114, "encoder_q-layer.0": 2053.9436, "encoder_q-layer.1": 2604.7986, "encoder_q-layer.10": 666.9109, "encoder_q-layer.11": 1258.5303, "encoder_q-layer.2": 3317.668, "encoder_q-layer.3": 3238.4756, "encoder_q-layer.4": 3115.3601, "encoder_q-layer.5": 2785.9653, "encoder_q-layer.6": 2264.1565, "encoder_q-layer.7": 1893.5862, "encoder_q-layer.8": 1781.5763, "encoder_q-layer.9": 1126.2532, "epoch": 0.32, "inbatch_neg_score": 0.1885, "inbatch_pos_score": 0.8203, "learning_rate": 3.7111111111111113e-05, "loss": 3.4186, "norm_diff": 0.0471, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3691.5725, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1895, "query_norm": 1.3489, "queue_k_norm": 1.3962, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7748, "sent_len_1": 66.7442, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5938, "stdk": 0.0482, "stdq": 0.0444, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 33200 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.4166, "doc_norm": 1.3963, "encoder_q-embeddings": 545.7358, "encoder_q-layer.0": 337.8994, "encoder_q-layer.1": 356.7563, "encoder_q-layer.10": 634.9484, "encoder_q-layer.11": 1346.0634, "encoder_q-layer.2": 384.1034, "encoder_q-layer.3": 411.9646, "encoder_q-layer.4": 435.1906, "encoder_q-layer.5": 446.2536, "encoder_q-layer.6": 509.8382, "encoder_q-layer.7": 590.6722, "encoder_q-layer.8": 709.6586, "encoder_q-layer.9": 631.4519, "epoch": 0.33, "inbatch_neg_score": 0.1775, "inbatch_pos_score": 0.7988, "learning_rate": 3.705555555555556e-05, "loss": 3.4166, "norm_diff": 0.0387, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 912.8118, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1782, "query_norm": 1.3576, "queue_k_norm": 1.3964, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8148, "sent_len_1": 66.697, "sent_max_len_0": 128.0, "sent_max_len_1": 189.445, "stdk": 0.0482, "stdq": 0.0452, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 33300 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.4213, "doc_norm": 1.4027, "encoder_q-embeddings": 1639.1564, "encoder_q-layer.0": 1075.3419, "encoder_q-layer.1": 1113.1862, "encoder_q-layer.10": 619.6888, "encoder_q-layer.11": 1239.8955, "encoder_q-layer.2": 1220.4054, "encoder_q-layer.3": 1145.183, "encoder_q-layer.4": 1257.3303, "encoder_q-layer.5": 1184.8674, "encoder_q-layer.6": 1190.0043, "encoder_q-layer.7": 1232.2415, "encoder_q-layer.8": 1012.4897, "encoder_q-layer.9": 634.3809, "epoch": 0.33, "inbatch_neg_score": 0.1697, "inbatch_pos_score": 0.8477, "learning_rate": 3.7e-05, "loss": 3.4213, "norm_diff": 0.0416, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1776.3753, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1697, "query_norm": 1.3611, "queue_k_norm": 1.3988, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9616, "sent_len_1": 66.6542, "sent_max_len_0": 128.0, "sent_max_len_1": 191.7988, "stdk": 0.0484, "stdq": 0.0456, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 33400 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.4268, "doc_norm": 1.396, "encoder_q-embeddings": 922.4612, "encoder_q-layer.0": 620.6849, "encoder_q-layer.1": 700.2497, "encoder_q-layer.10": 621.5677, "encoder_q-layer.11": 1315.9706, "encoder_q-layer.2": 857.8074, "encoder_q-layer.3": 929.5607, "encoder_q-layer.4": 1019.0856, "encoder_q-layer.5": 1040.3019, "encoder_q-layer.6": 1114.0416, "encoder_q-layer.7": 1188.6166, "encoder_q-layer.8": 936.7408, "encoder_q-layer.9": 637.7393, "epoch": 0.33, "inbatch_neg_score": 0.1644, "inbatch_pos_score": 0.7783, "learning_rate": 3.694444444444445e-05, "loss": 3.4268, "norm_diff": 0.0759, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1381.5233, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1638, "query_norm": 1.3202, "queue_k_norm": 1.3973, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.912, "sent_len_1": 66.695, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7075, "stdk": 0.0482, "stdq": 0.0442, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 33500 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3982, "doc_norm": 1.4032, "encoder_q-embeddings": 560.9611, "encoder_q-layer.0": 353.8989, "encoder_q-layer.1": 367.1816, "encoder_q-layer.10": 653.4078, "encoder_q-layer.11": 1396.3828, "encoder_q-layer.2": 412.0691, "encoder_q-layer.3": 434.0828, "encoder_q-layer.4": 463.1119, "encoder_q-layer.5": 466.8572, "encoder_q-layer.6": 523.8105, "encoder_q-layer.7": 615.972, "encoder_q-layer.8": 725.4834, "encoder_q-layer.9": 666.1911, "epoch": 0.33, "inbatch_neg_score": 0.1521, "inbatch_pos_score": 0.8213, "learning_rate": 3.688888888888889e-05, "loss": 3.3982, "norm_diff": 0.0371, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 942.2325, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1528, "query_norm": 1.3661, "queue_k_norm": 1.3978, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9515, "sent_len_1": 66.6962, "sent_max_len_0": 128.0, "sent_max_len_1": 189.31, "stdk": 0.0485, "stdq": 0.0462, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 33600 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.4079, "doc_norm": 1.3859, "encoder_q-embeddings": 594.8828, "encoder_q-layer.0": 377.6842, "encoder_q-layer.1": 389.572, "encoder_q-layer.10": 666.8322, "encoder_q-layer.11": 1377.2577, "encoder_q-layer.2": 438.1504, "encoder_q-layer.3": 459.1713, "encoder_q-layer.4": 475.8571, "encoder_q-layer.5": 478.6035, "encoder_q-layer.6": 559.501, "encoder_q-layer.7": 631.9219, "encoder_q-layer.8": 713.808, "encoder_q-layer.9": 641.9203, "epoch": 0.33, "inbatch_neg_score": 0.1435, "inbatch_pos_score": 0.7466, "learning_rate": 3.683333333333334e-05, "loss": 3.4079, "norm_diff": 0.0506, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 974.6063, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1427, "query_norm": 1.3353, "queue_k_norm": 1.3958, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7652, "sent_len_1": 66.6305, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6037, "stdk": 0.0478, "stdq": 0.0452, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 33700 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 3.3793, "doc_norm": 1.4023, "encoder_q-embeddings": 648.1539, "encoder_q-layer.0": 428.6269, "encoder_q-layer.1": 478.5431, "encoder_q-layer.10": 570.8197, "encoder_q-layer.11": 1235.8611, "encoder_q-layer.2": 564.1092, "encoder_q-layer.3": 608.7717, "encoder_q-layer.4": 584.014, "encoder_q-layer.5": 585.4926, "encoder_q-layer.6": 658.8486, "encoder_q-layer.7": 677.7982, "encoder_q-layer.8": 745.0098, "encoder_q-layer.9": 655.1516, "epoch": 0.33, "inbatch_neg_score": 0.1405, "inbatch_pos_score": 0.876, "learning_rate": 3.677777777777778e-05, "loss": 3.3793, "norm_diff": 0.0151, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 991.7152, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1407, "query_norm": 1.3933, "queue_k_norm": 1.3928, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8129, "sent_len_1": 66.6719, "sent_max_len_0": 128.0, "sent_max_len_1": 188.17, "stdk": 0.0485, "stdq": 0.0472, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 33800 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.4057, "doc_norm": 1.3945, "encoder_q-embeddings": 556.5262, "encoder_q-layer.0": 359.4926, "encoder_q-layer.1": 391.9268, "encoder_q-layer.10": 563.6735, "encoder_q-layer.11": 1265.3895, "encoder_q-layer.2": 435.2762, "encoder_q-layer.3": 467.4, "encoder_q-layer.4": 499.6545, "encoder_q-layer.5": 504.4809, "encoder_q-layer.6": 530.743, "encoder_q-layer.7": 604.5628, "encoder_q-layer.8": 693.5175, "encoder_q-layer.9": 590.9453, "epoch": 0.33, "inbatch_neg_score": 0.1403, "inbatch_pos_score": 0.7935, "learning_rate": 3.672222222222222e-05, "loss": 3.4057, "norm_diff": 0.0509, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 921.1366, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1406, "query_norm": 1.3436, "queue_k_norm": 1.3925, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8119, "sent_len_1": 66.5999, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6912, "stdk": 0.0482, "stdq": 0.0453, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 33900 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3831, "doc_norm": 1.3969, "encoder_q-embeddings": 909.8882, "encoder_q-layer.0": 623.6838, "encoder_q-layer.1": 624.4474, "encoder_q-layer.10": 560.4666, "encoder_q-layer.11": 1260.9718, "encoder_q-layer.2": 684.6583, "encoder_q-layer.3": 683.2859, "encoder_q-layer.4": 716.6005, "encoder_q-layer.5": 670.4549, "encoder_q-layer.6": 728.9794, "encoder_q-layer.7": 731.7294, "encoder_q-layer.8": 764.7271, "encoder_q-layer.9": 610.1914, "epoch": 0.33, "inbatch_neg_score": 0.136, "inbatch_pos_score": 0.8027, "learning_rate": 3.6666666666666666e-05, "loss": 3.3831, "norm_diff": 0.0413, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1144.9833, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1362, "query_norm": 1.3556, "queue_k_norm": 1.3932, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.763, "sent_len_1": 66.7077, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9913, "stdk": 0.0483, "stdq": 0.0457, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 34000 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.4059, "doc_norm": 1.3892, "encoder_q-embeddings": 1146.7867, "encoder_q-layer.0": 802.8483, "encoder_q-layer.1": 813.4438, "encoder_q-layer.10": 579.0887, "encoder_q-layer.11": 1226.0988, "encoder_q-layer.2": 999.5176, "encoder_q-layer.3": 971.2878, "encoder_q-layer.4": 1010.9708, "encoder_q-layer.5": 1082.434, "encoder_q-layer.6": 1049.0164, "encoder_q-layer.7": 836.7924, "encoder_q-layer.8": 803.6633, "encoder_q-layer.9": 667.7416, "epoch": 0.33, "inbatch_neg_score": 0.1373, "inbatch_pos_score": 0.8135, "learning_rate": 3.6611111111111115e-05, "loss": 3.4059, "norm_diff": 0.0289, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1405.9494, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.137, "query_norm": 1.3604, "queue_k_norm": 1.393, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8403, "sent_len_1": 66.5894, "sent_max_len_0": 128.0, "sent_max_len_1": 188.94, "stdk": 0.0481, "stdq": 0.0458, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 34100 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.4018, "doc_norm": 1.3942, "encoder_q-embeddings": 1550.0554, "encoder_q-layer.0": 1168.3311, "encoder_q-layer.1": 1157.5745, "encoder_q-layer.10": 650.4166, "encoder_q-layer.11": 1296.9515, "encoder_q-layer.2": 1452.1879, "encoder_q-layer.3": 1500.3707, "encoder_q-layer.4": 1556.8416, "encoder_q-layer.5": 1691.9148, "encoder_q-layer.6": 1888.8674, "encoder_q-layer.7": 1725.902, "encoder_q-layer.8": 1732.42, "encoder_q-layer.9": 1044.017, "epoch": 0.33, "inbatch_neg_score": 0.1369, "inbatch_pos_score": 0.8066, "learning_rate": 3.655555555555556e-05, "loss": 3.4018, "norm_diff": 0.0544, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2201.3351, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1375, "query_norm": 1.3398, "queue_k_norm": 1.3923, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8875, "sent_len_1": 66.5821, "sent_max_len_0": 128.0, "sent_max_len_1": 186.7475, "stdk": 0.0483, "stdq": 0.0451, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 34200 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.4014, "doc_norm": 1.393, "encoder_q-embeddings": 2536.0413, "encoder_q-layer.0": 1681.2228, "encoder_q-layer.1": 1611.538, "encoder_q-layer.10": 619.4867, "encoder_q-layer.11": 1356.873, "encoder_q-layer.2": 2037.8395, "encoder_q-layer.3": 1892.2881, "encoder_q-layer.4": 2179.3889, "encoder_q-layer.5": 2108.5686, "encoder_q-layer.6": 2290.4651, "encoder_q-layer.7": 2256.0938, "encoder_q-layer.8": 2065.5803, "encoder_q-layer.9": 1057.7574, "epoch": 0.33, "inbatch_neg_score": 0.1439, "inbatch_pos_score": 0.7397, "learning_rate": 3.65e-05, "loss": 3.4014, "norm_diff": 0.0655, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2950.471, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1444, "query_norm": 1.3275, "queue_k_norm": 1.3926, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0109, "sent_len_1": 66.7444, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6113, "stdk": 0.0483, "stdq": 0.0444, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 34300 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.3791, "doc_norm": 1.3968, "encoder_q-embeddings": 2436.189, "encoder_q-layer.0": 1703.7983, "encoder_q-layer.1": 1867.972, "encoder_q-layer.10": 614.3889, "encoder_q-layer.11": 1322.6165, "encoder_q-layer.2": 2254.2964, "encoder_q-layer.3": 2225.7063, "encoder_q-layer.4": 2346.5093, "encoder_q-layer.5": 2576.4668, "encoder_q-layer.6": 2475.2341, "encoder_q-layer.7": 2324.4451, "encoder_q-layer.8": 1819.9364, "encoder_q-layer.9": 764.8059, "epoch": 0.34, "inbatch_neg_score": 0.1451, "inbatch_pos_score": 0.7852, "learning_rate": 3.644444444444445e-05, "loss": 3.3791, "norm_diff": 0.039, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3027.0497, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1443, "query_norm": 1.3578, "queue_k_norm": 1.393, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0235, "sent_len_1": 66.8909, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2325, "stdk": 0.0484, "stdq": 0.0454, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 34400 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.4129, "doc_norm": 1.3873, "encoder_q-embeddings": 720.245, "encoder_q-layer.0": 457.8578, "encoder_q-layer.1": 466.8432, "encoder_q-layer.10": 634.5436, "encoder_q-layer.11": 1351.1941, "encoder_q-layer.2": 492.4396, "encoder_q-layer.3": 521.1402, "encoder_q-layer.4": 639.189, "encoder_q-layer.5": 614.4017, "encoder_q-layer.6": 659.8909, "encoder_q-layer.7": 749.6517, "encoder_q-layer.8": 755.2548, "encoder_q-layer.9": 634.2066, "epoch": 0.34, "inbatch_neg_score": 0.1414, "inbatch_pos_score": 0.7705, "learning_rate": 3.638888888888889e-05, "loss": 3.4129, "norm_diff": 0.0239, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1051.9361, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.142, "query_norm": 1.3634, "queue_k_norm": 1.3906, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8546, "sent_len_1": 66.7978, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8837, "stdk": 0.0481, "stdq": 0.0458, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 34500 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.3727, "doc_norm": 1.3939, "encoder_q-embeddings": 1355.166, "encoder_q-layer.0": 898.5949, "encoder_q-layer.1": 1024.7487, "encoder_q-layer.10": 642.9348, "encoder_q-layer.11": 1312.17, "encoder_q-layer.2": 1191.6672, "encoder_q-layer.3": 1318.4622, "encoder_q-layer.4": 1320.6699, "encoder_q-layer.5": 1198.7212, "encoder_q-layer.6": 1216.9335, "encoder_q-layer.7": 1328.1207, "encoder_q-layer.8": 1080.4872, "encoder_q-layer.9": 680.5975, "epoch": 0.34, "inbatch_neg_score": 0.1439, "inbatch_pos_score": 0.8052, "learning_rate": 3.633333333333333e-05, "loss": 3.3727, "norm_diff": 0.0259, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1720.2252, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1432, "query_norm": 1.3704, "queue_k_norm": 1.3933, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0843, "sent_len_1": 66.6471, "sent_max_len_0": 128.0, "sent_max_len_1": 190.83, "stdk": 0.0483, "stdq": 0.0458, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 34600 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3597, "doc_norm": 1.3924, "encoder_q-embeddings": 1777.011, "encoder_q-layer.0": 1393.0509, "encoder_q-layer.1": 1425.931, "encoder_q-layer.10": 574.5323, "encoder_q-layer.11": 1289.3749, "encoder_q-layer.2": 1448.4294, "encoder_q-layer.3": 1179.7618, "encoder_q-layer.4": 1138.3582, "encoder_q-layer.5": 975.7834, "encoder_q-layer.6": 1043.9279, "encoder_q-layer.7": 1109.8102, "encoder_q-layer.8": 765.6638, "encoder_q-layer.9": 595.572, "epoch": 0.34, "inbatch_neg_score": 0.1419, "inbatch_pos_score": 0.7881, "learning_rate": 3.6277777777777776e-05, "loss": 3.3597, "norm_diff": 0.0192, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1845.761, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1418, "query_norm": 1.3732, "queue_k_norm": 1.391, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1332, "sent_len_1": 66.7381, "sent_max_len_0": 128.0, "sent_max_len_1": 188.265, "stdk": 0.0483, "stdq": 0.0458, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 34700 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.4114, "doc_norm": 1.3881, "encoder_q-embeddings": 692.049, "encoder_q-layer.0": 445.1454, "encoder_q-layer.1": 489.2068, "encoder_q-layer.10": 595.0972, "encoder_q-layer.11": 1295.5333, "encoder_q-layer.2": 555.8281, "encoder_q-layer.3": 580.0585, "encoder_q-layer.4": 631.0938, "encoder_q-layer.5": 601.1688, "encoder_q-layer.6": 670.2349, "encoder_q-layer.7": 713.8792, "encoder_q-layer.8": 721.2831, "encoder_q-layer.9": 601.1348, "epoch": 0.34, "inbatch_neg_score": 0.1362, "inbatch_pos_score": 0.7739, "learning_rate": 3.6222222222222225e-05, "loss": 3.4114, "norm_diff": 0.0305, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1032.4639, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1365, "query_norm": 1.3577, "queue_k_norm": 1.3921, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7111, "sent_len_1": 66.9413, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7775, "stdk": 0.0482, "stdq": 0.0451, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 34800 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.3528, "doc_norm": 1.3887, "encoder_q-embeddings": 1217.3873, "encoder_q-layer.0": 823.319, "encoder_q-layer.1": 897.4232, "encoder_q-layer.10": 592.9701, "encoder_q-layer.11": 1307.7307, "encoder_q-layer.2": 990.0146, "encoder_q-layer.3": 1038.5233, "encoder_q-layer.4": 1195.994, "encoder_q-layer.5": 1300.6631, "encoder_q-layer.6": 1237.2614, "encoder_q-layer.7": 1249.8456, "encoder_q-layer.8": 1065.754, "encoder_q-layer.9": 675.2884, "epoch": 0.34, "inbatch_neg_score": 0.1372, "inbatch_pos_score": 0.7754, "learning_rate": 3.6166666666666674e-05, "loss": 3.3528, "norm_diff": 0.0258, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1609.9695, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1375, "query_norm": 1.3629, "queue_k_norm": 1.3913, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9903, "sent_len_1": 66.834, "sent_max_len_0": 128.0, "sent_max_len_1": 189.865, "stdk": 0.0482, "stdq": 0.0452, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 34900 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.3933, "doc_norm": 1.3952, "encoder_q-embeddings": 620.4772, "encoder_q-layer.0": 417.0194, "encoder_q-layer.1": 441.6933, "encoder_q-layer.10": 650.9056, "encoder_q-layer.11": 1314.1658, "encoder_q-layer.2": 496.8109, "encoder_q-layer.3": 529.6899, "encoder_q-layer.4": 638.7408, "encoder_q-layer.5": 615.7935, "encoder_q-layer.6": 666.3781, "encoder_q-layer.7": 715.643, "encoder_q-layer.8": 739.5762, "encoder_q-layer.9": 633.0247, "epoch": 0.34, "inbatch_neg_score": 0.1374, "inbatch_pos_score": 0.7661, "learning_rate": 3.611111111111111e-05, "loss": 3.3933, "norm_diff": 0.0233, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1013.5027, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1372, "query_norm": 1.3719, "queue_k_norm": 1.3897, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8516, "sent_len_1": 66.6141, "sent_max_len_0": 128.0, "sent_max_len_1": 187.345, "stdk": 0.0485, "stdq": 0.0456, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 35000 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.3623, "doc_norm": 1.3949, "encoder_q-embeddings": 989.9117, "encoder_q-layer.0": 653.278, "encoder_q-layer.1": 773.6516, "encoder_q-layer.10": 567.9959, "encoder_q-layer.11": 1262.2476, "encoder_q-layer.2": 903.7637, "encoder_q-layer.3": 1058.3367, "encoder_q-layer.4": 1172.9805, "encoder_q-layer.5": 1081.7161, "encoder_q-layer.6": 1044.8942, "encoder_q-layer.7": 996.5469, "encoder_q-layer.8": 813.5123, "encoder_q-layer.9": 627.5322, "epoch": 0.34, "inbatch_neg_score": 0.1382, "inbatch_pos_score": 0.8208, "learning_rate": 3.605555555555556e-05, "loss": 3.3623, "norm_diff": 0.0179, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1404.9201, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1384, "query_norm": 1.3788, "queue_k_norm": 1.3892, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.002, "sent_len_1": 66.7369, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8988, "stdk": 0.0485, "stdq": 0.0458, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 35100 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3932, "doc_norm": 1.3881, "encoder_q-embeddings": 637.3997, "encoder_q-layer.0": 408.1852, "encoder_q-layer.1": 438.2856, "encoder_q-layer.10": 585.7705, "encoder_q-layer.11": 1301.5605, "encoder_q-layer.2": 496.0449, "encoder_q-layer.3": 513.157, "encoder_q-layer.4": 566.3279, "encoder_q-layer.5": 616.3674, "encoder_q-layer.6": 667.5393, "encoder_q-layer.7": 688.881, "encoder_q-layer.8": 728.4999, "encoder_q-layer.9": 643.5395, "epoch": 0.34, "inbatch_neg_score": 0.1367, "inbatch_pos_score": 0.8071, "learning_rate": 3.6e-05, "loss": 3.3932, "norm_diff": 0.0107, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 995.5181, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1372, "query_norm": 1.3929, "queue_k_norm": 1.3892, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9773, "sent_len_1": 66.858, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2675, "stdk": 0.0482, "stdq": 0.0464, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 35200 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3811, "doc_norm": 1.3929, "encoder_q-embeddings": 598.6394, "encoder_q-layer.0": 384.9061, "encoder_q-layer.1": 417.3241, "encoder_q-layer.10": 644.1778, "encoder_q-layer.11": 1329.4602, "encoder_q-layer.2": 467.5759, "encoder_q-layer.3": 490.3026, "encoder_q-layer.4": 525.1432, "encoder_q-layer.5": 555.5823, "encoder_q-layer.6": 628.8047, "encoder_q-layer.7": 747.0192, "encoder_q-layer.8": 786.6054, "encoder_q-layer.9": 661.5988, "epoch": 0.34, "inbatch_neg_score": 0.1457, "inbatch_pos_score": 0.7954, "learning_rate": 3.594444444444445e-05, "loss": 3.3811, "norm_diff": 0.0125, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 991.4518, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1445, "query_norm": 1.3999, "queue_k_norm": 1.3904, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9441, "sent_len_1": 66.7577, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6125, "stdk": 0.0484, "stdq": 0.0466, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 35300 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.3714, "doc_norm": 1.3946, "encoder_q-embeddings": 611.9994, "encoder_q-layer.0": 403.8955, "encoder_q-layer.1": 435.6125, "encoder_q-layer.10": 581.7361, "encoder_q-layer.11": 1306.1599, "encoder_q-layer.2": 485.0737, "encoder_q-layer.3": 507.5988, "encoder_q-layer.4": 532.6588, "encoder_q-layer.5": 542.8952, "encoder_q-layer.6": 559.9435, "encoder_q-layer.7": 620.6491, "encoder_q-layer.8": 677.7469, "encoder_q-layer.9": 581.038, "epoch": 0.35, "inbatch_neg_score": 0.1441, "inbatch_pos_score": 0.7754, "learning_rate": 3.5888888888888886e-05, "loss": 3.3714, "norm_diff": 0.0399, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 967.682, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1434, "query_norm": 1.3546, "queue_k_norm": 1.3911, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.2641, "sent_len_1": 67.2515, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7188, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 35400 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3929, "doc_norm": 1.39, "encoder_q-embeddings": 878.7101, "encoder_q-layer.0": 554.7265, "encoder_q-layer.1": 653.4938, "encoder_q-layer.10": 673.9153, "encoder_q-layer.11": 1317.3329, "encoder_q-layer.2": 727.6923, "encoder_q-layer.3": 801.3364, "encoder_q-layer.4": 825.9936, "encoder_q-layer.5": 863.0383, "encoder_q-layer.6": 890.3412, "encoder_q-layer.7": 902.1573, "encoder_q-layer.8": 800.3546, "encoder_q-layer.9": 665.1306, "epoch": 0.35, "inbatch_neg_score": 0.1435, "inbatch_pos_score": 0.7974, "learning_rate": 3.5833333333333335e-05, "loss": 3.3929, "norm_diff": 0.0147, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1222.9381, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1431, "query_norm": 1.3754, "queue_k_norm": 1.3908, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8831, "sent_len_1": 66.8347, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7763, "stdk": 0.0483, "stdq": 0.0458, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 35500 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.4049, "doc_norm": 1.387, "encoder_q-embeddings": 848.1967, "encoder_q-layer.0": 604.9983, "encoder_q-layer.1": 671.1117, "encoder_q-layer.10": 750.4475, "encoder_q-layer.11": 1448.7141, "encoder_q-layer.2": 800.2543, "encoder_q-layer.3": 771.0775, "encoder_q-layer.4": 804.3008, "encoder_q-layer.5": 849.6263, "encoder_q-layer.6": 942.2329, "encoder_q-layer.7": 912.6399, "encoder_q-layer.8": 893.541, "encoder_q-layer.9": 724.5931, "epoch": 0.35, "inbatch_neg_score": 0.1463, "inbatch_pos_score": 0.7959, "learning_rate": 3.577777777777778e-05, "loss": 3.4049, "norm_diff": 0.0089, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1267.5334, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1464, "query_norm": 1.3832, "queue_k_norm": 1.3906, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9172, "sent_len_1": 66.6619, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9475, "stdk": 0.0482, "stdq": 0.0464, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 35600 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.3967, "doc_norm": 1.3873, "encoder_q-embeddings": 1472.4036, "encoder_q-layer.0": 966.7648, "encoder_q-layer.1": 1080.423, "encoder_q-layer.10": 1246.0884, "encoder_q-layer.11": 2621.3645, "encoder_q-layer.2": 1271.0217, "encoder_q-layer.3": 1383.9537, "encoder_q-layer.4": 1540.5616, "encoder_q-layer.5": 1629.6245, "encoder_q-layer.6": 1676.8323, "encoder_q-layer.7": 1732.4371, "encoder_q-layer.8": 1485.1727, "encoder_q-layer.9": 1291.5747, "epoch": 0.35, "inbatch_neg_score": 0.1493, "inbatch_pos_score": 0.8081, "learning_rate": 3.5722222222222226e-05, "loss": 3.3967, "norm_diff": 0.0339, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2266.2558, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1498, "query_norm": 1.3534, "queue_k_norm": 1.3902, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0012, "sent_len_1": 66.5431, "sent_max_len_0": 128.0, "sent_max_len_1": 189.255, "stdk": 0.0482, "stdq": 0.0454, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 35700 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3892, "doc_norm": 1.3901, "encoder_q-embeddings": 1299.1021, "encoder_q-layer.0": 841.4662, "encoder_q-layer.1": 913.3298, "encoder_q-layer.10": 1322.8895, "encoder_q-layer.11": 2754.7769, "encoder_q-layer.2": 1040.2814, "encoder_q-layer.3": 1098.7607, "encoder_q-layer.4": 1111.1327, "encoder_q-layer.5": 1228.3777, "encoder_q-layer.6": 1363.3425, "encoder_q-layer.7": 1520.9169, "encoder_q-layer.8": 1556.208, "encoder_q-layer.9": 1287.0082, "epoch": 0.35, "inbatch_neg_score": 0.1527, "inbatch_pos_score": 0.811, "learning_rate": 3.566666666666667e-05, "loss": 3.3892, "norm_diff": 0.0419, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2088.2683, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1528, "query_norm": 1.3482, "queue_k_norm": 1.391, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7739, "sent_len_1": 66.8155, "sent_max_len_0": 128.0, "sent_max_len_1": 191.7738, "stdk": 0.0483, "stdq": 0.0452, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 35800 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3727, "doc_norm": 1.3884, "encoder_q-embeddings": 1217.8625, "encoder_q-layer.0": 778.105, "encoder_q-layer.1": 829.3466, "encoder_q-layer.10": 1183.3497, "encoder_q-layer.11": 2744.6084, "encoder_q-layer.2": 914.1365, "encoder_q-layer.3": 951.3224, "encoder_q-layer.4": 1011.2867, "encoder_q-layer.5": 1056.6832, "encoder_q-layer.6": 1170.2551, "encoder_q-layer.7": 1351.106, "encoder_q-layer.8": 1494.1653, "encoder_q-layer.9": 1223.3109, "epoch": 0.35, "inbatch_neg_score": 0.1625, "inbatch_pos_score": 0.8203, "learning_rate": 3.561111111111111e-05, "loss": 3.3727, "norm_diff": 0.03, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1974.9801, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1626, "query_norm": 1.3585, "queue_k_norm": 1.3916, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.6128, "sent_len_1": 66.6062, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2237, "stdk": 0.0482, "stdq": 0.0453, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 35900 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.3973, "doc_norm": 1.3856, "encoder_q-embeddings": 1193.5861, "encoder_q-layer.0": 780.3847, "encoder_q-layer.1": 832.9117, "encoder_q-layer.10": 1163.0331, "encoder_q-layer.11": 2532.3857, "encoder_q-layer.2": 932.3046, "encoder_q-layer.3": 940.0674, "encoder_q-layer.4": 978.4357, "encoder_q-layer.5": 1004.1297, "encoder_q-layer.6": 1126.5623, "encoder_q-layer.7": 1196.5848, "encoder_q-layer.8": 1403.5001, "encoder_q-layer.9": 1233.7643, "epoch": 0.35, "inbatch_neg_score": 0.1541, "inbatch_pos_score": 0.791, "learning_rate": 3.555555555555556e-05, "loss": 3.3973, "norm_diff": 0.0351, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1850.21, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1545, "query_norm": 1.3505, "queue_k_norm": 1.3926, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1639, "sent_len_1": 67.0406, "sent_max_len_0": 128.0, "sent_max_len_1": 189.575, "stdk": 0.0481, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 36000 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.351, "doc_norm": 1.3833, "encoder_q-embeddings": 1395.9885, "encoder_q-layer.0": 940.4857, "encoder_q-layer.1": 1081.5204, "encoder_q-layer.10": 1150.4878, "encoder_q-layer.11": 2544.6316, "encoder_q-layer.2": 1234.134, "encoder_q-layer.3": 1321.9469, "encoder_q-layer.4": 1342.7587, "encoder_q-layer.5": 1274.4357, "encoder_q-layer.6": 1412.9255, "encoder_q-layer.7": 1564.0786, "encoder_q-layer.8": 1538.6431, "encoder_q-layer.9": 1226.4604, "epoch": 0.35, "inbatch_neg_score": 0.1593, "inbatch_pos_score": 0.8535, "learning_rate": 3.55e-05, "loss": 3.351, "norm_diff": 0.01, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2151.5906, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1602, "query_norm": 1.3778, "queue_k_norm": 1.3907, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.617, "sent_len_1": 66.6937, "sent_max_len_0": 128.0, "sent_max_len_1": 187.5325, "stdk": 0.048, "stdq": 0.046, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 36100 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3625, "doc_norm": 1.392, "encoder_q-embeddings": 2235.291, "encoder_q-layer.0": 1533.2528, "encoder_q-layer.1": 1742.4832, "encoder_q-layer.10": 1228.7523, "encoder_q-layer.11": 2671.5713, "encoder_q-layer.2": 2082.3635, "encoder_q-layer.3": 2289.5659, "encoder_q-layer.4": 2266.2512, "encoder_q-layer.5": 2018.6365, "encoder_q-layer.6": 1876.689, "encoder_q-layer.7": 1448.6658, "encoder_q-layer.8": 1437.2837, "encoder_q-layer.9": 1215.7932, "epoch": 0.35, "inbatch_neg_score": 0.1666, "inbatch_pos_score": 0.8096, "learning_rate": 3.5444444444444445e-05, "loss": 3.3625, "norm_diff": 0.0276, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2842.0169, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1682, "query_norm": 1.3643, "queue_k_norm": 1.3918, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8821, "sent_len_1": 66.7437, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6225, "stdk": 0.0483, "stdq": 0.0455, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 36200 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3727, "doc_norm": 1.3968, "encoder_q-embeddings": 1421.1467, "encoder_q-layer.0": 1027.3617, "encoder_q-layer.1": 1151.2808, "encoder_q-layer.10": 1191.5364, "encoder_q-layer.11": 2540.3323, "encoder_q-layer.2": 1309.4911, "encoder_q-layer.3": 1383.5955, "encoder_q-layer.4": 1459.4536, "encoder_q-layer.5": 1602.1801, "encoder_q-layer.6": 1563.5477, "encoder_q-layer.7": 1394.5592, "encoder_q-layer.8": 1439.9156, "encoder_q-layer.9": 1188.9688, "epoch": 0.35, "inbatch_neg_score": 0.1588, "inbatch_pos_score": 0.7969, "learning_rate": 3.538888888888889e-05, "loss": 3.3727, "norm_diff": 0.0482, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2180.5406, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1595, "query_norm": 1.3486, "queue_k_norm": 1.3924, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1546, "sent_len_1": 66.679, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8325, "stdk": 0.0485, "stdq": 0.0453, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 36300 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.3769, "doc_norm": 1.394, "encoder_q-embeddings": 5620.0376, "encoder_q-layer.0": 4099.4043, "encoder_q-layer.1": 5281.9531, "encoder_q-layer.10": 1284.9463, "encoder_q-layer.11": 2384.7742, "encoder_q-layer.2": 6747.7456, "encoder_q-layer.3": 7265.6392, "encoder_q-layer.4": 7309.7495, "encoder_q-layer.5": 7476.3276, "encoder_q-layer.6": 7609.4556, "encoder_q-layer.7": 6979.6953, "encoder_q-layer.8": 5373.082, "encoder_q-layer.9": 3001.9534, "epoch": 0.36, "inbatch_neg_score": 0.157, "inbatch_pos_score": 0.8296, "learning_rate": 3.5333333333333336e-05, "loss": 3.3769, "norm_diff": 0.0309, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8755.6984, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1566, "query_norm": 1.3631, "queue_k_norm": 1.3919, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9887, "sent_len_1": 66.5346, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5037, "stdk": 0.0483, "stdq": 0.0458, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 36400 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.3593, "doc_norm": 1.3977, "encoder_q-embeddings": 1278.7057, "encoder_q-layer.0": 853.7161, "encoder_q-layer.1": 937.1256, "encoder_q-layer.10": 1211.6221, "encoder_q-layer.11": 2547.2578, "encoder_q-layer.2": 1094.8828, "encoder_q-layer.3": 1194.8397, "encoder_q-layer.4": 1275.5389, "encoder_q-layer.5": 1269.1602, "encoder_q-layer.6": 1293.538, "encoder_q-layer.7": 1402.788, "encoder_q-layer.8": 1601.3129, "encoder_q-layer.9": 1280.1283, "epoch": 0.36, "inbatch_neg_score": 0.1553, "inbatch_pos_score": 0.835, "learning_rate": 3.527777777777778e-05, "loss": 3.3593, "norm_diff": 0.0365, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2053.0315, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1566, "query_norm": 1.3612, "queue_k_norm": 1.3941, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8932, "sent_len_1": 66.7556, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7663, "stdk": 0.0485, "stdq": 0.0458, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 36500 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.356, "doc_norm": 1.3997, "encoder_q-embeddings": 1554.4653, "encoder_q-layer.0": 1075.8995, "encoder_q-layer.1": 1152.5884, "encoder_q-layer.10": 1132.1792, "encoder_q-layer.11": 2549.5208, "encoder_q-layer.2": 1302.8918, "encoder_q-layer.3": 1445.0848, "encoder_q-layer.4": 1447.0691, "encoder_q-layer.5": 1552.8007, "encoder_q-layer.6": 1585.234, "encoder_q-layer.7": 1723.1438, "encoder_q-layer.8": 1481.9688, "encoder_q-layer.9": 1250.4227, "epoch": 0.36, "inbatch_neg_score": 0.1561, "inbatch_pos_score": 0.8257, "learning_rate": 3.522222222222222e-05, "loss": 3.356, "norm_diff": 0.0316, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2256.1298, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1564, "query_norm": 1.3681, "queue_k_norm": 1.3945, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0163, "sent_len_1": 67.033, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0987, "stdk": 0.0485, "stdq": 0.046, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 36600 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3728, "doc_norm": 1.3946, "encoder_q-embeddings": 1143.729, "encoder_q-layer.0": 732.0656, "encoder_q-layer.1": 755.6237, "encoder_q-layer.10": 1205.4191, "encoder_q-layer.11": 2484.937, "encoder_q-layer.2": 878.4327, "encoder_q-layer.3": 893.7133, "encoder_q-layer.4": 934.9391, "encoder_q-layer.5": 975.4617, "encoder_q-layer.6": 1088.2228, "encoder_q-layer.7": 1218.7886, "encoder_q-layer.8": 1350.819, "encoder_q-layer.9": 1224.9618, "epoch": 0.36, "inbatch_neg_score": 0.1577, "inbatch_pos_score": 0.8125, "learning_rate": 3.516666666666667e-05, "loss": 3.3728, "norm_diff": 0.0476, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1824.5662, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1578, "query_norm": 1.3471, "queue_k_norm": 1.3938, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8197, "sent_len_1": 66.5797, "sent_max_len_0": 128.0, "sent_max_len_1": 188.92, "stdk": 0.0483, "stdq": 0.0454, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 36700 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3743, "doc_norm": 1.3821, "encoder_q-embeddings": 1468.1963, "encoder_q-layer.0": 957.1272, "encoder_q-layer.1": 1049.1016, "encoder_q-layer.10": 1140.1708, "encoder_q-layer.11": 2666.9216, "encoder_q-layer.2": 1184.3757, "encoder_q-layer.3": 1255.2621, "encoder_q-layer.4": 1276.0186, "encoder_q-layer.5": 1253.7938, "encoder_q-layer.6": 1260.2168, "encoder_q-layer.7": 1327.3105, "encoder_q-layer.8": 1424.1796, "encoder_q-layer.9": 1205.4591, "epoch": 0.36, "inbatch_neg_score": 0.1603, "inbatch_pos_score": 0.7788, "learning_rate": 3.511111111111111e-05, "loss": 3.3743, "norm_diff": 0.0538, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2085.873, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1602, "query_norm": 1.3282, "queue_k_norm": 1.3942, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8823, "sent_len_1": 66.9211, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4412, "stdk": 0.0479, "stdq": 0.0445, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 36800 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.3731, "doc_norm": 1.394, "encoder_q-embeddings": 1407.6649, "encoder_q-layer.0": 916.9797, "encoder_q-layer.1": 975.7659, "encoder_q-layer.10": 1377.6333, "encoder_q-layer.11": 2716.3037, "encoder_q-layer.2": 1080.391, "encoder_q-layer.3": 1109.2002, "encoder_q-layer.4": 1199.3307, "encoder_q-layer.5": 1240.8937, "encoder_q-layer.6": 1348.7174, "encoder_q-layer.7": 1525.8695, "encoder_q-layer.8": 1685.5297, "encoder_q-layer.9": 1347.9808, "epoch": 0.36, "inbatch_neg_score": 0.1644, "inbatch_pos_score": 0.7812, "learning_rate": 3.505555555555556e-05, "loss": 3.3731, "norm_diff": 0.0373, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2161.6592, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1643, "query_norm": 1.3567, "queue_k_norm": 1.3955, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0322, "sent_len_1": 67.0993, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0925, "stdk": 0.0483, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 36900 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.3524, "doc_norm": 1.3934, "encoder_q-embeddings": 2701.4717, "encoder_q-layer.0": 2026.978, "encoder_q-layer.1": 2145.5005, "encoder_q-layer.10": 1223.9055, "encoder_q-layer.11": 2582.2344, "encoder_q-layer.2": 2528.4521, "encoder_q-layer.3": 2544.1775, "encoder_q-layer.4": 2643.926, "encoder_q-layer.5": 2719.364, "encoder_q-layer.6": 2531.0222, "encoder_q-layer.7": 2559.0232, "encoder_q-layer.8": 2588.9238, "encoder_q-layer.9": 1581.9308, "epoch": 0.36, "inbatch_neg_score": 0.167, "inbatch_pos_score": 0.8184, "learning_rate": 3.5e-05, "loss": 3.3524, "norm_diff": 0.0154, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3544.3004, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1671, "query_norm": 1.3817, "queue_k_norm": 1.3956, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1377, "sent_len_1": 67.2378, "sent_max_len_0": 128.0, "sent_max_len_1": 190.815, "stdk": 0.0483, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 37000 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.3451, "doc_norm": 1.4032, "encoder_q-embeddings": 3514.8645, "encoder_q-layer.0": 2318.7422, "encoder_q-layer.1": 2701.3425, "encoder_q-layer.10": 1136.5917, "encoder_q-layer.11": 2453.5234, "encoder_q-layer.2": 3137.2991, "encoder_q-layer.3": 3360.821, "encoder_q-layer.4": 3638.2588, "encoder_q-layer.5": 3825.6003, "encoder_q-layer.6": 4037.2273, "encoder_q-layer.7": 4039.6006, "encoder_q-layer.8": 3414.8413, "encoder_q-layer.9": 2059.2605, "epoch": 0.36, "inbatch_neg_score": 0.1654, "inbatch_pos_score": 0.8374, "learning_rate": 3.4944444444444446e-05, "loss": 3.3451, "norm_diff": 0.0498, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4748.6587, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.166, "query_norm": 1.3535, "queue_k_norm": 1.3972, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7709, "sent_len_1": 66.983, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6425, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 37100 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.3496, "doc_norm": 1.4008, "encoder_q-embeddings": 1625.7239, "encoder_q-layer.0": 1120.3136, "encoder_q-layer.1": 1208.7566, "encoder_q-layer.10": 1173.677, "encoder_q-layer.11": 2647.6619, "encoder_q-layer.2": 1465.4745, "encoder_q-layer.3": 1476.996, "encoder_q-layer.4": 1479.8625, "encoder_q-layer.5": 1402.4735, "encoder_q-layer.6": 1482.486, "encoder_q-layer.7": 1620.1455, "encoder_q-layer.8": 1592.7828, "encoder_q-layer.9": 1269.2473, "epoch": 0.36, "inbatch_neg_score": 0.1683, "inbatch_pos_score": 0.8218, "learning_rate": 3.4888888888888895e-05, "loss": 3.3496, "norm_diff": 0.0477, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2322.5447, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1683, "query_norm": 1.3531, "queue_k_norm": 1.3979, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9114, "sent_len_1": 66.679, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1163, "stdk": 0.0485, "stdq": 0.0452, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 37200 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.4096, "doc_norm": 1.4041, "encoder_q-embeddings": 1643.0905, "encoder_q-layer.0": 1040.9153, "encoder_q-layer.1": 1084.1627, "encoder_q-layer.10": 1164.4596, "encoder_q-layer.11": 2599.5857, "encoder_q-layer.2": 1188.5066, "encoder_q-layer.3": 1247.7837, "encoder_q-layer.4": 1285.0558, "encoder_q-layer.5": 1284.6721, "encoder_q-layer.6": 1352.7365, "encoder_q-layer.7": 1574.8917, "encoder_q-layer.8": 1691.1647, "encoder_q-layer.9": 1351.5009, "epoch": 0.36, "inbatch_neg_score": 0.1677, "inbatch_pos_score": 0.7964, "learning_rate": 3.483333333333334e-05, "loss": 3.4096, "norm_diff": 0.0661, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2235.4097, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1681, "query_norm": 1.338, "queue_k_norm": 1.3961, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0961, "sent_len_1": 66.9105, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3025, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 37300 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.3694, "doc_norm": 1.3934, "encoder_q-embeddings": 1356.2687, "encoder_q-layer.0": 884.5145, "encoder_q-layer.1": 990.5148, "encoder_q-layer.10": 1270.7151, "encoder_q-layer.11": 2691.9866, "encoder_q-layer.2": 1142.3348, "encoder_q-layer.3": 1143.1927, "encoder_q-layer.4": 1268.4967, "encoder_q-layer.5": 1397.0415, "encoder_q-layer.6": 1505.5405, "encoder_q-layer.7": 1592.048, "encoder_q-layer.8": 1840.1895, "encoder_q-layer.9": 1443.9324, "epoch": 0.37, "inbatch_neg_score": 0.1662, "inbatch_pos_score": 0.8071, "learning_rate": 3.477777777777778e-05, "loss": 3.3694, "norm_diff": 0.0401, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2222.0372, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1652, "query_norm": 1.3534, "queue_k_norm": 1.3976, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9361, "sent_len_1": 66.7934, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3375, "stdk": 0.0482, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 37400 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.3672, "doc_norm": 1.4034, "encoder_q-embeddings": 11509.5742, "encoder_q-layer.0": 8447.6963, "encoder_q-layer.1": 8263.6562, "encoder_q-layer.10": 2032.012, "encoder_q-layer.11": 2571.0786, "encoder_q-layer.2": 9239.083, "encoder_q-layer.3": 10938.3672, "encoder_q-layer.4": 12707.1621, "encoder_q-layer.5": 14960.25, "encoder_q-layer.6": 20196.3125, "encoder_q-layer.7": 29935.0645, "encoder_q-layer.8": Infinity, "encoder_q-layer.9": 17496.3809, "epoch": 0.37, "inbatch_neg_score": 0.1696, "inbatch_pos_score": 0.835, "learning_rate": 3.472222222222222e-05, "loss": 3.3672, "norm_diff": 0.0171, "norm_loss": 0.0, "postclip_grad_norm": NaN, "preclip_grad_norm": Infinity, "preclip_grad_norm_avg": Infinity, "q@queue_neg_score": 0.1697, "query_norm": 1.3932, "queue_k_norm": 1.3976, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0232, "sent_len_1": 66.8528, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7525, "stdk": 0.0486, "stdq": 0.0467, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 37500 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 3.3577, "doc_norm": 1.4035, "encoder_q-embeddings": 669.0736, "encoder_q-layer.0": 447.5952, "encoder_q-layer.1": 487.2352, "encoder_q-layer.10": 550.2194, "encoder_q-layer.11": 1221.766, "encoder_q-layer.2": 563.0723, "encoder_q-layer.3": 599.6226, "encoder_q-layer.4": 644.653, "encoder_q-layer.5": 631.6823, "encoder_q-layer.6": 649.6618, "encoder_q-layer.7": 719.478, "encoder_q-layer.8": 773.7578, "encoder_q-layer.9": 596.5146, "epoch": 0.37, "inbatch_neg_score": 0.176, "inbatch_pos_score": 0.8794, "learning_rate": 3.466666666666667e-05, "loss": 3.3577, "norm_diff": 0.0248, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1022.669, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1758, "query_norm": 1.3787, "queue_k_norm": 1.3983, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8495, "sent_len_1": 66.7225, "sent_max_len_0": 128.0, "sent_max_len_1": 189.845, "stdk": 0.0486, "stdq": 0.0461, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 37600 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.3558, "doc_norm": 1.3934, "encoder_q-embeddings": 1389.0618, "encoder_q-layer.0": 963.3607, "encoder_q-layer.1": 1132.3533, "encoder_q-layer.10": 653.3033, "encoder_q-layer.11": 1422.6047, "encoder_q-layer.2": 1382.0798, "encoder_q-layer.3": 1517.9768, "encoder_q-layer.4": 1650.5132, "encoder_q-layer.5": 1772.2921, "encoder_q-layer.6": 1621.7524, "encoder_q-layer.7": 1624.2843, "encoder_q-layer.8": 1057.1666, "encoder_q-layer.9": 750.3931, "epoch": 0.37, "inbatch_neg_score": 0.1743, "inbatch_pos_score": 0.8203, "learning_rate": 3.4611111111111114e-05, "loss": 3.3558, "norm_diff": 0.0259, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2022.0634, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1746, "query_norm": 1.3689, "queue_k_norm": 1.3992, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9609, "sent_len_1": 66.6854, "sent_max_len_0": 128.0, "sent_max_len_1": 191.8738, "stdk": 0.0481, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 37700 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.3727, "doc_norm": 1.3941, "encoder_q-embeddings": 655.854, "encoder_q-layer.0": 415.7728, "encoder_q-layer.1": 441.4001, "encoder_q-layer.10": 609.4597, "encoder_q-layer.11": 1376.1709, "encoder_q-layer.2": 494.6773, "encoder_q-layer.3": 547.4537, "encoder_q-layer.4": 573.0698, "encoder_q-layer.5": 615.4177, "encoder_q-layer.6": 704.8508, "encoder_q-layer.7": 783.7787, "encoder_q-layer.8": 810.6597, "encoder_q-layer.9": 672.3845, "epoch": 0.37, "inbatch_neg_score": 0.1785, "inbatch_pos_score": 0.772, "learning_rate": 3.4555555555555556e-05, "loss": 3.3727, "norm_diff": 0.0556, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1071.2842, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.177, "query_norm": 1.3386, "queue_k_norm": 1.3998, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7958, "sent_len_1": 66.8048, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6838, "stdk": 0.0481, "stdq": 0.0446, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 37800 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.343, "doc_norm": 1.4059, "encoder_q-embeddings": 1152.7859, "encoder_q-layer.0": 796.3542, "encoder_q-layer.1": 852.8484, "encoder_q-layer.10": 606.1483, "encoder_q-layer.11": 1303.1737, "encoder_q-layer.2": 1012.7851, "encoder_q-layer.3": 1028.9342, "encoder_q-layer.4": 1012.9362, "encoder_q-layer.5": 957.5239, "encoder_q-layer.6": 922.2787, "encoder_q-layer.7": 1007.7191, "encoder_q-layer.8": 990.0455, "encoder_q-layer.9": 676.4854, "epoch": 0.37, "inbatch_neg_score": 0.1735, "inbatch_pos_score": 0.8179, "learning_rate": 3.45e-05, "loss": 3.343, "norm_diff": 0.0565, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1462.337, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1742, "query_norm": 1.3495, "queue_k_norm": 1.3995, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9448, "sent_len_1": 66.834, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1025, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 37900 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.3704, "doc_norm": 1.4043, "encoder_q-embeddings": 627.4984, "encoder_q-layer.0": 426.1138, "encoder_q-layer.1": 442.7548, "encoder_q-layer.10": 551.704, "encoder_q-layer.11": 1214.9602, "encoder_q-layer.2": 505.2847, "encoder_q-layer.3": 512.6962, "encoder_q-layer.4": 545.5739, "encoder_q-layer.5": 551.1111, "encoder_q-layer.6": 602.2524, "encoder_q-layer.7": 702.2904, "encoder_q-layer.8": 710.4362, "encoder_q-layer.9": 575.722, "epoch": 0.37, "inbatch_neg_score": 0.1731, "inbatch_pos_score": 0.8525, "learning_rate": 3.444444444444445e-05, "loss": 3.3704, "norm_diff": 0.0437, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 965.783, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1729, "query_norm": 1.3606, "queue_k_norm": 1.4014, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9389, "sent_len_1": 66.9929, "sent_max_len_0": 128.0, "sent_max_len_1": 191.175, "stdk": 0.0485, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 38000 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.3599, "doc_norm": 1.4029, "encoder_q-embeddings": 567.3127, "encoder_q-layer.0": 370.2695, "encoder_q-layer.1": 396.4028, "encoder_q-layer.10": 645.5457, "encoder_q-layer.11": 1353.7666, "encoder_q-layer.2": 444.5764, "encoder_q-layer.3": 480.0852, "encoder_q-layer.4": 518.1058, "encoder_q-layer.5": 566.8663, "encoder_q-layer.6": 625.0043, "encoder_q-layer.7": 731.7353, "encoder_q-layer.8": 823.8107, "encoder_q-layer.9": 691.2753, "epoch": 0.37, "inbatch_neg_score": 0.1783, "inbatch_pos_score": 0.8228, "learning_rate": 3.438888888888889e-05, "loss": 3.3599, "norm_diff": 0.0379, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1008.1262, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1788, "query_norm": 1.3649, "queue_k_norm": 1.4008, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0181, "sent_len_1": 66.8521, "sent_max_len_0": 128.0, "sent_max_len_1": 192.9013, "stdk": 0.0485, "stdq": 0.0455, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 38100 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3264, "doc_norm": 1.4008, "encoder_q-embeddings": 578.1523, "encoder_q-layer.0": 356.1647, "encoder_q-layer.1": 371.0636, "encoder_q-layer.10": 604.4282, "encoder_q-layer.11": 1393.3163, "encoder_q-layer.2": 407.1126, "encoder_q-layer.3": 447.41, "encoder_q-layer.4": 473.8751, "encoder_q-layer.5": 482.5728, "encoder_q-layer.6": 572.4805, "encoder_q-layer.7": 657.5682, "encoder_q-layer.8": 767.1038, "encoder_q-layer.9": 636.5831, "epoch": 0.37, "inbatch_neg_score": 0.1768, "inbatch_pos_score": 0.8066, "learning_rate": 3.433333333333333e-05, "loss": 3.3264, "norm_diff": 0.0624, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 963.566, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1777, "query_norm": 1.3384, "queue_k_norm": 1.4001, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0683, "sent_len_1": 66.9894, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7562, "stdk": 0.0484, "stdq": 0.0445, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 38200 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.3809, "doc_norm": 1.4059, "encoder_q-embeddings": 3069.3687, "encoder_q-layer.0": 2096.8442, "encoder_q-layer.1": 2468.627, "encoder_q-layer.10": 561.3721, "encoder_q-layer.11": 1172.9507, "encoder_q-layer.2": 2876.323, "encoder_q-layer.3": 2932.1504, "encoder_q-layer.4": 3052.8401, "encoder_q-layer.5": 3077.189, "encoder_q-layer.6": 3328.6929, "encoder_q-layer.7": 3320.8752, "encoder_q-layer.8": 2786.1758, "encoder_q-layer.9": 996.637, "epoch": 0.37, "inbatch_neg_score": 0.1787, "inbatch_pos_score": 0.8477, "learning_rate": 3.427777777777778e-05, "loss": 3.3809, "norm_diff": 0.0392, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3989.5027, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.177, "query_norm": 1.3666, "queue_k_norm": 1.4011, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1834, "sent_len_1": 66.6941, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9075, "stdk": 0.0485, "stdq": 0.0455, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 38300 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.3489, "doc_norm": 1.3992, "encoder_q-embeddings": 711.3175, "encoder_q-layer.0": 497.7701, "encoder_q-layer.1": 566.0946, "encoder_q-layer.10": 577.7365, "encoder_q-layer.11": 1219.6321, "encoder_q-layer.2": 686.2197, "encoder_q-layer.3": 747.7931, "encoder_q-layer.4": 722.5842, "encoder_q-layer.5": 682.689, "encoder_q-layer.6": 728.0964, "encoder_q-layer.7": 798.6857, "encoder_q-layer.8": 770.2348, "encoder_q-layer.9": 608.45, "epoch": 0.37, "inbatch_neg_score": 0.1824, "inbatch_pos_score": 0.8682, "learning_rate": 3.4222222222222224e-05, "loss": 3.3489, "norm_diff": 0.011, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1091.3866, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1824, "query_norm": 1.393, "queue_k_norm": 1.4012, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8102, "sent_len_1": 66.6387, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1712, "stdk": 0.0483, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 38400 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3354, "doc_norm": 1.4054, "encoder_q-embeddings": 1226.1396, "encoder_q-layer.0": 947.7741, "encoder_q-layer.1": 1121.4462, "encoder_q-layer.10": 591.4812, "encoder_q-layer.11": 1244.5713, "encoder_q-layer.2": 1264.2577, "encoder_q-layer.3": 1281.4403, "encoder_q-layer.4": 1502.3201, "encoder_q-layer.5": 1513.1021, "encoder_q-layer.6": 1736.5763, "encoder_q-layer.7": 1477.4039, "encoder_q-layer.8": 1559.1649, "encoder_q-layer.9": 1011.7104, "epoch": 0.38, "inbatch_neg_score": 0.1901, "inbatch_pos_score": 0.8379, "learning_rate": 3.4166666666666666e-05, "loss": 3.3354, "norm_diff": 0.0229, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1957.3813, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1895, "query_norm": 1.3825, "queue_k_norm": 1.4044, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0108, "sent_len_1": 66.6671, "sent_max_len_0": 128.0, "sent_max_len_1": 191.12, "stdk": 0.0485, "stdq": 0.0457, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 38500 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.3391, "doc_norm": 1.4026, "encoder_q-embeddings": 2252.5225, "encoder_q-layer.0": 1587.0424, "encoder_q-layer.1": 1882.9913, "encoder_q-layer.10": 632.8958, "encoder_q-layer.11": 1290.4493, "encoder_q-layer.2": 2389.4355, "encoder_q-layer.3": 2915.0642, "encoder_q-layer.4": 3456.1443, "encoder_q-layer.5": 4018.9807, "encoder_q-layer.6": 2517.394, "encoder_q-layer.7": 1783.557, "encoder_q-layer.8": 1543.5857, "encoder_q-layer.9": 925.7364, "epoch": 0.38, "inbatch_neg_score": 0.1963, "inbatch_pos_score": 0.875, "learning_rate": 3.411111111111111e-05, "loss": 3.3391, "norm_diff": 0.0192, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3347.3626, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1957, "query_norm": 1.4149, "queue_k_norm": 1.4026, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0125, "sent_len_1": 66.6625, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7388, "stdk": 0.0484, "stdq": 0.0465, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 38600 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3618, "doc_norm": 1.4031, "encoder_q-embeddings": 1291.7607, "encoder_q-layer.0": 868.8148, "encoder_q-layer.1": 1099.7252, "encoder_q-layer.10": 644.2998, "encoder_q-layer.11": 1315.7524, "encoder_q-layer.2": 1228.726, "encoder_q-layer.3": 1275.1582, "encoder_q-layer.4": 1254.2858, "encoder_q-layer.5": 1196.467, "encoder_q-layer.6": 1355.7427, "encoder_q-layer.7": 1523.1218, "encoder_q-layer.8": 2188.8145, "encoder_q-layer.9": 1357.9227, "epoch": 0.38, "inbatch_neg_score": 0.2015, "inbatch_pos_score": 0.8384, "learning_rate": 3.405555555555556e-05, "loss": 3.3618, "norm_diff": 0.0163, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1999.4109, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2007, "query_norm": 1.3872, "queue_k_norm": 1.4017, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.931, "sent_len_1": 66.7699, "sent_max_len_0": 128.0, "sent_max_len_1": 191.6525, "stdk": 0.0483, "stdq": 0.0455, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 38700 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3687, "doc_norm": 1.3955, "encoder_q-embeddings": 603.1904, "encoder_q-layer.0": 386.6649, "encoder_q-layer.1": 414.2968, "encoder_q-layer.10": 599.7798, "encoder_q-layer.11": 1284.0865, "encoder_q-layer.2": 469.7938, "encoder_q-layer.3": 498.8611, "encoder_q-layer.4": 538.4988, "encoder_q-layer.5": 589.4727, "encoder_q-layer.6": 604.4183, "encoder_q-layer.7": 714.5781, "encoder_q-layer.8": 779.7006, "encoder_q-layer.9": 668.3961, "epoch": 0.38, "inbatch_neg_score": 0.2024, "inbatch_pos_score": 0.8682, "learning_rate": 3.4000000000000007e-05, "loss": 3.3687, "norm_diff": 0.0107, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 986.555, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2017, "query_norm": 1.3995, "queue_k_norm": 1.404, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0373, "sent_len_1": 66.8136, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6513, "stdk": 0.048, "stdq": 0.0458, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 38800 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.37, "doc_norm": 1.4062, "encoder_q-embeddings": 571.4634, "encoder_q-layer.0": 374.6999, "encoder_q-layer.1": 384.1917, "encoder_q-layer.10": 582.7562, "encoder_q-layer.11": 1312.4373, "encoder_q-layer.2": 425.2076, "encoder_q-layer.3": 453.1045, "encoder_q-layer.4": 473.7238, "encoder_q-layer.5": 512.3858, "encoder_q-layer.6": 548.1695, "encoder_q-layer.7": 616.3169, "encoder_q-layer.8": 689.299, "encoder_q-layer.9": 587.3215, "epoch": 0.38, "inbatch_neg_score": 0.2108, "inbatch_pos_score": 0.8647, "learning_rate": 3.394444444444444e-05, "loss": 3.37, "norm_diff": 0.0337, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 945.1627, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2111, "query_norm": 1.3725, "queue_k_norm": 1.405, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8132, "sent_len_1": 66.8597, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4, "stdk": 0.0484, "stdq": 0.0446, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 38900 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.354, "doc_norm": 1.4063, "encoder_q-embeddings": 790.3129, "encoder_q-layer.0": 527.9492, "encoder_q-layer.1": 594.5428, "encoder_q-layer.10": 606.434, "encoder_q-layer.11": 1264.8159, "encoder_q-layer.2": 740.6779, "encoder_q-layer.3": 797.2274, "encoder_q-layer.4": 830.6208, "encoder_q-layer.5": 859.8293, "encoder_q-layer.6": 891.1483, "encoder_q-layer.7": 1025.2632, "encoder_q-layer.8": 1086.4337, "encoder_q-layer.9": 779.0574, "epoch": 0.38, "inbatch_neg_score": 0.2159, "inbatch_pos_score": 0.9023, "learning_rate": 3.388888888888889e-05, "loss": 3.354, "norm_diff": 0.0137, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1270.3952, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2153, "query_norm": 1.4005, "queue_k_norm": 1.4056, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8772, "sent_len_1": 66.6243, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0938, "stdk": 0.0484, "stdq": 0.0459, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 39000 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.3483, "doc_norm": 1.4094, "encoder_q-embeddings": 706.5385, "encoder_q-layer.0": 479.006, "encoder_q-layer.1": 488.6994, "encoder_q-layer.10": 581.1353, "encoder_q-layer.11": 1328.0575, "encoder_q-layer.2": 574.3455, "encoder_q-layer.3": 644.5504, "encoder_q-layer.4": 669.93, "encoder_q-layer.5": 709.689, "encoder_q-layer.6": 783.1199, "encoder_q-layer.7": 768.435, "encoder_q-layer.8": 721.5502, "encoder_q-layer.9": 610.3312, "epoch": 0.38, "inbatch_neg_score": 0.2213, "inbatch_pos_score": 0.8799, "learning_rate": 3.3833333333333334e-05, "loss": 3.3483, "norm_diff": 0.0079, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1095.1661, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2197, "query_norm": 1.4015, "queue_k_norm": 1.4079, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8289, "sent_len_1": 66.6242, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7413, "stdk": 0.0484, "stdq": 0.046, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 39100 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3474, "doc_norm": 1.4031, "encoder_q-embeddings": 2467.8459, "encoder_q-layer.0": 1707.8228, "encoder_q-layer.1": 1668.4395, "encoder_q-layer.10": 653.9602, "encoder_q-layer.11": 1398.0372, "encoder_q-layer.2": 2076.1956, "encoder_q-layer.3": 2235.8005, "encoder_q-layer.4": 2250.3477, "encoder_q-layer.5": 2207.2896, "encoder_q-layer.6": 1859.1075, "encoder_q-layer.7": 1970.1234, "encoder_q-layer.8": 1474.5088, "encoder_q-layer.9": 757.8829, "epoch": 0.38, "inbatch_neg_score": 0.2228, "inbatch_pos_score": 0.8579, "learning_rate": 3.377777777777778e-05, "loss": 3.3474, "norm_diff": 0.0221, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2823.6359, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2227, "query_norm": 1.381, "queue_k_norm": 1.4082, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.2848, "sent_len_1": 66.8116, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7962, "stdk": 0.0481, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 39200 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.3685, "doc_norm": 1.4094, "encoder_q-embeddings": 1938.427, "encoder_q-layer.0": 1378.1002, "encoder_q-layer.1": 1603.515, "encoder_q-layer.10": 622.4032, "encoder_q-layer.11": 1359.4736, "encoder_q-layer.2": 1923.1278, "encoder_q-layer.3": 2019.0319, "encoder_q-layer.4": 1965.865, "encoder_q-layer.5": 1972.4304, "encoder_q-layer.6": 1912.1078, "encoder_q-layer.7": 2039.6169, "encoder_q-layer.8": 1310.3932, "encoder_q-layer.9": 717.5338, "epoch": 0.38, "inbatch_neg_score": 0.2195, "inbatch_pos_score": 0.8501, "learning_rate": 3.3722222222222225e-05, "loss": 3.3685, "norm_diff": 0.0463, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2524.7761, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2191, "query_norm": 1.3631, "queue_k_norm": 1.4092, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9994, "sent_len_1": 66.4936, "sent_max_len_0": 128.0, "sent_max_len_1": 186.4525, "stdk": 0.0483, "stdq": 0.0448, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 39300 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.3299, "doc_norm": 1.4127, "encoder_q-embeddings": 757.2884, "encoder_q-layer.0": 496.2839, "encoder_q-layer.1": 511.6798, "encoder_q-layer.10": 584.9152, "encoder_q-layer.11": 1340.7513, "encoder_q-layer.2": 589.9082, "encoder_q-layer.3": 628.8683, "encoder_q-layer.4": 664.424, "encoder_q-layer.5": 702.7582, "encoder_q-layer.6": 714.1782, "encoder_q-layer.7": 697.1252, "encoder_q-layer.8": 732.8589, "encoder_q-layer.9": 607.4174, "epoch": 0.38, "inbatch_neg_score": 0.2175, "inbatch_pos_score": 0.874, "learning_rate": 3.366666666666667e-05, "loss": 3.3299, "norm_diff": 0.0312, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1084.9338, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2173, "query_norm": 1.3815, "queue_k_norm": 1.4108, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9318, "sent_len_1": 66.6555, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9663, "stdk": 0.0484, "stdq": 0.0456, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 39400 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.3419, "doc_norm": 1.4129, "encoder_q-embeddings": 576.2877, "encoder_q-layer.0": 354.9731, "encoder_q-layer.1": 364.5914, "encoder_q-layer.10": 620.0936, "encoder_q-layer.11": 1336.3499, "encoder_q-layer.2": 401.4544, "encoder_q-layer.3": 420.8601, "encoder_q-layer.4": 456.3818, "encoder_q-layer.5": 471.2786, "encoder_q-layer.6": 524.0115, "encoder_q-layer.7": 625.4122, "encoder_q-layer.8": 717.5291, "encoder_q-layer.9": 633.0168, "epoch": 0.39, "inbatch_neg_score": 0.2161, "inbatch_pos_score": 0.8643, "learning_rate": 3.3611111111111116e-05, "loss": 3.3419, "norm_diff": 0.0548, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 932.3785, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2151, "query_norm": 1.3581, "queue_k_norm": 1.4093, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8663, "sent_len_1": 66.6706, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7637, "stdk": 0.0484, "stdq": 0.0447, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 39500 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3499, "doc_norm": 1.4145, "encoder_q-embeddings": 1200.2175, "encoder_q-layer.0": 789.0743, "encoder_q-layer.1": 873.2488, "encoder_q-layer.10": 1362.0583, "encoder_q-layer.11": 2638.896, "encoder_q-layer.2": 970.1713, "encoder_q-layer.3": 1034.9135, "encoder_q-layer.4": 1087.2527, "encoder_q-layer.5": 1120.6365, "encoder_q-layer.6": 1201.6266, "encoder_q-layer.7": 1299.5336, "encoder_q-layer.8": 1453.8374, "encoder_q-layer.9": 1245.3826, "epoch": 0.39, "inbatch_neg_score": 0.2117, "inbatch_pos_score": 0.8779, "learning_rate": 3.355555555555556e-05, "loss": 3.3499, "norm_diff": 0.0205, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1983.5373, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2122, "query_norm": 1.394, "queue_k_norm": 1.4131, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.095, "sent_len_1": 66.7927, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6763, "stdk": 0.0485, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 39600 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.3832, "doc_norm": 1.4123, "encoder_q-embeddings": 1148.5396, "encoder_q-layer.0": 727.2261, "encoder_q-layer.1": 756.4799, "encoder_q-layer.10": 1129.6239, "encoder_q-layer.11": 2590.2024, "encoder_q-layer.2": 856.0329, "encoder_q-layer.3": 883.1859, "encoder_q-layer.4": 930.1815, "encoder_q-layer.5": 934.0659, "encoder_q-layer.6": 1024.8186, "encoder_q-layer.7": 1193.8624, "encoder_q-layer.8": 1351.6421, "encoder_q-layer.9": 1183.5057, "epoch": 0.39, "inbatch_neg_score": 0.2088, "inbatch_pos_score": 0.8931, "learning_rate": 3.35e-05, "loss": 3.3832, "norm_diff": 0.0183, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1830.8072, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2094, "query_norm": 1.394, "queue_k_norm": 1.4125, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.839, "sent_len_1": 66.8073, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9487, "stdk": 0.0484, "stdq": 0.0463, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 39700 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.378, "doc_norm": 1.4117, "encoder_q-embeddings": 1549.6987, "encoder_q-layer.0": 1000.3274, "encoder_q-layer.1": 1121.3597, "encoder_q-layer.10": 1315.4952, "encoder_q-layer.11": 2628.6235, "encoder_q-layer.2": 1297.7834, "encoder_q-layer.3": 1416.7506, "encoder_q-layer.4": 1580.7021, "encoder_q-layer.5": 1724.7223, "encoder_q-layer.6": 1839.4146, "encoder_q-layer.7": 1893.8639, "encoder_q-layer.8": 1692.6525, "encoder_q-layer.9": 1259.5851, "epoch": 0.39, "inbatch_neg_score": 0.204, "inbatch_pos_score": 0.8789, "learning_rate": 3.3444444444444443e-05, "loss": 3.378, "norm_diff": 0.0243, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2417.2484, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2054, "query_norm": 1.3873, "queue_k_norm": 1.4135, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0303, "sent_len_1": 66.6961, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1025, "stdk": 0.0484, "stdq": 0.0461, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 39800 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.3403, "doc_norm": 1.4116, "encoder_q-embeddings": 2308.1023, "encoder_q-layer.0": 1552.54, "encoder_q-layer.1": 1704.9979, "encoder_q-layer.10": 1322.7167, "encoder_q-layer.11": 2664.7085, "encoder_q-layer.2": 1801.5869, "encoder_q-layer.3": 1954.4158, "encoder_q-layer.4": 2145.2529, "encoder_q-layer.5": 2204.6934, "encoder_q-layer.6": 2251.2336, "encoder_q-layer.7": 1993.2275, "encoder_q-layer.8": 1663.2617, "encoder_q-layer.9": 1340.5607, "epoch": 0.39, "inbatch_neg_score": 0.2035, "inbatch_pos_score": 0.8428, "learning_rate": 3.338888888888889e-05, "loss": 3.3403, "norm_diff": 0.0643, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2932.819, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2032, "query_norm": 1.3473, "queue_k_norm": 1.413, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1949, "sent_len_1": 66.7856, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7688, "stdk": 0.0483, "stdq": 0.0447, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 39900 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.3639, "doc_norm": 1.4198, "encoder_q-embeddings": 1712.2888, "encoder_q-layer.0": 1112.1698, "encoder_q-layer.1": 1183.1207, "encoder_q-layer.10": 1124.5503, "encoder_q-layer.11": 2543.2493, "encoder_q-layer.2": 1286.7252, "encoder_q-layer.3": 1405.7896, "encoder_q-layer.4": 1580.1936, "encoder_q-layer.5": 1619.6897, "encoder_q-layer.6": 1450.9484, "encoder_q-layer.7": 1641.8936, "encoder_q-layer.8": 1520.4315, "encoder_q-layer.9": 1170.9102, "epoch": 0.39, "inbatch_neg_score": 0.1958, "inbatch_pos_score": 0.8613, "learning_rate": 3.3333333333333335e-05, "loss": 3.3639, "norm_diff": 0.0622, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2317.4195, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1974, "query_norm": 1.3576, "queue_k_norm": 1.413, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1821, "sent_len_1": 66.6918, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0213, "stdk": 0.0487, "stdq": 0.0452, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 40000 }, { "dev_runtime": 37.3792, "dev_samples_per_second": 1.712, "dev_steps_per_second": 0.027, "epoch": 0.39, "step": 40000, "test_accuracy": 93.9697265625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3441445231437683, "test_doc_norm": 1.415613055229187, "test_inbatch_neg_score": 0.5627651214599609, "test_inbatch_pos_score": 1.512505292892456, "test_loss": 0.3441445231437683, "test_loss_align": 1.0954631567001343, "test_loss_unif": 3.9128928184509277, "test_loss_unif_q@queue": 3.9128928184509277, "test_norm_diff": 0.02933865785598755, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.18375235795974731, "test_query_norm": 1.4449516534805298, "test_queue_k_norm": 1.4133580923080444, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0429464727640152, "test_stdq": 0.042964886873960495, "test_stdqueue_k": 0.048496000468730927, "test_stdqueue_q": 0.0 }, { "dev_runtime": 37.3792, "dev_samples_per_second": 1.712, "dev_steps_per_second": 0.027, "epoch": 0.39, "eval_beir-arguana_ndcg@10": 0.37345, "eval_beir-arguana_recall@10": 0.64011, "eval_beir-arguana_recall@100": 0.92817, "eval_beir-arguana_recall@20": 0.76956, "eval_beir-avg_ndcg@10": 0.3658995, "eval_beir-avg_recall@10": 0.43282233333333336, "eval_beir-avg_recall@100": 0.6096421666666667, "eval_beir-avg_recall@20": 0.4901745833333333, "eval_beir-cqadupstack_ndcg@10": 0.25832499999999997, "eval_beir-cqadupstack_recall@10": 0.34714333333333336, "eval_beir-cqadupstack_recall@100": 0.5682416666666666, "eval_beir-cqadupstack_recall@20": 0.41124583333333337, "eval_beir-fiqa_ndcg@10": 0.2009, "eval_beir-fiqa_recall@10": 0.25622, "eval_beir-fiqa_recall@100": 0.50335, "eval_beir-fiqa_recall@20": 0.31686, "eval_beir-nfcorpus_ndcg@10": 0.27813, "eval_beir-nfcorpus_recall@10": 0.13619, "eval_beir-nfcorpus_recall@100": 0.25987, "eval_beir-nfcorpus_recall@20": 0.16341, "eval_beir-nq_ndcg@10": 0.26477, "eval_beir-nq_recall@10": 0.4383, "eval_beir-nq_recall@100": 0.77622, "eval_beir-nq_recall@20": 0.55808, "eval_beir-quora_ndcg@10": 0.81506, "eval_beir-quora_recall@10": 0.91213, "eval_beir-quora_recall@100": 0.98424, "eval_beir-quora_recall@20": 0.94741, "eval_beir-scidocs_ndcg@10": 0.13649, "eval_beir-scidocs_recall@10": 0.14498, "eval_beir-scidocs_recall@100": 0.3353, "eval_beir-scidocs_recall@20": 0.19777, "eval_beir-scifact_ndcg@10": 0.59783, "eval_beir-scifact_recall@10": 0.75017, "eval_beir-scifact_recall@100": 0.91311, "eval_beir-scifact_recall@20": 0.8125, "eval_beir-trec-covid_ndcg@10": 0.5112, "eval_beir-trec-covid_recall@10": 0.544, "eval_beir-trec-covid_recall@100": 0.3836, "eval_beir-trec-covid_recall@20": 0.504, "eval_beir-webis-touche2020_ndcg@10": 0.22284, "eval_beir-webis-touche2020_recall@10": 0.15898, "eval_beir-webis-touche2020_recall@100": 0.44432, "eval_beir-webis-touche2020_recall@20": 0.22091, "eval_senteval-avg_sts": 0.7385578406967059, "eval_senteval-sickr_spearman": 0.7056836532643718, "eval_senteval-stsb_spearman": 0.77143202812904, "step": 40000, "test_accuracy": 93.9697265625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3441445231437683, "test_doc_norm": 1.415613055229187, "test_inbatch_neg_score": 0.5627651214599609, "test_inbatch_pos_score": 1.512505292892456, "test_loss": 0.3441445231437683, "test_loss_align": 1.0954631567001343, "test_loss_unif": 3.9128928184509277, "test_loss_unif_q@queue": 3.9128928184509277, "test_norm_diff": 0.02933865785598755, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.18375235795974731, "test_query_norm": 1.4449516534805298, "test_queue_k_norm": 1.4133580923080444, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0429464727640152, "test_stdq": 0.042964886873960495, "test_stdqueue_k": 0.048496000468730927, "test_stdqueue_q": 0.0 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.333, "doc_norm": 1.417, "encoder_q-embeddings": 3233.7769, "encoder_q-layer.0": 2166.4883, "encoder_q-layer.1": 2200.9351, "encoder_q-layer.10": 1134.8048, "encoder_q-layer.11": 2597.5698, "encoder_q-layer.2": 2550.4905, "encoder_q-layer.3": 2852.7803, "encoder_q-layer.4": 3008.3757, "encoder_q-layer.5": 2995.5044, "encoder_q-layer.6": 2683.9834, "encoder_q-layer.7": 2649.6411, "encoder_q-layer.8": 2316.0928, "encoder_q-layer.9": 1440.4847, "epoch": 0.39, "inbatch_neg_score": 0.1923, "inbatch_pos_score": 0.8301, "learning_rate": 3.327777777777778e-05, "loss": 3.333, "norm_diff": 0.0711, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3843.158, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1914, "query_norm": 1.3459, "queue_k_norm": 1.414, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.978, "sent_len_1": 66.9881, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2612, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 40100 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.3555, "doc_norm": 1.4154, "encoder_q-embeddings": 1312.0577, "encoder_q-layer.0": 792.7803, "encoder_q-layer.1": 841.4208, "encoder_q-layer.10": 1285.903, "encoder_q-layer.11": 2595.6926, "encoder_q-layer.2": 972.9858, "encoder_q-layer.3": 1014.6182, "encoder_q-layer.4": 1085.9027, "encoder_q-layer.5": 1185.4161, "encoder_q-layer.6": 1236.2623, "encoder_q-layer.7": 1489.2904, "encoder_q-layer.8": 1492.4631, "encoder_q-layer.9": 1355.6554, "epoch": 0.39, "inbatch_neg_score": 0.1856, "inbatch_pos_score": 0.8711, "learning_rate": 3.322222222222222e-05, "loss": 3.3555, "norm_diff": 0.0231, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1995.4671, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1851, "query_norm": 1.3923, "queue_k_norm": 1.413, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.949, "sent_len_1": 66.8894, "sent_max_len_0": 128.0, "sent_max_len_1": 191.6525, "stdk": 0.0485, "stdq": 0.0467, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 40200 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.353, "doc_norm": 1.4148, "encoder_q-embeddings": 1295.3986, "encoder_q-layer.0": 893.2507, "encoder_q-layer.1": 926.0757, "encoder_q-layer.10": 1212.9961, "encoder_q-layer.11": 2667.6152, "encoder_q-layer.2": 1073.6384, "encoder_q-layer.3": 1106.9098, "encoder_q-layer.4": 1318.5913, "encoder_q-layer.5": 1296.5729, "encoder_q-layer.6": 1467.442, "encoder_q-layer.7": 1540.2766, "encoder_q-layer.8": 1585.2925, "encoder_q-layer.9": 1259.6162, "epoch": 0.39, "inbatch_neg_score": 0.1961, "inbatch_pos_score": 0.8359, "learning_rate": 3.316666666666667e-05, "loss": 3.353, "norm_diff": 0.0477, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2148.3447, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1971, "query_norm": 1.3672, "queue_k_norm": 1.4126, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8375, "sent_len_1": 66.7594, "sent_max_len_0": 128.0, "sent_max_len_1": 187.1438, "stdk": 0.0485, "stdq": 0.0456, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 40300 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3157, "doc_norm": 1.407, "encoder_q-embeddings": 1499.9557, "encoder_q-layer.0": 945.1443, "encoder_q-layer.1": 1025.5975, "encoder_q-layer.10": 1235.222, "encoder_q-layer.11": 2687.2605, "encoder_q-layer.2": 1163.568, "encoder_q-layer.3": 1280.6097, "encoder_q-layer.4": 1359.3488, "encoder_q-layer.5": 1302.4471, "encoder_q-layer.6": 1506.0605, "encoder_q-layer.7": 1759.688, "encoder_q-layer.8": 1744.7697, "encoder_q-layer.9": 1358.0847, "epoch": 0.39, "inbatch_neg_score": 0.1938, "inbatch_pos_score": 0.8223, "learning_rate": 3.311111111111112e-05, "loss": 3.3157, "norm_diff": 0.054, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2279.0202, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1941, "query_norm": 1.353, "queue_k_norm": 1.4133, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1128, "sent_len_1": 66.8896, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9325, "stdk": 0.0482, "stdq": 0.045, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 40400 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.3395, "doc_norm": 1.415, "encoder_q-embeddings": 1482.4994, "encoder_q-layer.0": 1078.2573, "encoder_q-layer.1": 1100.6616, "encoder_q-layer.10": 1059.0797, "encoder_q-layer.11": 2365.8459, "encoder_q-layer.2": 1254.5537, "encoder_q-layer.3": 1342.7682, "encoder_q-layer.4": 1418.6234, "encoder_q-layer.5": 1410.4874, "encoder_q-layer.6": 1386.5547, "encoder_q-layer.7": 1478.4985, "encoder_q-layer.8": 1501.1417, "encoder_q-layer.9": 1201.2959, "epoch": 0.4, "inbatch_neg_score": 0.1989, "inbatch_pos_score": 0.8779, "learning_rate": 3.3055555555555553e-05, "loss": 3.3395, "norm_diff": 0.0332, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2148.7766, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1996, "query_norm": 1.3818, "queue_k_norm": 1.413, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9781, "sent_len_1": 67.1243, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4938, "stdk": 0.0485, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 40500 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 3.3667, "doc_norm": 1.4157, "encoder_q-embeddings": 5215.4824, "encoder_q-layer.0": 3746.6145, "encoder_q-layer.1": 3617.563, "encoder_q-layer.10": 1191.9928, "encoder_q-layer.11": 2419.4578, "encoder_q-layer.2": 4326.498, "encoder_q-layer.3": 4866.5679, "encoder_q-layer.4": 5382.4336, "encoder_q-layer.5": 4525.0581, "encoder_q-layer.6": 4216.376, "encoder_q-layer.7": 3911.5842, "encoder_q-layer.8": 2579.3855, "encoder_q-layer.9": 1243.4432, "epoch": 0.4, "inbatch_neg_score": 0.1973, "inbatch_pos_score": 0.8955, "learning_rate": 3.3e-05, "loss": 3.3667, "norm_diff": 0.0423, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5978.8301, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1981, "query_norm": 1.3735, "queue_k_norm": 1.4114, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9598, "sent_len_1": 66.6363, "sent_max_len_0": 128.0, "sent_max_len_1": 188.025, "stdk": 0.0486, "stdq": 0.0455, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 40600 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3321, "doc_norm": 1.4174, "encoder_q-embeddings": 1146.5979, "encoder_q-layer.0": 726.337, "encoder_q-layer.1": 759.117, "encoder_q-layer.10": 1282.8011, "encoder_q-layer.11": 2601.7727, "encoder_q-layer.2": 844.0895, "encoder_q-layer.3": 896.6704, "encoder_q-layer.4": 1015.1094, "encoder_q-layer.5": 1046.1499, "encoder_q-layer.6": 1147.4677, "encoder_q-layer.7": 1204.4374, "encoder_q-layer.8": 1408.2594, "encoder_q-layer.9": 1231.9224, "epoch": 0.4, "inbatch_neg_score": 0.1914, "inbatch_pos_score": 0.856, "learning_rate": 3.2944444444444445e-05, "loss": 3.3321, "norm_diff": 0.0359, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1909.4298, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1912, "query_norm": 1.3815, "queue_k_norm": 1.4144, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9772, "sent_len_1": 66.9108, "sent_max_len_0": 128.0, "sent_max_len_1": 189.78, "stdk": 0.0486, "stdq": 0.046, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 40700 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.35, "doc_norm": 1.4146, "encoder_q-embeddings": 5310.4268, "encoder_q-layer.0": 3904.5044, "encoder_q-layer.1": 3830.0532, "encoder_q-layer.10": 1200.8978, "encoder_q-layer.11": 2596.0166, "encoder_q-layer.2": 3857.1084, "encoder_q-layer.3": 3654.3933, "encoder_q-layer.4": 4104.5225, "encoder_q-layer.5": 4191.1934, "encoder_q-layer.6": 4490.9702, "encoder_q-layer.7": 4800.6152, "encoder_q-layer.8": 5841.5347, "encoder_q-layer.9": 2988.4592, "epoch": 0.4, "inbatch_neg_score": 0.1849, "inbatch_pos_score": 0.8213, "learning_rate": 3.2888888888888894e-05, "loss": 3.35, "norm_diff": 0.0525, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6474.436, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1852, "query_norm": 1.3621, "queue_k_norm": 1.4121, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8642, "sent_len_1": 66.8761, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9737, "stdk": 0.0485, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 40800 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3298, "doc_norm": 1.4108, "encoder_q-embeddings": 1585.4557, "encoder_q-layer.0": 1083.2382, "encoder_q-layer.1": 1204.5563, "encoder_q-layer.10": 1216.8109, "encoder_q-layer.11": 2544.6008, "encoder_q-layer.2": 1388.3041, "encoder_q-layer.3": 1431.5133, "encoder_q-layer.4": 1442.0792, "encoder_q-layer.5": 1497.8756, "encoder_q-layer.6": 1708.5956, "encoder_q-layer.7": 1760.917, "encoder_q-layer.8": 1708.4431, "encoder_q-layer.9": 1353.2325, "epoch": 0.4, "inbatch_neg_score": 0.1847, "inbatch_pos_score": 0.8369, "learning_rate": 3.283333333333333e-05, "loss": 3.3298, "norm_diff": 0.0495, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2353.67, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1849, "query_norm": 1.3612, "queue_k_norm": 1.4114, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8692, "sent_len_1": 66.8079, "sent_max_len_0": 128.0, "sent_max_len_1": 191.7363, "stdk": 0.0484, "stdq": 0.0452, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 40900 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.3231, "doc_norm": 1.4149, "encoder_q-embeddings": 1676.5778, "encoder_q-layer.0": 1227.3209, "encoder_q-layer.1": 1347.6742, "encoder_q-layer.10": 1154.902, "encoder_q-layer.11": 2362.2046, "encoder_q-layer.2": 1612.9594, "encoder_q-layer.3": 1774.3479, "encoder_q-layer.4": 2004.0876, "encoder_q-layer.5": 2022.5013, "encoder_q-layer.6": 2109.1221, "encoder_q-layer.7": 2312.6245, "encoder_q-layer.8": 2431.5188, "encoder_q-layer.9": 1453.7163, "epoch": 0.4, "inbatch_neg_score": 0.1846, "inbatch_pos_score": 0.8589, "learning_rate": 3.277777777777778e-05, "loss": 3.3231, "norm_diff": 0.0385, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2715.7326, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1844, "query_norm": 1.3764, "queue_k_norm": 1.4113, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0024, "sent_len_1": 66.7786, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9112, "stdk": 0.0485, "stdq": 0.0455, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 41000 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3638, "doc_norm": 1.4071, "encoder_q-embeddings": 1596.5911, "encoder_q-layer.0": 1084.8865, "encoder_q-layer.1": 1173.3629, "encoder_q-layer.10": 1214.7642, "encoder_q-layer.11": 2570.7539, "encoder_q-layer.2": 1345.5277, "encoder_q-layer.3": 1478.833, "encoder_q-layer.4": 1576.0919, "encoder_q-layer.5": 1676.6656, "encoder_q-layer.6": 1778.6035, "encoder_q-layer.7": 1941.8195, "encoder_q-layer.8": 1806.7634, "encoder_q-layer.9": 1325.3214, "epoch": 0.4, "inbatch_neg_score": 0.1925, "inbatch_pos_score": 0.855, "learning_rate": 3.272222222222223e-05, "loss": 3.3638, "norm_diff": 0.0138, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2383.9609, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1919, "query_norm": 1.3933, "queue_k_norm": 1.4119, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0518, "sent_len_1": 66.6861, "sent_max_len_0": 128.0, "sent_max_len_1": 188.215, "stdk": 0.0483, "stdq": 0.046, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 41100 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.361, "doc_norm": 1.4181, "encoder_q-embeddings": 651.9974, "encoder_q-layer.0": 454.0811, "encoder_q-layer.1": 481.657, "encoder_q-layer.10": 582.196, "encoder_q-layer.11": 1317.8243, "encoder_q-layer.2": 559.9383, "encoder_q-layer.3": 569.7593, "encoder_q-layer.4": 627.4794, "encoder_q-layer.5": 594.5319, "encoder_q-layer.6": 653.8876, "encoder_q-layer.7": 719.9487, "encoder_q-layer.8": 784.9109, "encoder_q-layer.9": 645.6687, "epoch": 0.4, "inbatch_neg_score": 0.2043, "inbatch_pos_score": 0.8623, "learning_rate": 3.266666666666667e-05, "loss": 3.361, "norm_diff": 0.035, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1054.0056, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2036, "query_norm": 1.3831, "queue_k_norm": 1.4117, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8879, "sent_len_1": 66.867, "sent_max_len_0": 128.0, "sent_max_len_1": 190.18, "stdk": 0.0487, "stdq": 0.0455, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 41200 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.333, "doc_norm": 1.4172, "encoder_q-embeddings": 881.0007, "encoder_q-layer.0": 627.4488, "encoder_q-layer.1": 682.9316, "encoder_q-layer.10": 588.9636, "encoder_q-layer.11": 1305.8643, "encoder_q-layer.2": 828.9022, "encoder_q-layer.3": 906.1451, "encoder_q-layer.4": 1053.873, "encoder_q-layer.5": 1152.8063, "encoder_q-layer.6": 1304.0927, "encoder_q-layer.7": 1513.939, "encoder_q-layer.8": 1534.394, "encoder_q-layer.9": 975.4919, "epoch": 0.4, "inbatch_neg_score": 0.1964, "inbatch_pos_score": 0.8418, "learning_rate": 3.261111111111111e-05, "loss": 3.333, "norm_diff": 0.0317, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1588.3506, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1963, "query_norm": 1.3855, "queue_k_norm": 1.4127, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.6193, "sent_len_1": 66.6505, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2413, "stdk": 0.0486, "stdq": 0.0456, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 41300 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3662, "doc_norm": 1.4129, "encoder_q-embeddings": 948.6764, "encoder_q-layer.0": 626.0134, "encoder_q-layer.1": 703.9127, "encoder_q-layer.10": 666.0769, "encoder_q-layer.11": 1320.6569, "encoder_q-layer.2": 763.251, "encoder_q-layer.3": 782.788, "encoder_q-layer.4": 818.9739, "encoder_q-layer.5": 901.8481, "encoder_q-layer.6": 969.7435, "encoder_q-layer.7": 1185.9785, "encoder_q-layer.8": 1242.5299, "encoder_q-layer.9": 736.8083, "epoch": 0.4, "inbatch_neg_score": 0.2035, "inbatch_pos_score": 0.8701, "learning_rate": 3.2555555555555555e-05, "loss": 3.3662, "norm_diff": 0.0214, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1376.6371, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2041, "query_norm": 1.3997, "queue_k_norm": 1.4138, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.043, "sent_len_1": 66.8378, "sent_max_len_0": 128.0, "sent_max_len_1": 187.81, "stdk": 0.0485, "stdq": 0.046, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 41400 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3347, "doc_norm": 1.41, "encoder_q-embeddings": 709.8692, "encoder_q-layer.0": 478.3379, "encoder_q-layer.1": 504.4007, "encoder_q-layer.10": 681.1744, "encoder_q-layer.11": 1298.7441, "encoder_q-layer.2": 564.3962, "encoder_q-layer.3": 623.1088, "encoder_q-layer.4": 657.1346, "encoder_q-layer.5": 671.4012, "encoder_q-layer.6": 742.2836, "encoder_q-layer.7": 855.5963, "encoder_q-layer.8": 1140.4753, "encoder_q-layer.9": 904.9538, "epoch": 0.41, "inbatch_neg_score": 0.2103, "inbatch_pos_score": 0.8574, "learning_rate": 3.2500000000000004e-05, "loss": 3.3347, "norm_diff": 0.0152, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1165.7722, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.209, "query_norm": 1.3976, "queue_k_norm": 1.4112, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9307, "sent_len_1": 66.7849, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1687, "stdk": 0.0484, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 41500 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.3183, "doc_norm": 1.4108, "encoder_q-embeddings": 644.4532, "encoder_q-layer.0": 400.3166, "encoder_q-layer.1": 435.7461, "encoder_q-layer.10": 627.6603, "encoder_q-layer.11": 1395.3156, "encoder_q-layer.2": 489.9265, "encoder_q-layer.3": 528.2087, "encoder_q-layer.4": 545.0742, "encoder_q-layer.5": 624.3137, "encoder_q-layer.6": 724.6846, "encoder_q-layer.7": 912.7904, "encoder_q-layer.8": 1157.3857, "encoder_q-layer.9": 904.647, "epoch": 0.41, "inbatch_neg_score": 0.2164, "inbatch_pos_score": 0.8774, "learning_rate": 3.2444444444444446e-05, "loss": 3.3183, "norm_diff": 0.009, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1176.9049, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2161, "query_norm": 1.4058, "queue_k_norm": 1.4118, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8785, "sent_len_1": 66.6637, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7562, "stdk": 0.0484, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 41600 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.358, "doc_norm": 1.4149, "encoder_q-embeddings": 651.5169, "encoder_q-layer.0": 416.6948, "encoder_q-layer.1": 490.3955, "encoder_q-layer.10": 628.271, "encoder_q-layer.11": 1381.8673, "encoder_q-layer.2": 597.9086, "encoder_q-layer.3": 608.101, "encoder_q-layer.4": 628.2769, "encoder_q-layer.5": 660.2829, "encoder_q-layer.6": 683.5198, "encoder_q-layer.7": 759.3759, "encoder_q-layer.8": 748.0917, "encoder_q-layer.9": 616.4691, "epoch": 0.41, "inbatch_neg_score": 0.2182, "inbatch_pos_score": 0.8701, "learning_rate": 3.238888888888889e-05, "loss": 3.358, "norm_diff": 0.0433, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1072.2726, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2183, "query_norm": 1.3716, "queue_k_norm": 1.4129, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7131, "sent_len_1": 66.5699, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5863, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 41700 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3211, "doc_norm": 1.4112, "encoder_q-embeddings": 772.6971, "encoder_q-layer.0": 492.7506, "encoder_q-layer.1": 606.5897, "encoder_q-layer.10": 579.0443, "encoder_q-layer.11": 1296.6702, "encoder_q-layer.2": 726.4323, "encoder_q-layer.3": 789.9049, "encoder_q-layer.4": 784.8657, "encoder_q-layer.5": 777.8354, "encoder_q-layer.6": 835.2467, "encoder_q-layer.7": 824.7648, "encoder_q-layer.8": 786.7727, "encoder_q-layer.9": 625.9105, "epoch": 0.41, "inbatch_neg_score": 0.2192, "inbatch_pos_score": 0.8911, "learning_rate": 3.233333333333333e-05, "loss": 3.3211, "norm_diff": 0.0298, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1166.9606, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.219, "query_norm": 1.3813, "queue_k_norm": 1.4133, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9235, "sent_len_1": 66.7277, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3938, "stdk": 0.0483, "stdq": 0.0455, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 41800 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3322, "doc_norm": 1.413, "encoder_q-embeddings": 991.7081, "encoder_q-layer.0": 655.4652, "encoder_q-layer.1": 780.5581, "encoder_q-layer.10": 590.3641, "encoder_q-layer.11": 1272.3623, "encoder_q-layer.2": 935.1348, "encoder_q-layer.3": 906.8386, "encoder_q-layer.4": 1054.3077, "encoder_q-layer.5": 1034.5135, "encoder_q-layer.6": 1089.2478, "encoder_q-layer.7": 1194.8716, "encoder_q-layer.8": 931.3466, "encoder_q-layer.9": 642.7172, "epoch": 0.41, "inbatch_neg_score": 0.2181, "inbatch_pos_score": 0.8848, "learning_rate": 3.227777777777778e-05, "loss": 3.3322, "norm_diff": 0.038, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1381.1061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2181, "query_norm": 1.375, "queue_k_norm": 1.4148, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.2039, "sent_len_1": 67.0027, "sent_max_len_0": 128.0, "sent_max_len_1": 188.715, "stdk": 0.0484, "stdq": 0.0455, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 41900 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.3387, "doc_norm": 1.4151, "encoder_q-embeddings": 685.9819, "encoder_q-layer.0": 434.4364, "encoder_q-layer.1": 463.9839, "encoder_q-layer.10": 568.3271, "encoder_q-layer.11": 1287.0354, "encoder_q-layer.2": 525.3226, "encoder_q-layer.3": 561.5266, "encoder_q-layer.4": 613.5241, "encoder_q-layer.5": 561.6017, "encoder_q-layer.6": 576.6347, "encoder_q-layer.7": 621.4035, "encoder_q-layer.8": 690.1999, "encoder_q-layer.9": 584.9044, "epoch": 0.41, "inbatch_neg_score": 0.2143, "inbatch_pos_score": 0.8462, "learning_rate": 3.222222222222223e-05, "loss": 3.3387, "norm_diff": 0.0571, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 989.1467, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2142, "query_norm": 1.358, "queue_k_norm": 1.4155, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9631, "sent_len_1": 66.8029, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6962, "stdk": 0.0484, "stdq": 0.045, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 42000 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.3566, "doc_norm": 1.4181, "encoder_q-embeddings": 871.4882, "encoder_q-layer.0": 606.5714, "encoder_q-layer.1": 643.5759, "encoder_q-layer.10": 621.8083, "encoder_q-layer.11": 1331.6365, "encoder_q-layer.2": 741.368, "encoder_q-layer.3": 781.0949, "encoder_q-layer.4": 826.465, "encoder_q-layer.5": 854.5236, "encoder_q-layer.6": 857.2499, "encoder_q-layer.7": 967.4739, "encoder_q-layer.8": 933.974, "encoder_q-layer.9": 681.6948, "epoch": 0.41, "inbatch_neg_score": 0.2131, "inbatch_pos_score": 0.8438, "learning_rate": 3.2166666666666665e-05, "loss": 3.3566, "norm_diff": 0.061, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1264.5897, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2131, "query_norm": 1.3572, "queue_k_norm": 1.4154, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9644, "sent_len_1": 66.5739, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8725, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 42100 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.3255, "doc_norm": 1.4164, "encoder_q-embeddings": 1921.0294, "encoder_q-layer.0": 1611.7421, "encoder_q-layer.1": 1869.7224, "encoder_q-layer.10": 610.2836, "encoder_q-layer.11": 1311.0249, "encoder_q-layer.2": 1833.1721, "encoder_q-layer.3": 1735.321, "encoder_q-layer.4": 1861.1237, "encoder_q-layer.5": 1904.1305, "encoder_q-layer.6": 2038.0642, "encoder_q-layer.7": 1703.3829, "encoder_q-layer.8": 1303.5468, "encoder_q-layer.9": 678.4182, "epoch": 0.41, "inbatch_neg_score": 0.2137, "inbatch_pos_score": 0.8804, "learning_rate": 3.2111111111111114e-05, "loss": 3.3255, "norm_diff": 0.0469, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2470.1605, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2125, "query_norm": 1.3695, "queue_k_norm": 1.4168, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0218, "sent_len_1": 66.6533, "sent_max_len_0": 128.0, "sent_max_len_1": 190.035, "stdk": 0.0485, "stdq": 0.0455, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 42200 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.3255, "doc_norm": 1.4199, "encoder_q-embeddings": 618.2369, "encoder_q-layer.0": 413.3218, "encoder_q-layer.1": 463.7318, "encoder_q-layer.10": 559.4675, "encoder_q-layer.11": 1218.4253, "encoder_q-layer.2": 592.8334, "encoder_q-layer.3": 627.7633, "encoder_q-layer.4": 688.7883, "encoder_q-layer.5": 698.4971, "encoder_q-layer.6": 744.4785, "encoder_q-layer.7": 824.3997, "encoder_q-layer.8": 816.7671, "encoder_q-layer.9": 610.4033, "epoch": 0.41, "inbatch_neg_score": 0.2068, "inbatch_pos_score": 0.8882, "learning_rate": 3.2055555555555556e-05, "loss": 3.3255, "norm_diff": 0.0258, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1043.2435, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2074, "query_norm": 1.3941, "queue_k_norm": 1.417, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9682, "sent_len_1": 66.7896, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7663, "stdk": 0.0486, "stdq": 0.0465, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 42300 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.3321, "doc_norm": 1.419, "encoder_q-embeddings": 667.4397, "encoder_q-layer.0": 463.5435, "encoder_q-layer.1": 453.595, "encoder_q-layer.10": 602.5529, "encoder_q-layer.11": 1290.707, "encoder_q-layer.2": 513.7106, "encoder_q-layer.3": 541.5753, "encoder_q-layer.4": 557.9749, "encoder_q-layer.5": 586.0375, "encoder_q-layer.6": 642.5804, "encoder_q-layer.7": 727.7101, "encoder_q-layer.8": 757.5709, "encoder_q-layer.9": 596.0701, "epoch": 0.41, "inbatch_neg_score": 0.2033, "inbatch_pos_score": 0.9004, "learning_rate": 3.2000000000000005e-05, "loss": 3.3321, "norm_diff": 0.0354, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1020.3585, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2054, "query_norm": 1.3836, "queue_k_norm": 1.4177, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9438, "sent_len_1": 66.6694, "sent_max_len_0": 128.0, "sent_max_len_1": 190.025, "stdk": 0.0486, "stdq": 0.0463, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 42400 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.3608, "doc_norm": 1.4197, "encoder_q-embeddings": 640.9758, "encoder_q-layer.0": 457.6689, "encoder_q-layer.1": 526.881, "encoder_q-layer.10": 620.9788, "encoder_q-layer.11": 1265.7422, "encoder_q-layer.2": 648.8992, "encoder_q-layer.3": 612.3759, "encoder_q-layer.4": 619.426, "encoder_q-layer.5": 642.4967, "encoder_q-layer.6": 682.1539, "encoder_q-layer.7": 872.7705, "encoder_q-layer.8": 826.871, "encoder_q-layer.9": 670.5676, "epoch": 0.41, "inbatch_neg_score": 0.2048, "inbatch_pos_score": 0.8867, "learning_rate": 3.194444444444444e-05, "loss": 3.3608, "norm_diff": 0.0458, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1075.3256, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2036, "query_norm": 1.3738, "queue_k_norm": 1.416, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9105, "sent_len_1": 66.56, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9762, "stdk": 0.0486, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 42500 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3175, "doc_norm": 1.4142, "encoder_q-embeddings": 888.2637, "encoder_q-layer.0": 628.2617, "encoder_q-layer.1": 696.5986, "encoder_q-layer.10": 646.1299, "encoder_q-layer.11": 1317.4252, "encoder_q-layer.2": 766.0026, "encoder_q-layer.3": 793.8303, "encoder_q-layer.4": 791.4348, "encoder_q-layer.5": 831.7206, "encoder_q-layer.6": 866.4728, "encoder_q-layer.7": 888.8124, "encoder_q-layer.8": 854.0746, "encoder_q-layer.9": 677.5018, "epoch": 0.42, "inbatch_neg_score": 0.2001, "inbatch_pos_score": 0.8564, "learning_rate": 3.188888888888889e-05, "loss": 3.3175, "norm_diff": 0.0518, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1251.3704, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2007, "query_norm": 1.3625, "queue_k_norm": 1.417, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9007, "sent_len_1": 66.724, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7063, "stdk": 0.0484, "stdq": 0.0456, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 42600 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.3132, "doc_norm": 1.4262, "encoder_q-embeddings": 646.3419, "encoder_q-layer.0": 450.6562, "encoder_q-layer.1": 480.3102, "encoder_q-layer.10": 585.9885, "encoder_q-layer.11": 1276.0741, "encoder_q-layer.2": 533.89, "encoder_q-layer.3": 544.4371, "encoder_q-layer.4": 563.3469, "encoder_q-layer.5": 583.7206, "encoder_q-layer.6": 592.3884, "encoder_q-layer.7": 696.0845, "encoder_q-layer.8": 731.8017, "encoder_q-layer.9": 604.5693, "epoch": 0.42, "inbatch_neg_score": 0.2042, "inbatch_pos_score": 0.9004, "learning_rate": 3.183333333333334e-05, "loss": 3.3132, "norm_diff": 0.0546, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1000.3268, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2036, "query_norm": 1.3715, "queue_k_norm": 1.4165, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9338, "sent_len_1": 66.6621, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8638, "stdk": 0.0488, "stdq": 0.0459, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 42700 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.3339, "doc_norm": 1.4142, "encoder_q-embeddings": 565.9507, "encoder_q-layer.0": 356.8761, "encoder_q-layer.1": 382.2411, "encoder_q-layer.10": 622.2243, "encoder_q-layer.11": 1338.1033, "encoder_q-layer.2": 427.2748, "encoder_q-layer.3": 452.442, "encoder_q-layer.4": 468.8776, "encoder_q-layer.5": 483.1323, "encoder_q-layer.6": 559.0262, "encoder_q-layer.7": 670.1913, "encoder_q-layer.8": 742.8456, "encoder_q-layer.9": 647.2837, "epoch": 0.42, "inbatch_neg_score": 0.2058, "inbatch_pos_score": 0.8828, "learning_rate": 3.177777777777778e-05, "loss": 3.3339, "norm_diff": 0.0369, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 945.7569, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2052, "query_norm": 1.3773, "queue_k_norm": 1.418, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9515, "sent_len_1": 66.7725, "sent_max_len_0": 128.0, "sent_max_len_1": 189.22, "stdk": 0.0484, "stdq": 0.0461, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 42800 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.3196, "doc_norm": 1.422, "encoder_q-embeddings": 652.2833, "encoder_q-layer.0": 427.7899, "encoder_q-layer.1": 459.8958, "encoder_q-layer.10": 617.1335, "encoder_q-layer.11": 1294.7386, "encoder_q-layer.2": 526.9139, "encoder_q-layer.3": 587.7241, "encoder_q-layer.4": 629.0897, "encoder_q-layer.5": 649.8214, "encoder_q-layer.6": 735.3761, "encoder_q-layer.7": 901.1779, "encoder_q-layer.8": 939.2888, "encoder_q-layer.9": 766.4161, "epoch": 0.42, "inbatch_neg_score": 0.211, "inbatch_pos_score": 0.8887, "learning_rate": 3.1722222222222224e-05, "loss": 3.3196, "norm_diff": 0.0597, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1103.248, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2109, "query_norm": 1.3622, "queue_k_norm": 1.4168, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9112, "sent_len_1": 66.7907, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0288, "stdk": 0.0487, "stdq": 0.0454, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 42900 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3412, "doc_norm": 1.4182, "encoder_q-embeddings": 652.647, "encoder_q-layer.0": 415.5762, "encoder_q-layer.1": 442.8323, "encoder_q-layer.10": 650.6154, "encoder_q-layer.11": 1426.2939, "encoder_q-layer.2": 485.8511, "encoder_q-layer.3": 538.4459, "encoder_q-layer.4": 582.2295, "encoder_q-layer.5": 631.8797, "encoder_q-layer.6": 694.65, "encoder_q-layer.7": 749.7224, "encoder_q-layer.8": 793.1775, "encoder_q-layer.9": 681.6508, "epoch": 0.42, "inbatch_neg_score": 0.2114, "inbatch_pos_score": 0.8467, "learning_rate": 3.1666666666666666e-05, "loss": 3.3412, "norm_diff": 0.0498, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1074.2289, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2113, "query_norm": 1.3684, "queue_k_norm": 1.4196, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9238, "sent_len_1": 66.9152, "sent_max_len_0": 128.0, "sent_max_len_1": 190.59, "stdk": 0.0485, "stdq": 0.0455, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 43000 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.3403, "doc_norm": 1.4247, "encoder_q-embeddings": 324.053, "encoder_q-layer.0": 216.7407, "encoder_q-layer.1": 255.9641, "encoder_q-layer.10": 318.1435, "encoder_q-layer.11": 676.2004, "encoder_q-layer.2": 301.6189, "encoder_q-layer.3": 321.2693, "encoder_q-layer.4": 392.086, "encoder_q-layer.5": 382.9891, "encoder_q-layer.6": 449.8855, "encoder_q-layer.7": 437.3492, "encoder_q-layer.8": 397.9259, "encoder_q-layer.9": 316.1306, "epoch": 0.42, "inbatch_neg_score": 0.2081, "inbatch_pos_score": 0.8545, "learning_rate": 3.1611111111111115e-05, "loss": 3.3403, "norm_diff": 0.0688, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 568.2427, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2076, "query_norm": 1.3559, "queue_k_norm": 1.4182, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9151, "sent_len_1": 66.6933, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5637, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 43100 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.3147, "doc_norm": 1.4173, "encoder_q-embeddings": 642.0334, "encoder_q-layer.0": 447.5197, "encoder_q-layer.1": 441.7097, "encoder_q-layer.10": 308.4878, "encoder_q-layer.11": 668.8505, "encoder_q-layer.2": 490.8317, "encoder_q-layer.3": 509.7571, "encoder_q-layer.4": 579.9095, "encoder_q-layer.5": 565.4186, "encoder_q-layer.6": 627.3035, "encoder_q-layer.7": 767.9724, "encoder_q-layer.8": 640.5216, "encoder_q-layer.9": 335.2906, "epoch": 0.42, "inbatch_neg_score": 0.2048, "inbatch_pos_score": 0.8511, "learning_rate": 3.155555555555556e-05, "loss": 3.3147, "norm_diff": 0.0469, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 828.5536, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2039, "query_norm": 1.3704, "queue_k_norm": 1.4176, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9996, "sent_len_1": 66.5101, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0163, "stdk": 0.0485, "stdq": 0.0458, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 43200 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.3457, "doc_norm": 1.4139, "encoder_q-embeddings": 643.7472, "encoder_q-layer.0": 433.6754, "encoder_q-layer.1": 489.727, "encoder_q-layer.10": 275.3564, "encoder_q-layer.11": 602.9526, "encoder_q-layer.2": 580.1151, "encoder_q-layer.3": 626.0225, "encoder_q-layer.4": 586.2537, "encoder_q-layer.5": 560.5406, "encoder_q-layer.6": 552.6998, "encoder_q-layer.7": 493.8007, "encoder_q-layer.8": 386.7263, "encoder_q-layer.9": 288.9821, "epoch": 0.42, "inbatch_neg_score": 0.1956, "inbatch_pos_score": 0.8784, "learning_rate": 3.15e-05, "loss": 3.3457, "norm_diff": 0.0456, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 786.5784, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1962, "query_norm": 1.3683, "queue_k_norm": 1.4175, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8636, "sent_len_1": 66.7243, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6825, "stdk": 0.0484, "stdq": 0.0458, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 43300 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.2983, "doc_norm": 1.419, "encoder_q-embeddings": 1714.1003, "encoder_q-layer.0": 1258.0769, "encoder_q-layer.1": 1255.7178, "encoder_q-layer.10": 352.252, "encoder_q-layer.11": 672.8743, "encoder_q-layer.2": 1478.099, "encoder_q-layer.3": 1699.9553, "encoder_q-layer.4": 1821.2969, "encoder_q-layer.5": 1572.5426, "encoder_q-layer.6": 1852.5077, "encoder_q-layer.7": 1883.7384, "encoder_q-layer.8": 1264.2515, "encoder_q-layer.9": 418.8791, "epoch": 0.42, "inbatch_neg_score": 0.1932, "inbatch_pos_score": 0.8486, "learning_rate": 3.144444444444445e-05, "loss": 3.2983, "norm_diff": 0.0493, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2184.948, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.194, "query_norm": 1.3697, "queue_k_norm": 1.4172, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0522, "sent_len_1": 66.7646, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9563, "stdk": 0.0486, "stdq": 0.046, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 43400 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.3228, "doc_norm": 1.4201, "encoder_q-embeddings": 547.2619, "encoder_q-layer.0": 370.1842, "encoder_q-layer.1": 368.4086, "encoder_q-layer.10": 319.984, "encoder_q-layer.11": 665.526, "encoder_q-layer.2": 483.1642, "encoder_q-layer.3": 460.4322, "encoder_q-layer.4": 514.5997, "encoder_q-layer.5": 560.1196, "encoder_q-layer.6": 602.4464, "encoder_q-layer.7": 595.7255, "encoder_q-layer.8": 458.3723, "encoder_q-layer.9": 316.4586, "epoch": 0.42, "inbatch_neg_score": 0.1964, "inbatch_pos_score": 0.8682, "learning_rate": 3.138888888888889e-05, "loss": 3.3228, "norm_diff": 0.0584, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 739.394, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1969, "query_norm": 1.3617, "queue_k_norm": 1.4157, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.884, "sent_len_1": 66.8179, "sent_max_len_0": 128.0, "sent_max_len_1": 192.165, "stdk": 0.0486, "stdq": 0.0455, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 43500 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.3038, "doc_norm": 1.4131, "encoder_q-embeddings": 291.5234, "encoder_q-layer.0": 186.2465, "encoder_q-layer.1": 205.7701, "encoder_q-layer.10": 281.7762, "encoder_q-layer.11": 626.8435, "encoder_q-layer.2": 235.476, "encoder_q-layer.3": 257.949, "encoder_q-layer.4": 273.9988, "encoder_q-layer.5": 280.9333, "encoder_q-layer.6": 314.0224, "encoder_q-layer.7": 362.0202, "encoder_q-layer.8": 368.3067, "encoder_q-layer.9": 315.1465, "epoch": 0.43, "inbatch_neg_score": 0.1984, "inbatch_pos_score": 0.8838, "learning_rate": 3.1333333333333334e-05, "loss": 3.3038, "norm_diff": 0.0394, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 480.4719, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1982, "query_norm": 1.3737, "queue_k_norm": 1.4167, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0912, "sent_len_1": 66.9852, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0775, "stdk": 0.0484, "stdq": 0.0459, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 43600 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.2844, "doc_norm": 1.4126, "encoder_q-embeddings": 247.6566, "encoder_q-layer.0": 165.9463, "encoder_q-layer.1": 180.7304, "encoder_q-layer.10": 286.089, "encoder_q-layer.11": 639.0943, "encoder_q-layer.2": 201.9543, "encoder_q-layer.3": 205.0382, "encoder_q-layer.4": 220.9973, "encoder_q-layer.5": 230.0922, "encoder_q-layer.6": 260.8263, "encoder_q-layer.7": 298.4917, "encoder_q-layer.8": 338.9518, "encoder_q-layer.9": 296.118, "epoch": 0.43, "inbatch_neg_score": 0.1943, "inbatch_pos_score": 0.8384, "learning_rate": 3.1277777777777776e-05, "loss": 3.2844, "norm_diff": 0.0613, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 440.7413, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1946, "query_norm": 1.3512, "queue_k_norm": 1.4165, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9848, "sent_len_1": 66.7873, "sent_max_len_0": 128.0, "sent_max_len_1": 189.31, "stdk": 0.0484, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 43700 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.3191, "doc_norm": 1.4138, "encoder_q-embeddings": 462.9347, "encoder_q-layer.0": 319.8843, "encoder_q-layer.1": 325.8416, "encoder_q-layer.10": 338.8963, "encoder_q-layer.11": 670.9633, "encoder_q-layer.2": 357.0933, "encoder_q-layer.3": 385.9306, "encoder_q-layer.4": 406.1962, "encoder_q-layer.5": 443.9343, "encoder_q-layer.6": 473.923, "encoder_q-layer.7": 460.0476, "encoder_q-layer.8": 429.6974, "encoder_q-layer.9": 327.0101, "epoch": 0.43, "inbatch_neg_score": 0.1965, "inbatch_pos_score": 0.8169, "learning_rate": 3.1222222222222225e-05, "loss": 3.3191, "norm_diff": 0.065, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 634.4755, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1967, "query_norm": 1.3488, "queue_k_norm": 1.4162, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9725, "sent_len_1": 66.7369, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8275, "stdk": 0.0484, "stdq": 0.0449, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 43800 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.3195, "doc_norm": 1.416, "encoder_q-embeddings": 262.5099, "encoder_q-layer.0": 177.4348, "encoder_q-layer.1": 194.7729, "encoder_q-layer.10": 320.0033, "encoder_q-layer.11": 624.7731, "encoder_q-layer.2": 213.2238, "encoder_q-layer.3": 225.2675, "encoder_q-layer.4": 232.2906, "encoder_q-layer.5": 251.0912, "encoder_q-layer.6": 282.781, "encoder_q-layer.7": 326.2191, "encoder_q-layer.8": 394.0247, "encoder_q-layer.9": 321.9742, "epoch": 0.43, "inbatch_neg_score": 0.1973, "inbatch_pos_score": 0.8535, "learning_rate": 3.116666666666667e-05, "loss": 3.3195, "norm_diff": 0.0349, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 465.0568, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1965, "query_norm": 1.3811, "queue_k_norm": 1.4163, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0631, "sent_len_1": 66.7689, "sent_max_len_0": 128.0, "sent_max_len_1": 190.165, "stdk": 0.0485, "stdq": 0.0463, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 43900 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.3244, "doc_norm": 1.4169, "encoder_q-embeddings": 289.4416, "encoder_q-layer.0": 179.4188, "encoder_q-layer.1": 186.8819, "encoder_q-layer.10": 290.0345, "encoder_q-layer.11": 638.5543, "encoder_q-layer.2": 209.779, "encoder_q-layer.3": 225.065, "encoder_q-layer.4": 247.7596, "encoder_q-layer.5": 259.0912, "encoder_q-layer.6": 291.816, "encoder_q-layer.7": 337.4434, "encoder_q-layer.8": 378.7804, "encoder_q-layer.9": 313.7754, "epoch": 0.43, "inbatch_neg_score": 0.1965, "inbatch_pos_score": 0.8955, "learning_rate": 3.111111111111111e-05, "loss": 3.3244, "norm_diff": 0.0239, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 469.6961, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1959, "query_norm": 1.393, "queue_k_norm": 1.415, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.732, "sent_len_1": 66.8841, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4025, "stdk": 0.0485, "stdq": 0.0466, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 44000 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.3419, "doc_norm": 1.4165, "encoder_q-embeddings": 488.8597, "encoder_q-layer.0": 325.5974, "encoder_q-layer.1": 335.3644, "encoder_q-layer.10": 285.6553, "encoder_q-layer.11": 633.9598, "encoder_q-layer.2": 401.3664, "encoder_q-layer.3": 406.3181, "encoder_q-layer.4": 494.4386, "encoder_q-layer.5": 538.5054, "encoder_q-layer.6": 671.9125, "encoder_q-layer.7": 717.4082, "encoder_q-layer.8": 578.7513, "encoder_q-layer.9": 399.9344, "epoch": 0.43, "inbatch_neg_score": 0.1967, "inbatch_pos_score": 0.8296, "learning_rate": 3.105555555555555e-05, "loss": 3.3419, "norm_diff": 0.0591, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 739.6686, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1968, "query_norm": 1.3574, "queue_k_norm": 1.4158, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9649, "sent_len_1": 66.6701, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7912, "stdk": 0.0485, "stdq": 0.0453, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 44100 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.3189, "doc_norm": 1.4208, "encoder_q-embeddings": 286.7896, "encoder_q-layer.0": 184.0093, "encoder_q-layer.1": 200.5699, "encoder_q-layer.10": 296.0781, "encoder_q-layer.11": 608.0043, "encoder_q-layer.2": 229.4677, "encoder_q-layer.3": 237.2131, "encoder_q-layer.4": 241.926, "encoder_q-layer.5": 261.6401, "encoder_q-layer.6": 301.3804, "encoder_q-layer.7": 358.3695, "encoder_q-layer.8": 339.4464, "encoder_q-layer.9": 288.5755, "epoch": 0.43, "inbatch_neg_score": 0.1961, "inbatch_pos_score": 0.9106, "learning_rate": 3.1e-05, "loss": 3.3189, "norm_diff": 0.0292, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 461.4437, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1965, "query_norm": 1.3916, "queue_k_norm": 1.4182, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.112, "sent_len_1": 66.7092, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4162, "stdk": 0.0487, "stdq": 0.0466, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 44200 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.329, "doc_norm": 1.4123, "encoder_q-embeddings": 584.4637, "encoder_q-layer.0": 424.2446, "encoder_q-layer.1": 482.28, "encoder_q-layer.10": 288.3389, "encoder_q-layer.11": 637.2018, "encoder_q-layer.2": 558.9258, "encoder_q-layer.3": 596.7576, "encoder_q-layer.4": 612.652, "encoder_q-layer.5": 626.6475, "encoder_q-layer.6": 596.9974, "encoder_q-layer.7": 638.5096, "encoder_q-layer.8": 550.6765, "encoder_q-layer.9": 308.0601, "epoch": 0.43, "inbatch_neg_score": 0.1981, "inbatch_pos_score": 0.8301, "learning_rate": 3.094444444444445e-05, "loss": 3.329, "norm_diff": 0.0521, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 823.6682, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1975, "query_norm": 1.3602, "queue_k_norm": 1.4159, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.964, "sent_len_1": 66.7947, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1138, "stdk": 0.0484, "stdq": 0.0453, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 44300 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.2984, "doc_norm": 1.4138, "encoder_q-embeddings": 600.5946, "encoder_q-layer.0": 428.7651, "encoder_q-layer.1": 458.8206, "encoder_q-layer.10": 322.7917, "encoder_q-layer.11": 664.9639, "encoder_q-layer.2": 543.8619, "encoder_q-layer.3": 569.5438, "encoder_q-layer.4": 605.7654, "encoder_q-layer.5": 571.5055, "encoder_q-layer.6": 495.3671, "encoder_q-layer.7": 501.3279, "encoder_q-layer.8": 456.2144, "encoder_q-layer.9": 321.4072, "epoch": 0.43, "inbatch_neg_score": 0.1969, "inbatch_pos_score": 0.8354, "learning_rate": 3.088888888888889e-05, "loss": 3.2984, "norm_diff": 0.039, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 772.0803, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.198, "query_norm": 1.3748, "queue_k_norm": 1.416, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9307, "sent_len_1": 66.8017, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9013, "stdk": 0.0485, "stdq": 0.0457, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 44400 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.3144, "doc_norm": 1.4173, "encoder_q-embeddings": 336.8156, "encoder_q-layer.0": 218.2655, "encoder_q-layer.1": 232.7871, "encoder_q-layer.10": 293.2474, "encoder_q-layer.11": 658.017, "encoder_q-layer.2": 268.3653, "encoder_q-layer.3": 274.4176, "encoder_q-layer.4": 297.9587, "encoder_q-layer.5": 310.8348, "encoder_q-layer.6": 332.9094, "encoder_q-layer.7": 367.5707, "encoder_q-layer.8": 388.3803, "encoder_q-layer.9": 317.8849, "epoch": 0.43, "inbatch_neg_score": 0.1983, "inbatch_pos_score": 0.8296, "learning_rate": 3.0833333333333335e-05, "loss": 3.3144, "norm_diff": 0.0472, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 519.2402, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1989, "query_norm": 1.3701, "queue_k_norm": 1.4143, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8951, "sent_len_1": 66.6784, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6875, "stdk": 0.0486, "stdq": 0.0456, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 44500 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.3146, "doc_norm": 1.419, "encoder_q-embeddings": 297.1875, "encoder_q-layer.0": 192.2595, "encoder_q-layer.1": 206.4339, "encoder_q-layer.10": 316.444, "encoder_q-layer.11": 659.6421, "encoder_q-layer.2": 227.7933, "encoder_q-layer.3": 241.623, "encoder_q-layer.4": 266.461, "encoder_q-layer.5": 277.622, "encoder_q-layer.6": 303.7605, "encoder_q-layer.7": 337.3526, "encoder_q-layer.8": 363.2707, "encoder_q-layer.9": 311.3642, "epoch": 0.44, "inbatch_neg_score": 0.2025, "inbatch_pos_score": 0.8647, "learning_rate": 3.077777777777778e-05, "loss": 3.3146, "norm_diff": 0.0509, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 487.4019, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2023, "query_norm": 1.3681, "queue_k_norm": 1.4168, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8687, "sent_len_1": 66.6894, "sent_max_len_0": 128.0, "sent_max_len_1": 188.555, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 44600 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3036, "doc_norm": 1.4166, "encoder_q-embeddings": 374.2867, "encoder_q-layer.0": 253.1553, "encoder_q-layer.1": 272.7285, "encoder_q-layer.10": 285.3405, "encoder_q-layer.11": 637.5209, "encoder_q-layer.2": 325.6043, "encoder_q-layer.3": 357.7886, "encoder_q-layer.4": 351.4905, "encoder_q-layer.5": 338.3356, "encoder_q-layer.6": 387.7497, "encoder_q-layer.7": 430.6668, "encoder_q-layer.8": 378.2032, "encoder_q-layer.9": 285.5622, "epoch": 0.44, "inbatch_neg_score": 0.1996, "inbatch_pos_score": 0.8594, "learning_rate": 3.0722222222222227e-05, "loss": 3.3036, "norm_diff": 0.0399, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 555.4492, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1997, "query_norm": 1.3767, "queue_k_norm": 1.4171, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.6456, "sent_len_1": 66.8564, "sent_max_len_0": 128.0, "sent_max_len_1": 193.5762, "stdk": 0.0485, "stdq": 0.0457, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 44700 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.2875, "doc_norm": 1.4188, "encoder_q-embeddings": 438.8046, "encoder_q-layer.0": 339.4998, "encoder_q-layer.1": 393.7716, "encoder_q-layer.10": 330.1195, "encoder_q-layer.11": 685.0272, "encoder_q-layer.2": 448.0167, "encoder_q-layer.3": 438.2965, "encoder_q-layer.4": 422.6928, "encoder_q-layer.5": 379.284, "encoder_q-layer.6": 363.7701, "encoder_q-layer.7": 384.4494, "encoder_q-layer.8": 422.1562, "encoder_q-layer.9": 330.9393, "epoch": 0.44, "inbatch_neg_score": 0.1997, "inbatch_pos_score": 0.8525, "learning_rate": 3.066666666666667e-05, "loss": 3.2875, "norm_diff": 0.0507, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 630.8868, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1993, "query_norm": 1.3681, "queue_k_norm": 1.4181, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9654, "sent_len_1": 66.8566, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1037, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 44800 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3101, "doc_norm": 1.4072, "encoder_q-embeddings": 1008.6836, "encoder_q-layer.0": 698.0264, "encoder_q-layer.1": 773.3721, "encoder_q-layer.10": 298.5735, "encoder_q-layer.11": 637.9417, "encoder_q-layer.2": 996.0127, "encoder_q-layer.3": 1067.2345, "encoder_q-layer.4": 1207.7457, "encoder_q-layer.5": 1315.9204, "encoder_q-layer.6": 1355.9625, "encoder_q-layer.7": 1271.5652, "encoder_q-layer.8": 865.9797, "encoder_q-layer.9": 358.8655, "epoch": 0.44, "inbatch_neg_score": 0.206, "inbatch_pos_score": 0.8638, "learning_rate": 3.061111111111111e-05, "loss": 3.3101, "norm_diff": 0.017, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1433.2065, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2057, "query_norm": 1.3917, "queue_k_norm": 1.4186, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9545, "sent_len_1": 66.8065, "sent_max_len_0": 128.0, "sent_max_len_1": 188.575, "stdk": 0.0482, "stdq": 0.0457, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 44900 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.3055, "doc_norm": 1.4168, "encoder_q-embeddings": 443.9218, "encoder_q-layer.0": 319.0468, "encoder_q-layer.1": 314.9095, "encoder_q-layer.10": 282.2415, "encoder_q-layer.11": 593.1105, "encoder_q-layer.2": 361.2851, "encoder_q-layer.3": 363.5935, "encoder_q-layer.4": 364.8318, "encoder_q-layer.5": 395.7696, "encoder_q-layer.6": 402.6557, "encoder_q-layer.7": 408.2527, "encoder_q-layer.8": 371.869, "encoder_q-layer.9": 285.5411, "epoch": 0.44, "inbatch_neg_score": 0.2135, "inbatch_pos_score": 0.8984, "learning_rate": 3.055555555555556e-05, "loss": 3.3055, "norm_diff": 0.0141, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 576.6874, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2128, "query_norm": 1.4027, "queue_k_norm": 1.4179, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0397, "sent_len_1": 66.8432, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8825, "stdk": 0.0485, "stdq": 0.0456, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 45000 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.3073, "doc_norm": 1.4158, "encoder_q-embeddings": 2151.3162, "encoder_q-layer.0": 1393.9089, "encoder_q-layer.1": 1658.1257, "encoder_q-layer.10": 574.8478, "encoder_q-layer.11": 1242.0784, "encoder_q-layer.2": 1899.3822, "encoder_q-layer.3": 1791.661, "encoder_q-layer.4": 1771.0856, "encoder_q-layer.5": 1488.2998, "encoder_q-layer.6": 1520.9319, "encoder_q-layer.7": 1455.3865, "encoder_q-layer.8": 1021.2946, "encoder_q-layer.9": 631.4149, "epoch": 0.44, "inbatch_neg_score": 0.2182, "inbatch_pos_score": 0.854, "learning_rate": 3.05e-05, "loss": 3.3073, "norm_diff": 0.0199, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2294.5212, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2181, "query_norm": 1.3959, "queue_k_norm": 1.4168, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0006, "sent_len_1": 67.0456, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0387, "stdk": 0.0485, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 45100 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.3143, "doc_norm": 1.4155, "encoder_q-embeddings": 528.5461, "encoder_q-layer.0": 350.3493, "encoder_q-layer.1": 364.1764, "encoder_q-layer.10": 604.6742, "encoder_q-layer.11": 1253.2003, "encoder_q-layer.2": 395.5608, "encoder_q-layer.3": 401.8536, "encoder_q-layer.4": 432.2409, "encoder_q-layer.5": 459.5488, "encoder_q-layer.6": 541.5715, "encoder_q-layer.7": 592.0429, "encoder_q-layer.8": 714.8865, "encoder_q-layer.9": 612.745, "epoch": 0.44, "inbatch_neg_score": 0.2254, "inbatch_pos_score": 0.8999, "learning_rate": 3.044444444444445e-05, "loss": 3.3143, "norm_diff": 0.0089, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 901.3771, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2244, "query_norm": 1.4206, "queue_k_norm": 1.4169, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9405, "sent_len_1": 66.8898, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8625, "stdk": 0.0484, "stdq": 0.0461, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 45200 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.2908, "doc_norm": 1.4138, "encoder_q-embeddings": 707.9913, "encoder_q-layer.0": 499.0506, "encoder_q-layer.1": 552.5721, "encoder_q-layer.10": 602.9816, "encoder_q-layer.11": 1329.1221, "encoder_q-layer.2": 611.2488, "encoder_q-layer.3": 613.9766, "encoder_q-layer.4": 639.7579, "encoder_q-layer.5": 608.3501, "encoder_q-layer.6": 630.3896, "encoder_q-layer.7": 665.3107, "encoder_q-layer.8": 723.4641, "encoder_q-layer.9": 598.5833, "epoch": 0.44, "inbatch_neg_score": 0.2315, "inbatch_pos_score": 0.9136, "learning_rate": 3.0388888888888887e-05, "loss": 3.2908, "norm_diff": 0.0171, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1051.7396, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2308, "query_norm": 1.4299, "queue_k_norm": 1.4187, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8462, "sent_len_1": 66.7798, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5175, "stdk": 0.0484, "stdq": 0.0464, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 45300 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.3302, "doc_norm": 1.4223, "encoder_q-embeddings": 572.1978, "encoder_q-layer.0": 388.9594, "encoder_q-layer.1": 427.3064, "encoder_q-layer.10": 616.3926, "encoder_q-layer.11": 1361.8383, "encoder_q-layer.2": 476.388, "encoder_q-layer.3": 501.2525, "encoder_q-layer.4": 529.0745, "encoder_q-layer.5": 549.6539, "encoder_q-layer.6": 562.7726, "encoder_q-layer.7": 627.0963, "encoder_q-layer.8": 712.4395, "encoder_q-layer.9": 608.4832, "epoch": 0.44, "inbatch_neg_score": 0.236, "inbatch_pos_score": 0.9326, "learning_rate": 3.0333333333333337e-05, "loss": 3.3302, "norm_diff": 0.0073, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 993.3553, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.236, "query_norm": 1.4274, "queue_k_norm": 1.4195, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0496, "sent_len_1": 66.9502, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5012, "stdk": 0.0486, "stdq": 0.0463, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 45400 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.3118, "doc_norm": 1.4234, "encoder_q-embeddings": 768.6887, "encoder_q-layer.0": 507.2254, "encoder_q-layer.1": 546.4923, "encoder_q-layer.10": 567.2425, "encoder_q-layer.11": 1220.38, "encoder_q-layer.2": 616.4326, "encoder_q-layer.3": 684.917, "encoder_q-layer.4": 667.4772, "encoder_q-layer.5": 690.4581, "encoder_q-layer.6": 761.5431, "encoder_q-layer.7": 821.6723, "encoder_q-layer.8": 835.9122, "encoder_q-layer.9": 613.6291, "epoch": 0.44, "inbatch_neg_score": 0.2373, "inbatch_pos_score": 0.9258, "learning_rate": 3.0277777777777776e-05, "loss": 3.3118, "norm_diff": 0.0076, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1103.7136, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2378, "query_norm": 1.4186, "queue_k_norm": 1.4203, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0134, "sent_len_1": 66.756, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2587, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 45500 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.3033, "doc_norm": 1.4213, "encoder_q-embeddings": 704.0043, "encoder_q-layer.0": 485.1963, "encoder_q-layer.1": 500.5159, "encoder_q-layer.10": 615.786, "encoder_q-layer.11": 1365.4988, "encoder_q-layer.2": 553.1611, "encoder_q-layer.3": 564.9895, "encoder_q-layer.4": 589.6924, "encoder_q-layer.5": 598.3147, "encoder_q-layer.6": 618.9741, "encoder_q-layer.7": 719.8652, "encoder_q-layer.8": 725.1595, "encoder_q-layer.9": 615.0979, "epoch": 0.45, "inbatch_neg_score": 0.234, "inbatch_pos_score": 0.8779, "learning_rate": 3.0222222222222225e-05, "loss": 3.3033, "norm_diff": 0.0464, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1046.2606, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2334, "query_norm": 1.3749, "queue_k_norm": 1.4214, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8107, "sent_len_1": 66.8491, "sent_max_len_0": 128.0, "sent_max_len_1": 190.72, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 45600 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.3141, "doc_norm": 1.4181, "encoder_q-embeddings": 823.1373, "encoder_q-layer.0": 601.6436, "encoder_q-layer.1": 659.7247, "encoder_q-layer.10": 609.1884, "encoder_q-layer.11": 1463.3352, "encoder_q-layer.2": 758.6068, "encoder_q-layer.3": 821.4514, "encoder_q-layer.4": 866.547, "encoder_q-layer.5": 928.1512, "encoder_q-layer.6": 921.5577, "encoder_q-layer.7": 1017.5889, "encoder_q-layer.8": 987.1755, "encoder_q-layer.9": 698.1871, "epoch": 0.45, "inbatch_neg_score": 0.2351, "inbatch_pos_score": 0.856, "learning_rate": 3.016666666666667e-05, "loss": 3.3141, "norm_diff": 0.0419, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1329.5789, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2346, "query_norm": 1.3762, "queue_k_norm": 1.4208, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.6507, "sent_len_1": 66.7432, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8425, "stdk": 0.0484, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 45700 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.3072, "doc_norm": 1.4251, "encoder_q-embeddings": 496.9846, "encoder_q-layer.0": 329.057, "encoder_q-layer.1": 344.3408, "encoder_q-layer.10": 576.1889, "encoder_q-layer.11": 1314.952, "encoder_q-layer.2": 376.309, "encoder_q-layer.3": 403.5558, "encoder_q-layer.4": 427.9603, "encoder_q-layer.5": 451.2594, "encoder_q-layer.6": 531.1075, "encoder_q-layer.7": 644.2263, "encoder_q-layer.8": 691.0531, "encoder_q-layer.9": 608.9666, "epoch": 0.45, "inbatch_neg_score": 0.2378, "inbatch_pos_score": 0.9053, "learning_rate": 3.0111111111111113e-05, "loss": 3.3072, "norm_diff": 0.0429, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 907.2869, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2373, "query_norm": 1.3823, "queue_k_norm": 1.4227, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9742, "sent_len_1": 66.8795, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9725, "stdk": 0.0486, "stdq": 0.0454, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 45800 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2884, "doc_norm": 1.4243, "encoder_q-embeddings": 722.6323, "encoder_q-layer.0": 496.2588, "encoder_q-layer.1": 551.7868, "encoder_q-layer.10": 634.8545, "encoder_q-layer.11": 1325.9576, "encoder_q-layer.2": 691.0233, "encoder_q-layer.3": 764.1688, "encoder_q-layer.4": 883.7959, "encoder_q-layer.5": 923.074, "encoder_q-layer.6": 971.5112, "encoder_q-layer.7": 841.146, "encoder_q-layer.8": 850.5674, "encoder_q-layer.9": 645.7567, "epoch": 0.45, "inbatch_neg_score": 0.2358, "inbatch_pos_score": 0.8779, "learning_rate": 3.005555555555556e-05, "loss": 3.2884, "norm_diff": 0.0347, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1214.4133, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.236, "query_norm": 1.3896, "queue_k_norm": 1.422, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.882, "sent_len_1": 66.7835, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7038, "stdk": 0.0486, "stdq": 0.0458, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 45900 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3149, "doc_norm": 1.4218, "encoder_q-embeddings": 2466.2339, "encoder_q-layer.0": 1746.1521, "encoder_q-layer.1": 1474.5081, "encoder_q-layer.10": 600.1024, "encoder_q-layer.11": 1295.2843, "encoder_q-layer.2": 1651.5361, "encoder_q-layer.3": 1723.9873, "encoder_q-layer.4": 1759.2365, "encoder_q-layer.5": 1663.9301, "encoder_q-layer.6": 1569.5308, "encoder_q-layer.7": 1660.8876, "encoder_q-layer.8": 1353.2133, "encoder_q-layer.9": 771.9584, "epoch": 0.45, "inbatch_neg_score": 0.2331, "inbatch_pos_score": 0.8926, "learning_rate": 3e-05, "loss": 3.3149, "norm_diff": 0.0265, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2519.3038, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2336, "query_norm": 1.3952, "queue_k_norm": 1.4261, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7013, "sent_len_1": 66.5543, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1738, "stdk": 0.0485, "stdq": 0.0462, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 46000 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.3141, "doc_norm": 1.4199, "encoder_q-embeddings": 538.4189, "encoder_q-layer.0": 358.2054, "encoder_q-layer.1": 376.3449, "encoder_q-layer.10": 622.5236, "encoder_q-layer.11": 1262.4795, "encoder_q-layer.2": 408.5656, "encoder_q-layer.3": 420.966, "encoder_q-layer.4": 470.7821, "encoder_q-layer.5": 485.1684, "encoder_q-layer.6": 525.8845, "encoder_q-layer.7": 608.8171, "encoder_q-layer.8": 715.424, "encoder_q-layer.9": 633.2883, "epoch": 0.45, "inbatch_neg_score": 0.2252, "inbatch_pos_score": 0.8809, "learning_rate": 2.9944444444444446e-05, "loss": 3.3141, "norm_diff": 0.0464, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 918.3071, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2263, "query_norm": 1.3735, "queue_k_norm": 1.4249, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.833, "sent_len_1": 66.5855, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4863, "stdk": 0.0484, "stdq": 0.0455, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 46100 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.2864, "doc_norm": 1.4326, "encoder_q-embeddings": 662.6136, "encoder_q-layer.0": 410.8615, "encoder_q-layer.1": 421.296, "encoder_q-layer.10": 643.6664, "encoder_q-layer.11": 1389.5165, "encoder_q-layer.2": 472.1631, "encoder_q-layer.3": 502.4929, "encoder_q-layer.4": 527.8853, "encoder_q-layer.5": 534.4405, "encoder_q-layer.6": 588.6668, "encoder_q-layer.7": 681.8404, "encoder_q-layer.8": 741.5972, "encoder_q-layer.9": 629.8555, "epoch": 0.45, "inbatch_neg_score": 0.2299, "inbatch_pos_score": 0.8989, "learning_rate": 2.988888888888889e-05, "loss": 3.2864, "norm_diff": 0.0502, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1026.6205, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2303, "query_norm": 1.3824, "queue_k_norm": 1.4242, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8486, "sent_len_1": 66.4723, "sent_max_len_0": 128.0, "sent_max_len_1": 187.5737, "stdk": 0.0489, "stdq": 0.0457, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 46200 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.2911, "doc_norm": 1.4249, "encoder_q-embeddings": 1285.2937, "encoder_q-layer.0": 940.409, "encoder_q-layer.1": 1091.1738, "encoder_q-layer.10": 619.2592, "encoder_q-layer.11": 1437.351, "encoder_q-layer.2": 1288.751, "encoder_q-layer.3": 1338.6329, "encoder_q-layer.4": 1411.8809, "encoder_q-layer.5": 1377.1304, "encoder_q-layer.6": 1281.236, "encoder_q-layer.7": 1096.6338, "encoder_q-layer.8": 997.6033, "encoder_q-layer.9": 666.907, "epoch": 0.45, "inbatch_neg_score": 0.229, "inbatch_pos_score": 0.8535, "learning_rate": 2.9833333333333335e-05, "loss": 3.2911, "norm_diff": 0.0721, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1744.4094, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2273, "query_norm": 1.3528, "queue_k_norm": 1.4249, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9992, "sent_len_1": 66.848, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9975, "stdk": 0.0485, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 46300 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.2975, "doc_norm": 1.4247, "encoder_q-embeddings": 532.2657, "encoder_q-layer.0": 349.8211, "encoder_q-layer.1": 386.0331, "encoder_q-layer.10": 610.9373, "encoder_q-layer.11": 1254.9396, "encoder_q-layer.2": 431.7523, "encoder_q-layer.3": 447.0812, "encoder_q-layer.4": 467.7936, "encoder_q-layer.5": 493.9984, "encoder_q-layer.6": 543.5929, "encoder_q-layer.7": 630.6003, "encoder_q-layer.8": 675.6058, "encoder_q-layer.9": 593.9529, "epoch": 0.45, "inbatch_neg_score": 0.2213, "inbatch_pos_score": 0.9121, "learning_rate": 2.9777777777777777e-05, "loss": 3.2975, "norm_diff": 0.0332, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 912.4508, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2208, "query_norm": 1.3915, "queue_k_norm": 1.4283, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9949, "sent_len_1": 66.8478, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3988, "stdk": 0.0485, "stdq": 0.0462, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 46400 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.2937, "doc_norm": 1.4248, "encoder_q-embeddings": 516.7308, "encoder_q-layer.0": 336.9787, "encoder_q-layer.1": 341.7676, "encoder_q-layer.10": 543.8725, "encoder_q-layer.11": 1254.1589, "encoder_q-layer.2": 389.2842, "encoder_q-layer.3": 428.2521, "encoder_q-layer.4": 457.6279, "encoder_q-layer.5": 480.2539, "encoder_q-layer.6": 553.2525, "encoder_q-layer.7": 615.6038, "encoder_q-layer.8": 700.2859, "encoder_q-layer.9": 590.9703, "epoch": 0.45, "inbatch_neg_score": 0.2192, "inbatch_pos_score": 0.8804, "learning_rate": 2.9722222222222223e-05, "loss": 3.2937, "norm_diff": 0.0605, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 901.372, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.22, "query_norm": 1.3644, "queue_k_norm": 1.4265, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.6605, "sent_len_1": 66.5996, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2788, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 46500 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3097, "doc_norm": 1.4273, "encoder_q-embeddings": 720.0128, "encoder_q-layer.0": 474.8589, "encoder_q-layer.1": 543.2939, "encoder_q-layer.10": 641.2438, "encoder_q-layer.11": 1376.1637, "encoder_q-layer.2": 631.0128, "encoder_q-layer.3": 660.0961, "encoder_q-layer.4": 753.6788, "encoder_q-layer.5": 821.9664, "encoder_q-layer.6": 889.2244, "encoder_q-layer.7": 887.1025, "encoder_q-layer.8": 993.5014, "encoder_q-layer.9": 768.4984, "epoch": 0.45, "inbatch_neg_score": 0.2199, "inbatch_pos_score": 0.8745, "learning_rate": 2.9666666666666672e-05, "loss": 3.3097, "norm_diff": 0.0529, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1207.3944, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.22, "query_norm": 1.3745, "queue_k_norm": 1.4263, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0609, "sent_len_1": 67.004, "sent_max_len_0": 128.0, "sent_max_len_1": 191.3325, "stdk": 0.0486, "stdq": 0.0455, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 46600 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.3014, "doc_norm": 1.4292, "encoder_q-embeddings": 1675.5349, "encoder_q-layer.0": 1184.3787, "encoder_q-layer.1": 1356.4994, "encoder_q-layer.10": 607.3243, "encoder_q-layer.11": 1314.6866, "encoder_q-layer.2": 1538.9652, "encoder_q-layer.3": 1724.3932, "encoder_q-layer.4": 1843.2448, "encoder_q-layer.5": 1823.2882, "encoder_q-layer.6": 1805.8909, "encoder_q-layer.7": 1722.225, "encoder_q-layer.8": 1196.9406, "encoder_q-layer.9": 691.4374, "epoch": 0.46, "inbatch_neg_score": 0.2213, "inbatch_pos_score": 0.8687, "learning_rate": 2.961111111111111e-05, "loss": 3.3014, "norm_diff": 0.055, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2200.3962, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.22, "query_norm": 1.3742, "queue_k_norm": 1.4258, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7671, "sent_len_1": 66.9782, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1987, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 46700 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.317, "doc_norm": 1.4279, "encoder_q-embeddings": 534.8832, "encoder_q-layer.0": 347.0648, "encoder_q-layer.1": 364.2167, "encoder_q-layer.10": 708.2047, "encoder_q-layer.11": 1416.8915, "encoder_q-layer.2": 424.5532, "encoder_q-layer.3": 449.3445, "encoder_q-layer.4": 483.2906, "encoder_q-layer.5": 486.7383, "encoder_q-layer.6": 551.3012, "encoder_q-layer.7": 635.5269, "encoder_q-layer.8": 718.485, "encoder_q-layer.9": 698.8461, "epoch": 0.46, "inbatch_neg_score": 0.2153, "inbatch_pos_score": 0.8643, "learning_rate": 2.955555555555556e-05, "loss": 3.317, "norm_diff": 0.0388, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 964.8262, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2155, "query_norm": 1.3891, "queue_k_norm": 1.4253, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9584, "sent_len_1": 66.6899, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6463, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 46800 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.27, "doc_norm": 1.4294, "encoder_q-embeddings": 514.0528, "encoder_q-layer.0": 340.7193, "encoder_q-layer.1": 344.8975, "encoder_q-layer.10": 598.6968, "encoder_q-layer.11": 1296.3027, "encoder_q-layer.2": 376.4624, "encoder_q-layer.3": 401.5954, "encoder_q-layer.4": 431.6165, "encoder_q-layer.5": 463.8053, "encoder_q-layer.6": 500.7421, "encoder_q-layer.7": 570.1464, "encoder_q-layer.8": 674.3207, "encoder_q-layer.9": 605.6843, "epoch": 0.46, "inbatch_neg_score": 0.221, "inbatch_pos_score": 0.9072, "learning_rate": 2.95e-05, "loss": 3.27, "norm_diff": 0.0265, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 893.4081, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2202, "query_norm": 1.4035, "queue_k_norm": 1.4254, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9031, "sent_len_1": 67.0401, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5662, "stdk": 0.0488, "stdq": 0.0465, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 46900 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.3077, "doc_norm": 1.4238, "encoder_q-embeddings": 565.1782, "encoder_q-layer.0": 365.8199, "encoder_q-layer.1": 385.2644, "encoder_q-layer.10": 569.3797, "encoder_q-layer.11": 1321.4886, "encoder_q-layer.2": 424.0208, "encoder_q-layer.3": 454.5379, "encoder_q-layer.4": 494.619, "encoder_q-layer.5": 504.0933, "encoder_q-layer.6": 553.5225, "encoder_q-layer.7": 639.169, "encoder_q-layer.8": 696.531, "encoder_q-layer.9": 600.1984, "epoch": 0.46, "inbatch_neg_score": 0.2213, "inbatch_pos_score": 0.9009, "learning_rate": 2.9444444444444448e-05, "loss": 3.3077, "norm_diff": 0.0425, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 952.8839, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2225, "query_norm": 1.3814, "queue_k_norm": 1.4251, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1263, "sent_len_1": 66.9611, "sent_max_len_0": 128.0, "sent_max_len_1": 192.475, "stdk": 0.0486, "stdq": 0.0456, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 47000 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.2852, "doc_norm": 1.426, "encoder_q-embeddings": 1006.1053, "encoder_q-layer.0": 654.6647, "encoder_q-layer.1": 677.7766, "encoder_q-layer.10": 1211.595, "encoder_q-layer.11": 2727.8237, "encoder_q-layer.2": 756.7513, "encoder_q-layer.3": 759.2128, "encoder_q-layer.4": 825.3746, "encoder_q-layer.5": 863.7272, "encoder_q-layer.6": 983.4937, "encoder_q-layer.7": 1243.5803, "encoder_q-layer.8": 1417.1707, "encoder_q-layer.9": 1266.5186, "epoch": 0.46, "inbatch_neg_score": 0.2237, "inbatch_pos_score": 0.8916, "learning_rate": 2.9388888888888887e-05, "loss": 3.2852, "norm_diff": 0.039, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1828.7839, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.224, "query_norm": 1.387, "queue_k_norm": 1.4259, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8653, "sent_len_1": 66.6842, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1763, "stdk": 0.0487, "stdq": 0.0457, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 47100 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.3046, "doc_norm": 1.4251, "encoder_q-embeddings": 1916.7208, "encoder_q-layer.0": 1243.9668, "encoder_q-layer.1": 1467.0419, "encoder_q-layer.10": 1309.8733, "encoder_q-layer.11": 2745.0518, "encoder_q-layer.2": 1875.5293, "encoder_q-layer.3": 2231.8564, "encoder_q-layer.4": 2199.4424, "encoder_q-layer.5": 2329.6704, "encoder_q-layer.6": 2657.1677, "encoder_q-layer.7": 3054.7551, "encoder_q-layer.8": 2947.2256, "encoder_q-layer.9": 1735.4291, "epoch": 0.46, "inbatch_neg_score": 0.2208, "inbatch_pos_score": 0.8535, "learning_rate": 2.9333333333333336e-05, "loss": 3.3046, "norm_diff": 0.0395, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3298.7609, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2209, "query_norm": 1.3856, "queue_k_norm": 1.4246, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0405, "sent_len_1": 66.8335, "sent_max_len_0": 128.0, "sent_max_len_1": 192.17, "stdk": 0.0486, "stdq": 0.0458, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 47200 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.2956, "doc_norm": 1.421, "encoder_q-embeddings": 1280.3264, "encoder_q-layer.0": 900.5726, "encoder_q-layer.1": 916.1006, "encoder_q-layer.10": 1240.1953, "encoder_q-layer.11": 2969.8765, "encoder_q-layer.2": 1020.7275, "encoder_q-layer.3": 1076.554, "encoder_q-layer.4": 1158.0641, "encoder_q-layer.5": 1221.2217, "encoder_q-layer.6": 1314.1509, "encoder_q-layer.7": 1473.8191, "encoder_q-layer.8": 1543.2629, "encoder_q-layer.9": 1273.686, "epoch": 0.46, "inbatch_neg_score": 0.2282, "inbatch_pos_score": 0.8076, "learning_rate": 2.927777777777778e-05, "loss": 3.2956, "norm_diff": 0.0735, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2199.7201, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2264, "query_norm": 1.3475, "queue_k_norm": 1.4253, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0511, "sent_len_1": 66.8818, "sent_max_len_0": 128.0, "sent_max_len_1": 189.37, "stdk": 0.0484, "stdq": 0.044, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 47300 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.2896, "doc_norm": 1.4256, "encoder_q-embeddings": 1336.8907, "encoder_q-layer.0": 893.0792, "encoder_q-layer.1": 971.411, "encoder_q-layer.10": 1185.0334, "encoder_q-layer.11": 2604.0833, "encoder_q-layer.2": 1104.0466, "encoder_q-layer.3": 1098.4115, "encoder_q-layer.4": 1078.5972, "encoder_q-layer.5": 1143.3488, "encoder_q-layer.6": 1240.9667, "encoder_q-layer.7": 1376.4022, "encoder_q-layer.8": 1451.6913, "encoder_q-layer.9": 1168.3917, "epoch": 0.46, "inbatch_neg_score": 0.2244, "inbatch_pos_score": 0.8823, "learning_rate": 2.9222222222222224e-05, "loss": 3.2896, "norm_diff": 0.039, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2028.3423, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2249, "query_norm": 1.3866, "queue_k_norm": 1.4259, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8916, "sent_len_1": 66.5893, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8137, "stdk": 0.0486, "stdq": 0.0456, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 47400 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.3209, "doc_norm": 1.4286, "encoder_q-embeddings": 1032.4471, "encoder_q-layer.0": 674.157, "encoder_q-layer.1": 696.6463, "encoder_q-layer.10": 1135.1278, "encoder_q-layer.11": 2607.3789, "encoder_q-layer.2": 742.3719, "encoder_q-layer.3": 792.0936, "encoder_q-layer.4": 826.8855, "encoder_q-layer.5": 825.0183, "encoder_q-layer.6": 965.8436, "encoder_q-layer.7": 1138.9368, "encoder_q-layer.8": 1372.0851, "encoder_q-layer.9": 1207.9176, "epoch": 0.46, "inbatch_neg_score": 0.2306, "inbatch_pos_score": 0.9189, "learning_rate": 2.916666666666667e-05, "loss": 3.3209, "norm_diff": 0.024, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1800.0288, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2302, "query_norm": 1.4046, "queue_k_norm": 1.4271, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8666, "sent_len_1": 66.782, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2713, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 47500 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.2891, "doc_norm": 1.4248, "encoder_q-embeddings": 1026.5413, "encoder_q-layer.0": 668.6854, "encoder_q-layer.1": 691.8762, "encoder_q-layer.10": 1150.1545, "encoder_q-layer.11": 2657.7563, "encoder_q-layer.2": 750.4769, "encoder_q-layer.3": 786.4589, "encoder_q-layer.4": 821.9103, "encoder_q-layer.5": 917.7315, "encoder_q-layer.6": 1045.2136, "encoder_q-layer.7": 1177.535, "encoder_q-layer.8": 1365.605, "encoder_q-layer.9": 1187.5286, "epoch": 0.46, "inbatch_neg_score": 0.225, "inbatch_pos_score": 0.8789, "learning_rate": 2.9111111111111112e-05, "loss": 3.2891, "norm_diff": 0.0509, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1834.4356, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2247, "query_norm": 1.3738, "queue_k_norm": 1.4243, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.609, "sent_len_1": 66.4559, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7025, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 47600 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.3104, "doc_norm": 1.4303, "encoder_q-embeddings": 1037.6136, "encoder_q-layer.0": 671.5725, "encoder_q-layer.1": 713.489, "encoder_q-layer.10": 1201.4991, "encoder_q-layer.11": 2634.6887, "encoder_q-layer.2": 804.9861, "encoder_q-layer.3": 839.2104, "encoder_q-layer.4": 885.7121, "encoder_q-layer.5": 909.1273, "encoder_q-layer.6": 995.6784, "encoder_q-layer.7": 1189.5145, "encoder_q-layer.8": 1399.7854, "encoder_q-layer.9": 1272.5129, "epoch": 0.47, "inbatch_neg_score": 0.2293, "inbatch_pos_score": 0.8799, "learning_rate": 2.9055555555555558e-05, "loss": 3.3104, "norm_diff": 0.049, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1827.5978, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2302, "query_norm": 1.3813, "queue_k_norm": 1.4249, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8479, "sent_len_1": 66.7309, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4812, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 47700 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.3088, "doc_norm": 1.4273, "encoder_q-embeddings": 1308.8805, "encoder_q-layer.0": 831.4935, "encoder_q-layer.1": 808.0121, "encoder_q-layer.10": 1265.2429, "encoder_q-layer.11": 2889.7161, "encoder_q-layer.2": 862.2923, "encoder_q-layer.3": 920.5638, "encoder_q-layer.4": 977.1323, "encoder_q-layer.5": 1002.98, "encoder_q-layer.6": 1114.0908, "encoder_q-layer.7": 1260.7939, "encoder_q-layer.8": 1482.0543, "encoder_q-layer.9": 1328.9124, "epoch": 0.47, "inbatch_neg_score": 0.2276, "inbatch_pos_score": 0.8579, "learning_rate": 2.9e-05, "loss": 3.3088, "norm_diff": 0.0424, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2045.8082, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2273, "query_norm": 1.3849, "queue_k_norm": 1.4257, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8838, "sent_len_1": 66.7109, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4888, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 47800 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.2887, "doc_norm": 1.4283, "encoder_q-embeddings": 1398.6035, "encoder_q-layer.0": 964.3931, "encoder_q-layer.1": 1135.7676, "encoder_q-layer.10": 590.2095, "encoder_q-layer.11": 1305.5054, "encoder_q-layer.2": 1240.3499, "encoder_q-layer.3": 1293.2319, "encoder_q-layer.4": 1400.6847, "encoder_q-layer.5": 1555.4417, "encoder_q-layer.6": 1570.9894, "encoder_q-layer.7": 1513.9745, "encoder_q-layer.8": 1018.3575, "encoder_q-layer.9": 645.3621, "epoch": 0.47, "inbatch_neg_score": 0.235, "inbatch_pos_score": 0.9062, "learning_rate": 2.8944444444444446e-05, "loss": 3.2887, "norm_diff": 0.022, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1848.1127, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.234, "query_norm": 1.4063, "queue_k_norm": 1.4259, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0827, "sent_len_1": 66.8307, "sent_max_len_0": 128.0, "sent_max_len_1": 192.3938, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 47900 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.2759, "doc_norm": 1.4328, "encoder_q-embeddings": 892.9286, "encoder_q-layer.0": 587.9846, "encoder_q-layer.1": 670.3327, "encoder_q-layer.10": 591.7316, "encoder_q-layer.11": 1246.304, "encoder_q-layer.2": 788.8891, "encoder_q-layer.3": 830.2289, "encoder_q-layer.4": 916.0156, "encoder_q-layer.5": 1014.6574, "encoder_q-layer.6": 1033.6362, "encoder_q-layer.7": 1132.2083, "encoder_q-layer.8": 1025.2064, "encoder_q-layer.9": 697.8023, "epoch": 0.47, "inbatch_neg_score": 0.2378, "inbatch_pos_score": 0.9072, "learning_rate": 2.8888888888888888e-05, "loss": 3.2759, "norm_diff": 0.0481, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1349.5552, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2382, "query_norm": 1.3846, "queue_k_norm": 1.4257, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.952, "sent_len_1": 66.7182, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7625, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 48000 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.2723, "doc_norm": 1.422, "encoder_q-embeddings": 1356.514, "encoder_q-layer.0": 911.4519, "encoder_q-layer.1": 992.5809, "encoder_q-layer.10": 607.5475, "encoder_q-layer.11": 1265.5281, "encoder_q-layer.2": 1088.2566, "encoder_q-layer.3": 1115.301, "encoder_q-layer.4": 1084.0438, "encoder_q-layer.5": 1178.6055, "encoder_q-layer.6": 1029.9767, "encoder_q-layer.7": 1099.9539, "encoder_q-layer.8": 924.0941, "encoder_q-layer.9": 620.5746, "epoch": 0.47, "inbatch_neg_score": 0.2427, "inbatch_pos_score": 0.9058, "learning_rate": 2.8833333333333334e-05, "loss": 3.2723, "norm_diff": 0.0231, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1596.2087, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2428, "query_norm": 1.3989, "queue_k_norm": 1.4258, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0153, "sent_len_1": 66.6307, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6987, "stdk": 0.0484, "stdq": 0.0456, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 48100 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.2747, "doc_norm": 1.4288, "encoder_q-embeddings": 546.8296, "encoder_q-layer.0": 348.5963, "encoder_q-layer.1": 371.2564, "encoder_q-layer.10": 648.1476, "encoder_q-layer.11": 1380.618, "encoder_q-layer.2": 409.53, "encoder_q-layer.3": 418.1068, "encoder_q-layer.4": 457.9181, "encoder_q-layer.5": 462.685, "encoder_q-layer.6": 575.0729, "encoder_q-layer.7": 648.1972, "encoder_q-layer.8": 757.8553, "encoder_q-layer.9": 666.8765, "epoch": 0.47, "inbatch_neg_score": 0.246, "inbatch_pos_score": 0.9092, "learning_rate": 2.877777777777778e-05, "loss": 3.2747, "norm_diff": 0.0126, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 961.0177, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2468, "query_norm": 1.4272, "queue_k_norm": 1.4248, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0243, "sent_len_1": 66.7676, "sent_max_len_0": 128.0, "sent_max_len_1": 189.44, "stdk": 0.0487, "stdq": 0.0467, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 48200 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.3041, "doc_norm": 1.4258, "encoder_q-embeddings": 569.5185, "encoder_q-layer.0": 367.6384, "encoder_q-layer.1": 399.3294, "encoder_q-layer.10": 602.2183, "encoder_q-layer.11": 1380.363, "encoder_q-layer.2": 464.5112, "encoder_q-layer.3": 481.264, "encoder_q-layer.4": 539.3295, "encoder_q-layer.5": 545.8685, "encoder_q-layer.6": 590.405, "encoder_q-layer.7": 642.0701, "encoder_q-layer.8": 706.6587, "encoder_q-layer.9": 606.5648, "epoch": 0.47, "inbatch_neg_score": 0.2503, "inbatch_pos_score": 0.8936, "learning_rate": 2.8722222222222222e-05, "loss": 3.3041, "norm_diff": 0.0388, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 992.7243, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2498, "query_norm": 1.387, "queue_k_norm": 1.4285, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9645, "sent_len_1": 66.9948, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9425, "stdk": 0.0485, "stdq": 0.0453, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 48300 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2632, "doc_norm": 1.4301, "encoder_q-embeddings": 553.0736, "encoder_q-layer.0": 362.5482, "encoder_q-layer.1": 390.148, "encoder_q-layer.10": 591.7563, "encoder_q-layer.11": 1281.4136, "encoder_q-layer.2": 434.1026, "encoder_q-layer.3": 452.7967, "encoder_q-layer.4": 509.3332, "encoder_q-layer.5": 526.9597, "encoder_q-layer.6": 550.8844, "encoder_q-layer.7": 618.018, "encoder_q-layer.8": 748.4677, "encoder_q-layer.9": 618.9684, "epoch": 0.47, "inbatch_neg_score": 0.253, "inbatch_pos_score": 0.9487, "learning_rate": 2.8666666666666668e-05, "loss": 3.2632, "norm_diff": 0.0103, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 938.1393, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2524, "query_norm": 1.425, "queue_k_norm": 1.4307, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8994, "sent_len_1": 66.9176, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2388, "stdk": 0.0487, "stdq": 0.0468, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 48400 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 3.2852, "doc_norm": 1.4306, "encoder_q-embeddings": 1498.9424, "encoder_q-layer.0": 1012.7009, "encoder_q-layer.1": 1190.5234, "encoder_q-layer.10": 580.6085, "encoder_q-layer.11": 1252.8499, "encoder_q-layer.2": 1180.6609, "encoder_q-layer.3": 1350.1953, "encoder_q-layer.4": 1283.1575, "encoder_q-layer.5": 1223.6936, "encoder_q-layer.6": 1275.7114, "encoder_q-layer.7": 1182.236, "encoder_q-layer.8": 858.3002, "encoder_q-layer.9": 649.9968, "epoch": 0.47, "inbatch_neg_score": 0.2503, "inbatch_pos_score": 0.9561, "learning_rate": 2.861111111111111e-05, "loss": 3.2852, "norm_diff": 0.0312, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1744.4753, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2524, "query_norm": 1.3994, "queue_k_norm": 1.4308, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8606, "sent_len_1": 66.6842, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9125, "stdk": 0.0486, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 48500 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.3052, "doc_norm": 1.4311, "encoder_q-embeddings": 549.3686, "encoder_q-layer.0": 350.7133, "encoder_q-layer.1": 354.2456, "encoder_q-layer.10": 570.1596, "encoder_q-layer.11": 1347.975, "encoder_q-layer.2": 396.1655, "encoder_q-layer.3": 417.18, "encoder_q-layer.4": 448.7313, "encoder_q-layer.5": 493.0651, "encoder_q-layer.6": 529.5985, "encoder_q-layer.7": 627.5674, "encoder_q-layer.8": 692.7375, "encoder_q-layer.9": 601.0941, "epoch": 0.47, "inbatch_neg_score": 0.2562, "inbatch_pos_score": 0.9067, "learning_rate": 2.855555555555556e-05, "loss": 3.3052, "norm_diff": 0.0568, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 936.9608, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2571, "query_norm": 1.3744, "queue_k_norm": 1.4305, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9522, "sent_len_1": 66.7514, "sent_max_len_0": 128.0, "sent_max_len_1": 188.025, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 48600 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.2944, "doc_norm": 1.4319, "encoder_q-embeddings": 494.7448, "encoder_q-layer.0": 325.32, "encoder_q-layer.1": 343.7641, "encoder_q-layer.10": 554.3524, "encoder_q-layer.11": 1255.1483, "encoder_q-layer.2": 382.2431, "encoder_q-layer.3": 395.2921, "encoder_q-layer.4": 430.5898, "encoder_q-layer.5": 447.0279, "encoder_q-layer.6": 493.0802, "encoder_q-layer.7": 569.2576, "encoder_q-layer.8": 682.7518, "encoder_q-layer.9": 588.1035, "epoch": 0.48, "inbatch_neg_score": 0.2516, "inbatch_pos_score": 0.9634, "learning_rate": 2.8499999999999998e-05, "loss": 3.2944, "norm_diff": 0.0282, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 884.4212, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2522, "query_norm": 1.4063, "queue_k_norm": 1.4314, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8827, "sent_len_1": 66.6237, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5225, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 48700 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.2453, "doc_norm": 1.4333, "encoder_q-embeddings": 712.2699, "encoder_q-layer.0": 491.7051, "encoder_q-layer.1": 536.4409, "encoder_q-layer.10": 558.1549, "encoder_q-layer.11": 1236.0571, "encoder_q-layer.2": 592.5216, "encoder_q-layer.3": 636.8582, "encoder_q-layer.4": 646.5758, "encoder_q-layer.5": 689.6365, "encoder_q-layer.6": 668.5109, "encoder_q-layer.7": 793.3224, "encoder_q-layer.8": 782.2405, "encoder_q-layer.9": 609.9998, "epoch": 0.48, "inbatch_neg_score": 0.2516, "inbatch_pos_score": 0.9639, "learning_rate": 2.8444444444444447e-05, "loss": 3.2453, "norm_diff": 0.0228, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1071.5103, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2527, "query_norm": 1.4104, "queue_k_norm": 1.4314, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9028, "sent_len_1": 66.7626, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9863, "stdk": 0.0487, "stdq": 0.0465, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 48800 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.2805, "doc_norm": 1.4355, "encoder_q-embeddings": 565.7724, "encoder_q-layer.0": 368.4667, "encoder_q-layer.1": 383.0261, "encoder_q-layer.10": 591.6637, "encoder_q-layer.11": 1419.0225, "encoder_q-layer.2": 436.8559, "encoder_q-layer.3": 458.7169, "encoder_q-layer.4": 478.0298, "encoder_q-layer.5": 507.2666, "encoder_q-layer.6": 547.9071, "encoder_q-layer.7": 638.4215, "encoder_q-layer.8": 719.4729, "encoder_q-layer.9": 616.3262, "epoch": 0.48, "inbatch_neg_score": 0.2566, "inbatch_pos_score": 0.8892, "learning_rate": 2.8388888888888893e-05, "loss": 3.2805, "norm_diff": 0.0637, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 976.9166, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2559, "query_norm": 1.3718, "queue_k_norm": 1.4336, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0095, "sent_len_1": 66.8235, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5938, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 48900 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.3087, "doc_norm": 1.4296, "encoder_q-embeddings": 573.9216, "encoder_q-layer.0": 380.9545, "encoder_q-layer.1": 411.9406, "encoder_q-layer.10": 645.8431, "encoder_q-layer.11": 1409.3793, "encoder_q-layer.2": 462.9188, "encoder_q-layer.3": 469.1431, "encoder_q-layer.4": 524.4514, "encoder_q-layer.5": 516.006, "encoder_q-layer.6": 583.3431, "encoder_q-layer.7": 673.5248, "encoder_q-layer.8": 744.4202, "encoder_q-layer.9": 648.5178, "epoch": 0.48, "inbatch_neg_score": 0.2572, "inbatch_pos_score": 0.8984, "learning_rate": 2.8333333333333335e-05, "loss": 3.3087, "norm_diff": 0.0378, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1018.6196, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2578, "query_norm": 1.3918, "queue_k_norm": 1.4336, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8964, "sent_len_1": 66.7379, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2988, "stdk": 0.0485, "stdq": 0.0456, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 49000 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.254, "doc_norm": 1.4307, "encoder_q-embeddings": 480.1978, "encoder_q-layer.0": 312.885, "encoder_q-layer.1": 328.4211, "encoder_q-layer.10": 595.6025, "encoder_q-layer.11": 1179.4508, "encoder_q-layer.2": 366.0674, "encoder_q-layer.3": 403.5346, "encoder_q-layer.4": 415.4842, "encoder_q-layer.5": 442.2807, "encoder_q-layer.6": 505.0235, "encoder_q-layer.7": 548.2421, "encoder_q-layer.8": 637.4025, "encoder_q-layer.9": 561.2772, "epoch": 0.48, "inbatch_neg_score": 0.2529, "inbatch_pos_score": 0.9331, "learning_rate": 2.827777777777778e-05, "loss": 3.254, "norm_diff": 0.036, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 827.0843, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2522, "query_norm": 1.3947, "queue_k_norm": 1.4348, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7881, "sent_len_1": 66.9515, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9712, "stdk": 0.0485, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 49100 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 3.2674, "doc_norm": 1.4362, "encoder_q-embeddings": 669.8634, "encoder_q-layer.0": 451.548, "encoder_q-layer.1": 461.2002, "encoder_q-layer.10": 556.2665, "encoder_q-layer.11": 1195.0251, "encoder_q-layer.2": 538.9232, "encoder_q-layer.3": 537.0554, "encoder_q-layer.4": 602.7024, "encoder_q-layer.5": 586.8381, "encoder_q-layer.6": 648.6224, "encoder_q-layer.7": 693.9544, "encoder_q-layer.8": 744.7681, "encoder_q-layer.9": 602.6758, "epoch": 0.48, "inbatch_neg_score": 0.2496, "inbatch_pos_score": 0.9878, "learning_rate": 2.8222222222222223e-05, "loss": 3.2674, "norm_diff": 0.0189, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 996.8091, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2498, "query_norm": 1.4181, "queue_k_norm": 1.4342, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8133, "sent_len_1": 66.7709, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6025, "stdk": 0.0487, "stdq": 0.0469, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 49200 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.2776, "doc_norm": 1.438, "encoder_q-embeddings": 743.2507, "encoder_q-layer.0": 499.1469, "encoder_q-layer.1": 528.4717, "encoder_q-layer.10": 575.9436, "encoder_q-layer.11": 1237.1012, "encoder_q-layer.2": 549.3284, "encoder_q-layer.3": 541.8404, "encoder_q-layer.4": 558.2244, "encoder_q-layer.5": 556.8466, "encoder_q-layer.6": 609.2246, "encoder_q-layer.7": 688.6274, "encoder_q-layer.8": 798.4581, "encoder_q-layer.9": 639.2142, "epoch": 0.48, "inbatch_neg_score": 0.2491, "inbatch_pos_score": 0.9307, "learning_rate": 2.816666666666667e-05, "loss": 3.2776, "norm_diff": 0.0384, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1044.7238, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2487, "query_norm": 1.3996, "queue_k_norm": 1.4361, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.146, "sent_len_1": 66.8046, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1413, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 49300 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.2815, "doc_norm": 1.43, "encoder_q-embeddings": 741.7136, "encoder_q-layer.0": 479.366, "encoder_q-layer.1": 568.0845, "encoder_q-layer.10": 599.6125, "encoder_q-layer.11": 1266.66, "encoder_q-layer.2": 649.4241, "encoder_q-layer.3": 808.4289, "encoder_q-layer.4": 824.9398, "encoder_q-layer.5": 843.7504, "encoder_q-layer.6": 936.3667, "encoder_q-layer.7": 995.6174, "encoder_q-layer.8": 887.8491, "encoder_q-layer.9": 609.2858, "epoch": 0.48, "inbatch_neg_score": 0.2515, "inbatch_pos_score": 0.8994, "learning_rate": 2.811111111111111e-05, "loss": 3.2815, "norm_diff": 0.0632, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1209.0235, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2522, "query_norm": 1.3668, "queue_k_norm": 1.4353, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8605, "sent_len_1": 66.7924, "sent_max_len_0": 128.0, "sent_max_len_1": 188.065, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 49400 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.261, "doc_norm": 1.4377, "encoder_q-embeddings": 670.2951, "encoder_q-layer.0": 464.3238, "encoder_q-layer.1": 529.0363, "encoder_q-layer.10": 559.6861, "encoder_q-layer.11": 1190.3253, "encoder_q-layer.2": 588.9677, "encoder_q-layer.3": 637.9328, "encoder_q-layer.4": 697.6728, "encoder_q-layer.5": 711.5396, "encoder_q-layer.6": 717.5536, "encoder_q-layer.7": 812.1486, "encoder_q-layer.8": 814.0258, "encoder_q-layer.9": 585.0984, "epoch": 0.48, "inbatch_neg_score": 0.2544, "inbatch_pos_score": 0.9131, "learning_rate": 2.8055555555555557e-05, "loss": 3.261, "norm_diff": 0.0582, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1055.6176, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2542, "query_norm": 1.3795, "queue_k_norm": 1.4351, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8175, "sent_len_1": 66.6153, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5163, "stdk": 0.0487, "stdq": 0.0454, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 49500 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.2698, "doc_norm": 1.4374, "encoder_q-embeddings": 639.9524, "encoder_q-layer.0": 433.8493, "encoder_q-layer.1": 477.6505, "encoder_q-layer.10": 596.1375, "encoder_q-layer.11": 1267.7473, "encoder_q-layer.2": 529.642, "encoder_q-layer.3": 549.7073, "encoder_q-layer.4": 576.8627, "encoder_q-layer.5": 609.8173, "encoder_q-layer.6": 636.2068, "encoder_q-layer.7": 693.9382, "encoder_q-layer.8": 758.7448, "encoder_q-layer.9": 601.1351, "epoch": 0.48, "inbatch_neg_score": 0.2551, "inbatch_pos_score": 0.894, "learning_rate": 2.8000000000000003e-05, "loss": 3.2698, "norm_diff": 0.0724, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1014.3912, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2542, "query_norm": 1.365, "queue_k_norm": 1.4362, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9943, "sent_len_1": 66.8838, "sent_max_len_0": 128.0, "sent_max_len_1": 189.58, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 49600 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.2781, "doc_norm": 1.4354, "encoder_q-embeddings": 527.7044, "encoder_q-layer.0": 339.0107, "encoder_q-layer.1": 361.2082, "encoder_q-layer.10": 635.0565, "encoder_q-layer.11": 1340.6644, "encoder_q-layer.2": 404.3798, "encoder_q-layer.3": 410.4211, "encoder_q-layer.4": 450.1935, "encoder_q-layer.5": 469.7354, "encoder_q-layer.6": 555.3041, "encoder_q-layer.7": 629.2714, "encoder_q-layer.8": 734.3181, "encoder_q-layer.9": 647.3668, "epoch": 0.49, "inbatch_neg_score": 0.2541, "inbatch_pos_score": 0.9014, "learning_rate": 2.7944444444444445e-05, "loss": 3.2781, "norm_diff": 0.0473, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 941.5673, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2532, "query_norm": 1.3882, "queue_k_norm": 1.4366, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9019, "sent_len_1": 66.6103, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2325, "stdk": 0.0486, "stdq": 0.0457, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 49700 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.2694, "doc_norm": 1.4388, "encoder_q-embeddings": 1114.4673, "encoder_q-layer.0": 790.269, "encoder_q-layer.1": 983.6457, "encoder_q-layer.10": 587.7758, "encoder_q-layer.11": 1348.8569, "encoder_q-layer.2": 1148.7114, "encoder_q-layer.3": 1195.8994, "encoder_q-layer.4": 1274.5671, "encoder_q-layer.5": 1309.0521, "encoder_q-layer.6": 1134.8138, "encoder_q-layer.7": 1142.5459, "encoder_q-layer.8": 928.8678, "encoder_q-layer.9": 643.5111, "epoch": 0.49, "inbatch_neg_score": 0.2569, "inbatch_pos_score": 0.8794, "learning_rate": 2.788888888888889e-05, "loss": 3.2694, "norm_diff": 0.0729, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1597.4312, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2559, "query_norm": 1.366, "queue_k_norm": 1.4353, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8295, "sent_len_1": 66.595, "sent_max_len_0": 128.0, "sent_max_len_1": 186.665, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 49800 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.2659, "doc_norm": 1.434, "encoder_q-embeddings": 1040.2062, "encoder_q-layer.0": 676.837, "encoder_q-layer.1": 714.893, "encoder_q-layer.10": 1234.5951, "encoder_q-layer.11": 2755.4761, "encoder_q-layer.2": 794.3683, "encoder_q-layer.3": 821.9664, "encoder_q-layer.4": 874.4783, "encoder_q-layer.5": 948.3627, "encoder_q-layer.6": 1071.0786, "encoder_q-layer.7": 1297.5349, "encoder_q-layer.8": 1513.7731, "encoder_q-layer.9": 1282.4891, "epoch": 0.49, "inbatch_neg_score": 0.2594, "inbatch_pos_score": 0.9253, "learning_rate": 2.7833333333333333e-05, "loss": 3.2659, "norm_diff": 0.041, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1911.9599, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2583, "query_norm": 1.393, "queue_k_norm": 1.436, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.974, "sent_len_1": 66.7733, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9162, "stdk": 0.0485, "stdq": 0.0458, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 49900 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.2853, "doc_norm": 1.4345, "encoder_q-embeddings": 1055.1362, "encoder_q-layer.0": 684.3519, "encoder_q-layer.1": 746.9404, "encoder_q-layer.10": 1129.9023, "encoder_q-layer.11": 2476.2202, "encoder_q-layer.2": 855.759, "encoder_q-layer.3": 920.9887, "encoder_q-layer.4": 1028.2083, "encoder_q-layer.5": 993.6367, "encoder_q-layer.6": 1116.8638, "encoder_q-layer.7": 1245.4443, "encoder_q-layer.8": 1435.1512, "encoder_q-layer.9": 1256.2905, "epoch": 0.49, "inbatch_neg_score": 0.2621, "inbatch_pos_score": 0.9404, "learning_rate": 2.777777777777778e-05, "loss": 3.2853, "norm_diff": 0.0289, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1847.4731, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2607, "query_norm": 1.4056, "queue_k_norm": 1.4362, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7732, "sent_len_1": 66.7012, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3587, "stdk": 0.0486, "stdq": 0.0462, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 50000 }, { "dev_runtime": 37.2579, "dev_samples_per_second": 1.718, "dev_steps_per_second": 0.027, "epoch": 0.49, "step": 50000, "test_accuracy": 94.2626953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.32980793714523315, "test_doc_norm": 1.4416618347167969, "test_inbatch_neg_score": 0.6341235637664795, "test_inbatch_pos_score": 1.6145732402801514, "test_loss": 0.32980793714523315, "test_loss_align": 1.106376051902771, "test_loss_unif": 3.8641538619995117, "test_loss_unif_q@queue": 3.8641536235809326, "test_norm_diff": 0.05056659132242203, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.243794783949852, "test_query_norm": 1.4922285079956055, "test_queue_k_norm": 1.4359393119812012, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04328026995062828, "test_stdq": 0.04397352784872055, "test_stdqueue_k": 0.048719923943281174, "test_stdqueue_q": 0.0 }, { "dev_runtime": 37.2579, "dev_samples_per_second": 1.718, "dev_steps_per_second": 0.027, "epoch": 0.49, "eval_beir-arguana_ndcg@10": 0.35385, "eval_beir-arguana_recall@10": 0.60953, "eval_beir-arguana_recall@100": 0.9111, "eval_beir-arguana_recall@20": 0.74395, "eval_beir-avg_ndcg@10": 0.3668061666666667, "eval_beir-avg_recall@10": 0.433861, "eval_beir-avg_recall@100": 0.606643, "eval_beir-avg_recall@20": 0.4888535, "eval_beir-cqadupstack_ndcg@10": 0.25952166666666665, "eval_beir-cqadupstack_recall@10": 0.34896, "eval_beir-cqadupstack_recall@100": 0.5782299999999999, "eval_beir-cqadupstack_recall@20": 0.411715, "eval_beir-fiqa_ndcg@10": 0.21034, "eval_beir-fiqa_recall@10": 0.26955, "eval_beir-fiqa_recall@100": 0.50806, "eval_beir-fiqa_recall@20": 0.33142, "eval_beir-nfcorpus_ndcg@10": 0.27642, "eval_beir-nfcorpus_recall@10": 0.13751, "eval_beir-nfcorpus_recall@100": 0.25811, "eval_beir-nfcorpus_recall@20": 0.16814, "eval_beir-nq_ndcg@10": 0.26792, "eval_beir-nq_recall@10": 0.43839, "eval_beir-nq_recall@100": 0.77083, "eval_beir-nq_recall@20": 0.55123, "eval_beir-quora_ndcg@10": 0.81686, "eval_beir-quora_recall@10": 0.91333, "eval_beir-quora_recall@100": 0.98438, "eval_beir-quora_recall@20": 0.94872, "eval_beir-scidocs_ndcg@10": 0.13662, "eval_beir-scidocs_recall@10": 0.14343, "eval_beir-scidocs_recall@100": 0.33678, "eval_beir-scidocs_recall@20": 0.19703, "eval_beir-scifact_ndcg@10": 0.59899, "eval_beir-scifact_recall@10": 0.7465, "eval_beir-scifact_recall@100": 0.91522, "eval_beir-scifact_recall@20": 0.80067, "eval_beir-trec-covid_ndcg@10": 0.56107, "eval_beir-trec-covid_recall@10": 0.592, "eval_beir-trec-covid_recall@100": 0.394, "eval_beir-trec-covid_recall@20": 0.525, "eval_beir-webis-touche2020_ndcg@10": 0.18647, "eval_beir-webis-touche2020_recall@10": 0.13941, "eval_beir-webis-touche2020_recall@100": 0.40972, "eval_beir-webis-touche2020_recall@20": 0.21066, "eval_senteval-avg_sts": 0.7340157330939798, "eval_senteval-sickr_spearman": 0.7026975116568629, "eval_senteval-stsb_spearman": 0.7653339545310969, "step": 50000, "test_accuracy": 94.2626953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.32980793714523315, "test_doc_norm": 1.4416618347167969, "test_inbatch_neg_score": 0.6341235637664795, "test_inbatch_pos_score": 1.6145732402801514, "test_loss": 0.32980793714523315, "test_loss_align": 1.106376051902771, "test_loss_unif": 3.8641538619995117, "test_loss_unif_q@queue": 3.8641536235809326, "test_norm_diff": 0.05056659132242203, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.243794783949852, "test_query_norm": 1.4922285079956055, "test_queue_k_norm": 1.4359393119812012, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04328026995062828, "test_stdq": 0.04397352784872055, "test_stdqueue_k": 0.048719923943281174, "test_stdqueue_q": 0.0 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.2947, "doc_norm": 1.4341, "encoder_q-embeddings": 1211.6646, "encoder_q-layer.0": 792.7898, "encoder_q-layer.1": 899.413, "encoder_q-layer.10": 1147.7826, "encoder_q-layer.11": 2531.7803, "encoder_q-layer.2": 1033.4694, "encoder_q-layer.3": 1052.1979, "encoder_q-layer.4": 1126.5465, "encoder_q-layer.5": 1142.4198, "encoder_q-layer.6": 1304.8063, "encoder_q-layer.7": 1445.0179, "encoder_q-layer.8": 1658.5203, "encoder_q-layer.9": 1365.713, "epoch": 0.49, "inbatch_neg_score": 0.2606, "inbatch_pos_score": 0.9136, "learning_rate": 2.772222222222222e-05, "loss": 3.2947, "norm_diff": 0.0481, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2024.693, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2612, "query_norm": 1.3861, "queue_k_norm": 1.4358, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8835, "sent_len_1": 66.579, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3088, "stdk": 0.0485, "stdq": 0.0454, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 50100 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.2655, "doc_norm": 1.4414, "encoder_q-embeddings": 1207.978, "encoder_q-layer.0": 744.3564, "encoder_q-layer.1": 795.8477, "encoder_q-layer.10": 1262.3108, "encoder_q-layer.11": 2605.5647, "encoder_q-layer.2": 899.9904, "encoder_q-layer.3": 969.5113, "encoder_q-layer.4": 1031.207, "encoder_q-layer.5": 1077.5168, "encoder_q-layer.6": 1199.4794, "encoder_q-layer.7": 1361.3185, "encoder_q-layer.8": 1563.3794, "encoder_q-layer.9": 1360.3708, "epoch": 0.49, "inbatch_neg_score": 0.258, "inbatch_pos_score": 0.9175, "learning_rate": 2.7666666666666667e-05, "loss": 3.2655, "norm_diff": 0.0542, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1957.6102, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2573, "query_norm": 1.3873, "queue_k_norm": 1.4368, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8582, "sent_len_1": 66.7203, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1087, "stdk": 0.0488, "stdq": 0.0455, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 50200 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.2784, "doc_norm": 1.4374, "encoder_q-embeddings": 1765.6559, "encoder_q-layer.0": 1168.9316, "encoder_q-layer.1": 1422.0564, "encoder_q-layer.10": 1167.4447, "encoder_q-layer.11": 2598.1196, "encoder_q-layer.2": 1700.7476, "encoder_q-layer.3": 1859.4659, "encoder_q-layer.4": 1934.5746, "encoder_q-layer.5": 2215.3433, "encoder_q-layer.6": 2326.8479, "encoder_q-layer.7": 2360.5522, "encoder_q-layer.8": 2127.3074, "encoder_q-layer.9": 1484.8362, "epoch": 0.49, "inbatch_neg_score": 0.2588, "inbatch_pos_score": 0.936, "learning_rate": 2.761111111111111e-05, "loss": 3.2784, "norm_diff": 0.0455, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2848.7958, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2598, "query_norm": 1.3919, "queue_k_norm": 1.4381, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9632, "sent_len_1": 66.783, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1813, "stdk": 0.0486, "stdq": 0.0456, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 50300 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.2976, "doc_norm": 1.4375, "encoder_q-embeddings": 954.5184, "encoder_q-layer.0": 635.1726, "encoder_q-layer.1": 675.2886, "encoder_q-layer.10": 1310.5186, "encoder_q-layer.11": 2435.7488, "encoder_q-layer.2": 767.007, "encoder_q-layer.3": 800.6089, "encoder_q-layer.4": 831.1558, "encoder_q-layer.5": 877.3548, "encoder_q-layer.6": 998.9734, "encoder_q-layer.7": 1153.4221, "encoder_q-layer.8": 1359.4242, "encoder_q-layer.9": 1198.1189, "epoch": 0.49, "inbatch_neg_score": 0.2615, "inbatch_pos_score": 0.9043, "learning_rate": 2.7555555555555555e-05, "loss": 3.2976, "norm_diff": 0.0406, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1725.3252, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2617, "query_norm": 1.3969, "queue_k_norm": 1.4382, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9739, "sent_len_1": 66.8045, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2537, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 50400 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.2802, "doc_norm": 1.4408, "encoder_q-embeddings": 1428.1772, "encoder_q-layer.0": 1018.8244, "encoder_q-layer.1": 1098.9192, "encoder_q-layer.10": 1123.7081, "encoder_q-layer.11": 2463.4946, "encoder_q-layer.2": 1337.5638, "encoder_q-layer.3": 1381.8877, "encoder_q-layer.4": 1373.7654, "encoder_q-layer.5": 1310.3585, "encoder_q-layer.6": 1230.3354, "encoder_q-layer.7": 1378.9727, "encoder_q-layer.8": 1487.8324, "encoder_q-layer.9": 1234.2593, "epoch": 0.49, "inbatch_neg_score": 0.2674, "inbatch_pos_score": 0.9287, "learning_rate": 2.7500000000000004e-05, "loss": 3.2802, "norm_diff": 0.0535, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2114.4005, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2664, "query_norm": 1.3873, "queue_k_norm": 1.439, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0151, "sent_len_1": 66.7572, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1413, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 50500 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.3005, "doc_norm": 1.4325, "encoder_q-embeddings": 1153.769, "encoder_q-layer.0": 759.8857, "encoder_q-layer.1": 803.3187, "encoder_q-layer.10": 1167.4299, "encoder_q-layer.11": 2452.1709, "encoder_q-layer.2": 883.792, "encoder_q-layer.3": 926.6732, "encoder_q-layer.4": 986.6658, "encoder_q-layer.5": 1102.5579, "encoder_q-layer.6": 1163.6055, "encoder_q-layer.7": 1263.8777, "encoder_q-layer.8": 1357.5933, "encoder_q-layer.9": 1128.3422, "epoch": 0.49, "inbatch_neg_score": 0.2708, "inbatch_pos_score": 0.9409, "learning_rate": 2.7444444444444443e-05, "loss": 3.3005, "norm_diff": 0.0338, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1838.3568, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2705, "query_norm": 1.3987, "queue_k_norm": 1.4403, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7369, "sent_len_1": 66.7605, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0462, "stdk": 0.0484, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 50600 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.2622, "doc_norm": 1.4413, "encoder_q-embeddings": 1097.0942, "encoder_q-layer.0": 690.6096, "encoder_q-layer.1": 723.7868, "encoder_q-layer.10": 1250.9301, "encoder_q-layer.11": 2711.6902, "encoder_q-layer.2": 802.0071, "encoder_q-layer.3": 865.0005, "encoder_q-layer.4": 944.0429, "encoder_q-layer.5": 1006.4625, "encoder_q-layer.6": 1092.9701, "encoder_q-layer.7": 1243.7129, "encoder_q-layer.8": 1426.7479, "encoder_q-layer.9": 1237.1437, "epoch": 0.49, "inbatch_neg_score": 0.2763, "inbatch_pos_score": 0.9658, "learning_rate": 2.7388888888888892e-05, "loss": 3.2622, "norm_diff": 0.0268, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1908.1977, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2756, "query_norm": 1.4145, "queue_k_norm": 1.4406, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9272, "sent_len_1": 66.982, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7325, "stdk": 0.0487, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 50700 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.2665, "doc_norm": 1.4479, "encoder_q-embeddings": 1019.4509, "encoder_q-layer.0": 686.3734, "encoder_q-layer.1": 727.0787, "encoder_q-layer.10": 1128.7607, "encoder_q-layer.11": 2583.7151, "encoder_q-layer.2": 813.1434, "encoder_q-layer.3": 845.6356, "encoder_q-layer.4": 919.1758, "encoder_q-layer.5": 912.0933, "encoder_q-layer.6": 1089.1222, "encoder_q-layer.7": 1226.6792, "encoder_q-layer.8": 1466.3184, "encoder_q-layer.9": 1266.4097, "epoch": 0.5, "inbatch_neg_score": 0.2804, "inbatch_pos_score": 0.978, "learning_rate": 2.733333333333333e-05, "loss": 3.2665, "norm_diff": 0.0183, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1842.2471, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.281, "query_norm": 1.4296, "queue_k_norm": 1.4423, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9366, "sent_len_1": 66.8677, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8613, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 50800 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.2491, "doc_norm": 1.4432, "encoder_q-embeddings": 1123.4386, "encoder_q-layer.0": 790.5886, "encoder_q-layer.1": 844.0469, "encoder_q-layer.10": 1426.6189, "encoder_q-layer.11": 2770.3386, "encoder_q-layer.2": 925.0001, "encoder_q-layer.3": 945.0209, "encoder_q-layer.4": 1034.1024, "encoder_q-layer.5": 1089.2164, "encoder_q-layer.6": 1322.8473, "encoder_q-layer.7": 1510.824, "encoder_q-layer.8": 1740.3009, "encoder_q-layer.9": 1489.2295, "epoch": 0.5, "inbatch_neg_score": 0.2869, "inbatch_pos_score": 0.9414, "learning_rate": 2.727777777777778e-05, "loss": 3.2491, "norm_diff": 0.0496, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2064.3833, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2869, "query_norm": 1.3936, "queue_k_norm": 1.4403, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.822, "sent_len_1": 66.5197, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4538, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 50900 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.2815, "doc_norm": 1.438, "encoder_q-embeddings": 2220.0398, "encoder_q-layer.0": 1568.1479, "encoder_q-layer.1": 1792.9387, "encoder_q-layer.10": 1147.9126, "encoder_q-layer.11": 2522.082, "encoder_q-layer.2": 2104.2612, "encoder_q-layer.3": 2024.3868, "encoder_q-layer.4": 2317.907, "encoder_q-layer.5": 2161.7134, "encoder_q-layer.6": 2181.7244, "encoder_q-layer.7": 1956.7344, "encoder_q-layer.8": 1953.9933, "encoder_q-layer.9": 1487.8584, "epoch": 0.5, "inbatch_neg_score": 0.2899, "inbatch_pos_score": 0.9741, "learning_rate": 2.7222222222222223e-05, "loss": 3.2815, "norm_diff": 0.0069, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2952.3577, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2893, "query_norm": 1.4377, "queue_k_norm": 1.4426, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7962, "sent_len_1": 66.6016, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1037, "stdk": 0.0485, "stdq": 0.0465, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 51000 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.2602, "doc_norm": 1.4486, "encoder_q-embeddings": 1009.5827, "encoder_q-layer.0": 636.9775, "encoder_q-layer.1": 688.6617, "encoder_q-layer.10": 1254.7466, "encoder_q-layer.11": 2659.0186, "encoder_q-layer.2": 753.02, "encoder_q-layer.3": 779.0068, "encoder_q-layer.4": 807.4285, "encoder_q-layer.5": 903.6761, "encoder_q-layer.6": 995.0179, "encoder_q-layer.7": 1139.2833, "encoder_q-layer.8": 1404.1119, "encoder_q-layer.9": 1196.4259, "epoch": 0.5, "inbatch_neg_score": 0.2901, "inbatch_pos_score": 0.9688, "learning_rate": 2.716666666666667e-05, "loss": 3.2602, "norm_diff": 0.0159, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1802.0676, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2898, "query_norm": 1.438, "queue_k_norm": 1.4454, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0127, "sent_len_1": 66.8526, "sent_max_len_0": 128.0, "sent_max_len_1": 188.365, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 51100 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.276, "doc_norm": 1.4508, "encoder_q-embeddings": 1723.6058, "encoder_q-layer.0": 1122.0077, "encoder_q-layer.1": 1274.3641, "encoder_q-layer.10": 1170.4948, "encoder_q-layer.11": 2614.2214, "encoder_q-layer.2": 1557.7628, "encoder_q-layer.3": 1737.8528, "encoder_q-layer.4": 1749.2527, "encoder_q-layer.5": 1836.8586, "encoder_q-layer.6": 1768.502, "encoder_q-layer.7": 1814.5582, "encoder_q-layer.8": 1624.8115, "encoder_q-layer.9": 1199.8207, "epoch": 0.5, "inbatch_neg_score": 0.2922, "inbatch_pos_score": 0.9756, "learning_rate": 2.7111111111111114e-05, "loss": 3.276, "norm_diff": 0.0315, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2528.7995, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.4192, "queue_k_norm": 1.4435, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8029, "sent_len_1": 66.8065, "sent_max_len_0": 128.0, "sent_max_len_1": 192.38, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 51200 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3002, "doc_norm": 1.4441, "encoder_q-embeddings": 1338.3572, "encoder_q-layer.0": 900.2236, "encoder_q-layer.1": 967.2972, "encoder_q-layer.10": 1194.7307, "encoder_q-layer.11": 2587.0698, "encoder_q-layer.2": 1060.7356, "encoder_q-layer.3": 1107.3979, "encoder_q-layer.4": 1201.5321, "encoder_q-layer.5": 1228.3876, "encoder_q-layer.6": 1302.9744, "encoder_q-layer.7": 1426.8362, "encoder_q-layer.8": 1510.8982, "encoder_q-layer.9": 1284.0896, "epoch": 0.5, "inbatch_neg_score": 0.291, "inbatch_pos_score": 0.9414, "learning_rate": 2.7055555555555557e-05, "loss": 3.3002, "norm_diff": 0.0403, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2075.3779, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2903, "query_norm": 1.4039, "queue_k_norm": 1.4441, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8996, "sent_len_1": 66.4845, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0975, "stdk": 0.0486, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 51300 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.2871, "doc_norm": 1.4496, "encoder_q-embeddings": 2659.5273, "encoder_q-layer.0": 1715.2466, "encoder_q-layer.1": 1758.1676, "encoder_q-layer.10": 1199.6017, "encoder_q-layer.11": 2804.0176, "encoder_q-layer.2": 2002.5017, "encoder_q-layer.3": 2171.3596, "encoder_q-layer.4": 2447.6096, "encoder_q-layer.5": 2344.2251, "encoder_q-layer.6": 2013.2656, "encoder_q-layer.7": 1987.2859, "encoder_q-layer.8": 1710.4736, "encoder_q-layer.9": 1271.9417, "epoch": 0.5, "inbatch_neg_score": 0.2979, "inbatch_pos_score": 0.9336, "learning_rate": 2.7000000000000002e-05, "loss": 3.2871, "norm_diff": 0.0484, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3144.9979, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2971, "query_norm": 1.4011, "queue_k_norm": 1.4444, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7978, "sent_len_1": 66.6128, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8375, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 51400 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.2952, "doc_norm": 1.4472, "encoder_q-embeddings": 1237.239, "encoder_q-layer.0": 791.0626, "encoder_q-layer.1": 828.5809, "encoder_q-layer.10": 1214.4211, "encoder_q-layer.11": 2795.3115, "encoder_q-layer.2": 928.469, "encoder_q-layer.3": 998.6166, "encoder_q-layer.4": 1062.6223, "encoder_q-layer.5": 1077.6219, "encoder_q-layer.6": 1169.0847, "encoder_q-layer.7": 1389.251, "encoder_q-layer.8": 1468.6864, "encoder_q-layer.9": 1223.3564, "epoch": 0.5, "inbatch_neg_score": 0.2958, "inbatch_pos_score": 0.9712, "learning_rate": 2.6944444444444445e-05, "loss": 3.2952, "norm_diff": 0.0304, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2013.6185, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2961, "query_norm": 1.4167, "queue_k_norm": 1.4474, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9549, "sent_len_1": 66.7634, "sent_max_len_0": 128.0, "sent_max_len_1": 187.81, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 51500 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.2544, "doc_norm": 1.4486, "encoder_q-embeddings": 2165.8784, "encoder_q-layer.0": 1487.7036, "encoder_q-layer.1": 1703.0875, "encoder_q-layer.10": 1157.6433, "encoder_q-layer.11": 2769.354, "encoder_q-layer.2": 1987.9492, "encoder_q-layer.3": 2278.6904, "encoder_q-layer.4": 2472.2561, "encoder_q-layer.5": 2458.7173, "encoder_q-layer.6": 2414.5176, "encoder_q-layer.7": 2220.2673, "encoder_q-layer.8": 2168.0903, "encoder_q-layer.9": 1457.9832, "epoch": 0.5, "inbatch_neg_score": 0.2993, "inbatch_pos_score": 0.9707, "learning_rate": 2.688888888888889e-05, "loss": 3.2544, "norm_diff": 0.0435, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3209.2698, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3, "query_norm": 1.4051, "queue_k_norm": 1.4478, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1709, "sent_len_1": 66.7817, "sent_max_len_0": 128.0, "sent_max_len_1": 188.88, "stdk": 0.0488, "stdq": 0.0456, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 51600 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2536, "doc_norm": 1.4475, "encoder_q-embeddings": 969.1141, "encoder_q-layer.0": 642.3048, "encoder_q-layer.1": 664.6626, "encoder_q-layer.10": 1174.2742, "encoder_q-layer.11": 2858.5254, "encoder_q-layer.2": 753.7573, "encoder_q-layer.3": 768.1957, "encoder_q-layer.4": 818.3869, "encoder_q-layer.5": 855.6999, "encoder_q-layer.6": 988.6628, "encoder_q-layer.7": 1162.6769, "encoder_q-layer.8": 1373.1758, "encoder_q-layer.9": 1195.8572, "epoch": 0.5, "inbatch_neg_score": 0.3022, "inbatch_pos_score": 0.9409, "learning_rate": 2.6833333333333333e-05, "loss": 3.2536, "norm_diff": 0.058, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1877.6536, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.302, "query_norm": 1.3895, "queue_k_norm": 1.4486, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.2201, "sent_len_1": 67.0927, "sent_max_len_0": 128.0, "sent_max_len_1": 192.0437, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 51700 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.2774, "doc_norm": 1.4489, "encoder_q-embeddings": 1256.1582, "encoder_q-layer.0": 833.2198, "encoder_q-layer.1": 924.1592, "encoder_q-layer.10": 1233.8258, "encoder_q-layer.11": 2581.1216, "encoder_q-layer.2": 1032.7875, "encoder_q-layer.3": 1147.6764, "encoder_q-layer.4": 1222.186, "encoder_q-layer.5": 1325.7236, "encoder_q-layer.6": 1368.3606, "encoder_q-layer.7": 1427.3041, "encoder_q-layer.8": 1489.9845, "encoder_q-layer.9": 1160.4004, "epoch": 0.51, "inbatch_neg_score": 0.3013, "inbatch_pos_score": 0.9595, "learning_rate": 2.677777777777778e-05, "loss": 3.2774, "norm_diff": 0.0626, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2066.7077, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3013, "query_norm": 1.3864, "queue_k_norm": 1.4496, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9184, "sent_len_1": 67.0613, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6962, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 51800 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.2684, "doc_norm": 1.4526, "encoder_q-embeddings": 2529.9556, "encoder_q-layer.0": 1686.6755, "encoder_q-layer.1": 1853.4084, "encoder_q-layer.10": 2231.1294, "encoder_q-layer.11": 5462.7642, "encoder_q-layer.2": 2087.5757, "encoder_q-layer.3": 2233.9949, "encoder_q-layer.4": 2348.4836, "encoder_q-layer.5": 2384.6833, "encoder_q-layer.6": 2538.8872, "encoder_q-layer.7": 2657.7949, "encoder_q-layer.8": 2804.5466, "encoder_q-layer.9": 2362.1243, "epoch": 0.51, "inbatch_neg_score": 0.3038, "inbatch_pos_score": 0.9629, "learning_rate": 2.6722222222222228e-05, "loss": 3.2684, "norm_diff": 0.0539, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4013.4874, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3049, "query_norm": 1.3986, "queue_k_norm": 1.4503, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0697, "sent_len_1": 66.7456, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5012, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 51900 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.259, "doc_norm": 1.4467, "encoder_q-embeddings": 3033.0173, "encoder_q-layer.0": 2047.2709, "encoder_q-layer.1": 2433.8833, "encoder_q-layer.10": 2427.7825, "encoder_q-layer.11": 5564.5396, "encoder_q-layer.2": 2812.0322, "encoder_q-layer.3": 2979.5, "encoder_q-layer.4": 3297.4167, "encoder_q-layer.5": 3086.604, "encoder_q-layer.6": 3170.0203, "encoder_q-layer.7": 3106.5581, "encoder_q-layer.8": 2979.2588, "encoder_q-layer.9": 2399.1279, "epoch": 0.51, "inbatch_neg_score": 0.3015, "inbatch_pos_score": 0.9614, "learning_rate": 2.6666666666666667e-05, "loss": 3.259, "norm_diff": 0.0498, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4684.1284, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3027, "query_norm": 1.3969, "queue_k_norm": 1.4504, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9123, "sent_len_1": 66.7628, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5425, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 52000 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.2551, "doc_norm": 1.4589, "encoder_q-embeddings": 2361.54, "encoder_q-layer.0": 1590.246, "encoder_q-layer.1": 1831.9202, "encoder_q-layer.10": 2434.6265, "encoder_q-layer.11": 5340.8027, "encoder_q-layer.2": 2049.5586, "encoder_q-layer.3": 2122.2317, "encoder_q-layer.4": 2228.5354, "encoder_q-layer.5": 2277.3682, "encoder_q-layer.6": 2430.8271, "encoder_q-layer.7": 2695.4519, "encoder_q-layer.8": 2876.448, "encoder_q-layer.9": 2487.7043, "epoch": 0.51, "inbatch_neg_score": 0.3062, "inbatch_pos_score": 0.9907, "learning_rate": 2.6611111111111116e-05, "loss": 3.2551, "norm_diff": 0.0558, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4035.6363, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3062, "query_norm": 1.4031, "queue_k_norm": 1.4502, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.2445, "sent_len_1": 66.8362, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0825, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 52100 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.2885, "doc_norm": 1.4576, "encoder_q-embeddings": 2441.5417, "encoder_q-layer.0": 1555.229, "encoder_q-layer.1": 1641.8291, "encoder_q-layer.10": 2439.7886, "encoder_q-layer.11": 5521.5518, "encoder_q-layer.2": 1849.7288, "encoder_q-layer.3": 1942.5228, "encoder_q-layer.4": 2137.7949, "encoder_q-layer.5": 2126.2378, "encoder_q-layer.6": 2435.2314, "encoder_q-layer.7": 2958.1292, "encoder_q-layer.8": 3134.2014, "encoder_q-layer.9": 2578.3206, "epoch": 0.51, "inbatch_neg_score": 0.3043, "inbatch_pos_score": 0.9595, "learning_rate": 2.6555555555555555e-05, "loss": 3.2885, "norm_diff": 0.058, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4103.4679, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.304, "query_norm": 1.3995, "queue_k_norm": 1.452, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9268, "sent_len_1": 66.894, "sent_max_len_0": 128.0, "sent_max_len_1": 193.245, "stdk": 0.0489, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 52200 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.2659, "doc_norm": 1.4536, "encoder_q-embeddings": 2475.5537, "encoder_q-layer.0": 1583.9683, "encoder_q-layer.1": 1677.0433, "encoder_q-layer.10": 2380.0686, "encoder_q-layer.11": 5387.8457, "encoder_q-layer.2": 1963.1552, "encoder_q-layer.3": 2071.1589, "encoder_q-layer.4": 2141.7131, "encoder_q-layer.5": 2152.915, "encoder_q-layer.6": 2380.1091, "encoder_q-layer.7": 2629.6094, "encoder_q-layer.8": 2887.5278, "encoder_q-layer.9": 2459.5403, "epoch": 0.51, "inbatch_neg_score": 0.3111, "inbatch_pos_score": 0.9497, "learning_rate": 2.6500000000000004e-05, "loss": 3.2659, "norm_diff": 0.0515, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4031.8182, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3105, "query_norm": 1.402, "queue_k_norm": 1.452, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8795, "sent_len_1": 66.6744, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4625, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 52300 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.2809, "doc_norm": 1.4559, "encoder_q-embeddings": 1941.0386, "encoder_q-layer.0": 1242.5665, "encoder_q-layer.1": 1299.5121, "encoder_q-layer.10": 2304.7559, "encoder_q-layer.11": 5088.1494, "encoder_q-layer.2": 1467.5684, "encoder_q-layer.3": 1581.2965, "encoder_q-layer.4": 1678.145, "encoder_q-layer.5": 1670.7913, "encoder_q-layer.6": 1954.3693, "encoder_q-layer.7": 2251.0762, "encoder_q-layer.8": 2541.8018, "encoder_q-layer.9": 2363.2793, "epoch": 0.51, "inbatch_neg_score": 0.3086, "inbatch_pos_score": 0.9956, "learning_rate": 2.6444444444444443e-05, "loss": 3.2809, "norm_diff": 0.0404, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3520.0242, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3091, "query_norm": 1.4155, "queue_k_norm": 1.4527, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9978, "sent_len_1": 66.6688, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8162, "stdk": 0.0489, "stdq": 0.0459, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 52400 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.2383, "doc_norm": 1.45, "encoder_q-embeddings": 3780.7537, "encoder_q-layer.0": 2482.3518, "encoder_q-layer.1": 2575.7002, "encoder_q-layer.10": 2306.2466, "encoder_q-layer.11": 5196.2178, "encoder_q-layer.2": 3109.2112, "encoder_q-layer.3": 3506.55, "encoder_q-layer.4": 3633.5725, "encoder_q-layer.5": 3547.5344, "encoder_q-layer.6": 3566.6033, "encoder_q-layer.7": 3479.3955, "encoder_q-layer.8": 3368.0918, "encoder_q-layer.9": 2499.9163, "epoch": 0.51, "inbatch_neg_score": 0.3083, "inbatch_pos_score": 0.9741, "learning_rate": 2.6388888888888892e-05, "loss": 3.2383, "norm_diff": 0.0386, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5142.7177, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3066, "query_norm": 1.4114, "queue_k_norm": 1.4536, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9209, "sent_len_1": 66.9175, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4038, "stdk": 0.0486, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 52500 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.2823, "doc_norm": 1.4598, "encoder_q-embeddings": 2638.0964, "encoder_q-layer.0": 1720.6609, "encoder_q-layer.1": 1858.2151, "encoder_q-layer.10": 2358.6245, "encoder_q-layer.11": 5248.0176, "encoder_q-layer.2": 2119.7834, "encoder_q-layer.3": 2252.0027, "encoder_q-layer.4": 2408.9929, "encoder_q-layer.5": 2546.8865, "encoder_q-layer.6": 2687.0647, "encoder_q-layer.7": 3328.6409, "encoder_q-layer.8": 3383.615, "encoder_q-layer.9": 2583.5813, "epoch": 0.51, "inbatch_neg_score": 0.3054, "inbatch_pos_score": 0.9468, "learning_rate": 2.633333333333333e-05, "loss": 3.2823, "norm_diff": 0.059, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4286.1376, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3047, "query_norm": 1.4008, "queue_k_norm": 1.4528, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7756, "sent_len_1": 66.692, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1525, "stdk": 0.049, "stdq": 0.0455, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 52600 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.2822, "doc_norm": 1.4599, "encoder_q-embeddings": 4765.2139, "encoder_q-layer.0": 3521.3997, "encoder_q-layer.1": 3877.8953, "encoder_q-layer.10": 2457.4885, "encoder_q-layer.11": 5721.958, "encoder_q-layer.2": 4464.8438, "encoder_q-layer.3": 4443.0586, "encoder_q-layer.4": 4301.6904, "encoder_q-layer.5": 4207.5479, "encoder_q-layer.6": 4598.3896, "encoder_q-layer.7": 4283.707, "encoder_q-layer.8": 3743.491, "encoder_q-layer.9": 2870.7637, "epoch": 0.51, "inbatch_neg_score": 0.3104, "inbatch_pos_score": 0.96, "learning_rate": 2.627777777777778e-05, "loss": 3.2822, "norm_diff": 0.0385, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6331.3822, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3098, "query_norm": 1.4214, "queue_k_norm": 1.4527, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8837, "sent_len_1": 66.8305, "sent_max_len_0": 128.0, "sent_max_len_1": 188.955, "stdk": 0.049, "stdq": 0.0462, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 52700 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.2553, "doc_norm": 1.4525, "encoder_q-embeddings": 2021.922, "encoder_q-layer.0": 1325.9564, "encoder_q-layer.1": 1414.6069, "encoder_q-layer.10": 2595.0339, "encoder_q-layer.11": 5686.7417, "encoder_q-layer.2": 1664.1935, "encoder_q-layer.3": 1721.0531, "encoder_q-layer.4": 1866.9073, "encoder_q-layer.5": 1957.0818, "encoder_q-layer.6": 2246.4702, "encoder_q-layer.7": 2743.0239, "encoder_q-layer.8": 2966.917, "encoder_q-layer.9": 2690.3845, "epoch": 0.52, "inbatch_neg_score": 0.3113, "inbatch_pos_score": 0.9668, "learning_rate": 2.6222222222222226e-05, "loss": 3.2553, "norm_diff": 0.0432, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3861.1265, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3105, "query_norm": 1.4094, "queue_k_norm": 1.4545, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7171, "sent_len_1": 66.5659, "sent_max_len_0": 128.0, "sent_max_len_1": 189.77, "stdk": 0.0486, "stdq": 0.0457, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 52800 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.2451, "doc_norm": 1.4565, "encoder_q-embeddings": 3015.7744, "encoder_q-layer.0": 1996.3484, "encoder_q-layer.1": 2244.9617, "encoder_q-layer.10": 2590.7407, "encoder_q-layer.11": 5759.564, "encoder_q-layer.2": 2624.3474, "encoder_q-layer.3": 2862.9958, "encoder_q-layer.4": 3108.7222, "encoder_q-layer.5": 3161.0122, "encoder_q-layer.6": 3328.1479, "encoder_q-layer.7": 3686.8052, "encoder_q-layer.8": 3903.0813, "encoder_q-layer.9": 3088.0574, "epoch": 0.52, "inbatch_neg_score": 0.3055, "inbatch_pos_score": 0.9917, "learning_rate": 2.6166666666666668e-05, "loss": 3.2451, "norm_diff": 0.0202, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4926.5298, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3066, "query_norm": 1.4389, "queue_k_norm": 1.4539, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.832, "sent_len_1": 66.8759, "sent_max_len_0": 128.0, "sent_max_len_1": 190.91, "stdk": 0.0488, "stdq": 0.0469, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 52900 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.2678, "doc_norm": 1.4541, "encoder_q-embeddings": 2934.2634, "encoder_q-layer.0": 1905.2008, "encoder_q-layer.1": 2104.4646, "encoder_q-layer.10": 2431.6289, "encoder_q-layer.11": 5374.3926, "encoder_q-layer.2": 2326.0884, "encoder_q-layer.3": 2422.9021, "encoder_q-layer.4": 2541.1909, "encoder_q-layer.5": 2700.042, "encoder_q-layer.6": 2792.2139, "encoder_q-layer.7": 2876.5571, "encoder_q-layer.8": 3180.6663, "encoder_q-layer.9": 2570.0298, "epoch": 0.52, "inbatch_neg_score": 0.3093, "inbatch_pos_score": 0.978, "learning_rate": 2.6111111111111114e-05, "loss": 3.2678, "norm_diff": 0.0397, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4361.9361, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3086, "query_norm": 1.4144, "queue_k_norm": 1.4554, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0636, "sent_len_1": 66.8272, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9375, "stdk": 0.0487, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 53000 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.2631, "doc_norm": 1.4556, "encoder_q-embeddings": 1864.6393, "encoder_q-layer.0": 1180.8322, "encoder_q-layer.1": 1250.1144, "encoder_q-layer.10": 2364.3118, "encoder_q-layer.11": 5327.8789, "encoder_q-layer.2": 1402.3622, "encoder_q-layer.3": 1481.8773, "encoder_q-layer.4": 1607.0627, "encoder_q-layer.5": 1653.6544, "encoder_q-layer.6": 1921.6986, "encoder_q-layer.7": 2236.167, "encoder_q-layer.8": 2647.5989, "encoder_q-layer.9": 2396.5813, "epoch": 0.52, "inbatch_neg_score": 0.3038, "inbatch_pos_score": 0.9771, "learning_rate": 2.6055555555555556e-05, "loss": 3.2631, "norm_diff": 0.0549, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3563.4463, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3044, "query_norm": 1.4007, "queue_k_norm": 1.4575, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0608, "sent_len_1": 66.7487, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3625, "stdk": 0.0487, "stdq": 0.0455, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 53100 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.2623, "doc_norm": 1.4541, "encoder_q-embeddings": 1289.6663, "encoder_q-layer.0": 855.8948, "encoder_q-layer.1": 903.3082, "encoder_q-layer.10": 1281.5227, "encoder_q-layer.11": 2682.9382, "encoder_q-layer.2": 1033.2267, "encoder_q-layer.3": 1123.7324, "encoder_q-layer.4": 1255.8892, "encoder_q-layer.5": 1259.7306, "encoder_q-layer.6": 1341.1338, "encoder_q-layer.7": 1394.8898, "encoder_q-layer.8": 1537.7391, "encoder_q-layer.9": 1323.8893, "epoch": 0.52, "inbatch_neg_score": 0.3047, "inbatch_pos_score": 0.9707, "learning_rate": 2.6000000000000002e-05, "loss": 3.2623, "norm_diff": 0.0575, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2088.1066, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3047, "query_norm": 1.3966, "queue_k_norm": 1.4556, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7924, "sent_len_1": 66.6957, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2738, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 53200 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.257, "doc_norm": 1.4485, "encoder_q-embeddings": 1919.9004, "encoder_q-layer.0": 1310.1681, "encoder_q-layer.1": 1275.0701, "encoder_q-layer.10": 1266.5015, "encoder_q-layer.11": 2817.6836, "encoder_q-layer.2": 1515.9335, "encoder_q-layer.3": 1701.0081, "encoder_q-layer.4": 1870.8693, "encoder_q-layer.5": 1711.2993, "encoder_q-layer.6": 1760.2579, "encoder_q-layer.7": 1712.668, "encoder_q-layer.8": 1718.6309, "encoder_q-layer.9": 1293.0289, "epoch": 0.52, "inbatch_neg_score": 0.3053, "inbatch_pos_score": 0.9717, "learning_rate": 2.5944444444444444e-05, "loss": 3.257, "norm_diff": 0.0396, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2652.2284, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3054, "query_norm": 1.4089, "queue_k_norm": 1.455, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8865, "sent_len_1": 66.7807, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6875, "stdk": 0.0484, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 53300 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.2503, "doc_norm": 1.4535, "encoder_q-embeddings": 2664.2913, "encoder_q-layer.0": 1682.5398, "encoder_q-layer.1": 1990.8265, "encoder_q-layer.10": 1276.0438, "encoder_q-layer.11": 2669.5229, "encoder_q-layer.2": 2200.8228, "encoder_q-layer.3": 2317.3169, "encoder_q-layer.4": 2563.3738, "encoder_q-layer.5": 2814.3625, "encoder_q-layer.6": 2937.0488, "encoder_q-layer.7": 2968.968, "encoder_q-layer.8": 2961.187, "encoder_q-layer.9": 1879.399, "epoch": 0.52, "inbatch_neg_score": 0.305, "inbatch_pos_score": 0.9751, "learning_rate": 2.588888888888889e-05, "loss": 3.2503, "norm_diff": 0.0395, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3696.9173, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3032, "query_norm": 1.414, "queue_k_norm": 1.4559, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0034, "sent_len_1": 66.8238, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2562, "stdk": 0.0486, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 53400 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.2516, "doc_norm": 1.4573, "encoder_q-embeddings": 679.1169, "encoder_q-layer.0": 465.4492, "encoder_q-layer.1": 519.6705, "encoder_q-layer.10": 609.7736, "encoder_q-layer.11": 1306.3805, "encoder_q-layer.2": 573.0762, "encoder_q-layer.3": 608.7589, "encoder_q-layer.4": 620.0797, "encoder_q-layer.5": 622.5662, "encoder_q-layer.6": 600.4145, "encoder_q-layer.7": 678.2234, "encoder_q-layer.8": 772.5187, "encoder_q-layer.9": 642.8572, "epoch": 0.52, "inbatch_neg_score": 0.3083, "inbatch_pos_score": 1.0186, "learning_rate": 2.5833333333333336e-05, "loss": 3.2516, "norm_diff": 0.0174, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1046.9606, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3062, "query_norm": 1.4455, "queue_k_norm": 1.4563, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1461, "sent_len_1": 66.995, "sent_max_len_0": 128.0, "sent_max_len_1": 189.255, "stdk": 0.0487, "stdq": 0.0471, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 53500 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.264, "doc_norm": 1.4515, "encoder_q-embeddings": 532.1332, "encoder_q-layer.0": 339.4643, "encoder_q-layer.1": 364.5318, "encoder_q-layer.10": 579.0833, "encoder_q-layer.11": 1255.015, "encoder_q-layer.2": 403.6624, "encoder_q-layer.3": 410.9844, "encoder_q-layer.4": 443.556, "encoder_q-layer.5": 467.5862, "encoder_q-layer.6": 542.5486, "encoder_q-layer.7": 614.436, "encoder_q-layer.8": 692.5894, "encoder_q-layer.9": 597.0106, "epoch": 0.52, "inbatch_neg_score": 0.3095, "inbatch_pos_score": 0.9956, "learning_rate": 2.5777777777777778e-05, "loss": 3.264, "norm_diff": 0.0295, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 907.4439, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3105, "query_norm": 1.422, "queue_k_norm": 1.4564, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9817, "sent_len_1": 66.6722, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9313, "stdk": 0.0485, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 53600 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.2855, "doc_norm": 1.4492, "encoder_q-embeddings": 978.6429, "encoder_q-layer.0": 664.0924, "encoder_q-layer.1": 663.9759, "encoder_q-layer.10": 596.2518, "encoder_q-layer.11": 1356.7678, "encoder_q-layer.2": 813.6729, "encoder_q-layer.3": 820.6057, "encoder_q-layer.4": 987.0893, "encoder_q-layer.5": 988.9365, "encoder_q-layer.6": 1206.7094, "encoder_q-layer.7": 1346.1925, "encoder_q-layer.8": 1388.4393, "encoder_q-layer.9": 1030.9988, "epoch": 0.52, "inbatch_neg_score": 0.3138, "inbatch_pos_score": 0.9497, "learning_rate": 2.5722222222222224e-05, "loss": 3.2855, "norm_diff": 0.0405, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1550.6922, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3142, "query_norm": 1.4088, "queue_k_norm": 1.4566, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8617, "sent_len_1": 66.857, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1062, "stdk": 0.0484, "stdq": 0.0455, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 53700 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.2475, "doc_norm": 1.4602, "encoder_q-embeddings": 518.2703, "encoder_q-layer.0": 344.2548, "encoder_q-layer.1": 355.8439, "encoder_q-layer.10": 613.1191, "encoder_q-layer.11": 1371.0914, "encoder_q-layer.2": 391.3366, "encoder_q-layer.3": 425.5739, "encoder_q-layer.4": 459.1058, "encoder_q-layer.5": 483.7762, "encoder_q-layer.6": 533.8994, "encoder_q-layer.7": 659.2462, "encoder_q-layer.8": 702.2338, "encoder_q-layer.9": 618.1033, "epoch": 0.53, "inbatch_neg_score": 0.3086, "inbatch_pos_score": 0.9546, "learning_rate": 2.5666666666666666e-05, "loss": 3.2475, "norm_diff": 0.0689, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 960.9503, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3083, "query_norm": 1.3914, "queue_k_norm": 1.4588, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8782, "sent_len_1": 66.8841, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0163, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 53800 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.2615, "doc_norm": 1.4474, "encoder_q-embeddings": 875.852, "encoder_q-layer.0": 589.3999, "encoder_q-layer.1": 707.1741, "encoder_q-layer.10": 627.925, "encoder_q-layer.11": 1272.8533, "encoder_q-layer.2": 802.228, "encoder_q-layer.3": 842.9493, "encoder_q-layer.4": 877.1678, "encoder_q-layer.5": 892.139, "encoder_q-layer.6": 957.5679, "encoder_q-layer.7": 1001.8243, "encoder_q-layer.8": 855.2566, "encoder_q-layer.9": 640.3227, "epoch": 0.53, "inbatch_neg_score": 0.3069, "inbatch_pos_score": 0.9873, "learning_rate": 2.5611111111111115e-05, "loss": 3.2615, "norm_diff": 0.0274, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1268.4015, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3062, "query_norm": 1.42, "queue_k_norm": 1.4567, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9722, "sent_len_1": 66.6646, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6863, "stdk": 0.0483, "stdq": 0.0459, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 53900 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.2351, "doc_norm": 1.4623, "encoder_q-embeddings": 650.297, "encoder_q-layer.0": 442.5269, "encoder_q-layer.1": 485.9955, "encoder_q-layer.10": 581.5555, "encoder_q-layer.11": 1290.7361, "encoder_q-layer.2": 523.8799, "encoder_q-layer.3": 533.4045, "encoder_q-layer.4": 596.726, "encoder_q-layer.5": 618.5184, "encoder_q-layer.6": 660.2542, "encoder_q-layer.7": 738.7957, "encoder_q-layer.8": 715.33, "encoder_q-layer.9": 586.7059, "epoch": 0.53, "inbatch_neg_score": 0.3061, "inbatch_pos_score": 0.9785, "learning_rate": 2.5555555555555554e-05, "loss": 3.2351, "norm_diff": 0.0534, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1028.0005, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3057, "query_norm": 1.4089, "queue_k_norm": 1.4573, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1747, "sent_len_1": 67.0577, "sent_max_len_0": 128.0, "sent_max_len_1": 191.32, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 54000 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.2615, "doc_norm": 1.4531, "encoder_q-embeddings": 549.6766, "encoder_q-layer.0": 349.2867, "encoder_q-layer.1": 379.9253, "encoder_q-layer.10": 597.7563, "encoder_q-layer.11": 1404.0586, "encoder_q-layer.2": 439.146, "encoder_q-layer.3": 484.714, "encoder_q-layer.4": 497.5535, "encoder_q-layer.5": 496.144, "encoder_q-layer.6": 595.456, "encoder_q-layer.7": 647.8751, "encoder_q-layer.8": 707.4572, "encoder_q-layer.9": 617.5822, "epoch": 0.53, "inbatch_neg_score": 0.312, "inbatch_pos_score": 0.9761, "learning_rate": 2.5500000000000003e-05, "loss": 3.2615, "norm_diff": 0.0402, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 979.6412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3123, "query_norm": 1.413, "queue_k_norm": 1.4563, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0144, "sent_len_1": 66.9094, "sent_max_len_0": 128.0, "sent_max_len_1": 189.88, "stdk": 0.0486, "stdq": 0.0457, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 54100 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 3.2704, "doc_norm": 1.4629, "encoder_q-embeddings": 1223.2585, "encoder_q-layer.0": 812.2845, "encoder_q-layer.1": 958.4854, "encoder_q-layer.10": 617.2843, "encoder_q-layer.11": 1323.0123, "encoder_q-layer.2": 1127.2628, "encoder_q-layer.3": 1329.5723, "encoder_q-layer.4": 1505.9457, "encoder_q-layer.5": 1491.264, "encoder_q-layer.6": 1679.8549, "encoder_q-layer.7": 1439.2255, "encoder_q-layer.8": 977.5849, "encoder_q-layer.9": 631.3735, "epoch": 0.53, "inbatch_neg_score": 0.3136, "inbatch_pos_score": 1.0449, "learning_rate": 2.5444444444444442e-05, "loss": 3.2704, "norm_diff": 0.0187, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1775.3638, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3135, "query_norm": 1.4455, "queue_k_norm": 1.4572, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9794, "sent_len_1": 66.7084, "sent_max_len_0": 128.0, "sent_max_len_1": 187.4625, "stdk": 0.049, "stdq": 0.0469, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 54200 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.2603, "doc_norm": 1.4638, "encoder_q-embeddings": 527.3731, "encoder_q-layer.0": 348.3551, "encoder_q-layer.1": 373.1879, "encoder_q-layer.10": 566.4696, "encoder_q-layer.11": 1251.1072, "encoder_q-layer.2": 440.6123, "encoder_q-layer.3": 447.9819, "encoder_q-layer.4": 445.7251, "encoder_q-layer.5": 440.5097, "encoder_q-layer.6": 488.3738, "encoder_q-layer.7": 553.5131, "encoder_q-layer.8": 651.3451, "encoder_q-layer.9": 576.3845, "epoch": 0.53, "inbatch_neg_score": 0.3145, "inbatch_pos_score": 1.0117, "learning_rate": 2.538888888888889e-05, "loss": 3.2603, "norm_diff": 0.0472, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 902.265, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3149, "query_norm": 1.4166, "queue_k_norm": 1.4567, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0504, "sent_len_1": 66.9104, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8025, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 54300 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 3.2337, "doc_norm": 1.4575, "encoder_q-embeddings": 561.8256, "encoder_q-layer.0": 380.8225, "encoder_q-layer.1": 404.3314, "encoder_q-layer.10": 566.2388, "encoder_q-layer.11": 1287.0874, "encoder_q-layer.2": 482.0276, "encoder_q-layer.3": 498.191, "encoder_q-layer.4": 540.1487, "encoder_q-layer.5": 563.5172, "encoder_q-layer.6": 565.3632, "encoder_q-layer.7": 621.2231, "encoder_q-layer.8": 704.9969, "encoder_q-layer.9": 602.4878, "epoch": 0.53, "inbatch_neg_score": 0.3181, "inbatch_pos_score": 1.0527, "learning_rate": 2.5333333333333337e-05, "loss": 3.2337, "norm_diff": 0.0197, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 952.2864, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3181, "query_norm": 1.4378, "queue_k_norm": 1.4581, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9668, "sent_len_1": 66.777, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6337, "stdk": 0.0487, "stdq": 0.0467, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 54400 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.2583, "doc_norm": 1.4588, "encoder_q-embeddings": 515.7585, "encoder_q-layer.0": 332.3135, "encoder_q-layer.1": 344.2042, "encoder_q-layer.10": 610.6855, "encoder_q-layer.11": 1274.7162, "encoder_q-layer.2": 403.2712, "encoder_q-layer.3": 417.4149, "encoder_q-layer.4": 453.1838, "encoder_q-layer.5": 456.5516, "encoder_q-layer.6": 519.8853, "encoder_q-layer.7": 634.5446, "encoder_q-layer.8": 672.9125, "encoder_q-layer.9": 581.3373, "epoch": 0.53, "inbatch_neg_score": 0.3202, "inbatch_pos_score": 1.0088, "learning_rate": 2.527777777777778e-05, "loss": 3.2583, "norm_diff": 0.0461, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 911.3234, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3203, "query_norm": 1.4127, "queue_k_norm": 1.4586, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0535, "sent_len_1": 67.0125, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7862, "stdk": 0.0488, "stdq": 0.0457, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 54500 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 3.243, "doc_norm": 1.4581, "encoder_q-embeddings": 815.7733, "encoder_q-layer.0": 529.7228, "encoder_q-layer.1": 610.9584, "encoder_q-layer.10": 563.5249, "encoder_q-layer.11": 1247.4277, "encoder_q-layer.2": 718.024, "encoder_q-layer.3": 757.3364, "encoder_q-layer.4": 791.0568, "encoder_q-layer.5": 823.9933, "encoder_q-layer.6": 730.6739, "encoder_q-layer.7": 698.1954, "encoder_q-layer.8": 706.26, "encoder_q-layer.9": 584.9048, "epoch": 0.53, "inbatch_neg_score": 0.3247, "inbatch_pos_score": 1.0283, "learning_rate": 2.5222222222222225e-05, "loss": 3.243, "norm_diff": 0.0357, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1143.8077, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3252, "query_norm": 1.4224, "queue_k_norm": 1.458, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9519, "sent_len_1": 67.0095, "sent_max_len_0": 128.0, "sent_max_len_1": 190.065, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 54600 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.2385, "doc_norm": 1.4587, "encoder_q-embeddings": 691.7759, "encoder_q-layer.0": 468.3908, "encoder_q-layer.1": 538.395, "encoder_q-layer.10": 585.0952, "encoder_q-layer.11": 1337.7781, "encoder_q-layer.2": 592.1096, "encoder_q-layer.3": 610.0435, "encoder_q-layer.4": 673.6176, "encoder_q-layer.5": 665.4385, "encoder_q-layer.6": 695.9367, "encoder_q-layer.7": 738.8861, "encoder_q-layer.8": 737.2371, "encoder_q-layer.9": 624.432, "epoch": 0.53, "inbatch_neg_score": 0.3288, "inbatch_pos_score": 0.9961, "learning_rate": 2.5166666666666667e-05, "loss": 3.2385, "norm_diff": 0.0438, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1069.2969, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3281, "query_norm": 1.4149, "queue_k_norm": 1.4594, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0694, "sent_len_1": 66.8877, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1113, "stdk": 0.0487, "stdq": 0.0459, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 54700 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.234, "doc_norm": 1.4514, "encoder_q-embeddings": 657.139, "encoder_q-layer.0": 428.9868, "encoder_q-layer.1": 474.3978, "encoder_q-layer.10": 613.6863, "encoder_q-layer.11": 1414.6897, "encoder_q-layer.2": 541.6478, "encoder_q-layer.3": 559.39, "encoder_q-layer.4": 596.8987, "encoder_q-layer.5": 634.2571, "encoder_q-layer.6": 648.9056, "encoder_q-layer.7": 675.4987, "encoder_q-layer.8": 752.1203, "encoder_q-layer.9": 656.2859, "epoch": 0.54, "inbatch_neg_score": 0.3333, "inbatch_pos_score": 1.0078, "learning_rate": 2.5111111111111113e-05, "loss": 3.234, "norm_diff": 0.0217, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1051.717, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3337, "query_norm": 1.4296, "queue_k_norm": 1.4603, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9689, "sent_len_1": 66.8485, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7287, "stdk": 0.0484, "stdq": 0.0463, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 54800 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.2684, "doc_norm": 1.4626, "encoder_q-embeddings": 1019.385, "encoder_q-layer.0": 689.5947, "encoder_q-layer.1": 746.9528, "encoder_q-layer.10": 610.3276, "encoder_q-layer.11": 1420.1444, "encoder_q-layer.2": 849.4861, "encoder_q-layer.3": 882.6152, "encoder_q-layer.4": 957.4644, "encoder_q-layer.5": 1002.7396, "encoder_q-layer.6": 1097.0497, "encoder_q-layer.7": 1033.2451, "encoder_q-layer.8": 1057.6633, "encoder_q-layer.9": 709.7014, "epoch": 0.54, "inbatch_neg_score": 0.3299, "inbatch_pos_score": 0.9673, "learning_rate": 2.5055555555555555e-05, "loss": 3.2684, "norm_diff": 0.0582, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1451.6316, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3306, "query_norm": 1.4044, "queue_k_norm": 1.4617, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7743, "sent_len_1": 66.7653, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4888, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 54900 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.2528, "doc_norm": 1.4647, "encoder_q-embeddings": 706.4089, "encoder_q-layer.0": 472.3781, "encoder_q-layer.1": 528.5988, "encoder_q-layer.10": 571.3318, "encoder_q-layer.11": 1289.877, "encoder_q-layer.2": 622.8236, "encoder_q-layer.3": 635.8906, "encoder_q-layer.4": 628.5106, "encoder_q-layer.5": 626.7506, "encoder_q-layer.6": 642.0452, "encoder_q-layer.7": 664.2176, "encoder_q-layer.8": 708.6227, "encoder_q-layer.9": 578.4404, "epoch": 0.54, "inbatch_neg_score": 0.3312, "inbatch_pos_score": 1.0361, "learning_rate": 2.5e-05, "loss": 3.2528, "norm_diff": 0.0415, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1049.1153, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.332, "query_norm": 1.4232, "queue_k_norm": 1.4609, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9241, "sent_len_1": 66.6066, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6087, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 55000 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.2409, "doc_norm": 1.4663, "encoder_q-embeddings": 552.5273, "encoder_q-layer.0": 354.8134, "encoder_q-layer.1": 370.6664, "encoder_q-layer.10": 652.8088, "encoder_q-layer.11": 1386.8669, "encoder_q-layer.2": 431.3096, "encoder_q-layer.3": 457.5678, "encoder_q-layer.4": 485.9171, "encoder_q-layer.5": 507.1512, "encoder_q-layer.6": 559.1821, "encoder_q-layer.7": 650.7742, "encoder_q-layer.8": 731.1226, "encoder_q-layer.9": 659.0688, "epoch": 0.54, "inbatch_neg_score": 0.3332, "inbatch_pos_score": 1.0146, "learning_rate": 2.4944444444444447e-05, "loss": 3.2409, "norm_diff": 0.0579, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 977.4664, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.334, "query_norm": 1.4084, "queue_k_norm": 1.46, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8752, "sent_len_1": 66.6037, "sent_max_len_0": 128.0, "sent_max_len_1": 191.5037, "stdk": 0.0489, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 55100 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.2534, "doc_norm": 1.4579, "encoder_q-embeddings": 605.8903, "encoder_q-layer.0": 399.2472, "encoder_q-layer.1": 433.6851, "encoder_q-layer.10": 625.2964, "encoder_q-layer.11": 1312.9022, "encoder_q-layer.2": 489.0856, "encoder_q-layer.3": 540.7819, "encoder_q-layer.4": 572.4464, "encoder_q-layer.5": 604.6945, "encoder_q-layer.6": 659.9494, "encoder_q-layer.7": 702.3077, "encoder_q-layer.8": 710.2654, "encoder_q-layer.9": 612.6099, "epoch": 0.54, "inbatch_neg_score": 0.338, "inbatch_pos_score": 1.0166, "learning_rate": 2.488888888888889e-05, "loss": 3.2534, "norm_diff": 0.0443, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1001.4008, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3372, "query_norm": 1.4137, "queue_k_norm": 1.4628, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0638, "sent_len_1": 66.8353, "sent_max_len_0": 128.0, "sent_max_len_1": 191.095, "stdk": 0.0486, "stdq": 0.0455, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 55200 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.2419, "doc_norm": 1.4686, "encoder_q-embeddings": 604.7387, "encoder_q-layer.0": 396.3202, "encoder_q-layer.1": 445.6085, "encoder_q-layer.10": 623.955, "encoder_q-layer.11": 1393.0629, "encoder_q-layer.2": 534.3174, "encoder_q-layer.3": 533.3201, "encoder_q-layer.4": 534.2905, "encoder_q-layer.5": 562.9941, "encoder_q-layer.6": 579.3804, "encoder_q-layer.7": 619.6907, "encoder_q-layer.8": 695.8157, "encoder_q-layer.9": 610.1759, "epoch": 0.54, "inbatch_neg_score": 0.3383, "inbatch_pos_score": 0.9932, "learning_rate": 2.4833333333333335e-05, "loss": 3.2419, "norm_diff": 0.0414, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1009.7636, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3379, "query_norm": 1.4272, "queue_k_norm": 1.4653, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.2167, "sent_len_1": 66.997, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4162, "stdk": 0.0489, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 55300 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.2253, "doc_norm": 1.4706, "encoder_q-embeddings": 761.5033, "encoder_q-layer.0": 514.7718, "encoder_q-layer.1": 568.1295, "encoder_q-layer.10": 611.9531, "encoder_q-layer.11": 1348.5198, "encoder_q-layer.2": 648.4449, "encoder_q-layer.3": 665.0541, "encoder_q-layer.4": 748.3412, "encoder_q-layer.5": 818.2358, "encoder_q-layer.6": 804.3041, "encoder_q-layer.7": 824.9991, "encoder_q-layer.8": 818.051, "encoder_q-layer.9": 662.1336, "epoch": 0.54, "inbatch_neg_score": 0.3374, "inbatch_pos_score": 1.0439, "learning_rate": 2.477777777777778e-05, "loss": 3.2253, "norm_diff": 0.0352, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1168.4945, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3374, "query_norm": 1.4355, "queue_k_norm": 1.4662, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1988, "sent_len_1": 66.9594, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5087, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 55400 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.2539, "doc_norm": 1.4596, "encoder_q-embeddings": 1113.8564, "encoder_q-layer.0": 691.0915, "encoder_q-layer.1": 765.1619, "encoder_q-layer.10": 1328.6445, "encoder_q-layer.11": 2629.5701, "encoder_q-layer.2": 868.0745, "encoder_q-layer.3": 904.5007, "encoder_q-layer.4": 1055.4968, "encoder_q-layer.5": 1085.4646, "encoder_q-layer.6": 1191.2274, "encoder_q-layer.7": 1353.3383, "encoder_q-layer.8": 1532.7472, "encoder_q-layer.9": 1260.0905, "epoch": 0.54, "inbatch_neg_score": 0.3382, "inbatch_pos_score": 1.0234, "learning_rate": 2.4722222222222223e-05, "loss": 3.2539, "norm_diff": 0.0341, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1919.0622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3386, "query_norm": 1.4254, "queue_k_norm": 1.4655, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9384, "sent_len_1": 66.7859, "sent_max_len_0": 128.0, "sent_max_len_1": 190.065, "stdk": 0.0486, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 55500 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.2218, "doc_norm": 1.4635, "encoder_q-embeddings": 1076.5718, "encoder_q-layer.0": 724.0466, "encoder_q-layer.1": 809.8141, "encoder_q-layer.10": 1147.8977, "encoder_q-layer.11": 2659.3982, "encoder_q-layer.2": 907.1464, "encoder_q-layer.3": 973.7999, "encoder_q-layer.4": 997.3129, "encoder_q-layer.5": 1053.1294, "encoder_q-layer.6": 1183.2086, "encoder_q-layer.7": 1301.6, "encoder_q-layer.8": 1415.193, "encoder_q-layer.9": 1259.3058, "epoch": 0.54, "inbatch_neg_score": 0.3392, "inbatch_pos_score": 1.0264, "learning_rate": 2.466666666666667e-05, "loss": 3.2218, "norm_diff": 0.0335, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1951.9241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3396, "query_norm": 1.43, "queue_k_norm": 1.4663, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8371, "sent_len_1": 66.8118, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1387, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 55600 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2619, "doc_norm": 1.4663, "encoder_q-embeddings": 1158.2615, "encoder_q-layer.0": 772.6184, "encoder_q-layer.1": 860.2629, "encoder_q-layer.10": 1223.9451, "encoder_q-layer.11": 2740.335, "encoder_q-layer.2": 959.3987, "encoder_q-layer.3": 1035.8611, "encoder_q-layer.4": 1092.4092, "encoder_q-layer.5": 1155.2024, "encoder_q-layer.6": 1288.3085, "encoder_q-layer.7": 1454.7865, "encoder_q-layer.8": 1492.7397, "encoder_q-layer.9": 1297.1665, "epoch": 0.54, "inbatch_neg_score": 0.3454, "inbatch_pos_score": 1.0059, "learning_rate": 2.461111111111111e-05, "loss": 3.2619, "norm_diff": 0.0361, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2028.941, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3447, "query_norm": 1.4302, "queue_k_norm": 1.4652, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7971, "sent_len_1": 66.385, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2912, "stdk": 0.0488, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 55700 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.2371, "doc_norm": 1.4674, "encoder_q-embeddings": 1214.3668, "encoder_q-layer.0": 774.3855, "encoder_q-layer.1": 867.9693, "encoder_q-layer.10": 1221.4121, "encoder_q-layer.11": 2695.4668, "encoder_q-layer.2": 1015.8002, "encoder_q-layer.3": 1072.7611, "encoder_q-layer.4": 1164.3916, "encoder_q-layer.5": 1224.7582, "encoder_q-layer.6": 1244.7915, "encoder_q-layer.7": 1541.5612, "encoder_q-layer.8": 1601.4204, "encoder_q-layer.9": 1288.6572, "epoch": 0.54, "inbatch_neg_score": 0.3461, "inbatch_pos_score": 1.0234, "learning_rate": 2.4555555555555557e-05, "loss": 3.2371, "norm_diff": 0.0339, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2069.9696, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3462, "query_norm": 1.4336, "queue_k_norm": 1.4677, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9736, "sent_len_1": 66.8878, "sent_max_len_0": 128.0, "sent_max_len_1": 192.2562, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 55800 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.2729, "doc_norm": 1.4699, "encoder_q-embeddings": 1553.7365, "encoder_q-layer.0": 1056.7343, "encoder_q-layer.1": 1262.2657, "encoder_q-layer.10": 1257.2861, "encoder_q-layer.11": 2634.0977, "encoder_q-layer.2": 1579.9639, "encoder_q-layer.3": 1606.0625, "encoder_q-layer.4": 1892.3091, "encoder_q-layer.5": 2129.9036, "encoder_q-layer.6": 2344.4836, "encoder_q-layer.7": 2306.573, "encoder_q-layer.8": 2512.6797, "encoder_q-layer.9": 1673.5944, "epoch": 0.55, "inbatch_neg_score": 0.3431, "inbatch_pos_score": 1.0, "learning_rate": 2.45e-05, "loss": 3.2729, "norm_diff": 0.0653, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2785.8197, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3428, "query_norm": 1.4046, "queue_k_norm": 1.4664, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0716, "sent_len_1": 66.7217, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5762, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 55900 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.245, "doc_norm": 1.4685, "encoder_q-embeddings": 1941.3864, "encoder_q-layer.0": 1392.0302, "encoder_q-layer.1": 1502.5179, "encoder_q-layer.10": 1241.9531, "encoder_q-layer.11": 2620.4133, "encoder_q-layer.2": 1934.3741, "encoder_q-layer.3": 1985.3712, "encoder_q-layer.4": 2089.7507, "encoder_q-layer.5": 1979.4423, "encoder_q-layer.6": 1786.5112, "encoder_q-layer.7": 1956.6042, "encoder_q-layer.8": 1948.205, "encoder_q-layer.9": 1381.2026, "epoch": 0.55, "inbatch_neg_score": 0.3496, "inbatch_pos_score": 1.0166, "learning_rate": 2.4444444444444445e-05, "loss": 3.245, "norm_diff": 0.0638, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2816.2145, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3489, "query_norm": 1.4047, "queue_k_norm": 1.4688, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1076, "sent_len_1": 66.8001, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5125, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 56000 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.2302, "doc_norm": 1.4699, "encoder_q-embeddings": 1932.437, "encoder_q-layer.0": 1331.4807, "encoder_q-layer.1": 1426.1594, "encoder_q-layer.10": 1218.0751, "encoder_q-layer.11": 2637.4973, "encoder_q-layer.2": 1699.9277, "encoder_q-layer.3": 1497.6577, "encoder_q-layer.4": 1475.2361, "encoder_q-layer.5": 1453.0848, "encoder_q-layer.6": 1614.3583, "encoder_q-layer.7": 1646.2424, "encoder_q-layer.8": 1500.7961, "encoder_q-layer.9": 1292.0261, "epoch": 0.55, "inbatch_neg_score": 0.3515, "inbatch_pos_score": 1.0176, "learning_rate": 2.4388888888888887e-05, "loss": 3.2302, "norm_diff": 0.0476, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2485.5265, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3516, "query_norm": 1.4224, "queue_k_norm": 1.4687, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0403, "sent_len_1": 66.9177, "sent_max_len_0": 128.0, "sent_max_len_1": 189.845, "stdk": 0.0488, "stdq": 0.0456, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 56100 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.285, "doc_norm": 1.4631, "encoder_q-embeddings": 2873.1699, "encoder_q-layer.0": 1884.551, "encoder_q-layer.1": 2305.9473, "encoder_q-layer.10": 1245.3198, "encoder_q-layer.11": 2764.3979, "encoder_q-layer.2": 2905.4241, "encoder_q-layer.3": 2849.0935, "encoder_q-layer.4": 2837.6611, "encoder_q-layer.5": 3001.2612, "encoder_q-layer.6": 3488.2278, "encoder_q-layer.7": 2959.9648, "encoder_q-layer.8": 2173.813, "encoder_q-layer.9": 1216.2323, "epoch": 0.55, "inbatch_neg_score": 0.3532, "inbatch_pos_score": 1.0244, "learning_rate": 2.4333333333333336e-05, "loss": 3.285, "norm_diff": 0.0354, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3892.8125, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.353, "query_norm": 1.4277, "queue_k_norm": 1.4706, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7505, "sent_len_1": 66.6796, "sent_max_len_0": 128.0, "sent_max_len_1": 188.39, "stdk": 0.0485, "stdq": 0.0459, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 56200 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.2536, "doc_norm": 1.4674, "encoder_q-embeddings": 1937.3875, "encoder_q-layer.0": 1347.7516, "encoder_q-layer.1": 1551.9657, "encoder_q-layer.10": 1190.6985, "encoder_q-layer.11": 2704.7681, "encoder_q-layer.2": 1754.1162, "encoder_q-layer.3": 2020.4833, "encoder_q-layer.4": 2139.5757, "encoder_q-layer.5": 2382.2102, "encoder_q-layer.6": 2714.1995, "encoder_q-layer.7": 3217.2952, "encoder_q-layer.8": 2676.5774, "encoder_q-layer.9": 1323.2303, "epoch": 0.55, "inbatch_neg_score": 0.3522, "inbatch_pos_score": 1.0293, "learning_rate": 2.427777777777778e-05, "loss": 3.2536, "norm_diff": 0.0346, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3183.0784, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3528, "query_norm": 1.4328, "queue_k_norm": 1.4692, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9175, "sent_len_1": 66.7791, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0263, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 56300 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.2445, "doc_norm": 1.4707, "encoder_q-embeddings": 1114.9711, "encoder_q-layer.0": 696.4008, "encoder_q-layer.1": 770.7186, "encoder_q-layer.10": 1201.3453, "encoder_q-layer.11": 2769.646, "encoder_q-layer.2": 920.1508, "encoder_q-layer.3": 948.6283, "encoder_q-layer.4": 1023.6074, "encoder_q-layer.5": 1050.1355, "encoder_q-layer.6": 1200.1552, "encoder_q-layer.7": 1213.83, "encoder_q-layer.8": 1397.7942, "encoder_q-layer.9": 1224.0399, "epoch": 0.55, "inbatch_neg_score": 0.3521, "inbatch_pos_score": 1.0059, "learning_rate": 2.4222222222222224e-05, "loss": 3.2445, "norm_diff": 0.0527, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1981.2985, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.353, "query_norm": 1.418, "queue_k_norm": 1.4706, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8415, "sent_len_1": 66.5935, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4512, "stdk": 0.0488, "stdq": 0.0455, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 56400 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.2412, "doc_norm": 1.4785, "encoder_q-embeddings": 1165.6338, "encoder_q-layer.0": 705.7196, "encoder_q-layer.1": 746.6288, "encoder_q-layer.10": 1233.3812, "encoder_q-layer.11": 2798.0444, "encoder_q-layer.2": 816.2272, "encoder_q-layer.3": 904.0092, "encoder_q-layer.4": 928.2448, "encoder_q-layer.5": 990.3612, "encoder_q-layer.6": 1103.5435, "encoder_q-layer.7": 1252.2617, "encoder_q-layer.8": 1427.126, "encoder_q-layer.9": 1239.7184, "epoch": 0.55, "inbatch_neg_score": 0.3555, "inbatch_pos_score": 1.0537, "learning_rate": 2.4166666666666667e-05, "loss": 3.2412, "norm_diff": 0.0333, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1967.5881, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.355, "query_norm": 1.4451, "queue_k_norm": 1.4692, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9538, "sent_len_1": 66.5102, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7825, "stdk": 0.049, "stdq": 0.0466, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 56500 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.226, "doc_norm": 1.4694, "encoder_q-embeddings": 1471.0878, "encoder_q-layer.0": 943.5052, "encoder_q-layer.1": 1096.6223, "encoder_q-layer.10": 1203.8684, "encoder_q-layer.11": 2615.9224, "encoder_q-layer.2": 1292.9255, "encoder_q-layer.3": 1445.2753, "encoder_q-layer.4": 1714.2607, "encoder_q-layer.5": 1766.5958, "encoder_q-layer.6": 1883.7856, "encoder_q-layer.7": 2075.2065, "encoder_q-layer.8": 2102.5415, "encoder_q-layer.9": 1484.486, "epoch": 0.55, "inbatch_neg_score": 0.3575, "inbatch_pos_score": 1.0234, "learning_rate": 2.4111111111111113e-05, "loss": 3.226, "norm_diff": 0.0509, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2538.3023, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3577, "query_norm": 1.4185, "queue_k_norm": 1.4717, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0624, "sent_len_1": 66.7981, "sent_max_len_0": 128.0, "sent_max_len_1": 188.105, "stdk": 0.0487, "stdq": 0.0455, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 56600 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.2211, "doc_norm": 1.4721, "encoder_q-embeddings": 1889.5103, "encoder_q-layer.0": 1294.5557, "encoder_q-layer.1": 1542.1815, "encoder_q-layer.10": 1257.0962, "encoder_q-layer.11": 2835.8401, "encoder_q-layer.2": 1802.87, "encoder_q-layer.3": 1774.9858, "encoder_q-layer.4": 1897.3698, "encoder_q-layer.5": 2241.6592, "encoder_q-layer.6": 2451.1428, "encoder_q-layer.7": 2836.6235, "encoder_q-layer.8": 2697.3149, "encoder_q-layer.9": 1777.7114, "epoch": 0.55, "inbatch_neg_score": 0.3583, "inbatch_pos_score": 1.043, "learning_rate": 2.4055555555555555e-05, "loss": 3.2211, "norm_diff": 0.0314, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3141.3918, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3589, "query_norm": 1.4407, "queue_k_norm": 1.4731, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0373, "sent_len_1": 66.7911, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7063, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 56700 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.2362, "doc_norm": 1.4671, "encoder_q-embeddings": 1022.0084, "encoder_q-layer.0": 692.3594, "encoder_q-layer.1": 706.1815, "encoder_q-layer.10": 1114.4156, "encoder_q-layer.11": 2529.1321, "encoder_q-layer.2": 805.1382, "encoder_q-layer.3": 831.0999, "encoder_q-layer.4": 901.4658, "encoder_q-layer.5": 916.0894, "encoder_q-layer.6": 1032.7516, "encoder_q-layer.7": 1185.2068, "encoder_q-layer.8": 1344.0674, "encoder_q-layer.9": 1157.3574, "epoch": 0.55, "inbatch_neg_score": 0.3622, "inbatch_pos_score": 1.0029, "learning_rate": 2.4e-05, "loss": 3.2362, "norm_diff": 0.0452, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1803.9718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3625, "query_norm": 1.4219, "queue_k_norm": 1.472, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0519, "sent_len_1": 66.7586, "sent_max_len_0": 128.0, "sent_max_len_1": 187.95, "stdk": 0.0486, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 56800 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.2309, "doc_norm": 1.4739, "encoder_q-embeddings": 1585.8496, "encoder_q-layer.0": 1070.0491, "encoder_q-layer.1": 1150.5988, "encoder_q-layer.10": 1192.8466, "encoder_q-layer.11": 2553.2424, "encoder_q-layer.2": 1374.6466, "encoder_q-layer.3": 1394.5096, "encoder_q-layer.4": 1543.5957, "encoder_q-layer.5": 1528.7664, "encoder_q-layer.6": 1637.9849, "encoder_q-layer.7": 1781.6418, "encoder_q-layer.8": 1616.5585, "encoder_q-layer.9": 1202.2461, "epoch": 0.56, "inbatch_neg_score": 0.3612, "inbatch_pos_score": 1.0117, "learning_rate": 2.3944444444444443e-05, "loss": 3.2309, "norm_diff": 0.0636, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2346.0515, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3604, "query_norm": 1.4103, "queue_k_norm": 1.4728, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.2413, "sent_len_1": 66.9302, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0913, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 56900 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.2616, "doc_norm": 1.4701, "encoder_q-embeddings": 1990.4347, "encoder_q-layer.0": 1418.0406, "encoder_q-layer.1": 1498.2249, "encoder_q-layer.10": 1191.0461, "encoder_q-layer.11": 2716.2476, "encoder_q-layer.2": 1675.337, "encoder_q-layer.3": 1690.751, "encoder_q-layer.4": 1829.911, "encoder_q-layer.5": 1858.556, "encoder_q-layer.6": 2055.2085, "encoder_q-layer.7": 2017.7617, "encoder_q-layer.8": 1873.3562, "encoder_q-layer.9": 1345.9382, "epoch": 0.56, "inbatch_neg_score": 0.3613, "inbatch_pos_score": 1.0078, "learning_rate": 2.3888888888888892e-05, "loss": 3.2616, "norm_diff": 0.057, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2762.3642, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3608, "query_norm": 1.4131, "queue_k_norm": 1.4751, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7819, "sent_len_1": 66.763, "sent_max_len_0": 128.0, "sent_max_len_1": 188.88, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 57000 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.2021, "doc_norm": 1.4722, "encoder_q-embeddings": 1882.9877, "encoder_q-layer.0": 1342.3016, "encoder_q-layer.1": 1435.2957, "encoder_q-layer.10": 1259.845, "encoder_q-layer.11": 2448.5249, "encoder_q-layer.2": 1665.7238, "encoder_q-layer.3": 1803.7357, "encoder_q-layer.4": 1995.1848, "encoder_q-layer.5": 2028.9752, "encoder_q-layer.6": 1976.2976, "encoder_q-layer.7": 2113.6604, "encoder_q-layer.8": 2044.2844, "encoder_q-layer.9": 1319.6027, "epoch": 0.56, "inbatch_neg_score": 0.3635, "inbatch_pos_score": 1.0469, "learning_rate": 2.3833333333333334e-05, "loss": 3.2021, "norm_diff": 0.036, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2767.1173, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3635, "query_norm": 1.4363, "queue_k_norm": 1.4747, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9079, "sent_len_1": 66.5744, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6513, "stdk": 0.0487, "stdq": 0.0457, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 57100 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 3.2662, "doc_norm": 1.4785, "encoder_q-embeddings": 1689.5511, "encoder_q-layer.0": 1115.2408, "encoder_q-layer.1": 1284.0946, "encoder_q-layer.10": 1259.921, "encoder_q-layer.11": 2610.3503, "encoder_q-layer.2": 1464.1357, "encoder_q-layer.3": 1479.0961, "encoder_q-layer.4": 1553.5214, "encoder_q-layer.5": 1764.0682, "encoder_q-layer.6": 1847.0388, "encoder_q-layer.7": 1997.9785, "encoder_q-layer.8": 1867.5741, "encoder_q-layer.9": 1345.8416, "epoch": 0.56, "inbatch_neg_score": 0.3679, "inbatch_pos_score": 1.0781, "learning_rate": 2.377777777777778e-05, "loss": 3.2662, "norm_diff": 0.0306, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2500.8051, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3674, "query_norm": 1.4478, "queue_k_norm": 1.4749, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8777, "sent_len_1": 66.5608, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7587, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 57200 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.2094, "doc_norm": 1.4762, "encoder_q-embeddings": 1398.6498, "encoder_q-layer.0": 967.5212, "encoder_q-layer.1": 1024.116, "encoder_q-layer.10": 1236.739, "encoder_q-layer.11": 2643.6528, "encoder_q-layer.2": 1221.7419, "encoder_q-layer.3": 1220.7189, "encoder_q-layer.4": 1344.1329, "encoder_q-layer.5": 1256.6411, "encoder_q-layer.6": 1434.6155, "encoder_q-layer.7": 1670.2131, "encoder_q-layer.8": 1798.4407, "encoder_q-layer.9": 1404.6104, "epoch": 0.56, "inbatch_neg_score": 0.3678, "inbatch_pos_score": 1.0195, "learning_rate": 2.3722222222222222e-05, "loss": 3.2094, "norm_diff": 0.0458, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2260.7453, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3682, "query_norm": 1.4305, "queue_k_norm": 1.4759, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8131, "sent_len_1": 66.6776, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6788, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 57300 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.2339, "doc_norm": 1.4811, "encoder_q-embeddings": 1406.8981, "encoder_q-layer.0": 940.3824, "encoder_q-layer.1": 1032.658, "encoder_q-layer.10": 1105.8925, "encoder_q-layer.11": 2628.7004, "encoder_q-layer.2": 1269.3998, "encoder_q-layer.3": 1275.3231, "encoder_q-layer.4": 1400.9695, "encoder_q-layer.5": 1297.3739, "encoder_q-layer.6": 1451.3796, "encoder_q-layer.7": 1428.373, "encoder_q-layer.8": 1495.0306, "encoder_q-layer.9": 1221.3386, "epoch": 0.56, "inbatch_neg_score": 0.3746, "inbatch_pos_score": 1.0117, "learning_rate": 2.3666666666666668e-05, "loss": 3.2339, "norm_diff": 0.0682, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2169.1873, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3735, "query_norm": 1.4129, "queue_k_norm": 1.4781, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8301, "sent_len_1": 66.879, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3525, "stdk": 0.049, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 57400 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.2502, "doc_norm": 1.4793, "encoder_q-embeddings": 2560.502, "encoder_q-layer.0": 1708.2919, "encoder_q-layer.1": 1908.1431, "encoder_q-layer.10": 2291.0767, "encoder_q-layer.11": 5382.4878, "encoder_q-layer.2": 2125.9587, "encoder_q-layer.3": 2118.2417, "encoder_q-layer.4": 2224.906, "encoder_q-layer.5": 2344.0801, "encoder_q-layer.6": 2548.8296, "encoder_q-layer.7": 2629.7319, "encoder_q-layer.8": 2846.8865, "encoder_q-layer.9": 2426.5562, "epoch": 0.56, "inbatch_neg_score": 0.3737, "inbatch_pos_score": 1.0215, "learning_rate": 2.361111111111111e-05, "loss": 3.2502, "norm_diff": 0.0611, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4040.1044, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3728, "query_norm": 1.4182, "queue_k_norm": 1.4788, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8497, "sent_len_1": 66.8904, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5775, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 57500 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.2333, "doc_norm": 1.4802, "encoder_q-embeddings": 2829.7754, "encoder_q-layer.0": 1844.2151, "encoder_q-layer.1": 2227.9456, "encoder_q-layer.10": 2534.1079, "encoder_q-layer.11": 5530.6963, "encoder_q-layer.2": 2585.2915, "encoder_q-layer.3": 2710.5222, "encoder_q-layer.4": 2961.1777, "encoder_q-layer.5": 2924.7153, "encoder_q-layer.6": 3041.9534, "encoder_q-layer.7": 3525.104, "encoder_q-layer.8": 3328.5366, "encoder_q-layer.9": 2599.9705, "epoch": 0.56, "inbatch_neg_score": 0.369, "inbatch_pos_score": 1.0459, "learning_rate": 2.3555555555555556e-05, "loss": 3.2333, "norm_diff": 0.031, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4635.5641, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3696, "query_norm": 1.4492, "queue_k_norm": 1.4792, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8333, "sent_len_1": 66.8105, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6813, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 57600 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.2545, "doc_norm": 1.4832, "encoder_q-embeddings": 2785.8882, "encoder_q-layer.0": 2021.7037, "encoder_q-layer.1": 2260.6204, "encoder_q-layer.10": 2405.5547, "encoder_q-layer.11": 5367.0312, "encoder_q-layer.2": 2529.4033, "encoder_q-layer.3": 2542.6443, "encoder_q-layer.4": 2651.0549, "encoder_q-layer.5": 2578.0908, "encoder_q-layer.6": 2841.6814, "encoder_q-layer.7": 3235.6157, "encoder_q-layer.8": 3125.2593, "encoder_q-layer.9": 2532.2778, "epoch": 0.56, "inbatch_neg_score": 0.3728, "inbatch_pos_score": 1.0791, "learning_rate": 2.35e-05, "loss": 3.2545, "norm_diff": 0.0341, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4375.5643, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3733, "query_norm": 1.449, "queue_k_norm": 1.4816, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7784, "sent_len_1": 67.0284, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7262, "stdk": 0.049, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 57700 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.236, "doc_norm": 1.4739, "encoder_q-embeddings": 2274.3547, "encoder_q-layer.0": 1495.259, "encoder_q-layer.1": 1597.5135, "encoder_q-layer.10": 2488.0962, "encoder_q-layer.11": 5229.4512, "encoder_q-layer.2": 1798.0956, "encoder_q-layer.3": 1884.2036, "encoder_q-layer.4": 2097.9409, "encoder_q-layer.5": 2123.1375, "encoder_q-layer.6": 2239.3013, "encoder_q-layer.7": 2496.3826, "encoder_q-layer.8": 2800.6245, "encoder_q-layer.9": 2440.5247, "epoch": 0.56, "inbatch_neg_score": 0.3785, "inbatch_pos_score": 1.0684, "learning_rate": 2.3444444444444448e-05, "loss": 3.236, "norm_diff": 0.0385, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3822.1988, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3792, "query_norm": 1.4354, "queue_k_norm": 1.4808, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9136, "sent_len_1": 66.7715, "sent_max_len_0": 128.0, "sent_max_len_1": 191.185, "stdk": 0.0486, "stdq": 0.0458, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 57800 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.2314, "doc_norm": 1.4851, "encoder_q-embeddings": 2246.7974, "encoder_q-layer.0": 1501.1688, "encoder_q-layer.1": 1530.6754, "encoder_q-layer.10": 2284.792, "encoder_q-layer.11": 5251.3647, "encoder_q-layer.2": 1800.0486, "encoder_q-layer.3": 1910.7146, "encoder_q-layer.4": 2060.6382, "encoder_q-layer.5": 2136.5322, "encoder_q-layer.6": 2385.7773, "encoder_q-layer.7": 2694.835, "encoder_q-layer.8": 2891.0928, "encoder_q-layer.9": 2402.4233, "epoch": 0.57, "inbatch_neg_score": 0.3741, "inbatch_pos_score": 1.0146, "learning_rate": 2.338888888888889e-05, "loss": 3.2314, "norm_diff": 0.055, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3859.7088, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.375, "query_norm": 1.4301, "queue_k_norm": 1.4805, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.2674, "sent_len_1": 66.8367, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9487, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 57900 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.2392, "doc_norm": 1.4779, "encoder_q-embeddings": 2111.0615, "encoder_q-layer.0": 1433.8735, "encoder_q-layer.1": 1551.6718, "encoder_q-layer.10": 2258.9329, "encoder_q-layer.11": 5258.7676, "encoder_q-layer.2": 1778.7115, "encoder_q-layer.3": 1793.0382, "encoder_q-layer.4": 1876.6757, "encoder_q-layer.5": 2027.6704, "encoder_q-layer.6": 2163.843, "encoder_q-layer.7": 2474.1528, "encoder_q-layer.8": 2663.5017, "encoder_q-layer.9": 2345.3064, "epoch": 0.57, "inbatch_neg_score": 0.3727, "inbatch_pos_score": 1.0615, "learning_rate": 2.3333333333333336e-05, "loss": 3.2392, "norm_diff": 0.0313, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3721.6473, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3728, "query_norm": 1.4466, "queue_k_norm": 1.4816, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.814, "sent_len_1": 66.7058, "sent_max_len_0": 128.0, "sent_max_len_1": 190.68, "stdk": 0.0487, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 58000 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.2115, "doc_norm": 1.4836, "encoder_q-embeddings": 2129.3435, "encoder_q-layer.0": 1374.0822, "encoder_q-layer.1": 1514.7676, "encoder_q-layer.10": 2279.9531, "encoder_q-layer.11": 5323.4619, "encoder_q-layer.2": 1764.7936, "encoder_q-layer.3": 1831.2867, "encoder_q-layer.4": 1935.9261, "encoder_q-layer.5": 2139.7332, "encoder_q-layer.6": 2204.0898, "encoder_q-layer.7": 2563.1211, "encoder_q-layer.8": 2944.6265, "encoder_q-layer.9": 2434.7625, "epoch": 0.57, "inbatch_neg_score": 0.3721, "inbatch_pos_score": 1.043, "learning_rate": 2.3277777777777778e-05, "loss": 3.2115, "norm_diff": 0.0557, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3809.5045, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3726, "query_norm": 1.4279, "queue_k_norm": 1.4832, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0499, "sent_len_1": 66.6467, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1962, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 58100 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.2161, "doc_norm": 1.481, "encoder_q-embeddings": 1807.8369, "encoder_q-layer.0": 1242.4614, "encoder_q-layer.1": 1430.5211, "encoder_q-layer.10": 1256.3737, "encoder_q-layer.11": 2623.9595, "encoder_q-layer.2": 1686.5399, "encoder_q-layer.3": 1754.0078, "encoder_q-layer.4": 1867.3206, "encoder_q-layer.5": 1844.3683, "encoder_q-layer.6": 1865.9663, "encoder_q-layer.7": 2046.4343, "encoder_q-layer.8": 1925.2169, "encoder_q-layer.9": 1346.9208, "epoch": 0.57, "inbatch_neg_score": 0.375, "inbatch_pos_score": 1.0342, "learning_rate": 2.3222222222222224e-05, "loss": 3.2161, "norm_diff": 0.0765, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2674.5121, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3752, "query_norm": 1.4046, "queue_k_norm": 1.4815, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.2143, "sent_len_1": 67.0368, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5037, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 58200 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.2729, "doc_norm": 1.485, "encoder_q-embeddings": 1234.8131, "encoder_q-layer.0": 840.254, "encoder_q-layer.1": 1021.2996, "encoder_q-layer.10": 1307.8557, "encoder_q-layer.11": 2770.2949, "encoder_q-layer.2": 1118.8516, "encoder_q-layer.3": 1211.8312, "encoder_q-layer.4": 1249.9968, "encoder_q-layer.5": 1145.2371, "encoder_q-layer.6": 1261.4974, "encoder_q-layer.7": 1368.9902, "encoder_q-layer.8": 1532.0051, "encoder_q-layer.9": 1262.4761, "epoch": 0.57, "inbatch_neg_score": 0.3731, "inbatch_pos_score": 1.0322, "learning_rate": 2.3166666666666666e-05, "loss": 3.2729, "norm_diff": 0.0683, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2098.4774, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3733, "query_norm": 1.4167, "queue_k_norm": 1.4851, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8897, "sent_len_1": 66.8773, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1738, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 58300 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.2269, "doc_norm": 1.4747, "encoder_q-embeddings": 1165.3013, "encoder_q-layer.0": 782.6828, "encoder_q-layer.1": 882.038, "encoder_q-layer.10": 1266.0754, "encoder_q-layer.11": 2701.8633, "encoder_q-layer.2": 984.8092, "encoder_q-layer.3": 1058.887, "encoder_q-layer.4": 1119.1152, "encoder_q-layer.5": 1286.0273, "encoder_q-layer.6": 1387.5836, "encoder_q-layer.7": 1553.72, "encoder_q-layer.8": 1611.3322, "encoder_q-layer.9": 1382.2734, "epoch": 0.57, "inbatch_neg_score": 0.3708, "inbatch_pos_score": 1.0215, "learning_rate": 2.3111111111111112e-05, "loss": 3.2269, "norm_diff": 0.0631, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2098.5018, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3706, "query_norm": 1.4115, "queue_k_norm": 1.483, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0208, "sent_len_1": 66.7304, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1813, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 58400 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.2365, "doc_norm": 1.481, "encoder_q-embeddings": 1296.2507, "encoder_q-layer.0": 839.9404, "encoder_q-layer.1": 940.9912, "encoder_q-layer.10": 1315.7197, "encoder_q-layer.11": 2653.3301, "encoder_q-layer.2": 1063.17, "encoder_q-layer.3": 1148.7961, "encoder_q-layer.4": 1197.1791, "encoder_q-layer.5": 1294.4727, "encoder_q-layer.6": 1389.9193, "encoder_q-layer.7": 1494.5872, "encoder_q-layer.8": 1543.0814, "encoder_q-layer.9": 1190.5802, "epoch": 0.57, "inbatch_neg_score": 0.3733, "inbatch_pos_score": 1.0273, "learning_rate": 2.3055555555555558e-05, "loss": 3.2365, "norm_diff": 0.0693, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2114.7149, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3733, "query_norm": 1.4117, "queue_k_norm": 1.4842, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.099, "sent_len_1": 66.5761, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9812, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 58500 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.2343, "doc_norm": 1.4811, "encoder_q-embeddings": 6741.2349, "encoder_q-layer.0": 4982.4414, "encoder_q-layer.1": 5326.1895, "encoder_q-layer.10": 1236.051, "encoder_q-layer.11": 2738.7795, "encoder_q-layer.2": 6098.9258, "encoder_q-layer.3": 7158.2446, "encoder_q-layer.4": 7763.5508, "encoder_q-layer.5": 8164.394, "encoder_q-layer.6": 7681.416, "encoder_q-layer.7": 6992.5737, "encoder_q-layer.8": 5510.6802, "encoder_q-layer.9": 1714.272, "epoch": 0.57, "inbatch_neg_score": 0.3693, "inbatch_pos_score": 1.0244, "learning_rate": 2.3000000000000003e-05, "loss": 3.2343, "norm_diff": 0.0672, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9072.5265, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3699, "query_norm": 1.4139, "queue_k_norm": 1.4824, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9241, "sent_len_1": 66.7294, "sent_max_len_0": 128.0, "sent_max_len_1": 189.785, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 58600 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2425, "doc_norm": 1.4782, "encoder_q-embeddings": 2108.739, "encoder_q-layer.0": 1430.9731, "encoder_q-layer.1": 1657.4059, "encoder_q-layer.10": 1294.033, "encoder_q-layer.11": 2643.9075, "encoder_q-layer.2": 2083.916, "encoder_q-layer.3": 2316.9341, "encoder_q-layer.4": 2375.9473, "encoder_q-layer.5": 2478.2507, "encoder_q-layer.6": 2079.1797, "encoder_q-layer.7": 2011.8567, "encoder_q-layer.8": 1840.9968, "encoder_q-layer.9": 1321.1821, "epoch": 0.57, "inbatch_neg_score": 0.3695, "inbatch_pos_score": 1.0566, "learning_rate": 2.2944444444444446e-05, "loss": 3.2425, "norm_diff": 0.0406, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2989.1616, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3701, "query_norm": 1.4376, "queue_k_norm": 1.4838, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.008, "sent_len_1": 66.7881, "sent_max_len_0": 128.0, "sent_max_len_1": 187.77, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 58700 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.246, "doc_norm": 1.482, "encoder_q-embeddings": 1426.3964, "encoder_q-layer.0": 943.2488, "encoder_q-layer.1": 1036.9214, "encoder_q-layer.10": 1266.7295, "encoder_q-layer.11": 2673.0183, "encoder_q-layer.2": 1252.7764, "encoder_q-layer.3": 1256.1726, "encoder_q-layer.4": 1378.7083, "encoder_q-layer.5": 1540.4275, "encoder_q-layer.6": 1831.3196, "encoder_q-layer.7": 2224.5322, "encoder_q-layer.8": 1970.309, "encoder_q-layer.9": 1238.3535, "epoch": 0.57, "inbatch_neg_score": 0.3676, "inbatch_pos_score": 1.0244, "learning_rate": 2.288888888888889e-05, "loss": 3.246, "norm_diff": 0.0563, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2425.4736, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3667, "query_norm": 1.4257, "queue_k_norm": 1.4819, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9281, "sent_len_1": 66.6198, "sent_max_len_0": 128.0, "sent_max_len_1": 188.63, "stdk": 0.0488, "stdq": 0.0457, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 58800 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.2341, "doc_norm": 1.4845, "encoder_q-embeddings": 2705.8613, "encoder_q-layer.0": 1828.7273, "encoder_q-layer.1": 1965.0187, "encoder_q-layer.10": 1326.0631, "encoder_q-layer.11": 2662.0981, "encoder_q-layer.2": 2359.1328, "encoder_q-layer.3": 2489.8345, "encoder_q-layer.4": 2592.4551, "encoder_q-layer.5": 2535.875, "encoder_q-layer.6": 2343.9319, "encoder_q-layer.7": 1920.0669, "encoder_q-layer.8": 1701.1536, "encoder_q-layer.9": 1322.4008, "epoch": 0.58, "inbatch_neg_score": 0.3729, "inbatch_pos_score": 1.0527, "learning_rate": 2.2833333333333334e-05, "loss": 3.2341, "norm_diff": 0.0451, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3265.5412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3728, "query_norm": 1.4394, "queue_k_norm": 1.4817, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9412, "sent_len_1": 66.6572, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9437, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 58900 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.2481, "doc_norm": 1.4834, "encoder_q-embeddings": 951.6925, "encoder_q-layer.0": 609.4625, "encoder_q-layer.1": 629.248, "encoder_q-layer.10": 1215.9781, "encoder_q-layer.11": 2663.083, "encoder_q-layer.2": 694.0403, "encoder_q-layer.3": 719.4877, "encoder_q-layer.4": 772.3863, "encoder_q-layer.5": 810.8699, "encoder_q-layer.6": 955.1356, "encoder_q-layer.7": 1122.6117, "encoder_q-layer.8": 1312.9832, "encoder_q-layer.9": 1159.7871, "epoch": 0.58, "inbatch_neg_score": 0.3723, "inbatch_pos_score": 1.041, "learning_rate": 2.277777777777778e-05, "loss": 3.2481, "norm_diff": 0.0514, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1783.2123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3718, "query_norm": 1.432, "queue_k_norm": 1.4833, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0052, "sent_len_1": 66.6221, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5538, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 59000 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.2456, "doc_norm": 1.4894, "encoder_q-embeddings": 2328.9441, "encoder_q-layer.0": 1706.8611, "encoder_q-layer.1": 1992.8762, "encoder_q-layer.10": 1246.041, "encoder_q-layer.11": 2766.1099, "encoder_q-layer.2": 2393.1824, "encoder_q-layer.3": 2425.9644, "encoder_q-layer.4": 2716.353, "encoder_q-layer.5": 2572.9836, "encoder_q-layer.6": 2740.2017, "encoder_q-layer.7": 3095.4944, "encoder_q-layer.8": 2534.9001, "encoder_q-layer.9": 1299.7183, "epoch": 0.58, "inbatch_neg_score": 0.3697, "inbatch_pos_score": 1.0518, "learning_rate": 2.2722222222222222e-05, "loss": 3.2456, "norm_diff": 0.0636, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3575.4678, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3689, "query_norm": 1.4257, "queue_k_norm": 1.4834, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9533, "sent_len_1": 66.776, "sent_max_len_0": 128.0, "sent_max_len_1": 191.3638, "stdk": 0.0491, "stdq": 0.0456, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 59100 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.2409, "doc_norm": 1.475, "encoder_q-embeddings": 1776.7966, "encoder_q-layer.0": 1249.5449, "encoder_q-layer.1": 1346.5496, "encoder_q-layer.10": 1193.3099, "encoder_q-layer.11": 2656.3909, "encoder_q-layer.2": 1609.2906, "encoder_q-layer.3": 1701.645, "encoder_q-layer.4": 1974.1188, "encoder_q-layer.5": 1910.5322, "encoder_q-layer.6": 2027.8569, "encoder_q-layer.7": 2061.4094, "encoder_q-layer.8": 1800.928, "encoder_q-layer.9": 1321.6943, "epoch": 0.58, "inbatch_neg_score": 0.372, "inbatch_pos_score": 1.0068, "learning_rate": 2.2666666666666668e-05, "loss": 3.2409, "norm_diff": 0.0629, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2671.9452, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3723, "query_norm": 1.4121, "queue_k_norm": 1.4823, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7839, "sent_len_1": 66.5178, "sent_max_len_0": 128.0, "sent_max_len_1": 187.4712, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 59200 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.2437, "doc_norm": 1.4803, "encoder_q-embeddings": 1678.2751, "encoder_q-layer.0": 1152.5555, "encoder_q-layer.1": 1262.5835, "encoder_q-layer.10": 1259.549, "encoder_q-layer.11": 2698.4316, "encoder_q-layer.2": 1416.7975, "encoder_q-layer.3": 1567.031, "encoder_q-layer.4": 1614.1057, "encoder_q-layer.5": 1691.6161, "encoder_q-layer.6": 1771.9006, "encoder_q-layer.7": 2091.0752, "encoder_q-layer.8": 1797.8201, "encoder_q-layer.9": 1212.7612, "epoch": 0.58, "inbatch_neg_score": 0.373, "inbatch_pos_score": 1.0371, "learning_rate": 2.2611111111111113e-05, "loss": 3.2437, "norm_diff": 0.0574, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2508.3799, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3726, "query_norm": 1.4229, "queue_k_norm": 1.4818, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9721, "sent_len_1": 66.5892, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1463, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 59300 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.2208, "doc_norm": 1.4736, "encoder_q-embeddings": 1100.4573, "encoder_q-layer.0": 693.9564, "encoder_q-layer.1": 763.4878, "encoder_q-layer.10": 1211.9359, "encoder_q-layer.11": 2610.5527, "encoder_q-layer.2": 887.7753, "encoder_q-layer.3": 877.621, "encoder_q-layer.4": 925.3334, "encoder_q-layer.5": 1006.9426, "encoder_q-layer.6": 1057.1116, "encoder_q-layer.7": 1244.0792, "encoder_q-layer.8": 1334.5861, "encoder_q-layer.9": 1190.9852, "epoch": 0.58, "inbatch_neg_score": 0.3735, "inbatch_pos_score": 1.084, "learning_rate": 2.255555555555556e-05, "loss": 3.2208, "norm_diff": 0.0111, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1854.6283, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3726, "query_norm": 1.4669, "queue_k_norm": 1.482, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8148, "sent_len_1": 66.871, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0513, "stdk": 0.0485, "stdq": 0.0469, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 59400 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.2342, "doc_norm": 1.4815, "encoder_q-embeddings": 1292.8199, "encoder_q-layer.0": 867.569, "encoder_q-layer.1": 905.5621, "encoder_q-layer.10": 1218.9939, "encoder_q-layer.11": 2686.3599, "encoder_q-layer.2": 1066.9955, "encoder_q-layer.3": 1168.7935, "encoder_q-layer.4": 1214.636, "encoder_q-layer.5": 1335.2168, "encoder_q-layer.6": 1350.2461, "encoder_q-layer.7": 1456.5557, "encoder_q-layer.8": 1554.0271, "encoder_q-layer.9": 1257.1686, "epoch": 0.58, "inbatch_neg_score": 0.3737, "inbatch_pos_score": 1.043, "learning_rate": 2.25e-05, "loss": 3.2342, "norm_diff": 0.0371, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2123.2992, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3735, "query_norm": 1.4444, "queue_k_norm": 1.4836, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8757, "sent_len_1": 67.022, "sent_max_len_0": 128.0, "sent_max_len_1": 189.875, "stdk": 0.0488, "stdq": 0.046, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 59500 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.2193, "doc_norm": 1.4848, "encoder_q-embeddings": 1382.3181, "encoder_q-layer.0": 919.9025, "encoder_q-layer.1": 989.8084, "encoder_q-layer.10": 1343.5951, "encoder_q-layer.11": 2863.3235, "encoder_q-layer.2": 1120.0734, "encoder_q-layer.3": 1237.583, "encoder_q-layer.4": 1276.7999, "encoder_q-layer.5": 1358.321, "encoder_q-layer.6": 1544.3029, "encoder_q-layer.7": 1628.8215, "encoder_q-layer.8": 1637.9274, "encoder_q-layer.9": 1326.272, "epoch": 0.58, "inbatch_neg_score": 0.3773, "inbatch_pos_score": 1.0371, "learning_rate": 2.2444444444444447e-05, "loss": 3.2193, "norm_diff": 0.0554, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2257.0988, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3774, "query_norm": 1.4293, "queue_k_norm": 1.4825, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.956, "sent_len_1": 66.7029, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4338, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 59600 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.2154, "doc_norm": 1.4796, "encoder_q-embeddings": 6987.8022, "encoder_q-layer.0": 5000.0127, "encoder_q-layer.1": 6008.4004, "encoder_q-layer.10": 1294.0751, "encoder_q-layer.11": 2693.0283, "encoder_q-layer.2": 6661.1401, "encoder_q-layer.3": 7780.8237, "encoder_q-layer.4": 8896.7012, "encoder_q-layer.5": 11413.7227, "encoder_q-layer.6": 12818.9053, "encoder_q-layer.7": 12924.4307, "encoder_q-layer.8": 11276.8164, "encoder_q-layer.9": 4820.6514, "epoch": 0.58, "inbatch_neg_score": 0.3782, "inbatch_pos_score": 1.0293, "learning_rate": 2.238888888888889e-05, "loss": 3.2154, "norm_diff": 0.0318, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12561.0169, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3782, "query_norm": 1.4477, "queue_k_norm": 1.4828, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9107, "sent_len_1": 66.7106, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6438, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 59700 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.2105, "doc_norm": 1.4812, "encoder_q-embeddings": 1201.7867, "encoder_q-layer.0": 815.59, "encoder_q-layer.1": 890.6713, "encoder_q-layer.10": 1280.1959, "encoder_q-layer.11": 2720.9807, "encoder_q-layer.2": 998.9742, "encoder_q-layer.3": 1089.7461, "encoder_q-layer.4": 1167.8387, "encoder_q-layer.5": 1241.303, "encoder_q-layer.6": 1370.0525, "encoder_q-layer.7": 1567.6742, "encoder_q-layer.8": 1665.712, "encoder_q-layer.9": 1286.3778, "epoch": 0.58, "inbatch_neg_score": 0.3828, "inbatch_pos_score": 1.04, "learning_rate": 2.2333333333333335e-05, "loss": 3.2105, "norm_diff": 0.0463, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2135.8815, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3828, "query_norm": 1.435, "queue_k_norm": 1.4834, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9763, "sent_len_1": 66.781, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8487, "stdk": 0.0488, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 59800 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.2106, "doc_norm": 1.4859, "encoder_q-embeddings": 984.3124, "encoder_q-layer.0": 649.7353, "encoder_q-layer.1": 686.4484, "encoder_q-layer.10": 1319.3224, "encoder_q-layer.11": 2672.3264, "encoder_q-layer.2": 774.4699, "encoder_q-layer.3": 810.8926, "encoder_q-layer.4": 875.1655, "encoder_q-layer.5": 956.8505, "encoder_q-layer.6": 1092.3892, "encoder_q-layer.7": 1333.5803, "encoder_q-layer.8": 1555.5552, "encoder_q-layer.9": 1385.8132, "epoch": 0.58, "inbatch_neg_score": 0.3868, "inbatch_pos_score": 1.043, "learning_rate": 2.2277777777777778e-05, "loss": 3.2106, "norm_diff": 0.0781, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1887.7647, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3872, "query_norm": 1.4078, "queue_k_norm": 1.4836, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9694, "sent_len_1": 66.8533, "sent_max_len_0": 128.0, "sent_max_len_1": 188.34, "stdk": 0.049, "stdq": 0.0444, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 59900 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.2265, "doc_norm": 1.4871, "encoder_q-embeddings": 647.5739, "encoder_q-layer.0": 453.8141, "encoder_q-layer.1": 496.2095, "encoder_q-layer.10": 601.7897, "encoder_q-layer.11": 1414.9572, "encoder_q-layer.2": 577.6119, "encoder_q-layer.3": 595.0921, "encoder_q-layer.4": 663.7764, "encoder_q-layer.5": 711.8868, "encoder_q-layer.6": 723.6525, "encoder_q-layer.7": 722.7355, "encoder_q-layer.8": 742.2968, "encoder_q-layer.9": 619.955, "epoch": 0.59, "inbatch_neg_score": 0.3913, "inbatch_pos_score": 1.084, "learning_rate": 2.2222222222222223e-05, "loss": 3.2265, "norm_diff": 0.0365, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1092.5661, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3916, "query_norm": 1.4506, "queue_k_norm": 1.4828, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.823, "sent_len_1": 66.6466, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9162, "stdk": 0.049, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 60000 }, { "dev_runtime": 38.5299, "dev_samples_per_second": 1.661, "dev_steps_per_second": 0.026, "epoch": 0.59, "step": 60000, "test_accuracy": 94.4580078125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3176065683364868, "test_doc_norm": 1.4854023456573486, "test_inbatch_neg_score": 0.759244441986084, "test_inbatch_pos_score": 1.731167197227478, "test_loss": 0.3176065683364868, "test_loss_align": 1.0831103324890137, "test_loss_unif": 3.7100939750671387, "test_loss_unif_q@queue": 3.7100939750671387, "test_norm_diff": 0.03488502651453018, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3748781085014343, "test_query_norm": 1.520287275314331, "test_queue_k_norm": 1.482785940170288, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04331099987030029, "test_stdq": 0.04362639784812927, "test_stdqueue_k": 0.04889556020498276, "test_stdqueue_q": 0.0 }, { "dev_runtime": 38.5299, "dev_samples_per_second": 1.661, "dev_steps_per_second": 0.026, "epoch": 0.59, "eval_beir-arguana_ndcg@10": 0.37413, "eval_beir-arguana_recall@10": 0.63585, "eval_beir-arguana_recall@100": 0.9239, "eval_beir-arguana_recall@20": 0.77383, "eval_beir-avg_ndcg@10": 0.3619894166666667, "eval_beir-avg_recall@10": 0.4269369166666667, "eval_beir-avg_recall@100": 0.6093983333333333, "eval_beir-avg_recall@20": 0.48618683333333335, "eval_beir-cqadupstack_ndcg@10": 0.26613416666666667, "eval_beir-cqadupstack_recall@10": 0.3563691666666667, "eval_beir-cqadupstack_recall@100": 0.5823433333333333, "eval_beir-cqadupstack_recall@20": 0.42005833333333337, "eval_beir-fiqa_ndcg@10": 0.20731, "eval_beir-fiqa_recall@10": 0.26291, "eval_beir-fiqa_recall@100": 0.52885, "eval_beir-fiqa_recall@20": 0.32106, "eval_beir-nfcorpus_ndcg@10": 0.28619, "eval_beir-nfcorpus_recall@10": 0.14512, "eval_beir-nfcorpus_recall@100": 0.25994, "eval_beir-nfcorpus_recall@20": 0.17284, "eval_beir-nq_ndcg@10": 0.26354, "eval_beir-nq_recall@10": 0.42862, "eval_beir-nq_recall@100": 0.77851, "eval_beir-nq_recall@20": 0.55552, "eval_beir-quora_ndcg@10": 0.82317, "eval_beir-quora_recall@10": 0.91571, "eval_beir-quora_recall@100": 0.9848, "eval_beir-quora_recall@20": 0.94957, "eval_beir-scidocs_ndcg@10": 0.14061, "eval_beir-scidocs_recall@10": 0.14773, "eval_beir-scidocs_recall@100": 0.34017, "eval_beir-scidocs_recall@20": 0.19728, "eval_beir-scifact_ndcg@10": 0.60123, "eval_beir-scifact_recall@10": 0.74639, "eval_beir-scifact_recall@100": 0.89656, "eval_beir-scifact_recall@20": 0.79483, "eval_beir-trec-covid_ndcg@10": 0.47732, "eval_beir-trec-covid_recall@10": 0.502, "eval_beir-trec-covid_recall@100": 0.383, "eval_beir-trec-covid_recall@20": 0.478, "eval_beir-webis-touche2020_ndcg@10": 0.18026, "eval_beir-webis-touche2020_recall@10": 0.12867, "eval_beir-webis-touche2020_recall@100": 0.41591, "eval_beir-webis-touche2020_recall@20": 0.19888, "eval_senteval-avg_sts": 0.7327010099677189, "eval_senteval-sickr_spearman": 0.7026664835653339, "eval_senteval-stsb_spearman": 0.7627355363701038, "step": 60000, "test_accuracy": 94.4580078125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3176065683364868, "test_doc_norm": 1.4854023456573486, "test_inbatch_neg_score": 0.759244441986084, "test_inbatch_pos_score": 1.731167197227478, "test_loss": 0.3176065683364868, "test_loss_align": 1.0831103324890137, "test_loss_unif": 3.7100939750671387, "test_loss_unif_q@queue": 3.7100939750671387, "test_norm_diff": 0.03488502651453018, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3748781085014343, "test_query_norm": 1.520287275314331, "test_queue_k_norm": 1.482785940170288, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04331099987030029, "test_stdq": 0.04362639784812927, "test_stdqueue_k": 0.04889556020498276, "test_stdqueue_q": 0.0 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.2325, "doc_norm": 1.4858, "encoder_q-embeddings": 530.4781, "encoder_q-layer.0": 355.0197, "encoder_q-layer.1": 399.8825, "encoder_q-layer.10": 591.3457, "encoder_q-layer.11": 1344.3657, "encoder_q-layer.2": 471.4402, "encoder_q-layer.3": 500.9124, "encoder_q-layer.4": 531.5427, "encoder_q-layer.5": 564.7137, "encoder_q-layer.6": 600.617, "encoder_q-layer.7": 666.225, "encoder_q-layer.8": 702.0323, "encoder_q-layer.9": 610.3096, "epoch": 0.59, "inbatch_neg_score": 0.3979, "inbatch_pos_score": 1.1025, "learning_rate": 2.216666666666667e-05, "loss": 3.2325, "norm_diff": 0.0382, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 975.2374, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3979, "query_norm": 1.4475, "queue_k_norm": 1.4848, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9699, "sent_len_1": 66.7978, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7713, "stdk": 0.0489, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 60100 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.2171, "doc_norm": 1.4798, "encoder_q-embeddings": 653.9945, "encoder_q-layer.0": 435.5611, "encoder_q-layer.1": 495.7063, "encoder_q-layer.10": 572.1553, "encoder_q-layer.11": 1337.6598, "encoder_q-layer.2": 570.6015, "encoder_q-layer.3": 574.9038, "encoder_q-layer.4": 627.9908, "encoder_q-layer.5": 702.2622, "encoder_q-layer.6": 710.2473, "encoder_q-layer.7": 714.8608, "encoder_q-layer.8": 732.435, "encoder_q-layer.9": 615.8098, "epoch": 0.59, "inbatch_neg_score": 0.4001, "inbatch_pos_score": 1.082, "learning_rate": 2.211111111111111e-05, "loss": 3.2171, "norm_diff": 0.0415, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1055.8863, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4014, "query_norm": 1.4383, "queue_k_norm": 1.4855, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9608, "sent_len_1": 67.0308, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3363, "stdk": 0.0486, "stdq": 0.0456, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 60200 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.2371, "doc_norm": 1.4826, "encoder_q-embeddings": 510.7079, "encoder_q-layer.0": 352.5789, "encoder_q-layer.1": 364.5461, "encoder_q-layer.10": 605.526, "encoder_q-layer.11": 1349.403, "encoder_q-layer.2": 408.748, "encoder_q-layer.3": 427.6019, "encoder_q-layer.4": 478.2571, "encoder_q-layer.5": 480.059, "encoder_q-layer.6": 554.2308, "encoder_q-layer.7": 642.988, "encoder_q-layer.8": 707.9402, "encoder_q-layer.9": 618.1706, "epoch": 0.59, "inbatch_neg_score": 0.3982, "inbatch_pos_score": 1.0762, "learning_rate": 2.2055555555555557e-05, "loss": 3.2371, "norm_diff": 0.0436, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 940.7401, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3989, "query_norm": 1.439, "queue_k_norm": 1.485, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0682, "sent_len_1": 66.6661, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1887, "stdk": 0.0487, "stdq": 0.0459, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 60300 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2376, "doc_norm": 1.4857, "encoder_q-embeddings": 493.6274, "encoder_q-layer.0": 324.3079, "encoder_q-layer.1": 341.9132, "encoder_q-layer.10": 639.0984, "encoder_q-layer.11": 1385.0693, "encoder_q-layer.2": 385.2509, "encoder_q-layer.3": 403.1235, "encoder_q-layer.4": 424.8866, "encoder_q-layer.5": 472.687, "encoder_q-layer.6": 523.8005, "encoder_q-layer.7": 633.8199, "encoder_q-layer.8": 704.7192, "encoder_q-layer.9": 625.6462, "epoch": 0.59, "inbatch_neg_score": 0.403, "inbatch_pos_score": 1.0928, "learning_rate": 2.2000000000000003e-05, "loss": 3.2376, "norm_diff": 0.0357, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 944.7979, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4048, "query_norm": 1.45, "queue_k_norm": 1.485, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0087, "sent_len_1": 66.7212, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2575, "stdk": 0.0488, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 60400 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.2405, "doc_norm": 1.4857, "encoder_q-embeddings": 1453.0599, "encoder_q-layer.0": 1002.1303, "encoder_q-layer.1": 1034.2115, "encoder_q-layer.10": 623.3882, "encoder_q-layer.11": 1345.3962, "encoder_q-layer.2": 1136.8206, "encoder_q-layer.3": 1281.3127, "encoder_q-layer.4": 1491.3202, "encoder_q-layer.5": 1544.5057, "encoder_q-layer.6": 1665.1313, "encoder_q-layer.7": 1954.3557, "encoder_q-layer.8": 1770.9849, "encoder_q-layer.9": 955.7221, "epoch": 0.59, "inbatch_neg_score": 0.3997, "inbatch_pos_score": 1.0664, "learning_rate": 2.1944444444444445e-05, "loss": 3.2405, "norm_diff": 0.0433, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2085.6963, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3994, "query_norm": 1.4424, "queue_k_norm": 1.4866, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9525, "sent_len_1": 66.699, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9725, "stdk": 0.0488, "stdq": 0.0459, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 60500 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.2077, "doc_norm": 1.4866, "encoder_q-embeddings": 614.1601, "encoder_q-layer.0": 404.1742, "encoder_q-layer.1": 424.0622, "encoder_q-layer.10": 641.1713, "encoder_q-layer.11": 1399.4604, "encoder_q-layer.2": 507.6924, "encoder_q-layer.3": 540.2646, "encoder_q-layer.4": 592.9575, "encoder_q-layer.5": 633.5609, "encoder_q-layer.6": 604.8401, "encoder_q-layer.7": 702.7231, "encoder_q-layer.8": 743.9575, "encoder_q-layer.9": 626.9386, "epoch": 0.59, "inbatch_neg_score": 0.4041, "inbatch_pos_score": 1.0566, "learning_rate": 2.188888888888889e-05, "loss": 3.2077, "norm_diff": 0.0574, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1046.3221, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4055, "query_norm": 1.4292, "queue_k_norm": 1.4897, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.942, "sent_len_1": 67.0051, "sent_max_len_0": 128.0, "sent_max_len_1": 190.055, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 60600 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.2203, "doc_norm": 1.4877, "encoder_q-embeddings": 560.2883, "encoder_q-layer.0": 345.4669, "encoder_q-layer.1": 367.6927, "encoder_q-layer.10": 595.5017, "encoder_q-layer.11": 1308.3966, "encoder_q-layer.2": 433.157, "encoder_q-layer.3": 455.5255, "encoder_q-layer.4": 484.6642, "encoder_q-layer.5": 492.1579, "encoder_q-layer.6": 543.5474, "encoder_q-layer.7": 620.3129, "encoder_q-layer.8": 720.0184, "encoder_q-layer.9": 622.7245, "epoch": 0.59, "inbatch_neg_score": 0.4076, "inbatch_pos_score": 1.0947, "learning_rate": 2.1833333333333333e-05, "loss": 3.2203, "norm_diff": 0.0391, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 952.4538, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4082, "query_norm": 1.4486, "queue_k_norm": 1.4885, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0906, "sent_len_1": 66.987, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7312, "stdk": 0.0488, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 60700 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.2407, "doc_norm": 1.4864, "encoder_q-embeddings": 574.1752, "encoder_q-layer.0": 389.5114, "encoder_q-layer.1": 450.4989, "encoder_q-layer.10": 588.3186, "encoder_q-layer.11": 1413.1317, "encoder_q-layer.2": 512.5, "encoder_q-layer.3": 539.125, "encoder_q-layer.4": 604.0533, "encoder_q-layer.5": 593.9445, "encoder_q-layer.6": 590.8491, "encoder_q-layer.7": 658.3473, "encoder_q-layer.8": 707.8538, "encoder_q-layer.9": 641.3834, "epoch": 0.59, "inbatch_neg_score": 0.4083, "inbatch_pos_score": 1.0752, "learning_rate": 2.177777777777778e-05, "loss": 3.2407, "norm_diff": 0.0424, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1022.6137, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4082, "query_norm": 1.4439, "queue_k_norm": 1.49, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.918, "sent_len_1": 66.7307, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1175, "stdk": 0.0487, "stdq": 0.0458, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 60800 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.2262, "doc_norm": 1.4936, "encoder_q-embeddings": 1277.3589, "encoder_q-layer.0": 876.8021, "encoder_q-layer.1": 1020.5884, "encoder_q-layer.10": 616.385, "encoder_q-layer.11": 1345.793, "encoder_q-layer.2": 1208.593, "encoder_q-layer.3": 1374.6139, "encoder_q-layer.4": 1490.0122, "encoder_q-layer.5": 1817.6262, "encoder_q-layer.6": 1771.2329, "encoder_q-layer.7": 1392.684, "encoder_q-layer.8": 1078.6316, "encoder_q-layer.9": 635.0426, "epoch": 0.59, "inbatch_neg_score": 0.4063, "inbatch_pos_score": 1.0615, "learning_rate": 2.1722222222222225e-05, "loss": 3.2262, "norm_diff": 0.0658, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1864.8315, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4072, "query_norm": 1.4278, "queue_k_norm": 1.4894, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9605, "sent_len_1": 66.3643, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0513, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 60900 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.2252, "doc_norm": 1.4845, "encoder_q-embeddings": 484.8877, "encoder_q-layer.0": 318.5356, "encoder_q-layer.1": 344.3434, "encoder_q-layer.10": 567.2136, "encoder_q-layer.11": 1309.7268, "encoder_q-layer.2": 390.9054, "encoder_q-layer.3": 409.8229, "encoder_q-layer.4": 450.6829, "encoder_q-layer.5": 485.9786, "encoder_q-layer.6": 531.7285, "encoder_q-layer.7": 580.5843, "encoder_q-layer.8": 679.1954, "encoder_q-layer.9": 598.4219, "epoch": 0.6, "inbatch_neg_score": 0.4077, "inbatch_pos_score": 1.0986, "learning_rate": 2.1666666666666667e-05, "loss": 3.2252, "norm_diff": 0.0484, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 918.7627, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.407, "query_norm": 1.4361, "queue_k_norm": 1.4914, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9454, "sent_len_1": 66.7018, "sent_max_len_0": 128.0, "sent_max_len_1": 189.97, "stdk": 0.0486, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 61000 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 3.2145, "doc_norm": 1.4894, "encoder_q-embeddings": 506.3975, "encoder_q-layer.0": 314.7016, "encoder_q-layer.1": 341.4273, "encoder_q-layer.10": 607.3353, "encoder_q-layer.11": 1356.3552, "encoder_q-layer.2": 390.8464, "encoder_q-layer.3": 403.01, "encoder_q-layer.4": 434.1795, "encoder_q-layer.5": 461.9469, "encoder_q-layer.6": 510.4001, "encoder_q-layer.7": 621.5029, "encoder_q-layer.8": 699.9742, "encoder_q-layer.9": 612.8306, "epoch": 0.6, "inbatch_neg_score": 0.4051, "inbatch_pos_score": 1.126, "learning_rate": 2.1611111111111113e-05, "loss": 3.2145, "norm_diff": 0.0391, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 925.8093, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4065, "query_norm": 1.4503, "queue_k_norm": 1.491, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8971, "sent_len_1": 66.6262, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2775, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 61100 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.2422, "doc_norm": 1.4854, "encoder_q-embeddings": 557.4802, "encoder_q-layer.0": 376.6745, "encoder_q-layer.1": 394.8343, "encoder_q-layer.10": 717.3892, "encoder_q-layer.11": 1486.9265, "encoder_q-layer.2": 433.4803, "encoder_q-layer.3": 467.5325, "encoder_q-layer.4": 489.344, "encoder_q-layer.5": 493.2342, "encoder_q-layer.6": 535.9312, "encoder_q-layer.7": 637.6556, "encoder_q-layer.8": 735.0128, "encoder_q-layer.9": 686.1616, "epoch": 0.6, "inbatch_neg_score": 0.4107, "inbatch_pos_score": 1.0625, "learning_rate": 2.1555555555555555e-05, "loss": 3.2422, "norm_diff": 0.0393, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1014.3018, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4114, "query_norm": 1.4464, "queue_k_norm": 1.492, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8698, "sent_len_1": 66.6163, "sent_max_len_0": 128.0, "sent_max_len_1": 187.71, "stdk": 0.0486, "stdq": 0.0459, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 61200 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.2295, "doc_norm": 1.494, "encoder_q-embeddings": 621.9669, "encoder_q-layer.0": 415.8045, "encoder_q-layer.1": 461.6447, "encoder_q-layer.10": 616.7747, "encoder_q-layer.11": 1280.5724, "encoder_q-layer.2": 531.1612, "encoder_q-layer.3": 571.1211, "encoder_q-layer.4": 622.1876, "encoder_q-layer.5": 634.5808, "encoder_q-layer.6": 675.7037, "encoder_q-layer.7": 743.5679, "encoder_q-layer.8": 800.166, "encoder_q-layer.9": 642.1469, "epoch": 0.6, "inbatch_neg_score": 0.4078, "inbatch_pos_score": 1.1338, "learning_rate": 2.15e-05, "loss": 3.2295, "norm_diff": 0.0252, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1028.6666, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.408, "query_norm": 1.4688, "queue_k_norm": 1.4911, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9564, "sent_len_1": 66.7887, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6775, "stdk": 0.0489, "stdq": 0.0469, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 61300 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.2301, "doc_norm": 1.4866, "encoder_q-embeddings": 599.0319, "encoder_q-layer.0": 410.1107, "encoder_q-layer.1": 458.7542, "encoder_q-layer.10": 653.601, "encoder_q-layer.11": 1391.1235, "encoder_q-layer.2": 571.0105, "encoder_q-layer.3": 581.2432, "encoder_q-layer.4": 638.358, "encoder_q-layer.5": 631.2519, "encoder_q-layer.6": 637.5406, "encoder_q-layer.7": 731.2791, "encoder_q-layer.8": 798.1645, "encoder_q-layer.9": 676.6208, "epoch": 0.6, "inbatch_neg_score": 0.4021, "inbatch_pos_score": 1.083, "learning_rate": 2.1444444444444443e-05, "loss": 3.2301, "norm_diff": 0.0516, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1080.6652, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4033, "query_norm": 1.4349, "queue_k_norm": 1.4944, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8257, "sent_len_1": 66.7181, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1675, "stdk": 0.0487, "stdq": 0.0457, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 61400 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.241, "doc_norm": 1.4949, "encoder_q-embeddings": 536.0862, "encoder_q-layer.0": 370.6507, "encoder_q-layer.1": 392.9736, "encoder_q-layer.10": 628.5786, "encoder_q-layer.11": 1275.9366, "encoder_q-layer.2": 477.4033, "encoder_q-layer.3": 528.0164, "encoder_q-layer.4": 578.7834, "encoder_q-layer.5": 637.3351, "encoder_q-layer.6": 667.1328, "encoder_q-layer.7": 762.3666, "encoder_q-layer.8": 851.7795, "encoder_q-layer.9": 667.1727, "epoch": 0.6, "inbatch_neg_score": 0.4073, "inbatch_pos_score": 1.1201, "learning_rate": 2.138888888888889e-05, "loss": 3.241, "norm_diff": 0.0383, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1027.5928, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4067, "query_norm": 1.4566, "queue_k_norm": 1.4916, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0553, "sent_len_1": 66.7543, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8988, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 61500 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 3.2343, "doc_norm": 1.4919, "encoder_q-embeddings": 667.0848, "encoder_q-layer.0": 437.1859, "encoder_q-layer.1": 490.4369, "encoder_q-layer.10": 694.2335, "encoder_q-layer.11": 1401.502, "encoder_q-layer.2": 601.8382, "encoder_q-layer.3": 664.1978, "encoder_q-layer.4": 725.0717, "encoder_q-layer.5": 794.6619, "encoder_q-layer.6": 857.9032, "encoder_q-layer.7": 1071.8683, "encoder_q-layer.8": 1138.5554, "encoder_q-layer.9": 817.1213, "epoch": 0.6, "inbatch_neg_score": 0.4091, "inbatch_pos_score": 1.1221, "learning_rate": 2.1333333333333335e-05, "loss": 3.2343, "norm_diff": 0.0331, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1260.2492, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4097, "query_norm": 1.4588, "queue_k_norm": 1.4933, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8349, "sent_len_1": 66.7645, "sent_max_len_0": 128.0, "sent_max_len_1": 188.835, "stdk": 0.0488, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 61600 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.2158, "doc_norm": 1.4923, "encoder_q-embeddings": 1083.8667, "encoder_q-layer.0": 758.9235, "encoder_q-layer.1": 910.6014, "encoder_q-layer.10": 591.7983, "encoder_q-layer.11": 1288.4182, "encoder_q-layer.2": 1150.1748, "encoder_q-layer.3": 1272.949, "encoder_q-layer.4": 1459.0439, "encoder_q-layer.5": 1364.908, "encoder_q-layer.6": 1161.2164, "encoder_q-layer.7": 1049.7815, "encoder_q-layer.8": 951.7767, "encoder_q-layer.9": 672.9375, "epoch": 0.6, "inbatch_neg_score": 0.41, "inbatch_pos_score": 1.1133, "learning_rate": 2.127777777777778e-05, "loss": 3.2158, "norm_diff": 0.0362, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1661.6763, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4102, "query_norm": 1.4561, "queue_k_norm": 1.4929, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9259, "sent_len_1": 66.8487, "sent_max_len_0": 128.0, "sent_max_len_1": 188.115, "stdk": 0.0488, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 61700 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.2212, "doc_norm": 1.4916, "encoder_q-embeddings": 475.0765, "encoder_q-layer.0": 301.861, "encoder_q-layer.1": 317.2011, "encoder_q-layer.10": 632.4028, "encoder_q-layer.11": 1306.8694, "encoder_q-layer.2": 352.614, "encoder_q-layer.3": 378.7502, "encoder_q-layer.4": 387.0456, "encoder_q-layer.5": 410.9973, "encoder_q-layer.6": 463.8963, "encoder_q-layer.7": 550.1902, "encoder_q-layer.8": 639.121, "encoder_q-layer.9": 594.9974, "epoch": 0.6, "inbatch_neg_score": 0.4126, "inbatch_pos_score": 1.1016, "learning_rate": 2.1222222222222223e-05, "loss": 3.2212, "norm_diff": 0.0453, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 880.2749, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4126, "query_norm": 1.4463, "queue_k_norm": 1.4931, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8579, "sent_len_1": 66.7185, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5488, "stdk": 0.0488, "stdq": 0.0459, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 61800 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.229, "doc_norm": 1.4868, "encoder_q-embeddings": 610.4066, "encoder_q-layer.0": 381.1989, "encoder_q-layer.1": 442.1959, "encoder_q-layer.10": 640.3755, "encoder_q-layer.11": 1377.1462, "encoder_q-layer.2": 517.2002, "encoder_q-layer.3": 550.1354, "encoder_q-layer.4": 577.9149, "encoder_q-layer.5": 594.0143, "encoder_q-layer.6": 664.7084, "encoder_q-layer.7": 730.6105, "encoder_q-layer.8": 770.4783, "encoder_q-layer.9": 663.2803, "epoch": 0.6, "inbatch_neg_score": 0.412, "inbatch_pos_score": 1.0654, "learning_rate": 2.116666666666667e-05, "loss": 3.229, "norm_diff": 0.0647, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1036.7934, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4106, "query_norm": 1.4221, "queue_k_norm": 1.4947, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.707, "sent_len_1": 66.7267, "sent_max_len_0": 128.0, "sent_max_len_1": 189.975, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 61900 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 3.2138, "doc_norm": 1.4904, "encoder_q-embeddings": 1226.7847, "encoder_q-layer.0": 826.5023, "encoder_q-layer.1": 916.6356, "encoder_q-layer.10": 1145.8489, "encoder_q-layer.11": 2553.5193, "encoder_q-layer.2": 1082.7368, "encoder_q-layer.3": 1203.5801, "encoder_q-layer.4": 1319.2076, "encoder_q-layer.5": 1371.1425, "encoder_q-layer.6": 1416.1914, "encoder_q-layer.7": 1504.1034, "encoder_q-layer.8": 1591.259, "encoder_q-layer.9": 1257.5867, "epoch": 0.61, "inbatch_neg_score": 0.4128, "inbatch_pos_score": 1.1191, "learning_rate": 2.111111111111111e-05, "loss": 3.2138, "norm_diff": 0.0461, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2091.3718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4141, "query_norm": 1.4444, "queue_k_norm": 1.4938, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1047, "sent_len_1": 66.7884, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8175, "stdk": 0.0487, "stdq": 0.0458, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 62000 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.2332, "doc_norm": 1.4945, "encoder_q-embeddings": 1254.5432, "encoder_q-layer.0": 841.0117, "encoder_q-layer.1": 930.2258, "encoder_q-layer.10": 1183.9987, "encoder_q-layer.11": 2748.7329, "encoder_q-layer.2": 1083.6008, "encoder_q-layer.3": 1142.5891, "encoder_q-layer.4": 1235.5709, "encoder_q-layer.5": 1318.4954, "encoder_q-layer.6": 1374.0624, "encoder_q-layer.7": 1463.4292, "encoder_q-layer.8": 1490.2526, "encoder_q-layer.9": 1275.7299, "epoch": 0.61, "inbatch_neg_score": 0.4169, "inbatch_pos_score": 1.0693, "learning_rate": 2.1055555555555556e-05, "loss": 3.2332, "norm_diff": 0.0557, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2138.6191, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4165, "query_norm": 1.4387, "queue_k_norm": 1.4955, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0998, "sent_len_1": 66.5155, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9888, "stdk": 0.0488, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 62100 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1969, "doc_norm": 1.4985, "encoder_q-embeddings": 1754.0603, "encoder_q-layer.0": 1217.9141, "encoder_q-layer.1": 1302.5131, "encoder_q-layer.10": 1234.6783, "encoder_q-layer.11": 2631.7412, "encoder_q-layer.2": 1397.5811, "encoder_q-layer.3": 1398.1086, "encoder_q-layer.4": 1355.611, "encoder_q-layer.5": 1365.6409, "encoder_q-layer.6": 1422.6722, "encoder_q-layer.7": 1522.9847, "encoder_q-layer.8": 1515.7716, "encoder_q-layer.9": 1326.7992, "epoch": 0.61, "inbatch_neg_score": 0.4181, "inbatch_pos_score": 1.0977, "learning_rate": 2.1e-05, "loss": 3.1969, "norm_diff": 0.0448, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2334.5142, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.417, "query_norm": 1.4537, "queue_k_norm": 1.4941, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.953, "sent_len_1": 66.5143, "sent_max_len_0": 128.0, "sent_max_len_1": 187.62, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 62200 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.22, "doc_norm": 1.4985, "encoder_q-embeddings": 1016.699, "encoder_q-layer.0": 657.991, "encoder_q-layer.1": 736.7773, "encoder_q-layer.10": 1311.6809, "encoder_q-layer.11": 2730.5933, "encoder_q-layer.2": 830.534, "encoder_q-layer.3": 837.9756, "encoder_q-layer.4": 899.2057, "encoder_q-layer.5": 961.806, "encoder_q-layer.6": 1078.0076, "encoder_q-layer.7": 1258.1823, "encoder_q-layer.8": 1453.8092, "encoder_q-layer.9": 1271.5115, "epoch": 0.61, "inbatch_neg_score": 0.4173, "inbatch_pos_score": 1.0957, "learning_rate": 2.0944444444444445e-05, "loss": 3.22, "norm_diff": 0.0421, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1922.2442, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4172, "query_norm": 1.4564, "queue_k_norm": 1.4944, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9377, "sent_len_1": 66.7552, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3063, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 62300 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2147, "doc_norm": 1.4962, "encoder_q-embeddings": 1863.5292, "encoder_q-layer.0": 1404.6775, "encoder_q-layer.1": 1433.7124, "encoder_q-layer.10": 1222.6865, "encoder_q-layer.11": 2620.7529, "encoder_q-layer.2": 1501.8065, "encoder_q-layer.3": 1495.6311, "encoder_q-layer.4": 1649.4762, "encoder_q-layer.5": 1643.7604, "encoder_q-layer.6": 1615.4962, "encoder_q-layer.7": 1886.8806, "encoder_q-layer.8": 1688.4178, "encoder_q-layer.9": 1218.3755, "epoch": 0.61, "inbatch_neg_score": 0.4199, "inbatch_pos_score": 1.0986, "learning_rate": 2.088888888888889e-05, "loss": 3.2147, "norm_diff": 0.0408, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2527.1922, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4199, "query_norm": 1.4554, "queue_k_norm": 1.495, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9907, "sent_len_1": 66.8622, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4338, "stdk": 0.0488, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 62400 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.2117, "doc_norm": 1.4964, "encoder_q-embeddings": 1284.9346, "encoder_q-layer.0": 888.4434, "encoder_q-layer.1": 973.6556, "encoder_q-layer.10": 1151.1753, "encoder_q-layer.11": 2691.5386, "encoder_q-layer.2": 1115.3248, "encoder_q-layer.3": 1163.0298, "encoder_q-layer.4": 1317.2156, "encoder_q-layer.5": 1408.1738, "encoder_q-layer.6": 1404.9594, "encoder_q-layer.7": 1536.6235, "encoder_q-layer.8": 1497.9653, "encoder_q-layer.9": 1264.4813, "epoch": 0.61, "inbatch_neg_score": 0.4215, "inbatch_pos_score": 1.0889, "learning_rate": 2.0833333333333336e-05, "loss": 3.2117, "norm_diff": 0.0543, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2155.1555, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4214, "query_norm": 1.4422, "queue_k_norm": 1.4961, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9142, "sent_len_1": 66.6321, "sent_max_len_0": 128.0, "sent_max_len_1": 188.815, "stdk": 0.0488, "stdq": 0.0455, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 62500 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1974, "doc_norm": 1.5024, "encoder_q-embeddings": 972.1968, "encoder_q-layer.0": 604.8071, "encoder_q-layer.1": 626.9871, "encoder_q-layer.10": 1267.0361, "encoder_q-layer.11": 2852.6484, "encoder_q-layer.2": 698.5737, "encoder_q-layer.3": 747.9437, "encoder_q-layer.4": 821.0933, "encoder_q-layer.5": 857.2014, "encoder_q-layer.6": 975.0208, "encoder_q-layer.7": 1155.129, "encoder_q-layer.8": 1382.7961, "encoder_q-layer.9": 1276.4304, "epoch": 0.61, "inbatch_neg_score": 0.4251, "inbatch_pos_score": 1.1191, "learning_rate": 2.077777777777778e-05, "loss": 3.1974, "norm_diff": 0.043, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1860.4602, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.425, "query_norm": 1.4594, "queue_k_norm": 1.4965, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7652, "sent_len_1": 66.78, "sent_max_len_0": 128.0, "sent_max_len_1": 189.15, "stdk": 0.0491, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 62600 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.2333, "doc_norm": 1.5016, "encoder_q-embeddings": 2287.4858, "encoder_q-layer.0": 1551.5148, "encoder_q-layer.1": 1715.923, "encoder_q-layer.10": 1175.1272, "encoder_q-layer.11": 2816.4749, "encoder_q-layer.2": 2148.8792, "encoder_q-layer.3": 2292.4656, "encoder_q-layer.4": 2581.2595, "encoder_q-layer.5": 2500.4285, "encoder_q-layer.6": 2404.1299, "encoder_q-layer.7": 2555.3555, "encoder_q-layer.8": 2204.3013, "encoder_q-layer.9": 1307.9495, "epoch": 0.61, "inbatch_neg_score": 0.4303, "inbatch_pos_score": 1.0791, "learning_rate": 2.0722222222222224e-05, "loss": 3.2333, "norm_diff": 0.0718, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3331.9063, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4304, "query_norm": 1.4298, "queue_k_norm": 1.4982, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8321, "sent_len_1": 66.7063, "sent_max_len_0": 128.0, "sent_max_len_1": 188.44, "stdk": 0.049, "stdq": 0.0446, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 62700 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.2345, "doc_norm": 1.4982, "encoder_q-embeddings": 1729.9048, "encoder_q-layer.0": 1197.7823, "encoder_q-layer.1": 1493.4011, "encoder_q-layer.10": 1212.3589, "encoder_q-layer.11": 2664.459, "encoder_q-layer.2": 1738.3627, "encoder_q-layer.3": 1985.0439, "encoder_q-layer.4": 2204.0933, "encoder_q-layer.5": 2195.3096, "encoder_q-layer.6": 1984.2692, "encoder_q-layer.7": 1922.6267, "encoder_q-layer.8": 1577.6254, "encoder_q-layer.9": 1205.1179, "epoch": 0.61, "inbatch_neg_score": 0.4344, "inbatch_pos_score": 1.124, "learning_rate": 2.0666666666666666e-05, "loss": 3.2345, "norm_diff": 0.0416, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2757.0295, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4341, "query_norm": 1.4566, "queue_k_norm": 1.4996, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9678, "sent_len_1": 66.958, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8975, "stdk": 0.0488, "stdq": 0.0457, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 62800 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.2015, "doc_norm": 1.4999, "encoder_q-embeddings": 1326.7324, "encoder_q-layer.0": 859.2421, "encoder_q-layer.1": 964.8327, "encoder_q-layer.10": 1247.1624, "encoder_q-layer.11": 2750.4072, "encoder_q-layer.2": 1058.7524, "encoder_q-layer.3": 1097.8202, "encoder_q-layer.4": 1165.1726, "encoder_q-layer.5": 1359.0452, "encoder_q-layer.6": 1264.8317, "encoder_q-layer.7": 1442.1046, "encoder_q-layer.8": 1473.5322, "encoder_q-layer.9": 1300.3636, "epoch": 0.61, "inbatch_neg_score": 0.4384, "inbatch_pos_score": 1.1416, "learning_rate": 2.0611111111111112e-05, "loss": 3.2015, "norm_diff": 0.0223, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2147.8098, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4375, "query_norm": 1.4776, "queue_k_norm": 1.5002, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7547, "sent_len_1": 66.724, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5112, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 62900 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.2297, "doc_norm": 1.5033, "encoder_q-embeddings": 1064.373, "encoder_q-layer.0": 705.0458, "encoder_q-layer.1": 765.2467, "encoder_q-layer.10": 1254.6095, "encoder_q-layer.11": 2679.2847, "encoder_q-layer.2": 927.4606, "encoder_q-layer.3": 998.3497, "encoder_q-layer.4": 1073.304, "encoder_q-layer.5": 1054.4171, "encoder_q-layer.6": 1096.2102, "encoder_q-layer.7": 1212.5695, "encoder_q-layer.8": 1386.8589, "encoder_q-layer.9": 1246.0183, "epoch": 0.62, "inbatch_neg_score": 0.4431, "inbatch_pos_score": 1.1172, "learning_rate": 2.0555555555555555e-05, "loss": 3.2297, "norm_diff": 0.0397, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1931.7619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4417, "query_norm": 1.4636, "queue_k_norm": 1.5005, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7349, "sent_len_1": 66.5433, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0263, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 63000 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.2204, "doc_norm": 1.5114, "encoder_q-embeddings": 2261.2258, "encoder_q-layer.0": 1650.2686, "encoder_q-layer.1": 2124.5676, "encoder_q-layer.10": 1344.9431, "encoder_q-layer.11": 2778.3564, "encoder_q-layer.2": 2336.3079, "encoder_q-layer.3": 2567.873, "encoder_q-layer.4": 2779.667, "encoder_q-layer.5": 3090.8225, "encoder_q-layer.6": 2814.8445, "encoder_q-layer.7": 2709.7156, "encoder_q-layer.8": 1807.3397, "encoder_q-layer.9": 1322.5043, "epoch": 0.62, "inbatch_neg_score": 0.4418, "inbatch_pos_score": 1.1045, "learning_rate": 2.05e-05, "loss": 3.2204, "norm_diff": 0.0604, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3481.8541, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4409, "query_norm": 1.451, "queue_k_norm": 1.5015, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1385, "sent_len_1": 67.0171, "sent_max_len_0": 128.0, "sent_max_len_1": 191.015, "stdk": 0.0493, "stdq": 0.0454, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 63100 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1959, "doc_norm": 1.5041, "encoder_q-embeddings": 480.6876, "encoder_q-layer.0": 303.5064, "encoder_q-layer.1": 322.6266, "encoder_q-layer.10": 692.1949, "encoder_q-layer.11": 1428.5521, "encoder_q-layer.2": 361.5616, "encoder_q-layer.3": 370.0077, "encoder_q-layer.4": 409.437, "encoder_q-layer.5": 424.8144, "encoder_q-layer.6": 491.5599, "encoder_q-layer.7": 580.7375, "encoder_q-layer.8": 679.5379, "encoder_q-layer.9": 623.8641, "epoch": 0.62, "inbatch_neg_score": 0.4447, "inbatch_pos_score": 1.1084, "learning_rate": 2.0444444444444446e-05, "loss": 3.1959, "norm_diff": 0.0485, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 942.4431, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4456, "query_norm": 1.4556, "queue_k_norm": 1.5028, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8059, "sent_len_1": 66.7886, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4025, "stdk": 0.049, "stdq": 0.0456, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 63200 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.2132, "doc_norm": 1.5063, "encoder_q-embeddings": 523.9493, "encoder_q-layer.0": 327.3377, "encoder_q-layer.1": 351.7188, "encoder_q-layer.10": 580.0222, "encoder_q-layer.11": 1315.1227, "encoder_q-layer.2": 390.2125, "encoder_q-layer.3": 397.5548, "encoder_q-layer.4": 423.2117, "encoder_q-layer.5": 443.4863, "encoder_q-layer.6": 504.9615, "encoder_q-layer.7": 547.2618, "encoder_q-layer.8": 658.8247, "encoder_q-layer.9": 589.1041, "epoch": 0.62, "inbatch_neg_score": 0.4488, "inbatch_pos_score": 1.1523, "learning_rate": 2.0388888888888892e-05, "loss": 3.2132, "norm_diff": 0.025, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 908.8394, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4497, "query_norm": 1.4813, "queue_k_norm": 1.5029, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9335, "sent_len_1": 66.9307, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2525, "stdk": 0.0491, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 63300 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.2201, "doc_norm": 1.5096, "encoder_q-embeddings": 843.6217, "encoder_q-layer.0": 566.1784, "encoder_q-layer.1": 656.5072, "encoder_q-layer.10": 639.5671, "encoder_q-layer.11": 1327.6091, "encoder_q-layer.2": 768.0009, "encoder_q-layer.3": 773.4526, "encoder_q-layer.4": 863.4234, "encoder_q-layer.5": 887.1375, "encoder_q-layer.6": 894.7137, "encoder_q-layer.7": 887.4065, "encoder_q-layer.8": 866.2281, "encoder_q-layer.9": 625.6284, "epoch": 0.62, "inbatch_neg_score": 0.4545, "inbatch_pos_score": 1.168, "learning_rate": 2.0333333333333334e-05, "loss": 3.2201, "norm_diff": 0.027, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1258.7335, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4548, "query_norm": 1.4826, "queue_k_norm": 1.5035, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9733, "sent_len_1": 66.6381, "sent_max_len_0": 128.0, "sent_max_len_1": 189.455, "stdk": 0.0491, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 63400 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.2069, "doc_norm": 1.4964, "encoder_q-embeddings": 556.9018, "encoder_q-layer.0": 350.787, "encoder_q-layer.1": 368.4693, "encoder_q-layer.10": 561.0217, "encoder_q-layer.11": 1307.9109, "encoder_q-layer.2": 411.245, "encoder_q-layer.3": 442.6608, "encoder_q-layer.4": 488.1336, "encoder_q-layer.5": 491.3095, "encoder_q-layer.6": 532.9182, "encoder_q-layer.7": 616.727, "encoder_q-layer.8": 656.4955, "encoder_q-layer.9": 575.3683, "epoch": 0.62, "inbatch_neg_score": 0.4567, "inbatch_pos_score": 1.0928, "learning_rate": 2.027777777777778e-05, "loss": 3.2069, "norm_diff": 0.0586, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 941.7969, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4556, "query_norm": 1.4378, "queue_k_norm": 1.5052, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8882, "sent_len_1": 66.7186, "sent_max_len_0": 128.0, "sent_max_len_1": 188.875, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 63500 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.1986, "doc_norm": 1.5064, "encoder_q-embeddings": 911.6083, "encoder_q-layer.0": 640.2222, "encoder_q-layer.1": 677.6674, "encoder_q-layer.10": 667.524, "encoder_q-layer.11": 1528.8448, "encoder_q-layer.2": 771.4194, "encoder_q-layer.3": 814.3874, "encoder_q-layer.4": 885.0862, "encoder_q-layer.5": 902.5883, "encoder_q-layer.6": 1075.2175, "encoder_q-layer.7": 1099.9139, "encoder_q-layer.8": 964.2654, "encoder_q-layer.9": 690.4788, "epoch": 0.62, "inbatch_neg_score": 0.4563, "inbatch_pos_score": 1.0977, "learning_rate": 2.0222222222222222e-05, "loss": 3.1986, "norm_diff": 0.0585, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1393.8876, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4561, "query_norm": 1.4479, "queue_k_norm": 1.5064, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8718, "sent_len_1": 66.7826, "sent_max_len_0": 128.0, "sent_max_len_1": 191.3462, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 63600 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.2007, "doc_norm": 1.5052, "encoder_q-embeddings": 674.227, "encoder_q-layer.0": 422.6924, "encoder_q-layer.1": 476.8215, "encoder_q-layer.10": 589.3983, "encoder_q-layer.11": 1401.9501, "encoder_q-layer.2": 541.6241, "encoder_q-layer.3": 576.86, "encoder_q-layer.4": 657.3491, "encoder_q-layer.5": 653.478, "encoder_q-layer.6": 703.026, "encoder_q-layer.7": 835.7457, "encoder_q-layer.8": 793.9247, "encoder_q-layer.9": 635.5317, "epoch": 0.62, "inbatch_neg_score": 0.4565, "inbatch_pos_score": 1.1562, "learning_rate": 2.0166666666666668e-05, "loss": 3.2007, "norm_diff": 0.0401, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1092.3994, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4558, "query_norm": 1.4652, "queue_k_norm": 1.5067, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8207, "sent_len_1": 66.6387, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0037, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 63700 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.2089, "doc_norm": 1.5083, "encoder_q-embeddings": 600.0281, "encoder_q-layer.0": 394.0482, "encoder_q-layer.1": 419.2438, "encoder_q-layer.10": 609.1521, "encoder_q-layer.11": 1391.829, "encoder_q-layer.2": 473.6771, "encoder_q-layer.3": 522.102, "encoder_q-layer.4": 546.5981, "encoder_q-layer.5": 578.3675, "encoder_q-layer.6": 620.463, "encoder_q-layer.7": 681.126, "encoder_q-layer.8": 765.634, "encoder_q-layer.9": 634.3867, "epoch": 0.62, "inbatch_neg_score": 0.4613, "inbatch_pos_score": 1.1074, "learning_rate": 2.011111111111111e-05, "loss": 3.2089, "norm_diff": 0.0584, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1029.0174, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4614, "query_norm": 1.4499, "queue_k_norm": 1.5084, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.6669, "sent_len_1": 66.5641, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9963, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 63800 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 3.211, "doc_norm": 1.5094, "encoder_q-embeddings": 711.7576, "encoder_q-layer.0": 488.8303, "encoder_q-layer.1": 564.585, "encoder_q-layer.10": 609.0189, "encoder_q-layer.11": 1321.2111, "encoder_q-layer.2": 672.1862, "encoder_q-layer.3": 758.0822, "encoder_q-layer.4": 834.1042, "encoder_q-layer.5": 816.5254, "encoder_q-layer.6": 838.6954, "encoder_q-layer.7": 898.8941, "encoder_q-layer.8": 930.4572, "encoder_q-layer.9": 690.8707, "epoch": 0.62, "inbatch_neg_score": 0.4624, "inbatch_pos_score": 1.1748, "learning_rate": 2.0055555555555556e-05, "loss": 3.211, "norm_diff": 0.0476, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1205.3439, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4622, "query_norm": 1.4618, "queue_k_norm": 1.5061, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0139, "sent_len_1": 66.7167, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0387, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 63900 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.212, "doc_norm": 1.5077, "encoder_q-embeddings": 1269.094, "encoder_q-layer.0": 858.0757, "encoder_q-layer.1": 927.2702, "encoder_q-layer.10": 664.4972, "encoder_q-layer.11": 1392.3123, "encoder_q-layer.2": 1104.6333, "encoder_q-layer.3": 1166.6978, "encoder_q-layer.4": 1207.4374, "encoder_q-layer.5": 1229.6377, "encoder_q-layer.6": 1181.5341, "encoder_q-layer.7": 1171.1167, "encoder_q-layer.8": 923.1675, "encoder_q-layer.9": 661.6976, "epoch": 0.62, "inbatch_neg_score": 0.4686, "inbatch_pos_score": 1.1387, "learning_rate": 2e-05, "loss": 3.212, "norm_diff": 0.0485, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1645.4711, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4675, "query_norm": 1.4592, "queue_k_norm": 1.5109, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.158, "sent_len_1": 67.0109, "sent_max_len_0": 128.0, "sent_max_len_1": 189.71, "stdk": 0.0489, "stdq": 0.0457, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 64000 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.224, "doc_norm": 1.5097, "encoder_q-embeddings": 771.7272, "encoder_q-layer.0": 543.55, "encoder_q-layer.1": 656.2346, "encoder_q-layer.10": 688.826, "encoder_q-layer.11": 1427.542, "encoder_q-layer.2": 827.6564, "encoder_q-layer.3": 879.9137, "encoder_q-layer.4": 931.7577, "encoder_q-layer.5": 877.5795, "encoder_q-layer.6": 847.3301, "encoder_q-layer.7": 865.303, "encoder_q-layer.8": 809.3295, "encoder_q-layer.9": 657.1165, "epoch": 0.63, "inbatch_neg_score": 0.4671, "inbatch_pos_score": 1.1328, "learning_rate": 1.9944444444444447e-05, "loss": 3.224, "norm_diff": 0.0547, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1302.0413, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4666, "query_norm": 1.455, "queue_k_norm": 1.5103, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9727, "sent_len_1": 66.7153, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8125, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 64100 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 3.2053, "doc_norm": 1.5191, "encoder_q-embeddings": 1944.0, "encoder_q-layer.0": 1337.2644, "encoder_q-layer.1": 1520.7407, "encoder_q-layer.10": 723.5491, "encoder_q-layer.11": 1433.5845, "encoder_q-layer.2": 1743.6276, "encoder_q-layer.3": 1823.7217, "encoder_q-layer.4": 1807.4767, "encoder_q-layer.5": 1755.0422, "encoder_q-layer.6": 1698.0742, "encoder_q-layer.7": 2009.9667, "encoder_q-layer.8": 1675.8463, "encoder_q-layer.9": 855.4949, "epoch": 0.63, "inbatch_neg_score": 0.4673, "inbatch_pos_score": 1.1699, "learning_rate": 1.988888888888889e-05, "loss": 3.2053, "norm_diff": 0.052, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2436.5761, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4668, "query_norm": 1.4671, "queue_k_norm": 1.5116, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7739, "sent_len_1": 66.6704, "sent_max_len_0": 128.0, "sent_max_len_1": 186.2337, "stdk": 0.0493, "stdq": 0.046, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 64200 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.2089, "doc_norm": 1.5104, "encoder_q-embeddings": 1257.9249, "encoder_q-layer.0": 882.6318, "encoder_q-layer.1": 960.139, "encoder_q-layer.10": 633.6817, "encoder_q-layer.11": 1405.3759, "encoder_q-layer.2": 1145.9072, "encoder_q-layer.3": 1210.4412, "encoder_q-layer.4": 1365.4729, "encoder_q-layer.5": 1385.9846, "encoder_q-layer.6": 1297.0131, "encoder_q-layer.7": 1589.5129, "encoder_q-layer.8": 1214.0079, "encoder_q-layer.9": 766.0934, "epoch": 0.63, "inbatch_neg_score": 0.4675, "inbatch_pos_score": 1.0859, "learning_rate": 1.9833333333333335e-05, "loss": 3.2089, "norm_diff": 0.0729, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1779.7827, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4663, "query_norm": 1.4375, "queue_k_norm": 1.5107, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8741, "sent_len_1": 66.7565, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0662, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 64300 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.2032, "doc_norm": 1.5109, "encoder_q-embeddings": 550.6813, "encoder_q-layer.0": 361.2469, "encoder_q-layer.1": 392.1038, "encoder_q-layer.10": 629.7963, "encoder_q-layer.11": 1381.0402, "encoder_q-layer.2": 450.0116, "encoder_q-layer.3": 489.3995, "encoder_q-layer.4": 536.3806, "encoder_q-layer.5": 566.444, "encoder_q-layer.6": 633.9871, "encoder_q-layer.7": 712.8771, "encoder_q-layer.8": 798.7063, "encoder_q-layer.9": 660.7139, "epoch": 0.63, "inbatch_neg_score": 0.4707, "inbatch_pos_score": 1.1309, "learning_rate": 1.9777777777777778e-05, "loss": 3.2032, "norm_diff": 0.0615, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1003.9123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4705, "query_norm": 1.4493, "queue_k_norm": 1.5112, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9581, "sent_len_1": 66.8672, "sent_max_len_0": 128.0, "sent_max_len_1": 191.365, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 64400 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.203, "doc_norm": 1.5096, "encoder_q-embeddings": 1212.8392, "encoder_q-layer.0": 895.9315, "encoder_q-layer.1": 1070.6514, "encoder_q-layer.10": 617.8488, "encoder_q-layer.11": 1364.6188, "encoder_q-layer.2": 1239.0651, "encoder_q-layer.3": 1251.7141, "encoder_q-layer.4": 1319.8737, "encoder_q-layer.5": 1438.1315, "encoder_q-layer.6": 1318.3118, "encoder_q-layer.7": 1294.7728, "encoder_q-layer.8": 1062.6543, "encoder_q-layer.9": 642.1147, "epoch": 0.63, "inbatch_neg_score": 0.4585, "inbatch_pos_score": 1.1094, "learning_rate": 1.9722222222222224e-05, "loss": 3.203, "norm_diff": 0.0537, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1738.5892, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4587, "query_norm": 1.4559, "queue_k_norm": 1.5118, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.85, "sent_len_1": 66.6649, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2363, "stdk": 0.0488, "stdq": 0.0459, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 64500 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.187, "doc_norm": 1.5124, "encoder_q-embeddings": 593.7913, "encoder_q-layer.0": 399.5384, "encoder_q-layer.1": 434.3231, "encoder_q-layer.10": 656.9315, "encoder_q-layer.11": 1479.5264, "encoder_q-layer.2": 493.2334, "encoder_q-layer.3": 540.7809, "encoder_q-layer.4": 618.0341, "encoder_q-layer.5": 653.454, "encoder_q-layer.6": 715.1303, "encoder_q-layer.7": 744.5489, "encoder_q-layer.8": 801.8606, "encoder_q-layer.9": 667.8266, "epoch": 0.63, "inbatch_neg_score": 0.4573, "inbatch_pos_score": 1.1035, "learning_rate": 1.9666666666666666e-05, "loss": 3.187, "norm_diff": 0.072, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1095.4615, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4585, "query_norm": 1.4404, "queue_k_norm": 1.5128, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9137, "sent_len_1": 66.5645, "sent_max_len_0": 128.0, "sent_max_len_1": 187.3825, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 64600 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.2008, "doc_norm": 1.5149, "encoder_q-embeddings": 567.1237, "encoder_q-layer.0": 381.5062, "encoder_q-layer.1": 435.4225, "encoder_q-layer.10": 652.3917, "encoder_q-layer.11": 1310.0652, "encoder_q-layer.2": 528.6222, "encoder_q-layer.3": 551.6496, "encoder_q-layer.4": 582.7476, "encoder_q-layer.5": 625.6315, "encoder_q-layer.6": 640.0785, "encoder_q-layer.7": 682.5546, "encoder_q-layer.8": 753.3686, "encoder_q-layer.9": 601.1358, "epoch": 0.63, "inbatch_neg_score": 0.4639, "inbatch_pos_score": 1.1396, "learning_rate": 1.9611111111111115e-05, "loss": 3.2008, "norm_diff": 0.0684, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1017.4339, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4631, "query_norm": 1.4464, "queue_k_norm": 1.5104, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0073, "sent_len_1": 66.764, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4963, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 64700 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.2055, "doc_norm": 1.517, "encoder_q-embeddings": 487.0407, "encoder_q-layer.0": 316.7354, "encoder_q-layer.1": 324.119, "encoder_q-layer.10": 580.6937, "encoder_q-layer.11": 1329.3837, "encoder_q-layer.2": 367.2446, "encoder_q-layer.3": 398.5673, "encoder_q-layer.4": 424.7337, "encoder_q-layer.5": 435.5526, "encoder_q-layer.6": 509.0636, "encoder_q-layer.7": 647.3821, "encoder_q-layer.8": 697.4836, "encoder_q-layer.9": 602.6177, "epoch": 0.63, "inbatch_neg_score": 0.4584, "inbatch_pos_score": 1.1494, "learning_rate": 1.9555555555555557e-05, "loss": 3.2055, "norm_diff": 0.0512, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 922.9053, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4592, "query_norm": 1.4659, "queue_k_norm": 1.5133, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9706, "sent_len_1": 66.8002, "sent_max_len_0": 128.0, "sent_max_len_1": 187.445, "stdk": 0.0491, "stdq": 0.0462, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 64800 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.1913, "doc_norm": 1.5149, "encoder_q-embeddings": 808.1993, "encoder_q-layer.0": 535.0325, "encoder_q-layer.1": 574.3803, "encoder_q-layer.10": 614.5933, "encoder_q-layer.11": 1334.4319, "encoder_q-layer.2": 634.9188, "encoder_q-layer.3": 683.2802, "encoder_q-layer.4": 694.9596, "encoder_q-layer.5": 705.1801, "encoder_q-layer.6": 703.7319, "encoder_q-layer.7": 767.8749, "encoder_q-layer.8": 798.6143, "encoder_q-layer.9": 625.6681, "epoch": 0.63, "inbatch_neg_score": 0.4587, "inbatch_pos_score": 1.126, "learning_rate": 1.9500000000000003e-05, "loss": 3.1913, "norm_diff": 0.0641, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1145.8072, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4607, "query_norm": 1.4509, "queue_k_norm": 1.5122, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8277, "sent_len_1": 66.711, "sent_max_len_0": 128.0, "sent_max_len_1": 189.385, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 64900 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.2264, "doc_norm": 1.5144, "encoder_q-embeddings": 2018.0134, "encoder_q-layer.0": 1277.7849, "encoder_q-layer.1": 1403.9498, "encoder_q-layer.10": 622.3005, "encoder_q-layer.11": 1358.184, "encoder_q-layer.2": 1753.6144, "encoder_q-layer.3": 1818.0968, "encoder_q-layer.4": 1963.3871, "encoder_q-layer.5": 1638.2202, "encoder_q-layer.6": 1883.2385, "encoder_q-layer.7": 1607.8263, "encoder_q-layer.8": 1160.9039, "encoder_q-layer.9": 649.7268, "epoch": 0.63, "inbatch_neg_score": 0.4583, "inbatch_pos_score": 1.123, "learning_rate": 1.9444444444444445e-05, "loss": 3.2264, "norm_diff": 0.0741, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2292.0188, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.459, "query_norm": 1.4403, "queue_k_norm": 1.5131, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0258, "sent_len_1": 66.7768, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5425, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 65000 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 3.1921, "doc_norm": 1.5118, "encoder_q-embeddings": 455.7672, "encoder_q-layer.0": 298.7204, "encoder_q-layer.1": 314.0677, "encoder_q-layer.10": 564.5554, "encoder_q-layer.11": 1242.8503, "encoder_q-layer.2": 350.0052, "encoder_q-layer.3": 368.6246, "encoder_q-layer.4": 402.666, "encoder_q-layer.5": 418.4878, "encoder_q-layer.6": 482.6115, "encoder_q-layer.7": 558.2763, "encoder_q-layer.8": 661.2522, "encoder_q-layer.9": 584.7864, "epoch": 0.64, "inbatch_neg_score": 0.4591, "inbatch_pos_score": 1.1738, "learning_rate": 1.938888888888889e-05, "loss": 3.1921, "norm_diff": 0.0459, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 862.6619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4592, "query_norm": 1.4658, "queue_k_norm": 1.5132, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9041, "sent_len_1": 66.8826, "sent_max_len_0": 128.0, "sent_max_len_1": 189.895, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 65100 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.2211, "doc_norm": 1.5096, "encoder_q-embeddings": 1086.205, "encoder_q-layer.0": 701.2986, "encoder_q-layer.1": 748.4805, "encoder_q-layer.10": 1249.3127, "encoder_q-layer.11": 2762.9712, "encoder_q-layer.2": 834.8705, "encoder_q-layer.3": 878.8942, "encoder_q-layer.4": 897.5848, "encoder_q-layer.5": 950.7848, "encoder_q-layer.6": 1083.2183, "encoder_q-layer.7": 1269.5105, "encoder_q-layer.8": 1397.3561, "encoder_q-layer.9": 1241.2644, "epoch": 0.64, "inbatch_neg_score": 0.4623, "inbatch_pos_score": 1.1348, "learning_rate": 1.9333333333333333e-05, "loss": 3.2211, "norm_diff": 0.0363, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1939.515, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4622, "query_norm": 1.4733, "queue_k_norm": 1.5144, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9738, "sent_len_1": 66.8409, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9288, "stdk": 0.0488, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 65200 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.2288, "doc_norm": 1.5191, "encoder_q-embeddings": 1118.6464, "encoder_q-layer.0": 741.4786, "encoder_q-layer.1": 739.3354, "encoder_q-layer.10": 1206.8665, "encoder_q-layer.11": 2817.3152, "encoder_q-layer.2": 848.1968, "encoder_q-layer.3": 881.0345, "encoder_q-layer.4": 951.6905, "encoder_q-layer.5": 1005.3029, "encoder_q-layer.6": 1086.5719, "encoder_q-layer.7": 1279.7164, "encoder_q-layer.8": 1498.7537, "encoder_q-layer.9": 1255.3851, "epoch": 0.64, "inbatch_neg_score": 0.4615, "inbatch_pos_score": 1.1025, "learning_rate": 1.927777777777778e-05, "loss": 3.2288, "norm_diff": 0.067, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2004.2184, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4617, "query_norm": 1.4521, "queue_k_norm": 1.5141, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8397, "sent_len_1": 66.6238, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0712, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 65300 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.1966, "doc_norm": 1.5147, "encoder_q-embeddings": 1020.0449, "encoder_q-layer.0": 676.9633, "encoder_q-layer.1": 701.8994, "encoder_q-layer.10": 1187.7896, "encoder_q-layer.11": 2600.6587, "encoder_q-layer.2": 751.5262, "encoder_q-layer.3": 800.4385, "encoder_q-layer.4": 837.0302, "encoder_q-layer.5": 877.6196, "encoder_q-layer.6": 988.828, "encoder_q-layer.7": 1171.6836, "encoder_q-layer.8": 1348.8466, "encoder_q-layer.9": 1222.1935, "epoch": 0.64, "inbatch_neg_score": 0.4592, "inbatch_pos_score": 1.127, "learning_rate": 1.922222222222222e-05, "loss": 3.1966, "norm_diff": 0.0501, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1820.9512, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4583, "query_norm": 1.4645, "queue_k_norm": 1.5138, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.6336, "sent_len_1": 66.912, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3313, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 65400 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.2329, "doc_norm": 1.5122, "encoder_q-embeddings": 1339.879, "encoder_q-layer.0": 967.7212, "encoder_q-layer.1": 1020.1147, "encoder_q-layer.10": 1268.7947, "encoder_q-layer.11": 2592.2009, "encoder_q-layer.2": 1147.6774, "encoder_q-layer.3": 1139.8834, "encoder_q-layer.4": 1217.2393, "encoder_q-layer.5": 1258.157, "encoder_q-layer.6": 1203.0472, "encoder_q-layer.7": 1516.632, "encoder_q-layer.8": 1521.3065, "encoder_q-layer.9": 1323.3409, "epoch": 0.64, "inbatch_neg_score": 0.4606, "inbatch_pos_score": 1.1494, "learning_rate": 1.9166666666666667e-05, "loss": 3.2329, "norm_diff": 0.0583, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2120.8892, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4609, "query_norm": 1.4539, "queue_k_norm": 1.513, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8226, "sent_len_1": 66.488, "sent_max_len_0": 128.0, "sent_max_len_1": 191.125, "stdk": 0.0489, "stdq": 0.0456, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 65500 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.1932, "doc_norm": 1.5138, "encoder_q-embeddings": 1292.3354, "encoder_q-layer.0": 870.5707, "encoder_q-layer.1": 999.8556, "encoder_q-layer.10": 1254.1738, "encoder_q-layer.11": 2659.9363, "encoder_q-layer.2": 1099.4171, "encoder_q-layer.3": 1072.0564, "encoder_q-layer.4": 1130.0287, "encoder_q-layer.5": 1153.483, "encoder_q-layer.6": 1173.7854, "encoder_q-layer.7": 1299.562, "encoder_q-layer.8": 1387.2288, "encoder_q-layer.9": 1158.787, "epoch": 0.64, "inbatch_neg_score": 0.462, "inbatch_pos_score": 1.1406, "learning_rate": 1.9111111111111113e-05, "loss": 3.1932, "norm_diff": 0.0533, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2055.8034, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4614, "query_norm": 1.4605, "queue_k_norm": 1.5132, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9809, "sent_len_1": 66.6673, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6887, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 65600 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.1919, "doc_norm": 1.5138, "encoder_q-embeddings": 1038.4542, "encoder_q-layer.0": 680.0045, "encoder_q-layer.1": 719.8704, "encoder_q-layer.10": 1164.9222, "encoder_q-layer.11": 2636.7117, "encoder_q-layer.2": 795.0626, "encoder_q-layer.3": 805.3928, "encoder_q-layer.4": 875.8926, "encoder_q-layer.5": 864.9722, "encoder_q-layer.6": 1008.7755, "encoder_q-layer.7": 1178.8743, "encoder_q-layer.8": 1324.0789, "encoder_q-layer.9": 1226.0549, "epoch": 0.64, "inbatch_neg_score": 0.4655, "inbatch_pos_score": 1.1807, "learning_rate": 1.905555555555556e-05, "loss": 3.1919, "norm_diff": 0.0379, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1832.82, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4663, "query_norm": 1.4759, "queue_k_norm": 1.5164, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0796, "sent_len_1": 67.0481, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8988, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 65700 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.191, "doc_norm": 1.5197, "encoder_q-embeddings": 1053.7256, "encoder_q-layer.0": 696.8502, "encoder_q-layer.1": 748.8439, "encoder_q-layer.10": 1172.1399, "encoder_q-layer.11": 2630.1079, "encoder_q-layer.2": 829.3132, "encoder_q-layer.3": 911.3969, "encoder_q-layer.4": 954.4534, "encoder_q-layer.5": 989.6992, "encoder_q-layer.6": 1115.7574, "encoder_q-layer.7": 1229.374, "encoder_q-layer.8": 1357.3462, "encoder_q-layer.9": 1178.4222, "epoch": 0.64, "inbatch_neg_score": 0.4648, "inbatch_pos_score": 1.1211, "learning_rate": 1.9e-05, "loss": 3.191, "norm_diff": 0.0654, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1872.2253, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4651, "query_norm": 1.4544, "queue_k_norm": 1.5157, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8972, "sent_len_1": 66.6606, "sent_max_len_0": 128.0, "sent_max_len_1": 188.37, "stdk": 0.0492, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 65800 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.2096, "doc_norm": 1.5133, "encoder_q-embeddings": 1259.8729, "encoder_q-layer.0": 866.8186, "encoder_q-layer.1": 980.8087, "encoder_q-layer.10": 1197.1774, "encoder_q-layer.11": 2756.1487, "encoder_q-layer.2": 1156.9851, "encoder_q-layer.3": 1193.9724, "encoder_q-layer.4": 1240.1199, "encoder_q-layer.5": 1273.9299, "encoder_q-layer.6": 1434.4025, "encoder_q-layer.7": 1562.8062, "encoder_q-layer.8": 1552.8109, "encoder_q-layer.9": 1285.5798, "epoch": 0.64, "inbatch_neg_score": 0.4663, "inbatch_pos_score": 1.1494, "learning_rate": 1.8944444444444447e-05, "loss": 3.2096, "norm_diff": 0.0277, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2175.7082, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4675, "query_norm": 1.4856, "queue_k_norm": 1.5143, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9268, "sent_len_1": 66.6605, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7587, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 65900 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.1955, "doc_norm": 1.5114, "encoder_q-embeddings": 1156.0897, "encoder_q-layer.0": 753.3243, "encoder_q-layer.1": 820.4401, "encoder_q-layer.10": 1363.2491, "encoder_q-layer.11": 2828.3379, "encoder_q-layer.2": 933.7761, "encoder_q-layer.3": 1011.9638, "encoder_q-layer.4": 1085.7136, "encoder_q-layer.5": 1119.6705, "encoder_q-layer.6": 1221.006, "encoder_q-layer.7": 1402.0835, "encoder_q-layer.8": 1547.3834, "encoder_q-layer.9": 1347.7157, "epoch": 0.64, "inbatch_neg_score": 0.4672, "inbatch_pos_score": 1.1504, "learning_rate": 1.888888888888889e-05, "loss": 3.1955, "norm_diff": 0.0367, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2069.1406, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4668, "query_norm": 1.4747, "queue_k_norm": 1.5145, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0198, "sent_len_1": 66.697, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7925, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 66000 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.2427, "doc_norm": 1.5167, "encoder_q-embeddings": 1266.3644, "encoder_q-layer.0": 814.5183, "encoder_q-layer.1": 898.7255, "encoder_q-layer.10": 1141.4359, "encoder_q-layer.11": 2633.9773, "encoder_q-layer.2": 1046.2938, "encoder_q-layer.3": 1077.5872, "encoder_q-layer.4": 1131.343, "encoder_q-layer.5": 1196.8231, "encoder_q-layer.6": 1391.1794, "encoder_q-layer.7": 1652.8013, "encoder_q-layer.8": 1891.2009, "encoder_q-layer.9": 1503.0392, "epoch": 0.65, "inbatch_neg_score": 0.4727, "inbatch_pos_score": 1.1348, "learning_rate": 1.8833333333333335e-05, "loss": 3.2427, "norm_diff": 0.0659, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2163.0003, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4722, "query_norm": 1.4508, "queue_k_norm": 1.5155, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0807, "sent_len_1": 66.5189, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6813, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 66100 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.2101, "doc_norm": 1.5163, "encoder_q-embeddings": 1055.0765, "encoder_q-layer.0": 706.0272, "encoder_q-layer.1": 783.0759, "encoder_q-layer.10": 1225.1342, "encoder_q-layer.11": 2851.8794, "encoder_q-layer.2": 870.0491, "encoder_q-layer.3": 936.9124, "encoder_q-layer.4": 1006.8162, "encoder_q-layer.5": 1025.9822, "encoder_q-layer.6": 1182.9404, "encoder_q-layer.7": 1355.1207, "encoder_q-layer.8": 1568.2595, "encoder_q-layer.9": 1302.6631, "epoch": 0.65, "inbatch_neg_score": 0.4724, "inbatch_pos_score": 1.1562, "learning_rate": 1.8777777777777777e-05, "loss": 3.2101, "norm_diff": 0.0489, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2023.875, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4729, "query_norm": 1.4673, "queue_k_norm": 1.5159, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0451, "sent_len_1": 66.7758, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7788, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 66200 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.2215, "doc_norm": 1.5193, "encoder_q-embeddings": 1572.7737, "encoder_q-layer.0": 1167.8043, "encoder_q-layer.1": 1255.9601, "encoder_q-layer.10": 1249.7006, "encoder_q-layer.11": 2794.7954, "encoder_q-layer.2": 1478.1169, "encoder_q-layer.3": 1541.8165, "encoder_q-layer.4": 1729.3955, "encoder_q-layer.5": 1772.5271, "encoder_q-layer.6": 1576.5217, "encoder_q-layer.7": 1864.0337, "encoder_q-layer.8": 1755.8661, "encoder_q-layer.9": 1380.7043, "epoch": 0.65, "inbatch_neg_score": 0.4801, "inbatch_pos_score": 1.1348, "learning_rate": 1.8722222222222223e-05, "loss": 3.2215, "norm_diff": 0.0507, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2532.2, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4797, "query_norm": 1.4686, "queue_k_norm": 1.516, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9878, "sent_len_1": 66.553, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2525, "stdk": 0.0491, "stdq": 0.0458, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 66300 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.1962, "doc_norm": 1.5175, "encoder_q-embeddings": 992.4595, "encoder_q-layer.0": 657.1902, "encoder_q-layer.1": 687.545, "encoder_q-layer.10": 1264.3606, "encoder_q-layer.11": 2854.7883, "encoder_q-layer.2": 760.746, "encoder_q-layer.3": 810.6346, "encoder_q-layer.4": 850.6559, "encoder_q-layer.5": 891.6509, "encoder_q-layer.6": 1021.1628, "encoder_q-layer.7": 1241.6879, "encoder_q-layer.8": 1394.3967, "encoder_q-layer.9": 1260.8405, "epoch": 0.65, "inbatch_neg_score": 0.4816, "inbatch_pos_score": 1.1396, "learning_rate": 1.866666666666667e-05, "loss": 3.1962, "norm_diff": 0.0592, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1912.8885, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4819, "query_norm": 1.4583, "queue_k_norm": 1.5169, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9594, "sent_len_1": 66.8159, "sent_max_len_0": 128.0, "sent_max_len_1": 188.155, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 66400 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.1999, "doc_norm": 1.5157, "encoder_q-embeddings": 1248.8297, "encoder_q-layer.0": 861.6307, "encoder_q-layer.1": 860.9256, "encoder_q-layer.10": 1275.7094, "encoder_q-layer.11": 2742.6914, "encoder_q-layer.2": 954.6053, "encoder_q-layer.3": 1040.0419, "encoder_q-layer.4": 1097.6724, "encoder_q-layer.5": 1113.3016, "encoder_q-layer.6": 1225.957, "encoder_q-layer.7": 1462.163, "encoder_q-layer.8": 1482.375, "encoder_q-layer.9": 1269.1078, "epoch": 0.65, "inbatch_neg_score": 0.4823, "inbatch_pos_score": 1.1826, "learning_rate": 1.861111111111111e-05, "loss": 3.1999, "norm_diff": 0.0447, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2082.7485, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4832, "query_norm": 1.471, "queue_k_norm": 1.516, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9221, "sent_len_1": 66.5961, "sent_max_len_0": 128.0, "sent_max_len_1": 188.685, "stdk": 0.0489, "stdq": 0.046, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 66500 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.2256, "doc_norm": 1.5171, "encoder_q-embeddings": 1321.7635, "encoder_q-layer.0": 894.8356, "encoder_q-layer.1": 987.1892, "encoder_q-layer.10": 1138.4751, "encoder_q-layer.11": 2558.7668, "encoder_q-layer.2": 1117.0123, "encoder_q-layer.3": 1202.5038, "encoder_q-layer.4": 1250.2643, "encoder_q-layer.5": 1289.3866, "encoder_q-layer.6": 1425.1698, "encoder_q-layer.7": 1434.7356, "encoder_q-layer.8": 1434.5081, "encoder_q-layer.9": 1155.6309, "epoch": 0.65, "inbatch_neg_score": 0.4813, "inbatch_pos_score": 1.1348, "learning_rate": 1.8555555555555557e-05, "loss": 3.2256, "norm_diff": 0.07, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2094.8336, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4822, "query_norm": 1.4471, "queue_k_norm": 1.5188, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7597, "sent_len_1": 66.8082, "sent_max_len_0": 128.0, "sent_max_len_1": 192.1, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 66600 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2012, "doc_norm": 1.5177, "encoder_q-embeddings": 2316.5149, "encoder_q-layer.0": 1561.311, "encoder_q-layer.1": 1710.1243, "encoder_q-layer.10": 1170.449, "encoder_q-layer.11": 2718.3623, "encoder_q-layer.2": 1943.188, "encoder_q-layer.3": 2014.5714, "encoder_q-layer.4": 2157.4951, "encoder_q-layer.5": 2399.5161, "encoder_q-layer.6": 2314.009, "encoder_q-layer.7": 2493.8472, "encoder_q-layer.8": 2547.9565, "encoder_q-layer.9": 1490.8112, "epoch": 0.65, "inbatch_neg_score": 0.4843, "inbatch_pos_score": 1.1621, "learning_rate": 1.85e-05, "loss": 3.2012, "norm_diff": 0.0665, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3246.517, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4844, "query_norm": 1.4512, "queue_k_norm": 1.5202, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8128, "sent_len_1": 66.7817, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4525, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 66700 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 3.2211, "doc_norm": 1.5199, "encoder_q-embeddings": 1428.8146, "encoder_q-layer.0": 989.7008, "encoder_q-layer.1": 1081.5602, "encoder_q-layer.10": 1270.5989, "encoder_q-layer.11": 2749.4551, "encoder_q-layer.2": 1239.8781, "encoder_q-layer.3": 1356.0129, "encoder_q-layer.4": 1469.5216, "encoder_q-layer.5": 1496.98, "encoder_q-layer.6": 1573.2417, "encoder_q-layer.7": 1903.303, "encoder_q-layer.8": 1800.1536, "encoder_q-layer.9": 1278.26, "epoch": 0.65, "inbatch_neg_score": 0.4863, "inbatch_pos_score": 1.1855, "learning_rate": 1.8444444444444445e-05, "loss": 3.2211, "norm_diff": 0.0563, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2386.505, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4861, "query_norm": 1.4637, "queue_k_norm": 1.5174, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7789, "sent_len_1": 66.6603, "sent_max_len_0": 128.0, "sent_max_len_1": 191.4162, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 66800 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1956, "doc_norm": 1.5099, "encoder_q-embeddings": 3822.5632, "encoder_q-layer.0": 2637.7986, "encoder_q-layer.1": 2691.1316, "encoder_q-layer.10": 1196.2104, "encoder_q-layer.11": 2809.7473, "encoder_q-layer.2": 2682.9868, "encoder_q-layer.3": 2521.876, "encoder_q-layer.4": 2379.7815, "encoder_q-layer.5": 2331.3298, "encoder_q-layer.6": 2068.3506, "encoder_q-layer.7": 1741.7122, "encoder_q-layer.8": 1489.7711, "encoder_q-layer.9": 1209.8713, "epoch": 0.65, "inbatch_neg_score": 0.4884, "inbatch_pos_score": 1.1641, "learning_rate": 1.838888888888889e-05, "loss": 3.1956, "norm_diff": 0.0374, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3763.0106, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4883, "query_norm": 1.4725, "queue_k_norm": 1.5208, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9903, "sent_len_1": 66.7764, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2812, "stdk": 0.0485, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 66900 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1778, "doc_norm": 1.5203, "encoder_q-embeddings": 1338.9401, "encoder_q-layer.0": 841.085, "encoder_q-layer.1": 959.1176, "encoder_q-layer.10": 1502.5173, "encoder_q-layer.11": 3035.0972, "encoder_q-layer.2": 1057.7815, "encoder_q-layer.3": 1145.7677, "encoder_q-layer.4": 1134.692, "encoder_q-layer.5": 1160.4739, "encoder_q-layer.6": 1236.6545, "encoder_q-layer.7": 1422.3202, "encoder_q-layer.8": 1512.2552, "encoder_q-layer.9": 1347.3927, "epoch": 0.65, "inbatch_neg_score": 0.4892, "inbatch_pos_score": 1.167, "learning_rate": 1.8333333333333333e-05, "loss": 3.1778, "norm_diff": 0.0462, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2181.5251, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4883, "query_norm": 1.4741, "queue_k_norm": 1.5215, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1984, "sent_len_1": 66.701, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6387, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 67000 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.2197, "doc_norm": 1.5223, "encoder_q-embeddings": 1064.3405, "encoder_q-layer.0": 718.1384, "encoder_q-layer.1": 755.067, "encoder_q-layer.10": 1207.463, "encoder_q-layer.11": 2568.2585, "encoder_q-layer.2": 862.363, "encoder_q-layer.3": 949.7911, "encoder_q-layer.4": 1019.1638, "encoder_q-layer.5": 1047.7592, "encoder_q-layer.6": 1205.2594, "encoder_q-layer.7": 1403.2936, "encoder_q-layer.8": 1417.9128, "encoder_q-layer.9": 1201.0784, "epoch": 0.66, "inbatch_neg_score": 0.4888, "inbatch_pos_score": 1.1318, "learning_rate": 1.827777777777778e-05, "loss": 3.2197, "norm_diff": 0.0745, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1902.9124, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4878, "query_norm": 1.4478, "queue_k_norm": 1.5217, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9276, "sent_len_1": 66.89, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1637, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 67100 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.1745, "doc_norm": 1.525, "encoder_q-embeddings": 1977.5953, "encoder_q-layer.0": 1313.5333, "encoder_q-layer.1": 1446.9448, "encoder_q-layer.10": 2220.4412, "encoder_q-layer.11": 5102.1475, "encoder_q-layer.2": 1612.3713, "encoder_q-layer.3": 1737.1284, "encoder_q-layer.4": 1805.67, "encoder_q-layer.5": 1949.3031, "encoder_q-layer.6": 2192.5417, "encoder_q-layer.7": 2550.4897, "encoder_q-layer.8": 2996.1558, "encoder_q-layer.9": 2455.6638, "epoch": 0.66, "inbatch_neg_score": 0.4872, "inbatch_pos_score": 1.168, "learning_rate": 1.8222222222222224e-05, "loss": 3.1745, "norm_diff": 0.0583, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3663.749, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4878, "query_norm": 1.4667, "queue_k_norm": 1.5211, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0573, "sent_len_1": 67.0753, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7225, "stdk": 0.0491, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 67200 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.208, "doc_norm": 1.5211, "encoder_q-embeddings": 2695.3662, "encoder_q-layer.0": 1828.6154, "encoder_q-layer.1": 2081.3638, "encoder_q-layer.10": 2488.3694, "encoder_q-layer.11": 5690.0156, "encoder_q-layer.2": 2353.2563, "encoder_q-layer.3": 2545.2271, "encoder_q-layer.4": 2634.636, "encoder_q-layer.5": 2602.564, "encoder_q-layer.6": 2985.6328, "encoder_q-layer.7": 3173.1541, "encoder_q-layer.8": 3316.5251, "encoder_q-layer.9": 2723.6062, "epoch": 0.66, "inbatch_neg_score": 0.4867, "inbatch_pos_score": 1.1689, "learning_rate": 1.8166666666666667e-05, "loss": 3.208, "norm_diff": 0.0505, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4472.3721, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4866, "query_norm": 1.4707, "queue_k_norm": 1.5209, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.922, "sent_len_1": 66.8213, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8413, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 67300 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.2181, "doc_norm": 1.5184, "encoder_q-embeddings": 7531.5083, "encoder_q-layer.0": 5070.645, "encoder_q-layer.1": 5679.938, "encoder_q-layer.10": 2510.8306, "encoder_q-layer.11": 5366.2495, "encoder_q-layer.2": 5955.084, "encoder_q-layer.3": 6232.4814, "encoder_q-layer.4": 6146.2441, "encoder_q-layer.5": 4783.5112, "encoder_q-layer.6": 4827.7769, "encoder_q-layer.7": 5608.2124, "encoder_q-layer.8": 4741.1802, "encoder_q-layer.9": 3037.9404, "epoch": 0.66, "inbatch_neg_score": 0.4864, "inbatch_pos_score": 1.165, "learning_rate": 1.8111111111111112e-05, "loss": 3.2181, "norm_diff": 0.0624, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8250.8761, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4868, "query_norm": 1.456, "queue_k_norm": 1.52, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0575, "sent_len_1": 66.8381, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2175, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 67400 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.1868, "doc_norm": 1.5146, "encoder_q-embeddings": 1043.674, "encoder_q-layer.0": 702.5586, "encoder_q-layer.1": 773.145, "encoder_q-layer.10": 1178.4487, "encoder_q-layer.11": 2655.771, "encoder_q-layer.2": 859.5906, "encoder_q-layer.3": 923.7656, "encoder_q-layer.4": 1046.6565, "encoder_q-layer.5": 1088.6423, "encoder_q-layer.6": 1197.8267, "encoder_q-layer.7": 1334.7672, "encoder_q-layer.8": 1451.6708, "encoder_q-layer.9": 1208.9156, "epoch": 0.66, "inbatch_neg_score": 0.4875, "inbatch_pos_score": 1.1797, "learning_rate": 1.8055555555555555e-05, "loss": 3.1868, "norm_diff": 0.0503, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1922.3962, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4883, "query_norm": 1.4644, "queue_k_norm": 1.5214, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8113, "sent_len_1": 66.9035, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8388, "stdk": 0.0487, "stdq": 0.0457, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 67500 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.1805, "doc_norm": 1.5245, "encoder_q-embeddings": 1310.7788, "encoder_q-layer.0": 858.6702, "encoder_q-layer.1": 973.6287, "encoder_q-layer.10": 1175.6765, "encoder_q-layer.11": 2671.4602, "encoder_q-layer.2": 1150.9014, "encoder_q-layer.3": 1127.5564, "encoder_q-layer.4": 1189.6876, "encoder_q-layer.5": 1213.7334, "encoder_q-layer.6": 1187.0231, "encoder_q-layer.7": 1288.7046, "encoder_q-layer.8": 1435.9858, "encoder_q-layer.9": 1262.1703, "epoch": 0.66, "inbatch_neg_score": 0.4892, "inbatch_pos_score": 1.1416, "learning_rate": 1.8e-05, "loss": 3.1805, "norm_diff": 0.0707, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2080.1081, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4897, "query_norm": 1.4538, "queue_k_norm": 1.5208, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7952, "sent_len_1": 66.5417, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0825, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 67600 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.1626, "doc_norm": 1.5193, "encoder_q-embeddings": 1048.6641, "encoder_q-layer.0": 681.0511, "encoder_q-layer.1": 735.0033, "encoder_q-layer.10": 1235.0002, "encoder_q-layer.11": 2729.7825, "encoder_q-layer.2": 842.5024, "encoder_q-layer.3": 862.2263, "encoder_q-layer.4": 919.9046, "encoder_q-layer.5": 949.9571, "encoder_q-layer.6": 1079.3254, "encoder_q-layer.7": 1316.5399, "encoder_q-layer.8": 1430.3328, "encoder_q-layer.9": 1253.7642, "epoch": 0.66, "inbatch_neg_score": 0.4894, "inbatch_pos_score": 1.1504, "learning_rate": 1.7944444444444443e-05, "loss": 3.1626, "norm_diff": 0.06, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1920.8833, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.49, "query_norm": 1.4593, "queue_k_norm": 1.5226, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8669, "sent_len_1": 66.828, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6975, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 67700 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.2043, "doc_norm": 1.5283, "encoder_q-embeddings": 7820.229, "encoder_q-layer.0": 5392.0093, "encoder_q-layer.1": 6152.2246, "encoder_q-layer.10": 1325.6246, "encoder_q-layer.11": 2770.4961, "encoder_q-layer.2": 6872.645, "encoder_q-layer.3": 8224.7021, "encoder_q-layer.4": 9767.6113, "encoder_q-layer.5": 10353.9561, "encoder_q-layer.6": 10802.168, "encoder_q-layer.7": 11023.6807, "encoder_q-layer.8": 7869.9146, "encoder_q-layer.9": 1911.5994, "epoch": 0.66, "inbatch_neg_score": 0.4904, "inbatch_pos_score": 1.1562, "learning_rate": 1.788888888888889e-05, "loss": 3.2043, "norm_diff": 0.0569, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11425.395, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4907, "query_norm": 1.4715, "queue_k_norm": 1.5238, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9145, "sent_len_1": 66.7561, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6275, "stdk": 0.0492, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 67800 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.2292, "doc_norm": 1.5269, "encoder_q-embeddings": 1073.1238, "encoder_q-layer.0": 681.0153, "encoder_q-layer.1": 721.3771, "encoder_q-layer.10": 1199.608, "encoder_q-layer.11": 2705.636, "encoder_q-layer.2": 813.5032, "encoder_q-layer.3": 843.981, "encoder_q-layer.4": 910.0774, "encoder_q-layer.5": 973.1188, "encoder_q-layer.6": 1146.8329, "encoder_q-layer.7": 1319.5917, "encoder_q-layer.8": 1510.2059, "encoder_q-layer.9": 1310.8134, "epoch": 0.66, "inbatch_neg_score": 0.4882, "inbatch_pos_score": 1.1504, "learning_rate": 1.7833333333333334e-05, "loss": 3.2292, "norm_diff": 0.0674, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1940.6058, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4893, "query_norm": 1.4594, "queue_k_norm": 1.5236, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0403, "sent_len_1": 66.8463, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5213, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 67900 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.2251, "doc_norm": 1.5244, "encoder_q-embeddings": 11545.7627, "encoder_q-layer.0": 8340.6953, "encoder_q-layer.1": 8057.3979, "encoder_q-layer.10": 1166.6963, "encoder_q-layer.11": 2525.6101, "encoder_q-layer.2": 9826.0244, "encoder_q-layer.3": 10166.0996, "encoder_q-layer.4": 10042.833, "encoder_q-layer.5": 11535.5117, "encoder_q-layer.6": 9478.0732, "encoder_q-layer.7": 6499.2412, "encoder_q-layer.8": 4688.373, "encoder_q-layer.9": 1761.1381, "epoch": 0.66, "inbatch_neg_score": 0.4904, "inbatch_pos_score": 1.1689, "learning_rate": 1.777777777777778e-05, "loss": 3.2251, "norm_diff": 0.0603, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12367.9378, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4897, "query_norm": 1.4641, "queue_k_norm": 1.5211, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8749, "sent_len_1": 66.7007, "sent_max_len_0": 128.0, "sent_max_len_1": 190.345, "stdk": 0.049, "stdq": 0.0457, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 68000 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2085, "doc_norm": 1.5208, "encoder_q-embeddings": 1274.7114, "encoder_q-layer.0": 849.0226, "encoder_q-layer.1": 952.6102, "encoder_q-layer.10": 1261.4025, "encoder_q-layer.11": 2856.0095, "encoder_q-layer.2": 1091.8851, "encoder_q-layer.3": 1175.0961, "encoder_q-layer.4": 1335.9082, "encoder_q-layer.5": 1324.2708, "encoder_q-layer.6": 1464.1993, "encoder_q-layer.7": 1576.0826, "encoder_q-layer.8": 1644.9454, "encoder_q-layer.9": 1281.9733, "epoch": 0.66, "inbatch_neg_score": 0.4904, "inbatch_pos_score": 1.125, "learning_rate": 1.7722222222222222e-05, "loss": 3.2085, "norm_diff": 0.0722, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2218.3406, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4902, "query_norm": 1.4486, "queue_k_norm": 1.5246, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0656, "sent_len_1": 66.8031, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7738, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 68100 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 3.1899, "doc_norm": 1.5326, "encoder_q-embeddings": 1371.7784, "encoder_q-layer.0": 880.1191, "encoder_q-layer.1": 1040.6588, "encoder_q-layer.10": 1248.3231, "encoder_q-layer.11": 2727.3687, "encoder_q-layer.2": 1173.6083, "encoder_q-layer.3": 1418.5712, "encoder_q-layer.4": 1424.6377, "encoder_q-layer.5": 1465.9413, "encoder_q-layer.6": 1540.0216, "encoder_q-layer.7": 1534.8785, "encoder_q-layer.8": 1493.0798, "encoder_q-layer.9": 1231.464, "epoch": 0.67, "inbatch_neg_score": 0.4953, "inbatch_pos_score": 1.1992, "learning_rate": 1.7666666666666668e-05, "loss": 3.1899, "norm_diff": 0.0671, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2224.4909, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4963, "query_norm": 1.4655, "queue_k_norm": 1.5244, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1192, "sent_len_1": 66.8282, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6538, "stdk": 0.0493, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 68200 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 3.1973, "doc_norm": 1.5263, "encoder_q-embeddings": 1249.9409, "encoder_q-layer.0": 842.6589, "encoder_q-layer.1": 1006.8672, "encoder_q-layer.10": 1152.9519, "encoder_q-layer.11": 2538.8555, "encoder_q-layer.2": 1305.3854, "encoder_q-layer.3": 1383.4764, "encoder_q-layer.4": 1330.645, "encoder_q-layer.5": 1186.66, "encoder_q-layer.6": 1365.9683, "encoder_q-layer.7": 1453.8704, "encoder_q-layer.8": 1414.6389, "encoder_q-layer.9": 1214.7428, "epoch": 0.67, "inbatch_neg_score": 0.499, "inbatch_pos_score": 1.1973, "learning_rate": 1.761111111111111e-05, "loss": 3.1973, "norm_diff": 0.0434, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2111.081, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4993, "query_norm": 1.4829, "queue_k_norm": 1.5238, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0545, "sent_len_1": 66.7266, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3212, "stdk": 0.0491, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 68300 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.2053, "doc_norm": 1.5199, "encoder_q-embeddings": 943.1119, "encoder_q-layer.0": 608.234, "encoder_q-layer.1": 643.598, "encoder_q-layer.10": 1196.2977, "encoder_q-layer.11": 2632.3943, "encoder_q-layer.2": 710.9733, "encoder_q-layer.3": 766.9286, "encoder_q-layer.4": 845.1102, "encoder_q-layer.5": 899.4209, "encoder_q-layer.6": 1054.3529, "encoder_q-layer.7": 1171.8704, "encoder_q-layer.8": 1382.4926, "encoder_q-layer.9": 1216.3771, "epoch": 0.67, "inbatch_neg_score": 0.5007, "inbatch_pos_score": 1.1748, "learning_rate": 1.7555555555555556e-05, "loss": 3.2053, "norm_diff": 0.051, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1831.6731, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5, "query_norm": 1.4688, "queue_k_norm": 1.525, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9818, "sent_len_1": 66.6791, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6175, "stdk": 0.0487, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 68400 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.1983, "doc_norm": 1.5269, "encoder_q-embeddings": 8864.1426, "encoder_q-layer.0": 5931.0259, "encoder_q-layer.1": 6544.5781, "encoder_q-layer.10": 1318.2808, "encoder_q-layer.11": 2950.8213, "encoder_q-layer.2": 7724.0801, "encoder_q-layer.3": 8310.6914, "encoder_q-layer.4": 9652.8682, "encoder_q-layer.5": 10946.9561, "encoder_q-layer.6": 11348.334, "encoder_q-layer.7": 10398.1504, "encoder_q-layer.8": 6834.3618, "encoder_q-layer.9": 2022.1581, "epoch": 0.67, "inbatch_neg_score": 0.505, "inbatch_pos_score": 1.1914, "learning_rate": 1.75e-05, "loss": 3.1983, "norm_diff": 0.0471, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11600.2654, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5039, "query_norm": 1.4798, "queue_k_norm": 1.5252, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9837, "sent_len_1": 66.7062, "sent_max_len_0": 128.0, "sent_max_len_1": 185.6087, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 68500 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.2171, "doc_norm": 1.5337, "encoder_q-embeddings": 1002.3615, "encoder_q-layer.0": 659.6469, "encoder_q-layer.1": 698.1865, "encoder_q-layer.10": 1297.0663, "encoder_q-layer.11": 2786.0684, "encoder_q-layer.2": 794.8015, "encoder_q-layer.3": 860.4907, "encoder_q-layer.4": 928.982, "encoder_q-layer.5": 965.394, "encoder_q-layer.6": 1052.8906, "encoder_q-layer.7": 1222.5747, "encoder_q-layer.8": 1436.2412, "encoder_q-layer.9": 1314.4457, "epoch": 0.67, "inbatch_neg_score": 0.5033, "inbatch_pos_score": 1.1885, "learning_rate": 1.7444444444444448e-05, "loss": 3.2171, "norm_diff": 0.0629, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1903.6103, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5039, "query_norm": 1.4708, "queue_k_norm": 1.5251, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8307, "sent_len_1": 66.6683, "sent_max_len_0": 128.0, "sent_max_len_1": 186.545, "stdk": 0.0493, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 68600 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.1719, "doc_norm": 1.5251, "encoder_q-embeddings": 1027.3361, "encoder_q-layer.0": 662.5479, "encoder_q-layer.1": 698.8363, "encoder_q-layer.10": 1215.6469, "encoder_q-layer.11": 2827.7136, "encoder_q-layer.2": 813.6992, "encoder_q-layer.3": 841.1902, "encoder_q-layer.4": 925.2224, "encoder_q-layer.5": 950.0133, "encoder_q-layer.6": 1121.2323, "encoder_q-layer.7": 1293.2194, "encoder_q-layer.8": 1452.264, "encoder_q-layer.9": 1271.0665, "epoch": 0.67, "inbatch_neg_score": 0.506, "inbatch_pos_score": 1.1504, "learning_rate": 1.738888888888889e-05, "loss": 3.1719, "norm_diff": 0.0525, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1964.817, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5063, "query_norm": 1.4726, "queue_k_norm": 1.5256, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8819, "sent_len_1": 66.7434, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0863, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 68700 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.1721, "doc_norm": 1.5273, "encoder_q-embeddings": 1192.0872, "encoder_q-layer.0": 789.7538, "encoder_q-layer.1": 884.5311, "encoder_q-layer.10": 1150.0106, "encoder_q-layer.11": 2653.0049, "encoder_q-layer.2": 997.0336, "encoder_q-layer.3": 1078.4352, "encoder_q-layer.4": 1147.7887, "encoder_q-layer.5": 1237.3641, "encoder_q-layer.6": 1395.551, "encoder_q-layer.7": 1593.4426, "encoder_q-layer.8": 1762.6409, "encoder_q-layer.9": 1180.917, "epoch": 0.67, "inbatch_neg_score": 0.5113, "inbatch_pos_score": 1.1816, "learning_rate": 1.7333333333333336e-05, "loss": 3.1721, "norm_diff": 0.0477, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2122.4622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5112, "query_norm": 1.4796, "queue_k_norm": 1.5266, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8802, "sent_len_1": 66.7103, "sent_max_len_0": 128.0, "sent_max_len_1": 191.6138, "stdk": 0.049, "stdq": 0.0457, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 68800 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.1526, "doc_norm": 1.5216, "encoder_q-embeddings": 1042.5646, "encoder_q-layer.0": 674.2281, "encoder_q-layer.1": 714.0071, "encoder_q-layer.10": 1234.6494, "encoder_q-layer.11": 2611.9165, "encoder_q-layer.2": 818.027, "encoder_q-layer.3": 871.0714, "encoder_q-layer.4": 972.5583, "encoder_q-layer.5": 999.8356, "encoder_q-layer.6": 1152.4741, "encoder_q-layer.7": 1268.1172, "encoder_q-layer.8": 1442.5642, "encoder_q-layer.9": 1296.2848, "epoch": 0.67, "inbatch_neg_score": 0.5135, "inbatch_pos_score": 1.1758, "learning_rate": 1.7277777777777778e-05, "loss": 3.1526, "norm_diff": 0.0419, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1891.9781, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5132, "query_norm": 1.4797, "queue_k_norm": 1.5276, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9197, "sent_len_1": 66.8487, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2713, "stdk": 0.0487, "stdq": 0.0457, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 68900 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.1886, "doc_norm": 1.5246, "encoder_q-embeddings": 910.4836, "encoder_q-layer.0": 593.6825, "encoder_q-layer.1": 631.8937, "encoder_q-layer.10": 1175.0714, "encoder_q-layer.11": 2531.6089, "encoder_q-layer.2": 724.4766, "encoder_q-layer.3": 732.6523, "encoder_q-layer.4": 770.4797, "encoder_q-layer.5": 815.086, "encoder_q-layer.6": 993.97, "encoder_q-layer.7": 1197.702, "encoder_q-layer.8": 1356.6261, "encoder_q-layer.9": 1209.7686, "epoch": 0.67, "inbatch_neg_score": 0.5147, "inbatch_pos_score": 1.2197, "learning_rate": 1.7222222222222224e-05, "loss": 3.1886, "norm_diff": 0.0254, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1758.5526, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5137, "query_norm": 1.4991, "queue_k_norm": 1.5266, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1288, "sent_len_1": 66.6093, "sent_max_len_0": 128.0, "sent_max_len_1": 189.865, "stdk": 0.0488, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 69000 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.204, "doc_norm": 1.5249, "encoder_q-embeddings": 2508.3132, "encoder_q-layer.0": 1695.6292, "encoder_q-layer.1": 1855.4183, "encoder_q-layer.10": 1257.9952, "encoder_q-layer.11": 2678.6606, "encoder_q-layer.2": 2158.4463, "encoder_q-layer.3": 2355.9932, "encoder_q-layer.4": 2813.8875, "encoder_q-layer.5": 3183.4236, "encoder_q-layer.6": 3355.6299, "encoder_q-layer.7": 3653.6331, "encoder_q-layer.8": 2581.7874, "encoder_q-layer.9": 1430.7206, "epoch": 0.67, "inbatch_neg_score": 0.5165, "inbatch_pos_score": 1.165, "learning_rate": 1.7166666666666666e-05, "loss": 3.204, "norm_diff": 0.0447, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3738.4383, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5161, "query_norm": 1.4802, "queue_k_norm": 1.5266, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9629, "sent_len_1": 66.8261, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1475, "stdk": 0.0488, "stdq": 0.0456, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 69100 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.1934, "doc_norm": 1.5278, "encoder_q-embeddings": 899.2848, "encoder_q-layer.0": 605.0774, "encoder_q-layer.1": 627.5343, "encoder_q-layer.10": 1210.9093, "encoder_q-layer.11": 2694.4802, "encoder_q-layer.2": 735.9399, "encoder_q-layer.3": 787.6987, "encoder_q-layer.4": 814.3285, "encoder_q-layer.5": 874.9877, "encoder_q-layer.6": 990.7902, "encoder_q-layer.7": 1214.6355, "encoder_q-layer.8": 1395.687, "encoder_q-layer.9": 1194.7007, "epoch": 0.68, "inbatch_neg_score": 0.5166, "inbatch_pos_score": 1.2148, "learning_rate": 1.7111111111111112e-05, "loss": 3.1934, "norm_diff": 0.0332, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1835.34, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5161, "query_norm": 1.4946, "queue_k_norm": 1.5271, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1926, "sent_len_1": 66.9804, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4437, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 69200 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.214, "doc_norm": 1.5235, "encoder_q-embeddings": 503.0397, "encoder_q-layer.0": 327.3791, "encoder_q-layer.1": 356.7862, "encoder_q-layer.10": 694.6742, "encoder_q-layer.11": 1456.4427, "encoder_q-layer.2": 401.3344, "encoder_q-layer.3": 425.9633, "encoder_q-layer.4": 444.0031, "encoder_q-layer.5": 452.0669, "encoder_q-layer.6": 541.3746, "encoder_q-layer.7": 617.7626, "encoder_q-layer.8": 687.8088, "encoder_q-layer.9": 623.4868, "epoch": 0.68, "inbatch_neg_score": 0.5223, "inbatch_pos_score": 1.1611, "learning_rate": 1.7055555555555554e-05, "loss": 3.214, "norm_diff": 0.0581, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 981.0727, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5229, "query_norm": 1.4653, "queue_k_norm": 1.5296, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.894, "sent_len_1": 66.5787, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8562, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 69300 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.1995, "doc_norm": 1.5346, "encoder_q-embeddings": 474.9575, "encoder_q-layer.0": 319.4057, "encoder_q-layer.1": 333.4863, "encoder_q-layer.10": 588.4682, "encoder_q-layer.11": 1374.3879, "encoder_q-layer.2": 375.0822, "encoder_q-layer.3": 396.8225, "encoder_q-layer.4": 431.2452, "encoder_q-layer.5": 448.2468, "encoder_q-layer.6": 506.0506, "encoder_q-layer.7": 573.7466, "encoder_q-layer.8": 664.8459, "encoder_q-layer.9": 593.0918, "epoch": 0.68, "inbatch_neg_score": 0.5234, "inbatch_pos_score": 1.2217, "learning_rate": 1.7000000000000003e-05, "loss": 3.1995, "norm_diff": 0.0465, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 928.6183, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5234, "query_norm": 1.4881, "queue_k_norm": 1.5294, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7643, "sent_len_1": 66.4279, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4875, "stdk": 0.0491, "stdq": 0.046, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 69400 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.2027, "doc_norm": 1.5206, "encoder_q-embeddings": 500.6015, "encoder_q-layer.0": 331.5562, "encoder_q-layer.1": 346.3316, "encoder_q-layer.10": 645.4955, "encoder_q-layer.11": 1459.1542, "encoder_q-layer.2": 388.9737, "encoder_q-layer.3": 409.1449, "encoder_q-layer.4": 456.3963, "encoder_q-layer.5": 478.7166, "encoder_q-layer.6": 555.383, "encoder_q-layer.7": 668.4579, "encoder_q-layer.8": 745.601, "encoder_q-layer.9": 630.4598, "epoch": 0.68, "inbatch_neg_score": 0.5239, "inbatch_pos_score": 1.166, "learning_rate": 1.6944444444444446e-05, "loss": 3.2027, "norm_diff": 0.0634, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 978.7598, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5234, "query_norm": 1.4572, "queue_k_norm": 1.5292, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.947, "sent_len_1": 66.674, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4525, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 69500 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.187, "doc_norm": 1.534, "encoder_q-embeddings": 957.8161, "encoder_q-layer.0": 635.778, "encoder_q-layer.1": 737.5353, "encoder_q-layer.10": 669.6371, "encoder_q-layer.11": 1444.5139, "encoder_q-layer.2": 851.9166, "encoder_q-layer.3": 954.955, "encoder_q-layer.4": 1048.824, "encoder_q-layer.5": 1265.3629, "encoder_q-layer.6": 1153.6866, "encoder_q-layer.7": 1164.2694, "encoder_q-layer.8": 1065.024, "encoder_q-layer.9": 655.3311, "epoch": 0.68, "inbatch_neg_score": 0.5248, "inbatch_pos_score": 1.2227, "learning_rate": 1.688888888888889e-05, "loss": 3.187, "norm_diff": 0.0475, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1507.6127, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5239, "query_norm": 1.4865, "queue_k_norm": 1.5313, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9703, "sent_len_1": 66.5988, "sent_max_len_0": 128.0, "sent_max_len_1": 190.885, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 69600 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.2032, "doc_norm": 1.5275, "encoder_q-embeddings": 736.4041, "encoder_q-layer.0": 485.0319, "encoder_q-layer.1": 562.1609, "encoder_q-layer.10": 625.3538, "encoder_q-layer.11": 1394.5526, "encoder_q-layer.2": 649.6409, "encoder_q-layer.3": 690.0334, "encoder_q-layer.4": 740.6035, "encoder_q-layer.5": 797.5995, "encoder_q-layer.6": 807.9946, "encoder_q-layer.7": 811.5798, "encoder_q-layer.8": 821.8193, "encoder_q-layer.9": 641.6797, "epoch": 0.68, "inbatch_neg_score": 0.5257, "inbatch_pos_score": 1.1729, "learning_rate": 1.6833333333333334e-05, "loss": 3.2032, "norm_diff": 0.0652, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1190.7113, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5259, "query_norm": 1.4623, "queue_k_norm": 1.5317, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9209, "sent_len_1": 66.925, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4462, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 69700 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.2133, "doc_norm": 1.5329, "encoder_q-embeddings": 911.0137, "encoder_q-layer.0": 607.2457, "encoder_q-layer.1": 725.8464, "encoder_q-layer.10": 645.4068, "encoder_q-layer.11": 1389.1228, "encoder_q-layer.2": 832.4066, "encoder_q-layer.3": 885.9224, "encoder_q-layer.4": 935.8718, "encoder_q-layer.5": 968.6967, "encoder_q-layer.6": 963.5327, "encoder_q-layer.7": 844.6526, "encoder_q-layer.8": 801.3793, "encoder_q-layer.9": 642.5611, "epoch": 0.68, "inbatch_neg_score": 0.527, "inbatch_pos_score": 1.1855, "learning_rate": 1.677777777777778e-05, "loss": 3.2133, "norm_diff": 0.0583, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1319.0473, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5269, "query_norm": 1.4746, "queue_k_norm": 1.5325, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0478, "sent_len_1": 66.9045, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9412, "stdk": 0.049, "stdq": 0.0456, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 69800 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.2094, "doc_norm": 1.527, "encoder_q-embeddings": 645.037, "encoder_q-layer.0": 450.5551, "encoder_q-layer.1": 525.4329, "encoder_q-layer.10": 634.7334, "encoder_q-layer.11": 1472.4832, "encoder_q-layer.2": 629.398, "encoder_q-layer.3": 661.5403, "encoder_q-layer.4": 712.6177, "encoder_q-layer.5": 731.8926, "encoder_q-layer.6": 699.7377, "encoder_q-layer.7": 782.0722, "encoder_q-layer.8": 781.5135, "encoder_q-layer.9": 620.0786, "epoch": 0.68, "inbatch_neg_score": 0.5311, "inbatch_pos_score": 1.1543, "learning_rate": 1.6722222222222222e-05, "loss": 3.2094, "norm_diff": 0.0682, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1145.0715, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5298, "query_norm": 1.4588, "queue_k_norm": 1.5306, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9129, "sent_len_1": 66.537, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5938, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 69900 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.1962, "doc_norm": 1.5293, "encoder_q-embeddings": 926.1125, "encoder_q-layer.0": 668.567, "encoder_q-layer.1": 737.9895, "encoder_q-layer.10": 619.5527, "encoder_q-layer.11": 1431.3833, "encoder_q-layer.2": 911.4559, "encoder_q-layer.3": 1058.8562, "encoder_q-layer.4": 1242.4004, "encoder_q-layer.5": 1168.8914, "encoder_q-layer.6": 1363.1268, "encoder_q-layer.7": 1389.6298, "encoder_q-layer.8": 921.5632, "encoder_q-layer.9": 686.6312, "epoch": 0.68, "inbatch_neg_score": 0.5307, "inbatch_pos_score": 1.2324, "learning_rate": 1.6666666666666667e-05, "loss": 3.1962, "norm_diff": 0.0277, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1596.3432, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5293, "query_norm": 1.5017, "queue_k_norm": 1.5329, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8683, "sent_len_1": 66.8016, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8862, "stdk": 0.0488, "stdq": 0.0468, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 70000 }, { "dev_runtime": 38.5045, "dev_samples_per_second": 1.662, "dev_steps_per_second": 0.026, "epoch": 0.68, "step": 70000, "test_accuracy": 94.59228515625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.31344741582870483, "test_doc_norm": 1.5325968265533447, "test_inbatch_neg_score": 0.9010056853294373, "test_inbatch_pos_score": 1.8864071369171143, "test_loss": 0.31344741582870483, "test_loss_align": 1.0511610507965088, "test_loss_unif": 3.477783679962158, "test_loss_unif_q@queue": 3.477783679962158, "test_norm_diff": 0.03206353634595871, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5119207501411438, "test_query_norm": 1.5646604299545288, "test_queue_k_norm": 1.5326721668243408, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04340749233961105, "test_stdq": 0.043892648071050644, "test_stdqueue_k": 0.04903004691004753, "test_stdqueue_q": 0.0 }, { "dev_runtime": 38.5045, "dev_samples_per_second": 1.662, "dev_steps_per_second": 0.026, "epoch": 0.68, "eval_beir-arguana_ndcg@10": 0.35943, "eval_beir-arguana_recall@10": 0.6138, "eval_beir-arguana_recall@100": 0.90967, "eval_beir-arguana_recall@20": 0.75036, "eval_beir-avg_ndcg@10": 0.36724750000000006, "eval_beir-avg_recall@10": 0.43349650000000006, "eval_beir-avg_recall@100": 0.6165515833333333, "eval_beir-avg_recall@20": 0.49311358333333344, "eval_beir-cqadupstack_ndcg@10": 0.26323500000000005, "eval_beir-cqadupstack_recall@10": 0.353265, "eval_beir-cqadupstack_recall@100": 0.5810358333333333, "eval_beir-cqadupstack_recall@20": 0.41737583333333333, "eval_beir-fiqa_ndcg@10": 0.20825, "eval_beir-fiqa_recall@10": 0.26453, "eval_beir-fiqa_recall@100": 0.52102, "eval_beir-fiqa_recall@20": 0.32776, "eval_beir-nfcorpus_ndcg@10": 0.29474, "eval_beir-nfcorpus_recall@10": 0.14505, "eval_beir-nfcorpus_recall@100": 0.27328, "eval_beir-nfcorpus_recall@20": 0.17854, "eval_beir-nq_ndcg@10": 0.26901, "eval_beir-nq_recall@10": 0.44457, "eval_beir-nq_recall@100": 0.78498, "eval_beir-nq_recall@20": 0.5624, "eval_beir-quora_ndcg@10": 0.82261, "eval_beir-quora_recall@10": 0.9152, "eval_beir-quora_recall@100": 0.98481, "eval_beir-quora_recall@20": 0.9495, "eval_beir-scidocs_ndcg@10": 0.14481, "eval_beir-scidocs_recall@10": 0.15133, "eval_beir-scidocs_recall@100": 0.35128, "eval_beir-scidocs_recall@20": 0.20582, "eval_beir-scifact_ndcg@10": 0.61668, "eval_beir-scifact_recall@10": 0.75483, "eval_beir-scifact_recall@100": 0.92422, "eval_beir-scifact_recall@20": 0.82567, "eval_beir-trec-covid_ndcg@10": 0.51145, "eval_beir-trec-covid_recall@10": 0.556, "eval_beir-trec-covid_recall@100": 0.404, "eval_beir-trec-covid_recall@20": 0.503, "eval_beir-webis-touche2020_ndcg@10": 0.18226, "eval_beir-webis-touche2020_recall@10": 0.13639, "eval_beir-webis-touche2020_recall@100": 0.43122, "eval_beir-webis-touche2020_recall@20": 0.21071, "eval_senteval-avg_sts": 0.7291967840801405, "eval_senteval-sickr_spearman": 0.6983638574985077, "eval_senteval-stsb_spearman": 0.7600297106617734, "step": 70000, "test_accuracy": 94.59228515625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.31344741582870483, "test_doc_norm": 1.5325968265533447, "test_inbatch_neg_score": 0.9010056853294373, "test_inbatch_pos_score": 1.8864071369171143, "test_loss": 0.31344741582870483, "test_loss_align": 1.0511610507965088, "test_loss_unif": 3.477783679962158, "test_loss_unif_q@queue": 3.477783679962158, "test_norm_diff": 0.03206353634595871, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5119207501411438, "test_query_norm": 1.5646604299545288, "test_queue_k_norm": 1.5326721668243408, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04340749233961105, "test_stdq": 0.043892648071050644, "test_stdqueue_k": 0.04903004691004753, "test_stdqueue_q": 0.0 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.2004, "doc_norm": 1.5316, "encoder_q-embeddings": 515.7128, "encoder_q-layer.0": 316.4032, "encoder_q-layer.1": 340.3427, "encoder_q-layer.10": 630.8476, "encoder_q-layer.11": 1408.4774, "encoder_q-layer.2": 391.0073, "encoder_q-layer.3": 421.0928, "encoder_q-layer.4": 476.8719, "encoder_q-layer.5": 488.3036, "encoder_q-layer.6": 560.9354, "encoder_q-layer.7": 676.2326, "encoder_q-layer.8": 735.8165, "encoder_q-layer.9": 667.2932, "epoch": 0.68, "inbatch_neg_score": 0.5279, "inbatch_pos_score": 1.2344, "learning_rate": 1.661111111111111e-05, "loss": 3.2004, "norm_diff": 0.0491, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 988.976, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5283, "query_norm": 1.4825, "queue_k_norm": 1.5351, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8184, "sent_len_1": 66.7368, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0112, "stdk": 0.0489, "stdq": 0.046, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 70100 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.1836, "doc_norm": 1.5292, "encoder_q-embeddings": 753.8774, "encoder_q-layer.0": 544.5876, "encoder_q-layer.1": 594.0212, "encoder_q-layer.10": 768.0432, "encoder_q-layer.11": 1553.374, "encoder_q-layer.2": 651.5072, "encoder_q-layer.3": 670.739, "encoder_q-layer.4": 729.2429, "encoder_q-layer.5": 723.2966, "encoder_q-layer.6": 802.7032, "encoder_q-layer.7": 893.1661, "encoder_q-layer.8": 891.2643, "encoder_q-layer.9": 679.1931, "epoch": 0.69, "inbatch_neg_score": 0.5323, "inbatch_pos_score": 1.1562, "learning_rate": 1.655555555555556e-05, "loss": 3.1836, "norm_diff": 0.0632, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1240.5823, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5303, "query_norm": 1.466, "queue_k_norm": 1.5333, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0653, "sent_len_1": 66.5055, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1925, "stdk": 0.0487, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 70200 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.1986, "doc_norm": 1.5372, "encoder_q-embeddings": 2087.2517, "encoder_q-layer.0": 1472.8663, "encoder_q-layer.1": 1765.2465, "encoder_q-layer.10": 662.0661, "encoder_q-layer.11": 1416.5288, "encoder_q-layer.2": 1935.0385, "encoder_q-layer.3": 1905.8081, "encoder_q-layer.4": 1767.7029, "encoder_q-layer.5": 1724.814, "encoder_q-layer.6": 1685.4497, "encoder_q-layer.7": 1508.4396, "encoder_q-layer.8": 1086.5974, "encoder_q-layer.9": 698.4948, "epoch": 0.69, "inbatch_neg_score": 0.5315, "inbatch_pos_score": 1.209, "learning_rate": 1.65e-05, "loss": 3.1986, "norm_diff": 0.0705, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2377.9269, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5312, "query_norm": 1.4667, "queue_k_norm": 1.5358, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0075, "sent_len_1": 66.7621, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7587, "stdk": 0.0491, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 70300 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.198, "doc_norm": 1.5301, "encoder_q-embeddings": 602.7047, "encoder_q-layer.0": 416.8262, "encoder_q-layer.1": 463.1029, "encoder_q-layer.10": 637.6121, "encoder_q-layer.11": 1413.7876, "encoder_q-layer.2": 570.2717, "encoder_q-layer.3": 598.4441, "encoder_q-layer.4": 636.6793, "encoder_q-layer.5": 657.051, "encoder_q-layer.6": 694.2979, "encoder_q-layer.7": 741.4088, "encoder_q-layer.8": 746.6188, "encoder_q-layer.9": 657.2516, "epoch": 0.69, "inbatch_neg_score": 0.529, "inbatch_pos_score": 1.166, "learning_rate": 1.6444444444444447e-05, "loss": 3.198, "norm_diff": 0.0722, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1085.3205, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5283, "query_norm": 1.4579, "queue_k_norm": 1.535, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0845, "sent_len_1": 66.9094, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5725, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 70400 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.201, "doc_norm": 1.5434, "encoder_q-embeddings": 484.9706, "encoder_q-layer.0": 324.8301, "encoder_q-layer.1": 341.9171, "encoder_q-layer.10": 622.782, "encoder_q-layer.11": 1391.5027, "encoder_q-layer.2": 389.8105, "encoder_q-layer.3": 404.7368, "encoder_q-layer.4": 426.1296, "encoder_q-layer.5": 487.1759, "encoder_q-layer.6": 520.5556, "encoder_q-layer.7": 590.7158, "encoder_q-layer.8": 677.4696, "encoder_q-layer.9": 602.5097, "epoch": 0.69, "inbatch_neg_score": 0.5314, "inbatch_pos_score": 1.2002, "learning_rate": 1.638888888888889e-05, "loss": 3.201, "norm_diff": 0.0669, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 944.3497, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5317, "query_norm": 1.4765, "queue_k_norm": 1.534, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0884, "sent_len_1": 66.872, "sent_max_len_0": 128.0, "sent_max_len_1": 189.39, "stdk": 0.0493, "stdq": 0.0456, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 70500 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 3.1956, "doc_norm": 1.5373, "encoder_q-embeddings": 590.171, "encoder_q-layer.0": 431.7177, "encoder_q-layer.1": 462.8841, "encoder_q-layer.10": 633.7925, "encoder_q-layer.11": 1346.0123, "encoder_q-layer.2": 559.2192, "encoder_q-layer.3": 647.4349, "encoder_q-layer.4": 762.725, "encoder_q-layer.5": 775.5848, "encoder_q-layer.6": 843.2466, "encoder_q-layer.7": 881.8356, "encoder_q-layer.8": 992.3448, "encoder_q-layer.9": 702.7228, "epoch": 0.69, "inbatch_neg_score": 0.5334, "inbatch_pos_score": 1.2471, "learning_rate": 1.6333333333333335e-05, "loss": 3.1956, "norm_diff": 0.0453, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1178.0942, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5322, "query_norm": 1.4921, "queue_k_norm": 1.5354, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.2054, "sent_len_1": 66.8945, "sent_max_len_0": 128.0, "sent_max_len_1": 190.995, "stdk": 0.049, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 70600 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.1921, "doc_norm": 1.5376, "encoder_q-embeddings": 779.2072, "encoder_q-layer.0": 515.3904, "encoder_q-layer.1": 607.6839, "encoder_q-layer.10": 640.9584, "encoder_q-layer.11": 1374.0524, "encoder_q-layer.2": 730.933, "encoder_q-layer.3": 816.5665, "encoder_q-layer.4": 935.2493, "encoder_q-layer.5": 937.9548, "encoder_q-layer.6": 1015.7632, "encoder_q-layer.7": 868.7376, "encoder_q-layer.8": 777.7844, "encoder_q-layer.9": 608.5214, "epoch": 0.69, "inbatch_neg_score": 0.533, "inbatch_pos_score": 1.2207, "learning_rate": 1.6277777777777777e-05, "loss": 3.1921, "norm_diff": 0.0619, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1249.0474, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5332, "query_norm": 1.4756, "queue_k_norm": 1.5357, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1486, "sent_len_1": 66.7209, "sent_max_len_0": 128.0, "sent_max_len_1": 189.27, "stdk": 0.049, "stdq": 0.0456, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 70700 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1811, "doc_norm": 1.5353, "encoder_q-embeddings": 482.6019, "encoder_q-layer.0": 315.6527, "encoder_q-layer.1": 339.0034, "encoder_q-layer.10": 567.5803, "encoder_q-layer.11": 1327.6178, "encoder_q-layer.2": 388.6182, "encoder_q-layer.3": 405.1554, "encoder_q-layer.4": 430.3468, "encoder_q-layer.5": 461.8609, "encoder_q-layer.6": 497.9529, "encoder_q-layer.7": 552.6882, "encoder_q-layer.8": 650.6743, "encoder_q-layer.9": 584.5176, "epoch": 0.69, "inbatch_neg_score": 0.5339, "inbatch_pos_score": 1.2471, "learning_rate": 1.6222222222222223e-05, "loss": 3.1811, "norm_diff": 0.0459, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 914.7534, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5342, "query_norm": 1.4894, "queue_k_norm": 1.5366, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0927, "sent_len_1": 66.6019, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1012, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 70800 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 3.1984, "doc_norm": 1.5394, "encoder_q-embeddings": 1905.5875, "encoder_q-layer.0": 1358.2888, "encoder_q-layer.1": 1617.8768, "encoder_q-layer.10": 615.5238, "encoder_q-layer.11": 1362.2583, "encoder_q-layer.2": 2080.8726, "encoder_q-layer.3": 2170.4014, "encoder_q-layer.4": 2459.9766, "encoder_q-layer.5": 2346.6816, "encoder_q-layer.6": 2742.5527, "encoder_q-layer.7": 2934.394, "encoder_q-layer.8": 2525.5825, "encoder_q-layer.9": 1101.1082, "epoch": 0.69, "inbatch_neg_score": 0.5317, "inbatch_pos_score": 1.2227, "learning_rate": 1.6166666666666665e-05, "loss": 3.1984, "norm_diff": 0.0625, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3128.8665, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5322, "query_norm": 1.4769, "queue_k_norm": 1.5356, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8584, "sent_len_1": 66.6066, "sent_max_len_0": 128.0, "sent_max_len_1": 191.7688, "stdk": 0.0491, "stdq": 0.0456, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 70900 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.1872, "doc_norm": 1.5316, "encoder_q-embeddings": 983.7383, "encoder_q-layer.0": 634.2096, "encoder_q-layer.1": 712.2656, "encoder_q-layer.10": 613.8813, "encoder_q-layer.11": 1377.4264, "encoder_q-layer.2": 826.2786, "encoder_q-layer.3": 766.6396, "encoder_q-layer.4": 821.8364, "encoder_q-layer.5": 828.7028, "encoder_q-layer.6": 811.2592, "encoder_q-layer.7": 824.8439, "encoder_q-layer.8": 860.9696, "encoder_q-layer.9": 644.8098, "epoch": 0.69, "inbatch_neg_score": 0.5338, "inbatch_pos_score": 1.2236, "learning_rate": 1.6111111111111115e-05, "loss": 3.1872, "norm_diff": 0.04, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1274.5047, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5337, "query_norm": 1.4916, "queue_k_norm": 1.5366, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9825, "sent_len_1": 66.6328, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6687, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 71000 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1744, "doc_norm": 1.5417, "encoder_q-embeddings": 529.0616, "encoder_q-layer.0": 347.1833, "encoder_q-layer.1": 378.8297, "encoder_q-layer.10": 578.5715, "encoder_q-layer.11": 1315.9069, "encoder_q-layer.2": 424.1558, "encoder_q-layer.3": 438.1785, "encoder_q-layer.4": 503.0419, "encoder_q-layer.5": 498.591, "encoder_q-layer.6": 539.8322, "encoder_q-layer.7": 613.2227, "encoder_q-layer.8": 670.2361, "encoder_q-layer.9": 614.5303, "epoch": 0.69, "inbatch_neg_score": 0.5341, "inbatch_pos_score": 1.2578, "learning_rate": 1.6055555555555557e-05, "loss": 3.1744, "norm_diff": 0.0386, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 925.3141, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5342, "query_norm": 1.5031, "queue_k_norm": 1.5388, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1864, "sent_len_1": 66.9593, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1575, "stdk": 0.0491, "stdq": 0.0467, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 71100 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.1854, "doc_norm": 1.5364, "encoder_q-embeddings": 948.5603, "encoder_q-layer.0": 637.6948, "encoder_q-layer.1": 697.204, "encoder_q-layer.10": 600.7193, "encoder_q-layer.11": 1381.3829, "encoder_q-layer.2": 807.0599, "encoder_q-layer.3": 833.1315, "encoder_q-layer.4": 818.1985, "encoder_q-layer.5": 784.3917, "encoder_q-layer.6": 832.0074, "encoder_q-layer.7": 837.0083, "encoder_q-layer.8": 881.6041, "encoder_q-layer.9": 638.1913, "epoch": 0.7, "inbatch_neg_score": 0.5323, "inbatch_pos_score": 1.2061, "learning_rate": 1.6000000000000003e-05, "loss": 3.1854, "norm_diff": 0.0726, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1278.6828, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5327, "query_norm": 1.4638, "queue_k_norm": 1.5377, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7364, "sent_len_1": 66.5152, "sent_max_len_0": 128.0, "sent_max_len_1": 191.325, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 71200 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.1879, "doc_norm": 1.5438, "encoder_q-embeddings": 965.7256, "encoder_q-layer.0": 639.279, "encoder_q-layer.1": 690.4075, "encoder_q-layer.10": 1144.7428, "encoder_q-layer.11": 2476.3455, "encoder_q-layer.2": 769.8522, "encoder_q-layer.3": 833.4374, "encoder_q-layer.4": 894.1732, "encoder_q-layer.5": 877.6177, "encoder_q-layer.6": 999.1593, "encoder_q-layer.7": 1206.4763, "encoder_q-layer.8": 1383.5122, "encoder_q-layer.9": 1161.969, "epoch": 0.7, "inbatch_neg_score": 0.5353, "inbatch_pos_score": 1.2451, "learning_rate": 1.5944444444444445e-05, "loss": 3.1879, "norm_diff": 0.0546, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1760.3388, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5352, "query_norm": 1.4892, "queue_k_norm": 1.5374, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9131, "sent_len_1": 66.8312, "sent_max_len_0": 128.0, "sent_max_len_1": 191.305, "stdk": 0.0492, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 71300 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.2038, "doc_norm": 1.5429, "encoder_q-embeddings": 1230.0942, "encoder_q-layer.0": 846.2477, "encoder_q-layer.1": 920.123, "encoder_q-layer.10": 1285.4615, "encoder_q-layer.11": 2907.335, "encoder_q-layer.2": 1009.33, "encoder_q-layer.3": 1061.0415, "encoder_q-layer.4": 1117.7844, "encoder_q-layer.5": 1154.4823, "encoder_q-layer.6": 1236.4139, "encoder_q-layer.7": 1381.965, "encoder_q-layer.8": 1449.3081, "encoder_q-layer.9": 1248.3583, "epoch": 0.7, "inbatch_neg_score": 0.5381, "inbatch_pos_score": 1.2188, "learning_rate": 1.588888888888889e-05, "loss": 3.2038, "norm_diff": 0.0436, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2130.7931, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5381, "query_norm": 1.4993, "queue_k_norm": 1.5401, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.777, "sent_len_1": 66.6317, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3113, "stdk": 0.0491, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 71400 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.1672, "doc_norm": 1.5398, "encoder_q-embeddings": 1472.0751, "encoder_q-layer.0": 1082.6639, "encoder_q-layer.1": 1186.0702, "encoder_q-layer.10": 1159.8062, "encoder_q-layer.11": 2612.5283, "encoder_q-layer.2": 1289.652, "encoder_q-layer.3": 1372.4067, "encoder_q-layer.4": 1460.2654, "encoder_q-layer.5": 1465.7311, "encoder_q-layer.6": 1572.6449, "encoder_q-layer.7": 1735.0814, "encoder_q-layer.8": 1647.5488, "encoder_q-layer.9": 1189.8567, "epoch": 0.7, "inbatch_neg_score": 0.5372, "inbatch_pos_score": 1.2041, "learning_rate": 1.5833333333333333e-05, "loss": 3.1672, "norm_diff": 0.0662, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2326.7663, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5371, "query_norm": 1.4736, "queue_k_norm": 1.5407, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0597, "sent_len_1": 66.8194, "sent_max_len_0": 128.0, "sent_max_len_1": 188.64, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 71500 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 3.1634, "doc_norm": 1.5363, "encoder_q-embeddings": 932.7405, "encoder_q-layer.0": 601.7432, "encoder_q-layer.1": 632.7126, "encoder_q-layer.10": 1143.2515, "encoder_q-layer.11": 2442.6924, "encoder_q-layer.2": 722.0427, "encoder_q-layer.3": 749.0936, "encoder_q-layer.4": 802.7853, "encoder_q-layer.5": 851.2307, "encoder_q-layer.6": 969.6721, "encoder_q-layer.7": 1127.7356, "encoder_q-layer.8": 1316.3347, "encoder_q-layer.9": 1173.0963, "epoch": 0.7, "inbatch_neg_score": 0.5357, "inbatch_pos_score": 1.25, "learning_rate": 1.577777777777778e-05, "loss": 3.1634, "norm_diff": 0.0355, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1716.2884, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5347, "query_norm": 1.501, "queue_k_norm": 1.5397, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9166, "sent_len_1": 66.6555, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2212, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 71600 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.1794, "doc_norm": 1.5418, "encoder_q-embeddings": 1068.6145, "encoder_q-layer.0": 712.7808, "encoder_q-layer.1": 796.6774, "encoder_q-layer.10": 1298.5408, "encoder_q-layer.11": 2676.0078, "encoder_q-layer.2": 922.3505, "encoder_q-layer.3": 941.6613, "encoder_q-layer.4": 1009.9976, "encoder_q-layer.5": 1065.5171, "encoder_q-layer.6": 1115.4706, "encoder_q-layer.7": 1266.5148, "encoder_q-layer.8": 1414.9316, "encoder_q-layer.9": 1237.6165, "epoch": 0.7, "inbatch_neg_score": 0.5356, "inbatch_pos_score": 1.2324, "learning_rate": 1.5722222222222225e-05, "loss": 3.1794, "norm_diff": 0.0455, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1941.8542, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5361, "query_norm": 1.4962, "queue_k_norm": 1.5394, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.2844, "sent_len_1": 66.8745, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9762, "stdk": 0.0491, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 71700 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.184, "doc_norm": 1.5333, "encoder_q-embeddings": 1026.6343, "encoder_q-layer.0": 658.447, "encoder_q-layer.1": 693.7869, "encoder_q-layer.10": 1191.3671, "encoder_q-layer.11": 2671.0923, "encoder_q-layer.2": 786.9291, "encoder_q-layer.3": 832.7916, "encoder_q-layer.4": 883.1344, "encoder_q-layer.5": 938.7551, "encoder_q-layer.6": 1018.2356, "encoder_q-layer.7": 1178.0758, "encoder_q-layer.8": 1322.8929, "encoder_q-layer.9": 1215.6926, "epoch": 0.7, "inbatch_neg_score": 0.5361, "inbatch_pos_score": 1.2285, "learning_rate": 1.5666666666666667e-05, "loss": 3.184, "norm_diff": 0.0349, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1856.7294, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5356, "query_norm": 1.4984, "queue_k_norm": 1.5407, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8913, "sent_len_1": 66.8694, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9225, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 71800 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1745, "doc_norm": 1.5375, "encoder_q-embeddings": 1465.3086, "encoder_q-layer.0": 973.243, "encoder_q-layer.1": 1014.2963, "encoder_q-layer.10": 1291.4965, "encoder_q-layer.11": 2827.3767, "encoder_q-layer.2": 1180.2833, "encoder_q-layer.3": 1230.1434, "encoder_q-layer.4": 1249.8862, "encoder_q-layer.5": 1250.7499, "encoder_q-layer.6": 1341.8885, "encoder_q-layer.7": 1406.4265, "encoder_q-layer.8": 1354.6614, "encoder_q-layer.9": 1216.0846, "epoch": 0.7, "inbatch_neg_score": 0.5343, "inbatch_pos_score": 1.21, "learning_rate": 1.5611111111111113e-05, "loss": 3.1745, "norm_diff": 0.0586, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2207.5096, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5347, "query_norm": 1.4788, "queue_k_norm": 1.5395, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0393, "sent_len_1": 66.773, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8537, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 71900 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.195, "doc_norm": 1.5381, "encoder_q-embeddings": 1199.856, "encoder_q-layer.0": 803.529, "encoder_q-layer.1": 926.9163, "encoder_q-layer.10": 1224.8711, "encoder_q-layer.11": 2793.5403, "encoder_q-layer.2": 1055.5001, "encoder_q-layer.3": 1070.9636, "encoder_q-layer.4": 1096.0175, "encoder_q-layer.5": 1137.4442, "encoder_q-layer.6": 1260.6923, "encoder_q-layer.7": 1365.7776, "encoder_q-layer.8": 1600.8005, "encoder_q-layer.9": 1326.332, "epoch": 0.7, "inbatch_neg_score": 0.5357, "inbatch_pos_score": 1.168, "learning_rate": 1.5555555555555555e-05, "loss": 3.195, "norm_diff": 0.0672, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2088.2895, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5356, "query_norm": 1.4709, "queue_k_norm": 1.5397, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1721, "sent_len_1": 67.0348, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4963, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 72000 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.2046, "doc_norm": 1.5439, "encoder_q-embeddings": 963.538, "encoder_q-layer.0": 624.4863, "encoder_q-layer.1": 638.0428, "encoder_q-layer.10": 1135.1042, "encoder_q-layer.11": 2671.5867, "encoder_q-layer.2": 710.6894, "encoder_q-layer.3": 751.0983, "encoder_q-layer.4": 801.1094, "encoder_q-layer.5": 879.6038, "encoder_q-layer.6": 959.3598, "encoder_q-layer.7": 1136.3459, "encoder_q-layer.8": 1297.0698, "encoder_q-layer.9": 1174.1433, "epoch": 0.7, "inbatch_neg_score": 0.5377, "inbatch_pos_score": 1.21, "learning_rate": 1.55e-05, "loss": 3.2046, "norm_diff": 0.0623, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1822.1386, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5376, "query_norm": 1.4816, "queue_k_norm": 1.5398, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.6019, "sent_len_1": 66.5596, "sent_max_len_0": 128.0, "sent_max_len_1": 190.405, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 72100 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1852, "doc_norm": 1.5405, "encoder_q-embeddings": 1020.0333, "encoder_q-layer.0": 679.1085, "encoder_q-layer.1": 703.8406, "encoder_q-layer.10": 1291.4701, "encoder_q-layer.11": 2686.6746, "encoder_q-layer.2": 763.9773, "encoder_q-layer.3": 829.0822, "encoder_q-layer.4": 838.7519, "encoder_q-layer.5": 897.1908, "encoder_q-layer.6": 1046.0139, "encoder_q-layer.7": 1153.1458, "encoder_q-layer.8": 1313.1279, "encoder_q-layer.9": 1235.2695, "epoch": 0.7, "inbatch_neg_score": 0.5417, "inbatch_pos_score": 1.2314, "learning_rate": 1.5444444444444446e-05, "loss": 3.1852, "norm_diff": 0.0421, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1858.986, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.542, "query_norm": 1.4983, "queue_k_norm": 1.5389, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9828, "sent_len_1": 66.6382, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1287, "stdk": 0.049, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 72200 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.1914, "doc_norm": 1.5408, "encoder_q-embeddings": 1782.5144, "encoder_q-layer.0": 1231.6986, "encoder_q-layer.1": 1430.2997, "encoder_q-layer.10": 1239.2374, "encoder_q-layer.11": 2794.6853, "encoder_q-layer.2": 1710.9081, "encoder_q-layer.3": 1857.2864, "encoder_q-layer.4": 1912.1774, "encoder_q-layer.5": 1713.2714, "encoder_q-layer.6": 1629.1248, "encoder_q-layer.7": 1530.9055, "encoder_q-layer.8": 1483.1257, "encoder_q-layer.9": 1230.3801, "epoch": 0.71, "inbatch_neg_score": 0.548, "inbatch_pos_score": 1.2266, "learning_rate": 1.538888888888889e-05, "loss": 3.1914, "norm_diff": 0.0527, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2577.6464, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5474, "query_norm": 1.488, "queue_k_norm": 1.5399, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0431, "sent_len_1": 66.6913, "sent_max_len_0": 128.0, "sent_max_len_1": 189.165, "stdk": 0.049, "stdq": 0.0457, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 72300 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1735, "doc_norm": 1.5434, "encoder_q-embeddings": 1057.2794, "encoder_q-layer.0": 691.0752, "encoder_q-layer.1": 759.4733, "encoder_q-layer.10": 1326.1072, "encoder_q-layer.11": 2938.0374, "encoder_q-layer.2": 833.6299, "encoder_q-layer.3": 893.9981, "encoder_q-layer.4": 1011.2407, "encoder_q-layer.5": 1033.0869, "encoder_q-layer.6": 1146.9874, "encoder_q-layer.7": 1267.506, "encoder_q-layer.8": 1457.3942, "encoder_q-layer.9": 1284.1876, "epoch": 0.71, "inbatch_neg_score": 0.5515, "inbatch_pos_score": 1.2314, "learning_rate": 1.5333333333333334e-05, "loss": 3.1735, "norm_diff": 0.0415, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2040.0857, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5513, "query_norm": 1.5019, "queue_k_norm": 1.5396, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.725, "sent_len_1": 66.544, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9325, "stdk": 0.0491, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 72400 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.1455, "doc_norm": 1.5367, "encoder_q-embeddings": 956.7857, "encoder_q-layer.0": 633.1376, "encoder_q-layer.1": 670.0874, "encoder_q-layer.10": 1200.874, "encoder_q-layer.11": 2664.6772, "encoder_q-layer.2": 768.853, "encoder_q-layer.3": 833.6395, "encoder_q-layer.4": 856.0808, "encoder_q-layer.5": 901.9776, "encoder_q-layer.6": 1063.5857, "encoder_q-layer.7": 1229.8795, "encoder_q-layer.8": 1332.2993, "encoder_q-layer.9": 1248.9725, "epoch": 0.71, "inbatch_neg_score": 0.5552, "inbatch_pos_score": 1.2441, "learning_rate": 1.527777777777778e-05, "loss": 3.1455, "norm_diff": 0.0286, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1835.1272, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5537, "query_norm": 1.5082, "queue_k_norm": 1.5422, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9084, "sent_len_1": 67.0096, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1425, "stdk": 0.0488, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 72500 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1753, "doc_norm": 1.5413, "encoder_q-embeddings": 1366.1344, "encoder_q-layer.0": 897.4189, "encoder_q-layer.1": 1075.3844, "encoder_q-layer.10": 1400.9963, "encoder_q-layer.11": 2938.1736, "encoder_q-layer.2": 1327.1598, "encoder_q-layer.3": 1451.5048, "encoder_q-layer.4": 1668.7462, "encoder_q-layer.5": 1618.936, "encoder_q-layer.6": 1633.665, "encoder_q-layer.7": 1768.9148, "encoder_q-layer.8": 1680.6855, "encoder_q-layer.9": 1404.2271, "epoch": 0.71, "inbatch_neg_score": 0.5547, "inbatch_pos_score": 1.25, "learning_rate": 1.5222222222222224e-05, "loss": 3.1753, "norm_diff": 0.0354, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2447.2976, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5547, "query_norm": 1.506, "queue_k_norm": 1.542, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0173, "sent_len_1": 66.9014, "sent_max_len_0": 128.0, "sent_max_len_1": 190.795, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 72600 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 3.1559, "doc_norm": 1.5443, "encoder_q-embeddings": 1382.0852, "encoder_q-layer.0": 939.1196, "encoder_q-layer.1": 1137.7272, "encoder_q-layer.10": 1333.1063, "encoder_q-layer.11": 2741.1362, "encoder_q-layer.2": 1524.5566, "encoder_q-layer.3": 1738.2177, "encoder_q-layer.4": 1823.5594, "encoder_q-layer.5": 2199.7002, "encoder_q-layer.6": 2265.1729, "encoder_q-layer.7": 2492.9666, "encoder_q-layer.8": 1905.3916, "encoder_q-layer.9": 1399.0282, "epoch": 0.71, "inbatch_neg_score": 0.5571, "inbatch_pos_score": 1.2949, "learning_rate": 1.5166666666666668e-05, "loss": 3.1559, "norm_diff": 0.0386, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2696.5216, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5571, "query_norm": 1.5057, "queue_k_norm": 1.5424, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1229, "sent_len_1": 66.8711, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7912, "stdk": 0.0491, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 72700 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1819, "doc_norm": 1.5321, "encoder_q-embeddings": 980.1714, "encoder_q-layer.0": 625.0706, "encoder_q-layer.1": 667.2089, "encoder_q-layer.10": 1319.1206, "encoder_q-layer.11": 3006.425, "encoder_q-layer.2": 754.1843, "encoder_q-layer.3": 788.7116, "encoder_q-layer.4": 851.3588, "encoder_q-layer.5": 860.2919, "encoder_q-layer.6": 1016.0336, "encoder_q-layer.7": 1230.1238, "encoder_q-layer.8": 1446.6981, "encoder_q-layer.9": 1284.7523, "epoch": 0.71, "inbatch_neg_score": 0.5581, "inbatch_pos_score": 1.2354, "learning_rate": 1.5111111111111112e-05, "loss": 3.1819, "norm_diff": 0.0353, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1915.6816, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5581, "query_norm": 1.4968, "queue_k_norm": 1.5426, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9767, "sent_len_1": 66.5692, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4313, "stdk": 0.0486, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 72800 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.1783, "doc_norm": 1.5412, "encoder_q-embeddings": 2256.7512, "encoder_q-layer.0": 1585.0378, "encoder_q-layer.1": 1908.9926, "encoder_q-layer.10": 1322.6052, "encoder_q-layer.11": 2923.0339, "encoder_q-layer.2": 2538.4495, "encoder_q-layer.3": 2642.4526, "encoder_q-layer.4": 2745.3403, "encoder_q-layer.5": 2429.6995, "encoder_q-layer.6": 2260.0715, "encoder_q-layer.7": 2308.7078, "encoder_q-layer.8": 1907.8826, "encoder_q-layer.9": 1301.8066, "epoch": 0.71, "inbatch_neg_score": 0.5588, "inbatch_pos_score": 1.2021, "learning_rate": 1.5055555555555556e-05, "loss": 3.1783, "norm_diff": 0.0608, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3374.6156, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5581, "query_norm": 1.4804, "queue_k_norm": 1.5438, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8843, "sent_len_1": 66.8672, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2862, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 72900 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.2022, "doc_norm": 1.5465, "encoder_q-embeddings": 1776.5383, "encoder_q-layer.0": 1266.0554, "encoder_q-layer.1": 1305.1581, "encoder_q-layer.10": 1312.3555, "encoder_q-layer.11": 2767.3096, "encoder_q-layer.2": 1568.8806, "encoder_q-layer.3": 1632.4921, "encoder_q-layer.4": 1744.3792, "encoder_q-layer.5": 1841.4377, "encoder_q-layer.6": 2040.5695, "encoder_q-layer.7": 1948.2245, "encoder_q-layer.8": 1877.6686, "encoder_q-layer.9": 1432.355, "epoch": 0.71, "inbatch_neg_score": 0.5564, "inbatch_pos_score": 1.2578, "learning_rate": 1.5e-05, "loss": 3.2022, "norm_diff": 0.0485, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2704.854, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5566, "query_norm": 1.4979, "queue_k_norm": 1.5436, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8898, "sent_len_1": 66.8137, "sent_max_len_0": 128.0, "sent_max_len_1": 188.96, "stdk": 0.0491, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 73000 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.1779, "doc_norm": 1.5503, "encoder_q-embeddings": 1320.5981, "encoder_q-layer.0": 889.962, "encoder_q-layer.1": 1072.604, "encoder_q-layer.10": 1201.1991, "encoder_q-layer.11": 2713.6729, "encoder_q-layer.2": 1251.2932, "encoder_q-layer.3": 1383.3165, "encoder_q-layer.4": 1532.2915, "encoder_q-layer.5": 1609.4688, "encoder_q-layer.6": 1776.6464, "encoder_q-layer.7": 1679.5671, "encoder_q-layer.8": 1540.7937, "encoder_q-layer.9": 1220.278, "epoch": 0.71, "inbatch_neg_score": 0.56, "inbatch_pos_score": 1.2617, "learning_rate": 1.4944444444444444e-05, "loss": 3.1779, "norm_diff": 0.0601, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2323.6512, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5596, "query_norm": 1.4902, "queue_k_norm": 1.5448, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9164, "sent_len_1": 66.7928, "sent_max_len_0": 128.0, "sent_max_len_1": 192.6475, "stdk": 0.0492, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 73100 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.1727, "doc_norm": 1.5453, "encoder_q-embeddings": 1105.4956, "encoder_q-layer.0": 744.0076, "encoder_q-layer.1": 807.7679, "encoder_q-layer.10": 1526.9027, "encoder_q-layer.11": 3126.978, "encoder_q-layer.2": 914.2983, "encoder_q-layer.3": 958.4001, "encoder_q-layer.4": 1053.8214, "encoder_q-layer.5": 1141.5128, "encoder_q-layer.6": 1201.1501, "encoder_q-layer.7": 1379.9718, "encoder_q-layer.8": 1585.1754, "encoder_q-layer.9": 1428.0548, "epoch": 0.71, "inbatch_neg_score": 0.5596, "inbatch_pos_score": 1.2461, "learning_rate": 1.4888888888888888e-05, "loss": 3.1727, "norm_diff": 0.0692, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2154.9034, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5605, "query_norm": 1.4761, "queue_k_norm": 1.542, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8558, "sent_len_1": 66.5763, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5513, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 73200 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.186, "doc_norm": 1.5456, "encoder_q-embeddings": 4775.9546, "encoder_q-layer.0": 3182.3481, "encoder_q-layer.1": 3373.8032, "encoder_q-layer.10": 2708.7681, "encoder_q-layer.11": 5716.1406, "encoder_q-layer.2": 3718.3706, "encoder_q-layer.3": 3984.1104, "encoder_q-layer.4": 4623.1948, "encoder_q-layer.5": 4692.0396, "encoder_q-layer.6": 4481.0938, "encoder_q-layer.7": 5305.48, "encoder_q-layer.8": 4883.3623, "encoder_q-layer.9": 2854.0037, "epoch": 0.72, "inbatch_neg_score": 0.5601, "inbatch_pos_score": 1.25, "learning_rate": 1.4833333333333336e-05, "loss": 3.186, "norm_diff": 0.0601, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6497.3825, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5596, "query_norm": 1.4855, "queue_k_norm": 1.5458, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9192, "sent_len_1": 66.7742, "sent_max_len_0": 128.0, "sent_max_len_1": 187.1188, "stdk": 0.049, "stdq": 0.0457, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 73300 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.1479, "doc_norm": 1.5503, "encoder_q-embeddings": 2135.3315, "encoder_q-layer.0": 1393.318, "encoder_q-layer.1": 1564.1238, "encoder_q-layer.10": 2588.0894, "encoder_q-layer.11": 5638.6821, "encoder_q-layer.2": 1788.6798, "encoder_q-layer.3": 1910.8817, "encoder_q-layer.4": 2114.97, "encoder_q-layer.5": 2129.5303, "encoder_q-layer.6": 2323.5862, "encoder_q-layer.7": 2724.7512, "encoder_q-layer.8": 2914.6274, "encoder_q-layer.9": 2613.0667, "epoch": 0.72, "inbatch_neg_score": 0.5606, "inbatch_pos_score": 1.25, "learning_rate": 1.477777777777778e-05, "loss": 3.1479, "norm_diff": 0.0655, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4078.4348, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5605, "query_norm": 1.4848, "queue_k_norm": 1.5473, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.271, "sent_len_1": 66.9835, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3225, "stdk": 0.0492, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 73400 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 3.1767, "doc_norm": 1.5478, "encoder_q-embeddings": 2879.199, "encoder_q-layer.0": 1976.3889, "encoder_q-layer.1": 2309.5571, "encoder_q-layer.10": 2542.9678, "encoder_q-layer.11": 5406.4795, "encoder_q-layer.2": 2609.2537, "encoder_q-layer.3": 2722.1729, "encoder_q-layer.4": 3044.2354, "encoder_q-layer.5": 3183.6204, "encoder_q-layer.6": 3554.7119, "encoder_q-layer.7": 4151.7007, "encoder_q-layer.8": 3868.1228, "encoder_q-layer.9": 2644.3853, "epoch": 0.72, "inbatch_neg_score": 0.5606, "inbatch_pos_score": 1.2549, "learning_rate": 1.4722222222222224e-05, "loss": 3.1767, "norm_diff": 0.0644, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4946.6773, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5601, "query_norm": 1.4834, "queue_k_norm": 1.5486, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0967, "sent_len_1": 67.3128, "sent_max_len_0": 128.0, "sent_max_len_1": 191.5687, "stdk": 0.049, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 73500 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1736, "doc_norm": 1.5508, "encoder_q-embeddings": 2637.3401, "encoder_q-layer.0": 1782.7277, "encoder_q-layer.1": 1913.7959, "encoder_q-layer.10": 2413.231, "encoder_q-layer.11": 5667.415, "encoder_q-layer.2": 2187.2498, "encoder_q-layer.3": 2260.1665, "encoder_q-layer.4": 2320.5464, "encoder_q-layer.5": 2527.384, "encoder_q-layer.6": 2613.2351, "encoder_q-layer.7": 2477.113, "encoder_q-layer.8": 2779.9819, "encoder_q-layer.9": 2473.6982, "epoch": 0.72, "inbatch_neg_score": 0.5631, "inbatch_pos_score": 1.2441, "learning_rate": 1.4666666666666668e-05, "loss": 3.1736, "norm_diff": 0.0525, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4267.7951, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.563, "query_norm": 1.4984, "queue_k_norm": 1.5462, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1046, "sent_len_1": 66.8505, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4338, "stdk": 0.0492, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 73600 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1873, "doc_norm": 1.5476, "encoder_q-embeddings": 9180.7822, "encoder_q-layer.0": 6834.5381, "encoder_q-layer.1": 9043.832, "encoder_q-layer.10": 2564.552, "encoder_q-layer.11": 5620.5962, "encoder_q-layer.2": 10681.5117, "encoder_q-layer.3": 10543.3027, "encoder_q-layer.4": 12531.3721, "encoder_q-layer.5": 12283.7656, "encoder_q-layer.6": 10125.8516, "encoder_q-layer.7": 6991.7002, "encoder_q-layer.8": 3606.9109, "encoder_q-layer.9": 2682.0381, "epoch": 0.72, "inbatch_neg_score": 0.5619, "inbatch_pos_score": 1.2529, "learning_rate": 1.4611111111111112e-05, "loss": 3.1873, "norm_diff": 0.0574, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12673.5548, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.562, "query_norm": 1.4902, "queue_k_norm": 1.5481, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.857, "sent_len_1": 66.7291, "sent_max_len_0": 128.0, "sent_max_len_1": 186.5062, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 73700 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1902, "doc_norm": 1.5461, "encoder_q-embeddings": 2945.0659, "encoder_q-layer.0": 1978.5757, "encoder_q-layer.1": 2218.1934, "encoder_q-layer.10": 2394.0613, "encoder_q-layer.11": 5491.7256, "encoder_q-layer.2": 2589.1882, "encoder_q-layer.3": 2821.7188, "encoder_q-layer.4": 3016.8611, "encoder_q-layer.5": 3632.8818, "encoder_q-layer.6": 3682.0234, "encoder_q-layer.7": 3763.4468, "encoder_q-layer.8": 3811.658, "encoder_q-layer.9": 2692.0144, "epoch": 0.72, "inbatch_neg_score": 0.5665, "inbatch_pos_score": 1.2256, "learning_rate": 1.4555555555555556e-05, "loss": 3.1902, "norm_diff": 0.0687, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4933.289, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5659, "query_norm": 1.4774, "queue_k_norm": 1.5482, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9966, "sent_len_1": 66.843, "sent_max_len_0": 128.0, "sent_max_len_1": 190.64, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 73800 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.154, "doc_norm": 1.5449, "encoder_q-embeddings": 1963.1678, "encoder_q-layer.0": 1311.3064, "encoder_q-layer.1": 1438.8348, "encoder_q-layer.10": 2547.9529, "encoder_q-layer.11": 5531.4458, "encoder_q-layer.2": 1706.9778, "encoder_q-layer.3": 1828.1493, "encoder_q-layer.4": 1976.5031, "encoder_q-layer.5": 2008.2637, "encoder_q-layer.6": 2200.2373, "encoder_q-layer.7": 2480.9993, "encoder_q-layer.8": 2916.833, "encoder_q-layer.9": 2450.3875, "epoch": 0.72, "inbatch_neg_score": 0.5692, "inbatch_pos_score": 1.2715, "learning_rate": 1.45e-05, "loss": 3.154, "norm_diff": 0.0325, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3831.5839, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5693, "query_norm": 1.5125, "queue_k_norm": 1.547, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1228, "sent_len_1": 66.5413, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9062, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 73900 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.165, "doc_norm": 1.551, "encoder_q-embeddings": 2045.7683, "encoder_q-layer.0": 1314.6774, "encoder_q-layer.1": 1404.4771, "encoder_q-layer.10": 2649.1448, "encoder_q-layer.11": 5719.7603, "encoder_q-layer.2": 1557.0284, "encoder_q-layer.3": 1630.3934, "encoder_q-layer.4": 1790.9945, "encoder_q-layer.5": 1846.1086, "encoder_q-layer.6": 2091.6504, "encoder_q-layer.7": 2533.0791, "encoder_q-layer.8": 2895.127, "encoder_q-layer.9": 2581.6055, "epoch": 0.72, "inbatch_neg_score": 0.5676, "inbatch_pos_score": 1.25, "learning_rate": 1.4444444444444444e-05, "loss": 3.165, "norm_diff": 0.0632, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3960.5231, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5674, "query_norm": 1.4877, "queue_k_norm": 1.5466, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8725, "sent_len_1": 66.7215, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6838, "stdk": 0.0491, "stdq": 0.0456, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 74000 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.1812, "doc_norm": 1.5499, "encoder_q-embeddings": 2057.2131, "encoder_q-layer.0": 1374.24, "encoder_q-layer.1": 1568.3915, "encoder_q-layer.10": 2744.8362, "encoder_q-layer.11": 5662.5532, "encoder_q-layer.2": 1720.4698, "encoder_q-layer.3": 1759.5288, "encoder_q-layer.4": 1868.1562, "encoder_q-layer.5": 2005.7847, "encoder_q-layer.6": 2241.0059, "encoder_q-layer.7": 2547.6477, "encoder_q-layer.8": 2844.1973, "encoder_q-layer.9": 2661.7791, "epoch": 0.72, "inbatch_neg_score": 0.5707, "inbatch_pos_score": 1.252, "learning_rate": 1.438888888888889e-05, "loss": 3.1812, "norm_diff": 0.0653, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3956.1758, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5713, "query_norm": 1.4846, "queue_k_norm": 1.5484, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9154, "sent_len_1": 66.8857, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7775, "stdk": 0.0491, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 74100 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.181, "doc_norm": 1.5441, "encoder_q-embeddings": 2132.2742, "encoder_q-layer.0": 1445.2349, "encoder_q-layer.1": 1569.752, "encoder_q-layer.10": 2605.0398, "encoder_q-layer.11": 5749.0854, "encoder_q-layer.2": 1793.3547, "encoder_q-layer.3": 1937.5059, "encoder_q-layer.4": 2240.4143, "encoder_q-layer.5": 2297.1946, "encoder_q-layer.6": 2547.5247, "encoder_q-layer.7": 2891.6091, "encoder_q-layer.8": 3394.3682, "encoder_q-layer.9": 2703.5581, "epoch": 0.72, "inbatch_neg_score": 0.5738, "inbatch_pos_score": 1.2354, "learning_rate": 1.4333333333333334e-05, "loss": 3.181, "norm_diff": 0.056, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4215.2319, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5742, "query_norm": 1.4882, "queue_k_norm": 1.5484, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0381, "sent_len_1": 66.7226, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6025, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 74200 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.1971, "doc_norm": 1.5513, "encoder_q-embeddings": 2507.4348, "encoder_q-layer.0": 1695.3728, "encoder_q-layer.1": 1903.7732, "encoder_q-layer.10": 2624.97, "encoder_q-layer.11": 5785.106, "encoder_q-layer.2": 2099.7253, "encoder_q-layer.3": 2184.3726, "encoder_q-layer.4": 2494.7854, "encoder_q-layer.5": 2576.0596, "encoder_q-layer.6": 2649.0005, "encoder_q-layer.7": 2879.0752, "encoder_q-layer.8": 3265.0156, "encoder_q-layer.9": 2762.2158, "epoch": 0.73, "inbatch_neg_score": 0.5766, "inbatch_pos_score": 1.2432, "learning_rate": 1.427777777777778e-05, "loss": 3.1971, "norm_diff": 0.0408, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4346.5225, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5762, "query_norm": 1.5105, "queue_k_norm": 1.5484, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8849, "sent_len_1": 66.6012, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5188, "stdk": 0.049, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 74300 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.17, "doc_norm": 1.5462, "encoder_q-embeddings": 2837.8813, "encoder_q-layer.0": 1806.2917, "encoder_q-layer.1": 2120.8926, "encoder_q-layer.10": 2584.8296, "encoder_q-layer.11": 5673.3184, "encoder_q-layer.2": 2476.1406, "encoder_q-layer.3": 2699.0688, "encoder_q-layer.4": 3133.8174, "encoder_q-layer.5": 3288.8823, "encoder_q-layer.6": 3508.032, "encoder_q-layer.7": 3580.135, "encoder_q-layer.8": 3341.1057, "encoder_q-layer.9": 2868.8608, "epoch": 0.73, "inbatch_neg_score": 0.5744, "inbatch_pos_score": 1.2822, "learning_rate": 1.4222222222222224e-05, "loss": 3.17, "norm_diff": 0.0153, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4754.1507, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5747, "query_norm": 1.531, "queue_k_norm": 1.5508, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.902, "sent_len_1": 66.6944, "sent_max_len_0": 128.0, "sent_max_len_1": 190.34, "stdk": 0.0488, "stdq": 0.0472, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 74400 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.1695, "doc_norm": 1.5415, "encoder_q-embeddings": 1935.1073, "encoder_q-layer.0": 1276.0426, "encoder_q-layer.1": 1338.9579, "encoder_q-layer.10": 2692.7441, "encoder_q-layer.11": 6060.7949, "encoder_q-layer.2": 1545.8142, "encoder_q-layer.3": 1607.6183, "encoder_q-layer.4": 1723.0992, "encoder_q-layer.5": 1825.5347, "encoder_q-layer.6": 2018.031, "encoder_q-layer.7": 2319.1597, "encoder_q-layer.8": 2848.0645, "encoder_q-layer.9": 2534.0303, "epoch": 0.73, "inbatch_neg_score": 0.5801, "inbatch_pos_score": 1.2451, "learning_rate": 1.4166666666666668e-05, "loss": 3.1695, "norm_diff": 0.0409, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3929.3242, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5811, "query_norm": 1.5006, "queue_k_norm": 1.5514, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7414, "sent_len_1": 66.5487, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1375, "stdk": 0.0486, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 74500 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.1697, "doc_norm": 1.5583, "encoder_q-embeddings": 2496.3596, "encoder_q-layer.0": 1666.329, "encoder_q-layer.1": 1819.6046, "encoder_q-layer.10": 2319.0237, "encoder_q-layer.11": 5455.604, "encoder_q-layer.2": 2069.7515, "encoder_q-layer.3": 2318.2341, "encoder_q-layer.4": 2474.989, "encoder_q-layer.5": 2584.1257, "encoder_q-layer.6": 3024.9583, "encoder_q-layer.7": 3048.3445, "encoder_q-layer.8": 3196.4854, "encoder_q-layer.9": 2606.4011, "epoch": 0.73, "inbatch_neg_score": 0.5861, "inbatch_pos_score": 1.2402, "learning_rate": 1.4111111111111112e-05, "loss": 3.1697, "norm_diff": 0.0618, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4321.0793, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5845, "query_norm": 1.4964, "queue_k_norm": 1.5533, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0889, "sent_len_1": 66.9753, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2488, "stdk": 0.0493, "stdq": 0.0455, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 74600 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.1931, "doc_norm": 1.5499, "encoder_q-embeddings": 1985.882, "encoder_q-layer.0": 1307.681, "encoder_q-layer.1": 1392.5277, "encoder_q-layer.10": 2680.355, "encoder_q-layer.11": 5641.0732, "encoder_q-layer.2": 1575.7742, "encoder_q-layer.3": 1573.6864, "encoder_q-layer.4": 1702.7737, "encoder_q-layer.5": 1771.4996, "encoder_q-layer.6": 2004.7928, "encoder_q-layer.7": 2315.2092, "encoder_q-layer.8": 2916.8472, "encoder_q-layer.9": 2619.9878, "epoch": 0.73, "inbatch_neg_score": 0.5833, "inbatch_pos_score": 1.2666, "learning_rate": 1.4055555555555556e-05, "loss": 3.1931, "norm_diff": 0.0333, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3866.5448, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.583, "query_norm": 1.5166, "queue_k_norm": 1.5514, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7371, "sent_len_1": 66.6456, "sent_max_len_0": 128.0, "sent_max_len_1": 191.69, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 74700 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1592, "doc_norm": 1.551, "encoder_q-embeddings": 2239.324, "encoder_q-layer.0": 1556.635, "encoder_q-layer.1": 1636.0442, "encoder_q-layer.10": 2552.6133, "encoder_q-layer.11": 5657.3203, "encoder_q-layer.2": 1904.9974, "encoder_q-layer.3": 2028.551, "encoder_q-layer.4": 2148.2825, "encoder_q-layer.5": 2350.7375, "encoder_q-layer.6": 2316.5293, "encoder_q-layer.7": 2761.5635, "encoder_q-layer.8": 3012.98, "encoder_q-layer.9": 2530.4187, "epoch": 0.73, "inbatch_neg_score": 0.588, "inbatch_pos_score": 1.2598, "learning_rate": 1.4000000000000001e-05, "loss": 3.1592, "norm_diff": 0.0511, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4113.5701, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5884, "query_norm": 1.4999, "queue_k_norm": 1.5532, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0023, "sent_len_1": 66.6952, "sent_max_len_0": 128.0, "sent_max_len_1": 187.695, "stdk": 0.0489, "stdq": 0.0456, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 74800 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 3.1696, "doc_norm": 1.5549, "encoder_q-embeddings": 2368.4841, "encoder_q-layer.0": 1494.657, "encoder_q-layer.1": 1646.26, "encoder_q-layer.10": 2376.613, "encoder_q-layer.11": 5147.5762, "encoder_q-layer.2": 1847.7101, "encoder_q-layer.3": 1933.8927, "encoder_q-layer.4": 2074.6189, "encoder_q-layer.5": 2270.6841, "encoder_q-layer.6": 2390.7878, "encoder_q-layer.7": 2545.4421, "encoder_q-layer.8": 2874.4338, "encoder_q-layer.9": 2361.8599, "epoch": 0.73, "inbatch_neg_score": 0.5893, "inbatch_pos_score": 1.3066, "learning_rate": 1.3944444444444446e-05, "loss": 3.1696, "norm_diff": 0.037, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3877.1805, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5894, "query_norm": 1.5179, "queue_k_norm": 1.5527, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0135, "sent_len_1": 66.6425, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9162, "stdk": 0.0491, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 74900 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1903, "doc_norm": 1.5533, "encoder_q-embeddings": 5004.8408, "encoder_q-layer.0": 3453.0413, "encoder_q-layer.1": 4017.7893, "encoder_q-layer.10": 2403.5061, "encoder_q-layer.11": 5478.8193, "encoder_q-layer.2": 4905.5273, "encoder_q-layer.3": 4915.8271, "encoder_q-layer.4": 5839.5869, "encoder_q-layer.5": 6254.4795, "encoder_q-layer.6": 6969.1807, "encoder_q-layer.7": 6610.6826, "encoder_q-layer.8": 5360.8354, "encoder_q-layer.9": 2952.3828, "epoch": 0.73, "inbatch_neg_score": 0.5919, "inbatch_pos_score": 1.2812, "learning_rate": 1.388888888888889e-05, "loss": 3.1903, "norm_diff": 0.04, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7687.6038, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5923, "query_norm": 1.5133, "queue_k_norm": 1.5527, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8629, "sent_len_1": 66.6422, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5575, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 75000 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.17, "doc_norm": 1.5572, "encoder_q-embeddings": 2147.8879, "encoder_q-layer.0": 1399.3508, "encoder_q-layer.1": 1461.99, "encoder_q-layer.10": 2391.553, "encoder_q-layer.11": 5612.5649, "encoder_q-layer.2": 1660.1729, "encoder_q-layer.3": 1776.1384, "encoder_q-layer.4": 1902.4763, "encoder_q-layer.5": 2016.4128, "encoder_q-layer.6": 2282.6772, "encoder_q-layer.7": 2511.8677, "encoder_q-layer.8": 2949.3242, "encoder_q-layer.9": 2417.5254, "epoch": 0.73, "inbatch_neg_score": 0.5964, "inbatch_pos_score": 1.252, "learning_rate": 1.3833333333333334e-05, "loss": 3.17, "norm_diff": 0.0583, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3925.9457, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5962, "query_norm": 1.4989, "queue_k_norm": 1.5547, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0272, "sent_len_1": 66.7479, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6863, "stdk": 0.0491, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 75100 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.1739, "doc_norm": 1.5618, "encoder_q-embeddings": 3140.9307, "encoder_q-layer.0": 2115.4121, "encoder_q-layer.1": 2236.687, "encoder_q-layer.10": 2631.1016, "encoder_q-layer.11": 5286.0659, "encoder_q-layer.2": 2830.2698, "encoder_q-layer.3": 2934.0173, "encoder_q-layer.4": 3073.6458, "encoder_q-layer.5": 2904.4629, "encoder_q-layer.6": 3136.1614, "encoder_q-layer.7": 3234.2322, "encoder_q-layer.8": 3343.9675, "encoder_q-layer.9": 2486.5, "epoch": 0.73, "inbatch_neg_score": 0.5986, "inbatch_pos_score": 1.292, "learning_rate": 1.3777777777777778e-05, "loss": 3.1739, "norm_diff": 0.0437, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4732.9826, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5981, "query_norm": 1.518, "queue_k_norm": 1.5565, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0229, "sent_len_1": 66.8028, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2425, "stdk": 0.0493, "stdq": 0.0462, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 75200 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1727, "doc_norm": 1.5585, "encoder_q-embeddings": 8200.0557, "encoder_q-layer.0": 5321.5244, "encoder_q-layer.1": 6126.0962, "encoder_q-layer.10": 4967.2534, "encoder_q-layer.11": 11056.999, "encoder_q-layer.2": 7834.8369, "encoder_q-layer.3": 8595.9062, "encoder_q-layer.4": 9220.5566, "encoder_q-layer.5": 9783.0049, "encoder_q-layer.6": 9581.6934, "encoder_q-layer.7": 9118.8604, "encoder_q-layer.8": 7900.5327, "encoder_q-layer.9": 5249.5459, "epoch": 0.74, "inbatch_neg_score": 0.599, "inbatch_pos_score": 1.2852, "learning_rate": 1.3722222222222222e-05, "loss": 3.1727, "norm_diff": 0.0479, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12299.9379, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5986, "query_norm": 1.5105, "queue_k_norm": 1.5559, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9723, "sent_len_1": 66.6506, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6662, "stdk": 0.0491, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 75300 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.1696, "doc_norm": 1.5537, "encoder_q-embeddings": 7141.873, "encoder_q-layer.0": 5027.9131, "encoder_q-layer.1": 6302.9639, "encoder_q-layer.10": 5169.4668, "encoder_q-layer.11": 11498.4814, "encoder_q-layer.2": 7170.8823, "encoder_q-layer.3": 7928.3135, "encoder_q-layer.4": 9416.3799, "encoder_q-layer.5": 11293.3242, "encoder_q-layer.6": 12675.4258, "encoder_q-layer.7": 11556.8008, "encoder_q-layer.8": 10069.3203, "encoder_q-layer.9": 6133.7568, "epoch": 0.74, "inbatch_neg_score": 0.6044, "inbatch_pos_score": 1.2559, "learning_rate": 1.3666666666666666e-05, "loss": 3.1696, "norm_diff": 0.0635, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13186.9737, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6045, "query_norm": 1.4901, "queue_k_norm": 1.5567, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8775, "sent_len_1": 66.6525, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1775, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 75400 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.1824, "doc_norm": 1.5591, "encoder_q-embeddings": 2161.3477, "encoder_q-layer.0": 1406.6367, "encoder_q-layer.1": 1513.7592, "encoder_q-layer.10": 2438.5886, "encoder_q-layer.11": 5440.3457, "encoder_q-layer.2": 1725.8319, "encoder_q-layer.3": 1827.8616, "encoder_q-layer.4": 1977.649, "encoder_q-layer.5": 2059.0391, "encoder_q-layer.6": 2393.4873, "encoder_q-layer.7": 2644.2095, "encoder_q-layer.8": 2926.1238, "encoder_q-layer.9": 2574.6382, "epoch": 0.74, "inbatch_neg_score": 0.6037, "inbatch_pos_score": 1.293, "learning_rate": 1.3611111111111111e-05, "loss": 3.1824, "norm_diff": 0.0567, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4000.8086, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6035, "query_norm": 1.5024, "queue_k_norm": 1.5565, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0105, "sent_len_1": 66.7862, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4888, "stdk": 0.0491, "stdq": 0.0456, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 75500 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.1771, "doc_norm": 1.5598, "encoder_q-embeddings": 1857.0653, "encoder_q-layer.0": 1215.1851, "encoder_q-layer.1": 1275.8252, "encoder_q-layer.10": 2572.4373, "encoder_q-layer.11": 5751.29, "encoder_q-layer.2": 1458.463, "encoder_q-layer.3": 1598.4443, "encoder_q-layer.4": 1671.8458, "encoder_q-layer.5": 1858.9637, "encoder_q-layer.6": 2148.8347, "encoder_q-layer.7": 2624.7588, "encoder_q-layer.8": 2912.4653, "encoder_q-layer.9": 2556.8767, "epoch": 0.74, "inbatch_neg_score": 0.6091, "inbatch_pos_score": 1.291, "learning_rate": 1.3555555555555557e-05, "loss": 3.1771, "norm_diff": 0.0568, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3868.6348, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6089, "query_norm": 1.5029, "queue_k_norm": 1.5581, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8549, "sent_len_1": 66.786, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7312, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 75600 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.1713, "doc_norm": 1.5585, "encoder_q-embeddings": 1465.4961, "encoder_q-layer.0": 950.424, "encoder_q-layer.1": 1143.38, "encoder_q-layer.10": 1334.3623, "encoder_q-layer.11": 2896.6162, "encoder_q-layer.2": 1433.5925, "encoder_q-layer.3": 1625.8075, "encoder_q-layer.4": 1970.6665, "encoder_q-layer.5": 2094.7869, "encoder_q-layer.6": 2106.1877, "encoder_q-layer.7": 2097.6926, "encoder_q-layer.8": 1815.2234, "encoder_q-layer.9": 1356.8777, "epoch": 0.74, "inbatch_neg_score": 0.6101, "inbatch_pos_score": 1.291, "learning_rate": 1.3500000000000001e-05, "loss": 3.1713, "norm_diff": 0.0589, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2711.3901, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6104, "query_norm": 1.4996, "queue_k_norm": 1.5562, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8917, "sent_len_1": 66.6638, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3775, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 75700 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1596, "doc_norm": 1.5591, "encoder_q-embeddings": 1024.3262, "encoder_q-layer.0": 665.9628, "encoder_q-layer.1": 750.1851, "encoder_q-layer.10": 1229.4347, "encoder_q-layer.11": 2781.9602, "encoder_q-layer.2": 868.2911, "encoder_q-layer.3": 929.0332, "encoder_q-layer.4": 972.0706, "encoder_q-layer.5": 1005.922, "encoder_q-layer.6": 1183.7092, "encoder_q-layer.7": 1261.6667, "encoder_q-layer.8": 1404.2211, "encoder_q-layer.9": 1231.803, "epoch": 0.74, "inbatch_neg_score": 0.6125, "inbatch_pos_score": 1.2998, "learning_rate": 1.3444444444444445e-05, "loss": 3.1596, "norm_diff": 0.043, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1979.1145, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6123, "query_norm": 1.516, "queue_k_norm": 1.5589, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0991, "sent_len_1": 66.7472, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0725, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 75800 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1873, "doc_norm": 1.5561, "encoder_q-embeddings": 1263.7653, "encoder_q-layer.0": 836.9243, "encoder_q-layer.1": 946.2392, "encoder_q-layer.10": 1189.983, "encoder_q-layer.11": 2791.854, "encoder_q-layer.2": 1149.2076, "encoder_q-layer.3": 1228.8594, "encoder_q-layer.4": 1352.5887, "encoder_q-layer.5": 1363.0895, "encoder_q-layer.6": 1334.312, "encoder_q-layer.7": 1469.1436, "encoder_q-layer.8": 1539.1296, "encoder_q-layer.9": 1294.8751, "epoch": 0.74, "inbatch_neg_score": 0.6157, "inbatch_pos_score": 1.2773, "learning_rate": 1.338888888888889e-05, "loss": 3.1873, "norm_diff": 0.0539, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2203.7267, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6152, "query_norm": 1.5022, "queue_k_norm": 1.5598, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0633, "sent_len_1": 66.6232, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2775, "stdk": 0.0488, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 75900 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.1594, "doc_norm": 1.5561, "encoder_q-embeddings": 1208.7668, "encoder_q-layer.0": 823.3502, "encoder_q-layer.1": 924.2937, "encoder_q-layer.10": 1246.4893, "encoder_q-layer.11": 2757.6956, "encoder_q-layer.2": 1116.6431, "encoder_q-layer.3": 1149.9817, "encoder_q-layer.4": 1174.375, "encoder_q-layer.5": 1299.7758, "encoder_q-layer.6": 1390.3964, "encoder_q-layer.7": 1361.884, "encoder_q-layer.8": 1393.5601, "encoder_q-layer.9": 1195.6016, "epoch": 0.74, "inbatch_neg_score": 0.6189, "inbatch_pos_score": 1.2842, "learning_rate": 1.3333333333333333e-05, "loss": 3.1594, "norm_diff": 0.0583, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2128.0162, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6191, "query_norm": 1.4979, "queue_k_norm": 1.5617, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1851, "sent_len_1": 66.8962, "sent_max_len_0": 128.0, "sent_max_len_1": 189.95, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 76000 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.1434, "doc_norm": 1.555, "encoder_q-embeddings": 979.9028, "encoder_q-layer.0": 615.1132, "encoder_q-layer.1": 628.2957, "encoder_q-layer.10": 1432.2999, "encoder_q-layer.11": 2948.2053, "encoder_q-layer.2": 698.894, "encoder_q-layer.3": 736.2956, "encoder_q-layer.4": 781.2615, "encoder_q-layer.5": 810.0349, "encoder_q-layer.6": 954.7894, "encoder_q-layer.7": 1190.7322, "encoder_q-layer.8": 1446.9783, "encoder_q-layer.9": 1310.5179, "epoch": 0.74, "inbatch_neg_score": 0.6199, "inbatch_pos_score": 1.3086, "learning_rate": 1.3277777777777777e-05, "loss": 3.1434, "norm_diff": 0.0379, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1943.1603, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6201, "query_norm": 1.5171, "queue_k_norm": 1.5606, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9196, "sent_len_1": 66.8198, "sent_max_len_0": 128.0, "sent_max_len_1": 189.745, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 76100 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 3.1844, "doc_norm": 1.5602, "encoder_q-embeddings": 1052.3665, "encoder_q-layer.0": 684.2986, "encoder_q-layer.1": 736.3815, "encoder_q-layer.10": 1171.1365, "encoder_q-layer.11": 2734.2947, "encoder_q-layer.2": 840.4086, "encoder_q-layer.3": 888.8661, "encoder_q-layer.4": 967.0641, "encoder_q-layer.5": 980.3438, "encoder_q-layer.6": 1131.8843, "encoder_q-layer.7": 1283.1742, "encoder_q-layer.8": 1431.8995, "encoder_q-layer.9": 1268.5126, "epoch": 0.74, "inbatch_neg_score": 0.6199, "inbatch_pos_score": 1.3193, "learning_rate": 1.3222222222222221e-05, "loss": 3.1844, "norm_diff": 0.0479, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1929.2667, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6206, "query_norm": 1.5123, "queue_k_norm": 1.5616, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0538, "sent_len_1": 66.9808, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1375, "stdk": 0.0489, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 76200 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.1664, "doc_norm": 1.5579, "encoder_q-embeddings": 1019.8066, "encoder_q-layer.0": 670.029, "encoder_q-layer.1": 742.8884, "encoder_q-layer.10": 1163.4054, "encoder_q-layer.11": 2801.9749, "encoder_q-layer.2": 866.127, "encoder_q-layer.3": 898.3571, "encoder_q-layer.4": 982.1168, "encoder_q-layer.5": 1004.651, "encoder_q-layer.6": 1091.8055, "encoder_q-layer.7": 1276.4609, "encoder_q-layer.8": 1358.309, "encoder_q-layer.9": 1179.4246, "epoch": 0.74, "inbatch_neg_score": 0.6264, "inbatch_pos_score": 1.2725, "learning_rate": 1.3166666666666665e-05, "loss": 3.1664, "norm_diff": 0.0691, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1935.3609, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.626, "query_norm": 1.4888, "queue_k_norm": 1.5625, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0156, "sent_len_1": 66.5257, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2488, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 76300 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1997, "doc_norm": 1.5667, "encoder_q-embeddings": 1561.6692, "encoder_q-layer.0": 987.5603, "encoder_q-layer.1": 1127.7711, "encoder_q-layer.10": 1187.0997, "encoder_q-layer.11": 2831.792, "encoder_q-layer.2": 1293.6871, "encoder_q-layer.3": 1468.5544, "encoder_q-layer.4": 1614.1093, "encoder_q-layer.5": 1710.3893, "encoder_q-layer.6": 1564.3428, "encoder_q-layer.7": 1715.9398, "encoder_q-layer.8": 1661.1283, "encoder_q-layer.9": 1246.7933, "epoch": 0.75, "inbatch_neg_score": 0.6287, "inbatch_pos_score": 1.3086, "learning_rate": 1.3111111111111113e-05, "loss": 3.1997, "norm_diff": 0.0563, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2467.1987, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6289, "query_norm": 1.5104, "queue_k_norm": 1.5624, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1011, "sent_len_1": 66.8078, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8787, "stdk": 0.0491, "stdq": 0.0457, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 76400 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1786, "doc_norm": 1.5662, "encoder_q-embeddings": 998.1996, "encoder_q-layer.0": 663.5818, "encoder_q-layer.1": 728.3683, "encoder_q-layer.10": 1279.9879, "encoder_q-layer.11": 2878.512, "encoder_q-layer.2": 815.5669, "encoder_q-layer.3": 850.9406, "encoder_q-layer.4": 921.0896, "encoder_q-layer.5": 968.4423, "encoder_q-layer.6": 1082.563, "encoder_q-layer.7": 1295.6964, "encoder_q-layer.8": 1483.65, "encoder_q-layer.9": 1258.7258, "epoch": 0.75, "inbatch_neg_score": 0.6364, "inbatch_pos_score": 1.3271, "learning_rate": 1.3055555555555557e-05, "loss": 3.1786, "norm_diff": 0.0398, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1982.0136, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6362, "query_norm": 1.5264, "queue_k_norm": 1.5645, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0673, "sent_len_1": 66.8818, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9275, "stdk": 0.0491, "stdq": 0.0462, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 76500 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.1451, "doc_norm": 1.5626, "encoder_q-embeddings": 2521.4482, "encoder_q-layer.0": 1716.4009, "encoder_q-layer.1": 2008.2189, "encoder_q-layer.10": 1225.6606, "encoder_q-layer.11": 2662.0867, "encoder_q-layer.2": 2477.1814, "encoder_q-layer.3": 2657.448, "encoder_q-layer.4": 2895.3589, "encoder_q-layer.5": 2873.5422, "encoder_q-layer.6": 3298.5574, "encoder_q-layer.7": 3642.1726, "encoder_q-layer.8": 3657.6707, "encoder_q-layer.9": 2391.9124, "epoch": 0.75, "inbatch_neg_score": 0.6358, "inbatch_pos_score": 1.3457, "learning_rate": 1.3000000000000001e-05, "loss": 3.1451, "norm_diff": 0.0457, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4140.6427, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6353, "query_norm": 1.5169, "queue_k_norm": 1.5653, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.932, "sent_len_1": 66.8922, "sent_max_len_0": 128.0, "sent_max_len_1": 188.945, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 76600 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.182, "doc_norm": 1.5658, "encoder_q-embeddings": 1079.988, "encoder_q-layer.0": 761.9317, "encoder_q-layer.1": 779.9326, "encoder_q-layer.10": 1307.7113, "encoder_q-layer.11": 2982.3381, "encoder_q-layer.2": 864.5114, "encoder_q-layer.3": 897.7054, "encoder_q-layer.4": 976.7544, "encoder_q-layer.5": 1018.8286, "encoder_q-layer.6": 1080.2323, "encoder_q-layer.7": 1252.153, "encoder_q-layer.8": 1494.0441, "encoder_q-layer.9": 1280.1099, "epoch": 0.75, "inbatch_neg_score": 0.6409, "inbatch_pos_score": 1.292, "learning_rate": 1.2944444444444445e-05, "loss": 3.182, "norm_diff": 0.0659, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2040.1818, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6416, "query_norm": 1.4998, "queue_k_norm": 1.5668, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0921, "sent_len_1": 66.8219, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4462, "stdk": 0.049, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 76700 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.1544, "doc_norm": 1.568, "encoder_q-embeddings": 953.4173, "encoder_q-layer.0": 614.189, "encoder_q-layer.1": 637.1108, "encoder_q-layer.10": 1246.2124, "encoder_q-layer.11": 2731.4817, "encoder_q-layer.2": 697.0402, "encoder_q-layer.3": 702.1325, "encoder_q-layer.4": 774.4581, "encoder_q-layer.5": 809.3976, "encoder_q-layer.6": 959.8362, "encoder_q-layer.7": 1175.7621, "encoder_q-layer.8": 1374.2388, "encoder_q-layer.9": 1276.4059, "epoch": 0.75, "inbatch_neg_score": 0.6431, "inbatch_pos_score": 1.332, "learning_rate": 1.2888888888888889e-05, "loss": 3.1544, "norm_diff": 0.0425, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1837.9876, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6426, "query_norm": 1.5255, "queue_k_norm": 1.5656, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.7801, "sent_len_1": 66.5179, "sent_max_len_0": 128.0, "sent_max_len_1": 186.53, "stdk": 0.049, "stdq": 0.0462, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 76800 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.1514, "doc_norm": 1.5722, "encoder_q-embeddings": 920.1546, "encoder_q-layer.0": 636.2035, "encoder_q-layer.1": 671.1888, "encoder_q-layer.10": 1251.2987, "encoder_q-layer.11": 2801.0696, "encoder_q-layer.2": 750.7462, "encoder_q-layer.3": 800.4999, "encoder_q-layer.4": 864.861, "encoder_q-layer.5": 939.7134, "encoder_q-layer.6": 1051.2018, "encoder_q-layer.7": 1208.6002, "encoder_q-layer.8": 1386.0709, "encoder_q-layer.9": 1232.5308, "epoch": 0.75, "inbatch_neg_score": 0.6418, "inbatch_pos_score": 1.3076, "learning_rate": 1.2833333333333333e-05, "loss": 3.1514, "norm_diff": 0.0731, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1926.1918, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6411, "query_norm": 1.4991, "queue_k_norm": 1.5683, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0248, "sent_len_1": 66.8433, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9737, "stdk": 0.0492, "stdq": 0.045, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 76900 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.1544, "doc_norm": 1.5702, "encoder_q-embeddings": 978.2535, "encoder_q-layer.0": 665.0071, "encoder_q-layer.1": 728.5564, "encoder_q-layer.10": 1245.0553, "encoder_q-layer.11": 2656.2292, "encoder_q-layer.2": 824.3855, "encoder_q-layer.3": 902.9703, "encoder_q-layer.4": 990.199, "encoder_q-layer.5": 1053.0598, "encoder_q-layer.6": 1177.7483, "encoder_q-layer.7": 1301.2131, "encoder_q-layer.8": 1324.5337, "encoder_q-layer.9": 1125.8601, "epoch": 0.75, "inbatch_neg_score": 0.6434, "inbatch_pos_score": 1.3467, "learning_rate": 1.2777777777777777e-05, "loss": 3.1544, "norm_diff": 0.0566, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1942.416, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6431, "query_norm": 1.5136, "queue_k_norm": 1.569, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8247, "sent_len_1": 66.7542, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4575, "stdk": 0.0491, "stdq": 0.0456, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 77000 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.194, "doc_norm": 1.5681, "encoder_q-embeddings": 1028.9396, "encoder_q-layer.0": 659.2719, "encoder_q-layer.1": 710.2027, "encoder_q-layer.10": 1280.4664, "encoder_q-layer.11": 2707.8152, "encoder_q-layer.2": 815.1221, "encoder_q-layer.3": 912.9161, "encoder_q-layer.4": 974.3571, "encoder_q-layer.5": 1008.4022, "encoder_q-layer.6": 1139.8645, "encoder_q-layer.7": 1377.3804, "encoder_q-layer.8": 1621.3558, "encoder_q-layer.9": 1290.194, "epoch": 0.75, "inbatch_neg_score": 0.6412, "inbatch_pos_score": 1.3584, "learning_rate": 1.2722222222222221e-05, "loss": 3.194, "norm_diff": 0.0501, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1999.3117, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6426, "query_norm": 1.518, "queue_k_norm": 1.57, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0393, "sent_len_1": 66.7324, "sent_max_len_0": 128.0, "sent_max_len_1": 188.13, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 77100 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.1651, "doc_norm": 1.5754, "encoder_q-embeddings": 1763.7333, "encoder_q-layer.0": 1290.5435, "encoder_q-layer.1": 1551.7937, "encoder_q-layer.10": 1191.5386, "encoder_q-layer.11": 2762.1438, "encoder_q-layer.2": 1937.2554, "encoder_q-layer.3": 1981.0013, "encoder_q-layer.4": 2084.343, "encoder_q-layer.5": 2057.5503, "encoder_q-layer.6": 1967.8538, "encoder_q-layer.7": 1950.6313, "encoder_q-layer.8": 1891.0977, "encoder_q-layer.9": 1305.879, "epoch": 0.75, "inbatch_neg_score": 0.6443, "inbatch_pos_score": 1.3467, "learning_rate": 1.2666666666666668e-05, "loss": 3.1651, "norm_diff": 0.0566, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2853.2264, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.645, "query_norm": 1.5188, "queue_k_norm": 1.5683, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.968, "sent_len_1": 66.6408, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2887, "stdk": 0.0492, "stdq": 0.0458, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 77200 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1585, "doc_norm": 1.5727, "encoder_q-embeddings": 951.5279, "encoder_q-layer.0": 643.2775, "encoder_q-layer.1": 674.7125, "encoder_q-layer.10": 1274.2588, "encoder_q-layer.11": 2827.6821, "encoder_q-layer.2": 748.3366, "encoder_q-layer.3": 770.0674, "encoder_q-layer.4": 816.3086, "encoder_q-layer.5": 872.8497, "encoder_q-layer.6": 968.0383, "encoder_q-layer.7": 1153.6809, "encoder_q-layer.8": 1302.3416, "encoder_q-layer.9": 1193.8947, "epoch": 0.75, "inbatch_neg_score": 0.6454, "inbatch_pos_score": 1.3096, "learning_rate": 1.2611111111111113e-05, "loss": 3.1585, "norm_diff": 0.0675, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1901.5899, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.645, "query_norm": 1.5052, "queue_k_norm": 1.5714, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9241, "sent_len_1": 66.8445, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4875, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 77300 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1616, "doc_norm": 1.564, "encoder_q-embeddings": 1947.1647, "encoder_q-layer.0": 1396.7216, "encoder_q-layer.1": 1495.7775, "encoder_q-layer.10": 1378.4695, "encoder_q-layer.11": 2915.2759, "encoder_q-layer.2": 1728.9567, "encoder_q-layer.3": 1682.5164, "encoder_q-layer.4": 1681.6162, "encoder_q-layer.5": 1637.8372, "encoder_q-layer.6": 1658.2618, "encoder_q-layer.7": 1751.0413, "encoder_q-layer.8": 1850.1465, "encoder_q-layer.9": 1401.1697, "epoch": 0.76, "inbatch_neg_score": 0.6453, "inbatch_pos_score": 1.3232, "learning_rate": 1.2555555555555557e-05, "loss": 3.1616, "norm_diff": 0.0534, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2706.5116, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6465, "query_norm": 1.5107, "queue_k_norm": 1.5711, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9462, "sent_len_1": 66.7849, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2438, "stdk": 0.0487, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 77400 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.1303, "doc_norm": 1.5744, "encoder_q-embeddings": 992.5045, "encoder_q-layer.0": 648.1384, "encoder_q-layer.1": 705.2957, "encoder_q-layer.10": 1253.2083, "encoder_q-layer.11": 2636.2209, "encoder_q-layer.2": 803.4568, "encoder_q-layer.3": 829.6835, "encoder_q-layer.4": 897.5078, "encoder_q-layer.5": 966.9257, "encoder_q-layer.6": 1074.9622, "encoder_q-layer.7": 1197.9172, "encoder_q-layer.8": 1352.994, "encoder_q-layer.9": 1171.5554, "epoch": 0.76, "inbatch_neg_score": 0.6474, "inbatch_pos_score": 1.333, "learning_rate": 1.25e-05, "loss": 3.1303, "norm_diff": 0.0556, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1861.8785, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6475, "query_norm": 1.5188, "queue_k_norm": 1.5702, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8754, "sent_len_1": 66.8516, "sent_max_len_0": 128.0, "sent_max_len_1": 192.355, "stdk": 0.0491, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 77500 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.1622, "doc_norm": 1.5727, "encoder_q-embeddings": 1128.3977, "encoder_q-layer.0": 743.1681, "encoder_q-layer.1": 820.5738, "encoder_q-layer.10": 1285.3599, "encoder_q-layer.11": 2791.366, "encoder_q-layer.2": 902.5099, "encoder_q-layer.3": 966.1047, "encoder_q-layer.4": 1054.7731, "encoder_q-layer.5": 1127.0139, "encoder_q-layer.6": 1197.5981, "encoder_q-layer.7": 1330.8566, "encoder_q-layer.8": 1467.4712, "encoder_q-layer.9": 1211.824, "epoch": 0.76, "inbatch_neg_score": 0.6466, "inbatch_pos_score": 1.3359, "learning_rate": 1.2444444444444445e-05, "loss": 3.1622, "norm_diff": 0.0543, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2042.7284, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6475, "query_norm": 1.5184, "queue_k_norm": 1.573, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.86, "sent_len_1": 67.0601, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2537, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 77600 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.1993, "doc_norm": 1.5724, "encoder_q-embeddings": 2983.4451, "encoder_q-layer.0": 1969.2716, "encoder_q-layer.1": 2097.3706, "encoder_q-layer.10": 2454.803, "encoder_q-layer.11": 5525.9941, "encoder_q-layer.2": 2427.1655, "encoder_q-layer.3": 2479.3674, "encoder_q-layer.4": 2591.0503, "encoder_q-layer.5": 2572.616, "encoder_q-layer.6": 2590.6553, "encoder_q-layer.7": 2685.2559, "encoder_q-layer.8": 2825.4033, "encoder_q-layer.9": 2398.1396, "epoch": 0.76, "inbatch_neg_score": 0.6481, "inbatch_pos_score": 1.3164, "learning_rate": 1.238888888888889e-05, "loss": 3.1993, "norm_diff": 0.0661, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4443.6066, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6489, "query_norm": 1.5064, "queue_k_norm": 1.571, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.1373, "sent_len_1": 66.6674, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7463, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 77700 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.1545, "doc_norm": 1.5742, "encoder_q-embeddings": 2905.5227, "encoder_q-layer.0": 1858.0099, "encoder_q-layer.1": 2136.9873, "encoder_q-layer.10": 2727.5796, "encoder_q-layer.11": 5765.6577, "encoder_q-layer.2": 2453.8853, "encoder_q-layer.3": 2376.6465, "encoder_q-layer.4": 2489.5022, "encoder_q-layer.5": 2440.0747, "encoder_q-layer.6": 2643.6157, "encoder_q-layer.7": 3013.2292, "encoder_q-layer.8": 3129.5364, "encoder_q-layer.9": 2597.2, "epoch": 0.76, "inbatch_neg_score": 0.6512, "inbatch_pos_score": 1.332, "learning_rate": 1.2333333333333334e-05, "loss": 3.1545, "norm_diff": 0.0483, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4546.5951, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6514, "query_norm": 1.5258, "queue_k_norm": 1.5733, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0055, "sent_len_1": 66.6709, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1525, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 77800 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.1658, "doc_norm": 1.5761, "encoder_q-embeddings": 2336.7141, "encoder_q-layer.0": 1639.288, "encoder_q-layer.1": 1768.2217, "encoder_q-layer.10": 2591.6689, "encoder_q-layer.11": 5784.9277, "encoder_q-layer.2": 2088.874, "encoder_q-layer.3": 2149.7478, "encoder_q-layer.4": 2287.0979, "encoder_q-layer.5": 2361.519, "encoder_q-layer.6": 2598.3044, "encoder_q-layer.7": 2906.2161, "encoder_q-layer.8": 3163.4297, "encoder_q-layer.9": 2445.865, "epoch": 0.76, "inbatch_neg_score": 0.6536, "inbatch_pos_score": 1.3203, "learning_rate": 1.2277777777777778e-05, "loss": 3.1658, "norm_diff": 0.0626, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4278.7935, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6528, "query_norm": 1.5135, "queue_k_norm": 1.5744, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0081, "sent_len_1": 66.628, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7463, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 77900 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.1611, "doc_norm": 1.572, "encoder_q-embeddings": 2287.3555, "encoder_q-layer.0": 1506.9319, "encoder_q-layer.1": 1710.493, "encoder_q-layer.10": 2425.9717, "encoder_q-layer.11": 5566.875, "encoder_q-layer.2": 2039.054, "encoder_q-layer.3": 2100.9792, "encoder_q-layer.4": 2250.8562, "encoder_q-layer.5": 2275.2554, "encoder_q-layer.6": 2489.3774, "encoder_q-layer.7": 2685.1077, "encoder_q-layer.8": 2994.9541, "encoder_q-layer.9": 2495.335, "epoch": 0.76, "inbatch_neg_score": 0.653, "inbatch_pos_score": 1.3252, "learning_rate": 1.2222222222222222e-05, "loss": 3.1611, "norm_diff": 0.0537, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4168.7797, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6528, "query_norm": 1.5184, "queue_k_norm": 1.5759, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0234, "sent_len_1": 66.8237, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4338, "stdk": 0.0489, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 78000 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.1382, "doc_norm": 1.5787, "encoder_q-embeddings": 2228.3474, "encoder_q-layer.0": 1515.8003, "encoder_q-layer.1": 1756.9882, "encoder_q-layer.10": 2447.3245, "encoder_q-layer.11": 5217.5049, "encoder_q-layer.2": 2045.1913, "encoder_q-layer.3": 2120.0552, "encoder_q-layer.4": 2320.0547, "encoder_q-layer.5": 2623.645, "encoder_q-layer.6": 3019.3906, "encoder_q-layer.7": 3560.823, "encoder_q-layer.8": 3676.324, "encoder_q-layer.9": 2557.2544, "epoch": 0.76, "inbatch_neg_score": 0.6552, "inbatch_pos_score": 1.3516, "learning_rate": 1.2166666666666668e-05, "loss": 3.1382, "norm_diff": 0.0565, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4311.9979, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6538, "query_norm": 1.5222, "queue_k_norm": 1.5757, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9969, "sent_len_1": 66.7135, "sent_max_len_0": 128.0, "sent_max_len_1": 191.6275, "stdk": 0.0491, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 78100 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.1851, "doc_norm": 1.5771, "encoder_q-embeddings": 2001.933, "encoder_q-layer.0": 1379.3608, "encoder_q-layer.1": 1509.239, "encoder_q-layer.10": 2569.6267, "encoder_q-layer.11": 5762.7783, "encoder_q-layer.2": 1702.9415, "encoder_q-layer.3": 1790.7684, "encoder_q-layer.4": 2021.3563, "encoder_q-layer.5": 2070.2349, "encoder_q-layer.6": 2319.6672, "encoder_q-layer.7": 2593.9941, "encoder_q-layer.8": 2971.6345, "encoder_q-layer.9": 2667.7646, "epoch": 0.76, "inbatch_neg_score": 0.6507, "inbatch_pos_score": 1.3281, "learning_rate": 1.2111111111111112e-05, "loss": 3.1851, "norm_diff": 0.0628, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4009.6781, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6504, "query_norm": 1.5143, "queue_k_norm": 1.5753, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9702, "sent_len_1": 66.7414, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5325, "stdk": 0.0491, "stdq": 0.0457, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 78200 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.1756, "doc_norm": 1.5745, "encoder_q-embeddings": 2377.8452, "encoder_q-layer.0": 1617.1125, "encoder_q-layer.1": 1780.6348, "encoder_q-layer.10": 2662.5571, "encoder_q-layer.11": 6093.249, "encoder_q-layer.2": 2113.3198, "encoder_q-layer.3": 2149.0605, "encoder_q-layer.4": 2332.6001, "encoder_q-layer.5": 2528.7229, "encoder_q-layer.6": 2742.5955, "encoder_q-layer.7": 3055.9192, "encoder_q-layer.8": 3592.4653, "encoder_q-layer.9": 2853.9546, "epoch": 0.76, "inbatch_neg_score": 0.6531, "inbatch_pos_score": 1.3145, "learning_rate": 1.2055555555555556e-05, "loss": 3.1756, "norm_diff": 0.061, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4510.3225, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6523, "query_norm": 1.5134, "queue_k_norm": 1.5751, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.2194, "sent_len_1": 66.7451, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5475, "stdk": 0.0489, "stdq": 0.0456, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 78300 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1533, "doc_norm": 1.5779, "encoder_q-embeddings": 2259.5396, "encoder_q-layer.0": 1844.7709, "encoder_q-layer.1": 1958.3999, "encoder_q-layer.10": 1224.0638, "encoder_q-layer.11": 2880.1111, "encoder_q-layer.2": 2337.1819, "encoder_q-layer.3": 2105.0728, "encoder_q-layer.4": 2572.5725, "encoder_q-layer.5": 2348.8123, "encoder_q-layer.6": 1711.4771, "encoder_q-layer.7": 1675.2809, "encoder_q-layer.8": 1565.0474, "encoder_q-layer.9": 1215.0515, "epoch": 0.77, "inbatch_neg_score": 0.653, "inbatch_pos_score": 1.3203, "learning_rate": 1.2e-05, "loss": 3.1533, "norm_diff": 0.0733, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3107.1675, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6538, "query_norm": 1.5047, "queue_k_norm": 1.5767, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9691, "sent_len_1": 66.774, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6188, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 78400 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.153, "doc_norm": 1.5662, "encoder_q-embeddings": 2033.8506, "encoder_q-layer.0": 1383.6649, "encoder_q-layer.1": 1498.9988, "encoder_q-layer.10": 1268.4584, "encoder_q-layer.11": 2806.7173, "encoder_q-layer.2": 1806.3229, "encoder_q-layer.3": 1978.803, "encoder_q-layer.4": 2223.6487, "encoder_q-layer.5": 2357.6406, "encoder_q-layer.6": 2209.5381, "encoder_q-layer.7": 2322.3784, "encoder_q-layer.8": 1981.8619, "encoder_q-layer.9": 1349.6571, "epoch": 0.77, "inbatch_neg_score": 0.6534, "inbatch_pos_score": 1.3242, "learning_rate": 1.1944444444444446e-05, "loss": 3.153, "norm_diff": 0.0571, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2993.7936, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6533, "query_norm": 1.5091, "queue_k_norm": 1.5771, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.962, "sent_len_1": 66.9877, "sent_max_len_0": 128.0, "sent_max_len_1": 191.5925, "stdk": 0.0486, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 78500 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.1786, "doc_norm": 1.5793, "encoder_q-embeddings": 1125.6294, "encoder_q-layer.0": 758.6716, "encoder_q-layer.1": 776.98, "encoder_q-layer.10": 1252.7545, "encoder_q-layer.11": 2922.0474, "encoder_q-layer.2": 849.4283, "encoder_q-layer.3": 885.3347, "encoder_q-layer.4": 953.5569, "encoder_q-layer.5": 952.3735, "encoder_q-layer.6": 1081.7649, "encoder_q-layer.7": 1230.0795, "encoder_q-layer.8": 1405.9008, "encoder_q-layer.9": 1227.3896, "epoch": 0.77, "inbatch_neg_score": 0.6531, "inbatch_pos_score": 1.2988, "learning_rate": 1.188888888888889e-05, "loss": 3.1786, "norm_diff": 0.0735, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2029.0651, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6533, "query_norm": 1.5058, "queue_k_norm": 1.5775, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9991, "sent_len_1": 66.9956, "sent_max_len_0": 128.0, "sent_max_len_1": 190.595, "stdk": 0.0491, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 78600 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.1424, "doc_norm": 1.5825, "encoder_q-embeddings": 962.7748, "encoder_q-layer.0": 634.7428, "encoder_q-layer.1": 682.8058, "encoder_q-layer.10": 1264.923, "encoder_q-layer.11": 2713.3135, "encoder_q-layer.2": 794.4268, "encoder_q-layer.3": 802.4898, "encoder_q-layer.4": 876.6925, "encoder_q-layer.5": 974.6038, "encoder_q-layer.6": 1051.3634, "encoder_q-layer.7": 1261.2944, "encoder_q-layer.8": 1409.4144, "encoder_q-layer.9": 1226.7225, "epoch": 0.77, "inbatch_neg_score": 0.6533, "inbatch_pos_score": 1.3506, "learning_rate": 1.1833333333333334e-05, "loss": 3.1424, "norm_diff": 0.0634, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1923.3459, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6543, "query_norm": 1.5192, "queue_k_norm": 1.5764, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.2499, "sent_len_1": 66.8894, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3738, "stdk": 0.0492, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 78700 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 3.1652, "doc_norm": 1.5769, "encoder_q-embeddings": 972.7644, "encoder_q-layer.0": 629.7687, "encoder_q-layer.1": 680.2618, "encoder_q-layer.10": 1366.9229, "encoder_q-layer.11": 2709.4756, "encoder_q-layer.2": 765.659, "encoder_q-layer.3": 798.9073, "encoder_q-layer.4": 843.3757, "encoder_q-layer.5": 895.2687, "encoder_q-layer.6": 971.7783, "encoder_q-layer.7": 1137.8065, "encoder_q-layer.8": 1402.8965, "encoder_q-layer.9": 1221.605, "epoch": 0.77, "inbatch_neg_score": 0.6527, "inbatch_pos_score": 1.3555, "learning_rate": 1.1777777777777778e-05, "loss": 3.1652, "norm_diff": 0.0548, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1880.6348, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6533, "query_norm": 1.5221, "queue_k_norm": 1.5782, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9389, "sent_len_1": 66.7439, "sent_max_len_0": 128.0, "sent_max_len_1": 192.14, "stdk": 0.049, "stdq": 0.046, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 78800 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.1705, "doc_norm": 1.5787, "encoder_q-embeddings": 1423.2039, "encoder_q-layer.0": 936.6588, "encoder_q-layer.1": 994.168, "encoder_q-layer.10": 1256.6857, "encoder_q-layer.11": 2751.1064, "encoder_q-layer.2": 1168.0264, "encoder_q-layer.3": 1271.1741, "encoder_q-layer.4": 1406.915, "encoder_q-layer.5": 1445.5547, "encoder_q-layer.6": 1475.8647, "encoder_q-layer.7": 1676.7598, "encoder_q-layer.8": 1656.8119, "encoder_q-layer.9": 1295.1346, "epoch": 0.77, "inbatch_neg_score": 0.6532, "inbatch_pos_score": 1.3154, "learning_rate": 1.1722222222222224e-05, "loss": 3.1705, "norm_diff": 0.0744, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2291.6989, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6533, "query_norm": 1.5043, "queue_k_norm": 1.5777, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8341, "sent_len_1": 66.5852, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9787, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 78900 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.1668, "doc_norm": 1.5835, "encoder_q-embeddings": 952.2517, "encoder_q-layer.0": 617.5647, "encoder_q-layer.1": 671.2285, "encoder_q-layer.10": 1265.5879, "encoder_q-layer.11": 2834.821, "encoder_q-layer.2": 754.6312, "encoder_q-layer.3": 788.4008, "encoder_q-layer.4": 863.3146, "encoder_q-layer.5": 947.9061, "encoder_q-layer.6": 1006.546, "encoder_q-layer.7": 1192.5332, "encoder_q-layer.8": 1387.7178, "encoder_q-layer.9": 1192.4296, "epoch": 0.77, "inbatch_neg_score": 0.6547, "inbatch_pos_score": 1.3516, "learning_rate": 1.1666666666666668e-05, "loss": 3.1668, "norm_diff": 0.0652, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1902.4495, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6543, "query_norm": 1.5183, "queue_k_norm": 1.578, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.028, "sent_len_1": 66.5997, "sent_max_len_0": 128.0, "sent_max_len_1": 186.2125, "stdk": 0.0492, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 79000 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.1574, "doc_norm": 1.576, "encoder_q-embeddings": 975.2648, "encoder_q-layer.0": 650.8516, "encoder_q-layer.1": 697.6703, "encoder_q-layer.10": 1216.4587, "encoder_q-layer.11": 2697.6394, "encoder_q-layer.2": 810.0733, "encoder_q-layer.3": 823.8498, "encoder_q-layer.4": 868.7519, "encoder_q-layer.5": 927.0549, "encoder_q-layer.6": 1078.176, "encoder_q-layer.7": 1149.4763, "encoder_q-layer.8": 1398.3998, "encoder_q-layer.9": 1173.3846, "epoch": 0.77, "inbatch_neg_score": 0.6583, "inbatch_pos_score": 1.3496, "learning_rate": 1.1611111111111112e-05, "loss": 3.1574, "norm_diff": 0.0571, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1881.8266, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6577, "query_norm": 1.5189, "queue_k_norm": 1.5791, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7122, "sent_len_1": 66.8566, "sent_max_len_0": 128.0, "sent_max_len_1": 191.8938, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 79100 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 3.1579, "doc_norm": 1.5784, "encoder_q-embeddings": 1129.6061, "encoder_q-layer.0": 744.3054, "encoder_q-layer.1": 832.7578, "encoder_q-layer.10": 1200.6946, "encoder_q-layer.11": 2716.4832, "encoder_q-layer.2": 924.7032, "encoder_q-layer.3": 1016.14, "encoder_q-layer.4": 1055.583, "encoder_q-layer.5": 1099.9939, "encoder_q-layer.6": 1220.917, "encoder_q-layer.7": 1373.1429, "encoder_q-layer.8": 1482.0388, "encoder_q-layer.9": 1173.1982, "epoch": 0.77, "inbatch_neg_score": 0.6608, "inbatch_pos_score": 1.3721, "learning_rate": 1.1555555555555556e-05, "loss": 3.1579, "norm_diff": 0.0549, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2008.2147, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6597, "query_norm": 1.5235, "queue_k_norm": 1.5801, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0631, "sent_len_1": 66.9734, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7862, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 79200 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 3.1681, "doc_norm": 1.583, "encoder_q-embeddings": 1155.6843, "encoder_q-layer.0": 798.1595, "encoder_q-layer.1": 932.6233, "encoder_q-layer.10": 1117.0315, "encoder_q-layer.11": 2581.0459, "encoder_q-layer.2": 1136.2098, "encoder_q-layer.3": 1279.2827, "encoder_q-layer.4": 1351.9166, "encoder_q-layer.5": 1292.7146, "encoder_q-layer.6": 1325.3082, "encoder_q-layer.7": 1462.5278, "encoder_q-layer.8": 1590.1484, "encoder_q-layer.9": 1191.401, "epoch": 0.77, "inbatch_neg_score": 0.6597, "inbatch_pos_score": 1.3838, "learning_rate": 1.1500000000000002e-05, "loss": 3.1681, "norm_diff": 0.0473, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2113.711, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6597, "query_norm": 1.5357, "queue_k_norm": 1.5788, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9448, "sent_len_1": 66.834, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4025, "stdk": 0.0492, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 79300 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.1408, "doc_norm": 1.5853, "encoder_q-embeddings": 955.3694, "encoder_q-layer.0": 638.131, "encoder_q-layer.1": 680.5255, "encoder_q-layer.10": 1324.7545, "encoder_q-layer.11": 2815.7415, "encoder_q-layer.2": 744.665, "encoder_q-layer.3": 763.1681, "encoder_q-layer.4": 801.1353, "encoder_q-layer.5": 854.6644, "encoder_q-layer.6": 968.6303, "encoder_q-layer.7": 1143.1102, "encoder_q-layer.8": 1345.4923, "encoder_q-layer.9": 1256.8074, "epoch": 0.78, "inbatch_neg_score": 0.6637, "inbatch_pos_score": 1.3809, "learning_rate": 1.1444444444444446e-05, "loss": 3.1408, "norm_diff": 0.0366, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1860.985, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6641, "query_norm": 1.5488, "queue_k_norm": 1.5795, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7901, "sent_len_1": 66.5234, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2887, "stdk": 0.0493, "stdq": 0.047, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 79400 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.1659, "doc_norm": 1.579, "encoder_q-embeddings": 1047.1289, "encoder_q-layer.0": 713.1416, "encoder_q-layer.1": 779.677, "encoder_q-layer.10": 1286.0146, "encoder_q-layer.11": 2886.283, "encoder_q-layer.2": 852.8483, "encoder_q-layer.3": 903.6511, "encoder_q-layer.4": 987.2844, "encoder_q-layer.5": 1077.0369, "encoder_q-layer.6": 1181.5812, "encoder_q-layer.7": 1298.2281, "encoder_q-layer.8": 1417.9137, "encoder_q-layer.9": 1245.6595, "epoch": 0.78, "inbatch_neg_score": 0.6644, "inbatch_pos_score": 1.3604, "learning_rate": 1.138888888888889e-05, "loss": 3.1659, "norm_diff": 0.0445, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2022.3239, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6646, "query_norm": 1.5345, "queue_k_norm": 1.5801, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7916, "sent_len_1": 66.9398, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5775, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 79500 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1724, "doc_norm": 1.5827, "encoder_q-embeddings": 2171.8479, "encoder_q-layer.0": 1513.0347, "encoder_q-layer.1": 1585.1149, "encoder_q-layer.10": 1322.5426, "encoder_q-layer.11": 2852.1719, "encoder_q-layer.2": 1895.6962, "encoder_q-layer.3": 2071.7053, "encoder_q-layer.4": 2267.1003, "encoder_q-layer.5": 2159.1406, "encoder_q-layer.6": 2504.1887, "encoder_q-layer.7": 2368.9309, "encoder_q-layer.8": 2252.1953, "encoder_q-layer.9": 1335.7159, "epoch": 0.78, "inbatch_neg_score": 0.665, "inbatch_pos_score": 1.3525, "learning_rate": 1.1333333333333334e-05, "loss": 3.1724, "norm_diff": 0.0544, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3080.1219, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.665, "query_norm": 1.5283, "queue_k_norm": 1.5806, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.024, "sent_len_1": 66.8762, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2988, "stdk": 0.0491, "stdq": 0.0461, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 79600 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.1303, "doc_norm": 1.5842, "encoder_q-embeddings": 2251.7649, "encoder_q-layer.0": 1578.4786, "encoder_q-layer.1": 1832.4518, "encoder_q-layer.10": 1334.3176, "encoder_q-layer.11": 2831.446, "encoder_q-layer.2": 2236.5737, "encoder_q-layer.3": 2564.0015, "encoder_q-layer.4": 2474.7388, "encoder_q-layer.5": 2923.1765, "encoder_q-layer.6": 2639.1714, "encoder_q-layer.7": 3012.0156, "encoder_q-layer.8": 2562.2314, "encoder_q-layer.9": 1386.5248, "epoch": 0.78, "inbatch_neg_score": 0.6694, "inbatch_pos_score": 1.3682, "learning_rate": 1.127777777777778e-05, "loss": 3.1303, "norm_diff": 0.0616, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3572.2336, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6694, "query_norm": 1.5225, "queue_k_norm": 1.5802, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8234, "sent_len_1": 66.79, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4762, "stdk": 0.0492, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 79700 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.1697, "doc_norm": 1.5786, "encoder_q-embeddings": 1025.8494, "encoder_q-layer.0": 679.7411, "encoder_q-layer.1": 726.0505, "encoder_q-layer.10": 1528.2931, "encoder_q-layer.11": 3024.332, "encoder_q-layer.2": 831.5852, "encoder_q-layer.3": 886.8042, "encoder_q-layer.4": 937.981, "encoder_q-layer.5": 1013.5902, "encoder_q-layer.6": 1133.1135, "encoder_q-layer.7": 1288.5684, "encoder_q-layer.8": 1514.683, "encoder_q-layer.9": 1359.4454, "epoch": 0.78, "inbatch_neg_score": 0.6724, "inbatch_pos_score": 1.3408, "learning_rate": 1.1222222222222224e-05, "loss": 3.1697, "norm_diff": 0.062, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2060.977, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6719, "query_norm": 1.5166, "queue_k_norm": 1.5816, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.905, "sent_len_1": 66.9644, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1175, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 79800 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.1798, "doc_norm": 1.5762, "encoder_q-embeddings": 1015.8392, "encoder_q-layer.0": 675.3879, "encoder_q-layer.1": 712.3515, "encoder_q-layer.10": 1249.215, "encoder_q-layer.11": 2937.3428, "encoder_q-layer.2": 807.3736, "encoder_q-layer.3": 838.2741, "encoder_q-layer.4": 902.4457, "encoder_q-layer.5": 944.3777, "encoder_q-layer.6": 1083.7146, "encoder_q-layer.7": 1245.6196, "encoder_q-layer.8": 1426.897, "encoder_q-layer.9": 1256.4763, "epoch": 0.78, "inbatch_neg_score": 0.6717, "inbatch_pos_score": 1.3184, "learning_rate": 1.1166666666666668e-05, "loss": 3.1798, "norm_diff": 0.0754, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2018.2152, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6719, "query_norm": 1.5009, "queue_k_norm": 1.5791, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7898, "sent_len_1": 66.5531, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2925, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 79900 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.1805, "doc_norm": 1.5786, "encoder_q-embeddings": 1144.4414, "encoder_q-layer.0": 808.533, "encoder_q-layer.1": 873.9615, "encoder_q-layer.10": 1305.3239, "encoder_q-layer.11": 2968.7556, "encoder_q-layer.2": 1039.2085, "encoder_q-layer.3": 1029.0969, "encoder_q-layer.4": 949.4698, "encoder_q-layer.5": 890.2075, "encoder_q-layer.6": 992.3794, "encoder_q-layer.7": 1150.4061, "encoder_q-layer.8": 1380.6515, "encoder_q-layer.9": 1270.5737, "epoch": 0.78, "inbatch_neg_score": 0.6737, "inbatch_pos_score": 1.3691, "learning_rate": 1.1111111111111112e-05, "loss": 3.1805, "norm_diff": 0.0613, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2031.3872, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6743, "query_norm": 1.5173, "queue_k_norm": 1.5815, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8406, "sent_len_1": 66.7115, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1788, "stdk": 0.0489, "stdq": 0.0456, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 80000 }, { "dev_runtime": 37.2903, "dev_samples_per_second": 1.716, "dev_steps_per_second": 0.027, "epoch": 0.78, "step": 80000, "test_accuracy": 94.88525390625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3015986382961273, "test_doc_norm": 1.5760226249694824, "test_inbatch_neg_score": 1.0465819835662842, "test_inbatch_pos_score": 2.0188961029052734, "test_loss": 0.3015986382961273, "test_loss_align": 1.0064592361450195, "test_loss_unif": 3.147366523742676, "test_loss_unif_q@queue": 3.147366523742676, "test_norm_diff": 0.016963131725788116, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.659213662147522, "test_query_norm": 1.5925878286361694, "test_queue_k_norm": 1.5812711715698242, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0432203933596611, "test_stdq": 0.0432535745203495, "test_stdqueue_k": 0.04909998178482056, "test_stdqueue_q": 0.0 }, { "dev_runtime": 37.2903, "dev_samples_per_second": 1.716, "dev_steps_per_second": 0.027, "epoch": 0.78, "eval_beir-arguana_ndcg@10": 0.38533, "eval_beir-arguana_recall@10": 0.65789, "eval_beir-arguana_recall@100": 0.93954, "eval_beir-arguana_recall@20": 0.78805, "eval_beir-avg_ndcg@10": 0.37835558333333336, "eval_beir-avg_recall@10": 0.44511025000000004, "eval_beir-avg_recall@100": 0.6245525, "eval_beir-avg_recall@20": 0.5054496666666667, "eval_beir-cqadupstack_ndcg@10": 0.26795583333333334, "eval_beir-cqadupstack_recall@10": 0.3588925, "eval_beir-cqadupstack_recall@100": 0.5835049999999999, "eval_beir-cqadupstack_recall@20": 0.4238666666666666, "eval_beir-fiqa_ndcg@10": 0.22794, "eval_beir-fiqa_recall@10": 0.2852, "eval_beir-fiqa_recall@100": 0.52992, "eval_beir-fiqa_recall@20": 0.33986, "eval_beir-nfcorpus_ndcg@10": 0.29796, "eval_beir-nfcorpus_recall@10": 0.14283, "eval_beir-nfcorpus_recall@100": 0.2767, "eval_beir-nfcorpus_recall@20": 0.1788, "eval_beir-nq_ndcg@10": 0.2705, "eval_beir-nq_recall@10": 0.44535, "eval_beir-nq_recall@100": 0.78764, "eval_beir-nq_recall@20": 0.5612, "eval_beir-quora_ndcg@10": 0.82223, "eval_beir-quora_recall@10": 0.91454, "eval_beir-quora_recall@100": 0.9852, "eval_beir-quora_recall@20": 0.94913, "eval_beir-scidocs_ndcg@10": 0.14503, "eval_beir-scidocs_recall@10": 0.15228, "eval_beir-scidocs_recall@100": 0.35082, "eval_beir-scidocs_recall@20": 0.20648, "eval_beir-scifact_ndcg@10": 0.63083, "eval_beir-scifact_recall@10": 0.77, "eval_beir-scifact_recall@100": 0.92322, "eval_beir-scifact_recall@20": 0.83344, "eval_beir-trec-covid_ndcg@10": 0.54308, "eval_beir-trec-covid_recall@10": 0.582, "eval_beir-trec-covid_recall@100": 0.4304, "eval_beir-trec-covid_recall@20": 0.56, "eval_beir-webis-touche2020_ndcg@10": 0.1927, "eval_beir-webis-touche2020_recall@10": 0.14212, "eval_beir-webis-touche2020_recall@100": 0.43858, "eval_beir-webis-touche2020_recall@20": 0.21367, "eval_senteval-avg_sts": 0.7211243300477561, "eval_senteval-sickr_spearman": 0.6918598219594686, "eval_senteval-stsb_spearman": 0.7503888381360436, "step": 80000, "test_accuracy": 94.88525390625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3015986382961273, "test_doc_norm": 1.5760226249694824, "test_inbatch_neg_score": 1.0465819835662842, "test_inbatch_pos_score": 2.0188961029052734, "test_loss": 0.3015986382961273, "test_loss_align": 1.0064592361450195, "test_loss_unif": 3.147366523742676, "test_loss_unif_q@queue": 3.147366523742676, "test_norm_diff": 0.016963131725788116, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.659213662147522, "test_query_norm": 1.5925878286361694, "test_queue_k_norm": 1.5812711715698242, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0432203933596611, "test_stdq": 0.0432535745203495, "test_stdqueue_k": 0.04909998178482056, "test_stdqueue_q": 0.0 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.1638, "doc_norm": 1.5895, "encoder_q-embeddings": 1400.7617, "encoder_q-layer.0": 965.9867, "encoder_q-layer.1": 1060.2559, "encoder_q-layer.10": 1268.0103, "encoder_q-layer.11": 3028.1399, "encoder_q-layer.2": 1186.0958, "encoder_q-layer.3": 1221.2538, "encoder_q-layer.4": 1319.1344, "encoder_q-layer.5": 1304.0043, "encoder_q-layer.6": 1450.1825, "encoder_q-layer.7": 1595.1479, "encoder_q-layer.8": 1694.5428, "encoder_q-layer.9": 1360.759, "epoch": 0.78, "inbatch_neg_score": 0.6772, "inbatch_pos_score": 1.3379, "learning_rate": 1.1055555555555556e-05, "loss": 3.1638, "norm_diff": 0.0795, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2333.945, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6777, "query_norm": 1.51, "queue_k_norm": 1.5824, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0083, "sent_len_1": 66.8374, "sent_max_len_0": 128.0, "sent_max_len_1": 192.565, "stdk": 0.0493, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 80100 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.1626, "doc_norm": 1.5862, "encoder_q-embeddings": 1579.9305, "encoder_q-layer.0": 1089.163, "encoder_q-layer.1": 1149.2412, "encoder_q-layer.10": 1365.926, "encoder_q-layer.11": 2868.8459, "encoder_q-layer.2": 1424.2704, "encoder_q-layer.3": 1536.2289, "encoder_q-layer.4": 1569.3265, "encoder_q-layer.5": 1670.3801, "encoder_q-layer.6": 1677.0023, "encoder_q-layer.7": 1742.2429, "encoder_q-layer.8": 1710.124, "encoder_q-layer.9": 1277.5892, "epoch": 0.78, "inbatch_neg_score": 0.6764, "inbatch_pos_score": 1.3779, "learning_rate": 1.1000000000000001e-05, "loss": 3.1626, "norm_diff": 0.0587, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2531.037, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6772, "query_norm": 1.5275, "queue_k_norm": 1.5844, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7941, "sent_len_1": 66.6855, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4275, "stdk": 0.0492, "stdq": 0.046, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 80200 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1625, "doc_norm": 1.5842, "encoder_q-embeddings": 1257.8727, "encoder_q-layer.0": 812.7773, "encoder_q-layer.1": 906.0044, "encoder_q-layer.10": 1325.7552, "encoder_q-layer.11": 2915.1455, "encoder_q-layer.2": 1067.7902, "encoder_q-layer.3": 1122.0974, "encoder_q-layer.4": 1229.8771, "encoder_q-layer.5": 1304.3699, "encoder_q-layer.6": 1313.2994, "encoder_q-layer.7": 1402.3756, "encoder_q-layer.8": 1438.7482, "encoder_q-layer.9": 1269.2555, "epoch": 0.78, "inbatch_neg_score": 0.6785, "inbatch_pos_score": 1.3496, "learning_rate": 1.0944444444444445e-05, "loss": 3.1625, "norm_diff": 0.0648, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2190.6802, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6777, "query_norm": 1.5195, "queue_k_norm": 1.584, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7864, "sent_len_1": 66.8181, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7763, "stdk": 0.0491, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 80300 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 3.1595, "doc_norm": 1.5873, "encoder_q-embeddings": 2538.7866, "encoder_q-layer.0": 1708.1127, "encoder_q-layer.1": 1870.1849, "encoder_q-layer.10": 2477.9517, "encoder_q-layer.11": 5465.9282, "encoder_q-layer.2": 2243.8794, "encoder_q-layer.3": 2302.5898, "encoder_q-layer.4": 2429.0613, "encoder_q-layer.5": 2450.9224, "encoder_q-layer.6": 2766.3201, "encoder_q-layer.7": 3468.6184, "encoder_q-layer.8": 3345.9448, "encoder_q-layer.9": 2658.407, "epoch": 0.78, "inbatch_neg_score": 0.6764, "inbatch_pos_score": 1.4004, "learning_rate": 1.088888888888889e-05, "loss": 3.1595, "norm_diff": 0.0581, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4426.4098, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6772, "query_norm": 1.5292, "queue_k_norm": 1.5831, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8657, "sent_len_1": 66.8246, "sent_max_len_0": 128.0, "sent_max_len_1": 189.865, "stdk": 0.0492, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 80400 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1359, "doc_norm": 1.5847, "encoder_q-embeddings": 2665.5244, "encoder_q-layer.0": 1916.9335, "encoder_q-layer.1": 2065.2722, "encoder_q-layer.10": 2590.1963, "encoder_q-layer.11": 5689.1021, "encoder_q-layer.2": 2379.0696, "encoder_q-layer.3": 2439.3516, "encoder_q-layer.4": 2764.4619, "encoder_q-layer.5": 2701.4604, "encoder_q-layer.6": 2945.0581, "encoder_q-layer.7": 3222.8892, "encoder_q-layer.8": 3276.0549, "encoder_q-layer.9": 2524.1277, "epoch": 0.79, "inbatch_neg_score": 0.6804, "inbatch_pos_score": 1.3408, "learning_rate": 1.0833333333333334e-05, "loss": 3.1359, "norm_diff": 0.0717, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4559.3355, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6792, "query_norm": 1.513, "queue_k_norm": 1.5833, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.1389, "sent_len_1": 66.7879, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7413, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 80500 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.1759, "doc_norm": 1.5859, "encoder_q-embeddings": 1959.6002, "encoder_q-layer.0": 1287.25, "encoder_q-layer.1": 1343.1055, "encoder_q-layer.10": 2281.1526, "encoder_q-layer.11": 5452.895, "encoder_q-layer.2": 1538.5356, "encoder_q-layer.3": 1572.1865, "encoder_q-layer.4": 1645.0712, "encoder_q-layer.5": 1781.1532, "encoder_q-layer.6": 1979.3684, "encoder_q-layer.7": 2277.436, "encoder_q-layer.8": 2620.5503, "encoder_q-layer.9": 2329.1213, "epoch": 0.79, "inbatch_neg_score": 0.6807, "inbatch_pos_score": 1.3496, "learning_rate": 1.0777777777777778e-05, "loss": 3.1759, "norm_diff": 0.0759, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3711.6782, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6807, "query_norm": 1.51, "queue_k_norm": 1.5846, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9242, "sent_len_1": 66.8275, "sent_max_len_0": 128.0, "sent_max_len_1": 190.365, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 80600 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.1578, "doc_norm": 1.5806, "encoder_q-embeddings": 2157.3584, "encoder_q-layer.0": 1473.7998, "encoder_q-layer.1": 1619.2266, "encoder_q-layer.10": 2698.6846, "encoder_q-layer.11": 5936.2935, "encoder_q-layer.2": 1908.2031, "encoder_q-layer.3": 1942.3086, "encoder_q-layer.4": 2097.6775, "encoder_q-layer.5": 2202.9539, "encoder_q-layer.6": 2520.8799, "encoder_q-layer.7": 2714.8232, "encoder_q-layer.8": 2988.5583, "encoder_q-layer.9": 2462.5679, "epoch": 0.79, "inbatch_neg_score": 0.6832, "inbatch_pos_score": 1.3457, "learning_rate": 1.0722222222222222e-05, "loss": 3.1578, "norm_diff": 0.0549, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4185.7014, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6821, "query_norm": 1.5257, "queue_k_norm": 1.5846, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8776, "sent_len_1": 66.6474, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8212, "stdk": 0.0488, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 80700 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.1511, "doc_norm": 1.5886, "encoder_q-embeddings": 2006.6925, "encoder_q-layer.0": 1311.2059, "encoder_q-layer.1": 1428.442, "encoder_q-layer.10": 2353.3542, "encoder_q-layer.11": 5332.9766, "encoder_q-layer.2": 1646.4973, "encoder_q-layer.3": 1790.9276, "encoder_q-layer.4": 1922.8077, "encoder_q-layer.5": 2116.7239, "encoder_q-layer.6": 2365.6714, "encoder_q-layer.7": 2633.0508, "encoder_q-layer.8": 2760.7378, "encoder_q-layer.9": 2444.658, "epoch": 0.79, "inbatch_neg_score": 0.6831, "inbatch_pos_score": 1.3887, "learning_rate": 1.0666666666666667e-05, "loss": 3.1511, "norm_diff": 0.0556, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3825.693, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6831, "query_norm": 1.533, "queue_k_norm": 1.5863, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0671, "sent_len_1": 66.8788, "sent_max_len_0": 128.0, "sent_max_len_1": 191.7325, "stdk": 0.0492, "stdq": 0.0461, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 80800 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 3.168, "doc_norm": 1.5853, "encoder_q-embeddings": 1126.8324, "encoder_q-layer.0": 738.218, "encoder_q-layer.1": 832.9293, "encoder_q-layer.10": 1143.1877, "encoder_q-layer.11": 2756.5835, "encoder_q-layer.2": 963.8879, "encoder_q-layer.3": 1089.4331, "encoder_q-layer.4": 1213.651, "encoder_q-layer.5": 1338.7495, "encoder_q-layer.6": 1418.1177, "encoder_q-layer.7": 1489.3083, "encoder_q-layer.8": 1536.4492, "encoder_q-layer.9": 1213.5121, "epoch": 0.79, "inbatch_neg_score": 0.6835, "inbatch_pos_score": 1.3916, "learning_rate": 1.0611111111111111e-05, "loss": 3.168, "norm_diff": 0.0504, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2132.4717, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6836, "query_norm": 1.5349, "queue_k_norm": 1.5887, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9637, "sent_len_1": 66.9363, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2262, "stdk": 0.049, "stdq": 0.0463, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 80900 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1584, "doc_norm": 1.5895, "encoder_q-embeddings": 1695.5626, "encoder_q-layer.0": 1172.1503, "encoder_q-layer.1": 1283.7329, "encoder_q-layer.10": 1244.2246, "encoder_q-layer.11": 2868.2112, "encoder_q-layer.2": 1506.9012, "encoder_q-layer.3": 1554.045, "encoder_q-layer.4": 1634.9988, "encoder_q-layer.5": 1789.9016, "encoder_q-layer.6": 1928.594, "encoder_q-layer.7": 1865.0463, "encoder_q-layer.8": 1792.4965, "encoder_q-layer.9": 1297.8828, "epoch": 0.79, "inbatch_neg_score": 0.6844, "inbatch_pos_score": 1.3594, "learning_rate": 1.0555555555555555e-05, "loss": 3.1584, "norm_diff": 0.0661, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2565.7209, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6846, "query_norm": 1.5234, "queue_k_norm": 1.5873, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7889, "sent_len_1": 66.6344, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4575, "stdk": 0.0492, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 81000 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1767, "doc_norm": 1.5892, "encoder_q-embeddings": 1221.5774, "encoder_q-layer.0": 798.8763, "encoder_q-layer.1": 867.3085, "encoder_q-layer.10": 1344.6053, "encoder_q-layer.11": 3007.4163, "encoder_q-layer.2": 1062.2803, "encoder_q-layer.3": 1071.825, "encoder_q-layer.4": 1109.8955, "encoder_q-layer.5": 1114.2322, "encoder_q-layer.6": 1293.1104, "encoder_q-layer.7": 1556.6875, "encoder_q-layer.8": 1646.8997, "encoder_q-layer.9": 1351.084, "epoch": 0.79, "inbatch_neg_score": 0.6818, "inbatch_pos_score": 1.3643, "learning_rate": 1.05e-05, "loss": 3.1767, "norm_diff": 0.0774, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2214.0435, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6816, "query_norm": 1.5118, "queue_k_norm": 1.5871, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8857, "sent_len_1": 66.9106, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3475, "stdk": 0.0491, "stdq": 0.0453, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81100 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.1633, "doc_norm": 1.5826, "encoder_q-embeddings": 960.2942, "encoder_q-layer.0": 646.8708, "encoder_q-layer.1": 708.2198, "encoder_q-layer.10": 1157.2889, "encoder_q-layer.11": 2682.0654, "encoder_q-layer.2": 805.8773, "encoder_q-layer.3": 853.3541, "encoder_q-layer.4": 923.1658, "encoder_q-layer.5": 971.7415, "encoder_q-layer.6": 1066.8857, "encoder_q-layer.7": 1204.6906, "encoder_q-layer.8": 1324.5514, "encoder_q-layer.9": 1154.5015, "epoch": 0.79, "inbatch_neg_score": 0.6821, "inbatch_pos_score": 1.3535, "learning_rate": 1.0444444444444445e-05, "loss": 3.1633, "norm_diff": 0.0648, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1861.4283, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6826, "query_norm": 1.5178, "queue_k_norm": 1.5872, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.877, "sent_len_1": 66.6393, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3212, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81200 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.1684, "doc_norm": 1.5892, "encoder_q-embeddings": 2554.8455, "encoder_q-layer.0": 1642.9718, "encoder_q-layer.1": 1928.7162, "encoder_q-layer.10": 1342.5637, "encoder_q-layer.11": 2877.9834, "encoder_q-layer.2": 2391.6687, "encoder_q-layer.3": 2783.4021, "encoder_q-layer.4": 3140.7356, "encoder_q-layer.5": 3858.5183, "encoder_q-layer.6": 4359.4097, "encoder_q-layer.7": 4691.5229, "encoder_q-layer.8": 4806.1841, "encoder_q-layer.9": 2729.178, "epoch": 0.79, "inbatch_neg_score": 0.6851, "inbatch_pos_score": 1.3623, "learning_rate": 1.038888888888889e-05, "loss": 3.1684, "norm_diff": 0.071, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4917.5387, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6846, "query_norm": 1.5182, "queue_k_norm": 1.5873, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8911, "sent_len_1": 66.6048, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7125, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81300 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.1353, "doc_norm": 1.5877, "encoder_q-embeddings": 1023.3998, "encoder_q-layer.0": 677.4228, "encoder_q-layer.1": 674.5443, "encoder_q-layer.10": 1269.4103, "encoder_q-layer.11": 2769.9185, "encoder_q-layer.2": 802.902, "encoder_q-layer.3": 827.1428, "encoder_q-layer.4": 851.5632, "encoder_q-layer.5": 862.9307, "encoder_q-layer.6": 1009.8301, "encoder_q-layer.7": 1155.5106, "encoder_q-layer.8": 1336.1256, "encoder_q-layer.9": 1216.6882, "epoch": 0.79, "inbatch_neg_score": 0.6833, "inbatch_pos_score": 1.3711, "learning_rate": 1.0333333333333333e-05, "loss": 3.1353, "norm_diff": 0.0729, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1905.3619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6831, "query_norm": 1.5148, "queue_k_norm": 1.5871, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0704, "sent_len_1": 66.7664, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1375, "stdk": 0.0491, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81400 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.1558, "doc_norm": 1.5859, "encoder_q-embeddings": 1153.7661, "encoder_q-layer.0": 749.3093, "encoder_q-layer.1": 833.0377, "encoder_q-layer.10": 1177.8032, "encoder_q-layer.11": 2727.425, "encoder_q-layer.2": 981.3732, "encoder_q-layer.3": 1038.7552, "encoder_q-layer.4": 1124.0024, "encoder_q-layer.5": 1220.3698, "encoder_q-layer.6": 1434.7595, "encoder_q-layer.7": 1539.281, "encoder_q-layer.8": 1606.5076, "encoder_q-layer.9": 1269.3333, "epoch": 0.8, "inbatch_neg_score": 0.6844, "inbatch_pos_score": 1.3887, "learning_rate": 1.0277777777777777e-05, "loss": 3.1558, "norm_diff": 0.057, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2120.155, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6855, "query_norm": 1.5289, "queue_k_norm": 1.5875, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.5732, "sent_len_1": 66.6563, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7788, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81500 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.1533, "doc_norm": 1.5827, "encoder_q-embeddings": 1037.3945, "encoder_q-layer.0": 647.414, "encoder_q-layer.1": 664.1012, "encoder_q-layer.10": 1189.2025, "encoder_q-layer.11": 2872.0857, "encoder_q-layer.2": 732.4588, "encoder_q-layer.3": 772.7777, "encoder_q-layer.4": 834.499, "encoder_q-layer.5": 864.7521, "encoder_q-layer.6": 984.3871, "encoder_q-layer.7": 1160.3185, "encoder_q-layer.8": 1472.6077, "encoder_q-layer.9": 1217.6105, "epoch": 0.8, "inbatch_neg_score": 0.6889, "inbatch_pos_score": 1.374, "learning_rate": 1.0222222222222223e-05, "loss": 3.1533, "norm_diff": 0.0469, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1932.2574, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6885, "query_norm": 1.5358, "queue_k_norm": 1.5878, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8039, "sent_len_1": 66.8151, "sent_max_len_0": 128.0, "sent_max_len_1": 190.22, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81600 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1453, "doc_norm": 1.5811, "encoder_q-embeddings": 1234.7152, "encoder_q-layer.0": 868.3758, "encoder_q-layer.1": 1008.8305, "encoder_q-layer.10": 1280.7366, "encoder_q-layer.11": 2805.1125, "encoder_q-layer.2": 1146.34, "encoder_q-layer.3": 1275.0625, "encoder_q-layer.4": 1425.4331, "encoder_q-layer.5": 1474.6484, "encoder_q-layer.6": 1461.6285, "encoder_q-layer.7": 1477.2516, "encoder_q-layer.8": 1549.2378, "encoder_q-layer.9": 1278.2766, "epoch": 0.8, "inbatch_neg_score": 0.6893, "inbatch_pos_score": 1.3604, "learning_rate": 1.0166666666666667e-05, "loss": 3.1453, "norm_diff": 0.0542, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2204.983, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.689, "query_norm": 1.5268, "queue_k_norm": 1.5879, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0632, "sent_len_1": 66.8188, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8475, "stdk": 0.0487, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81700 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.1348, "doc_norm": 1.59, "encoder_q-embeddings": 1587.7563, "encoder_q-layer.0": 1099.8848, "encoder_q-layer.1": 1305.6074, "encoder_q-layer.10": 1239.2166, "encoder_q-layer.11": 2888.23, "encoder_q-layer.2": 1468.6234, "encoder_q-layer.3": 1648.178, "encoder_q-layer.4": 1768.4263, "encoder_q-layer.5": 1842.5452, "encoder_q-layer.6": 1965.9202, "encoder_q-layer.7": 2106.22, "encoder_q-layer.8": 1916.136, "encoder_q-layer.9": 1286.8361, "epoch": 0.8, "inbatch_neg_score": 0.6911, "inbatch_pos_score": 1.3496, "learning_rate": 1.0111111111111111e-05, "loss": 3.1348, "norm_diff": 0.0771, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2666.0876, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6914, "query_norm": 1.5129, "queue_k_norm": 1.5878, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0924, "sent_len_1": 66.8254, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3413, "stdk": 0.0491, "stdq": 0.0451, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81800 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.1647, "doc_norm": 1.5934, "encoder_q-embeddings": 2469.5005, "encoder_q-layer.0": 1852.484, "encoder_q-layer.1": 2165.3694, "encoder_q-layer.10": 1399.9178, "encoder_q-layer.11": 3009.7151, "encoder_q-layer.2": 2776.6545, "encoder_q-layer.3": 2972.8269, "encoder_q-layer.4": 3202.5774, "encoder_q-layer.5": 3408.9404, "encoder_q-layer.6": 3816.6567, "encoder_q-layer.7": 3641.8855, "encoder_q-layer.8": 2414.3347, "encoder_q-layer.9": 1399.2362, "epoch": 0.8, "inbatch_neg_score": 0.6909, "inbatch_pos_score": 1.415, "learning_rate": 1.0055555555555555e-05, "loss": 3.1647, "norm_diff": 0.0509, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4157.7581, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6909, "query_norm": 1.5426, "queue_k_norm": 1.5889, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.208, "sent_len_1": 66.9368, "sent_max_len_0": 128.0, "sent_max_len_1": 189.055, "stdk": 0.0492, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 81900 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.1462, "doc_norm": 1.5813, "encoder_q-embeddings": 1050.3915, "encoder_q-layer.0": 700.4301, "encoder_q-layer.1": 762.6502, "encoder_q-layer.10": 1327.6494, "encoder_q-layer.11": 2786.0969, "encoder_q-layer.2": 835.535, "encoder_q-layer.3": 913.1284, "encoder_q-layer.4": 1008.1145, "encoder_q-layer.5": 1077.8026, "encoder_q-layer.6": 1172.1234, "encoder_q-layer.7": 1319.7034, "encoder_q-layer.8": 1420.2125, "encoder_q-layer.9": 1233.0776, "epoch": 0.8, "inbatch_neg_score": 0.6929, "inbatch_pos_score": 1.3984, "learning_rate": 1e-05, "loss": 3.1462, "norm_diff": 0.0504, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1995.3183, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6914, "query_norm": 1.5308, "queue_k_norm": 1.5896, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.951, "sent_len_1": 66.9386, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7113, "stdk": 0.0487, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 82000 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 3.1719, "doc_norm": 1.6012, "encoder_q-embeddings": 862.9777, "encoder_q-layer.0": 555.4623, "encoder_q-layer.1": 577.1544, "encoder_q-layer.10": 1191.1334, "encoder_q-layer.11": 2572.7344, "encoder_q-layer.2": 642.679, "encoder_q-layer.3": 703.2296, "encoder_q-layer.4": 733.4401, "encoder_q-layer.5": 763.1398, "encoder_q-layer.6": 891.8499, "encoder_q-layer.7": 1069.9352, "encoder_q-layer.8": 1234.7516, "encoder_q-layer.9": 1163.613, "epoch": 0.8, "inbatch_neg_score": 0.6912, "inbatch_pos_score": 1.4053, "learning_rate": 9.944444444444445e-06, "loss": 3.1719, "norm_diff": 0.0687, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1735.5737, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6909, "query_norm": 1.5325, "queue_k_norm": 1.5888, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9605, "sent_len_1": 66.765, "sent_max_len_0": 128.0, "sent_max_len_1": 192.8562, "stdk": 0.0495, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 82100 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 3.1378, "doc_norm": 1.5927, "encoder_q-embeddings": 1050.9352, "encoder_q-layer.0": 682.3998, "encoder_q-layer.1": 711.1276, "encoder_q-layer.10": 1229.5808, "encoder_q-layer.11": 2710.4517, "encoder_q-layer.2": 819.4929, "encoder_q-layer.3": 838.0463, "encoder_q-layer.4": 909.5482, "encoder_q-layer.5": 933.9225, "encoder_q-layer.6": 1048.6969, "encoder_q-layer.7": 1211.6991, "encoder_q-layer.8": 1345.8118, "encoder_q-layer.9": 1184.2097, "epoch": 0.8, "inbatch_neg_score": 0.6934, "inbatch_pos_score": 1.4131, "learning_rate": 9.888888888888889e-06, "loss": 3.1378, "norm_diff": 0.0455, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1894.4208, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6929, "query_norm": 1.5472, "queue_k_norm": 1.5898, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1597, "sent_len_1": 66.8207, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4613, "stdk": 0.0492, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 82200 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.1638, "doc_norm": 1.5916, "encoder_q-embeddings": 1091.7739, "encoder_q-layer.0": 734.6206, "encoder_q-layer.1": 842.9809, "encoder_q-layer.10": 1242.4427, "encoder_q-layer.11": 2683.3745, "encoder_q-layer.2": 1044.2399, "encoder_q-layer.3": 1158.0393, "encoder_q-layer.4": 1235.2672, "encoder_q-layer.5": 1329.2577, "encoder_q-layer.6": 1502.4653, "encoder_q-layer.7": 1620.3137, "encoder_q-layer.8": 1674.936, "encoder_q-layer.9": 1248.8359, "epoch": 0.8, "inbatch_neg_score": 0.6965, "inbatch_pos_score": 1.3662, "learning_rate": 9.833333333333333e-06, "loss": 3.1638, "norm_diff": 0.0817, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2137.7964, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6963, "query_norm": 1.5099, "queue_k_norm": 1.5903, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.972, "sent_len_1": 66.7428, "sent_max_len_0": 128.0, "sent_max_len_1": 189.02, "stdk": 0.0491, "stdq": 0.0449, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 82300 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.1339, "doc_norm": 1.5938, "encoder_q-embeddings": 1192.9722, "encoder_q-layer.0": 774.4518, "encoder_q-layer.1": 818.5733, "encoder_q-layer.10": 1310.6936, "encoder_q-layer.11": 2798.0615, "encoder_q-layer.2": 969.2155, "encoder_q-layer.3": 1093.8545, "encoder_q-layer.4": 1194.8337, "encoder_q-layer.5": 1199.5719, "encoder_q-layer.6": 1360.5826, "encoder_q-layer.7": 1455.6652, "encoder_q-layer.8": 1515.9982, "encoder_q-layer.9": 1287.9614, "epoch": 0.8, "inbatch_neg_score": 0.6976, "inbatch_pos_score": 1.375, "learning_rate": 9.777777777777779e-06, "loss": 3.1339, "norm_diff": 0.0675, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2135.397, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6978, "query_norm": 1.5263, "queue_k_norm": 1.5904, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8079, "sent_len_1": 66.7187, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9888, "stdk": 0.0491, "stdq": 0.0456, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 82400 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 3.1589, "doc_norm": 1.596, "encoder_q-embeddings": 1631.6688, "encoder_q-layer.0": 1020.1454, "encoder_q-layer.1": 1084.6709, "encoder_q-layer.10": 1208.3673, "encoder_q-layer.11": 2635.2107, "encoder_q-layer.2": 1232.4893, "encoder_q-layer.3": 1286.8735, "encoder_q-layer.4": 1337.0674, "encoder_q-layer.5": 1511.1357, "encoder_q-layer.6": 1400.3566, "encoder_q-layer.7": 1426.9537, "encoder_q-layer.8": 1483.6189, "encoder_q-layer.9": 1168.9686, "epoch": 0.81, "inbatch_neg_score": 0.7, "inbatch_pos_score": 1.4072, "learning_rate": 9.722222222222223e-06, "loss": 3.1589, "norm_diff": 0.0669, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2267.2417, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6997, "query_norm": 1.5291, "queue_k_norm": 1.5929, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9664, "sent_len_1": 66.7294, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7413, "stdk": 0.0492, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 82500 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.1492, "doc_norm": 1.5896, "encoder_q-embeddings": 1208.4355, "encoder_q-layer.0": 824.8386, "encoder_q-layer.1": 932.1114, "encoder_q-layer.10": 1160.9698, "encoder_q-layer.11": 2679.335, "encoder_q-layer.2": 1134.3196, "encoder_q-layer.3": 1210.6642, "encoder_q-layer.4": 1343.5011, "encoder_q-layer.5": 1395.3096, "encoder_q-layer.6": 1492.6533, "encoder_q-layer.7": 1525.5842, "encoder_q-layer.8": 1557.5972, "encoder_q-layer.9": 1283.2218, "epoch": 0.81, "inbatch_neg_score": 0.7009, "inbatch_pos_score": 1.3691, "learning_rate": 9.666666666666667e-06, "loss": 3.1492, "norm_diff": 0.0634, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2190.6605, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7017, "query_norm": 1.5262, "queue_k_norm": 1.5919, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7419, "sent_len_1": 66.7727, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5437, "stdk": 0.049, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 82600 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1473, "doc_norm": 1.5889, "encoder_q-embeddings": 428.7136, "encoder_q-layer.0": 287.7683, "encoder_q-layer.1": 301.2601, "encoder_q-layer.10": 639.2629, "encoder_q-layer.11": 1430.0751, "encoder_q-layer.2": 329.322, "encoder_q-layer.3": 351.7398, "encoder_q-layer.4": 370.6867, "encoder_q-layer.5": 407.2479, "encoder_q-layer.6": 511.6207, "encoder_q-layer.7": 575.3125, "encoder_q-layer.8": 677.9387, "encoder_q-layer.9": 638.6132, "epoch": 0.81, "inbatch_neg_score": 0.7018, "inbatch_pos_score": 1.3721, "learning_rate": 9.61111111111111e-06, "loss": 3.1473, "norm_diff": 0.0638, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 943.6993, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7026, "query_norm": 1.5251, "queue_k_norm": 1.5921, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9716, "sent_len_1": 66.6937, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8988, "stdk": 0.0489, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 82700 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.1459, "doc_norm": 1.5994, "encoder_q-embeddings": 496.5228, "encoder_q-layer.0": 328.4564, "encoder_q-layer.1": 350.1421, "encoder_q-layer.10": 569.9866, "encoder_q-layer.11": 1358.2792, "encoder_q-layer.2": 411.1279, "encoder_q-layer.3": 432.5877, "encoder_q-layer.4": 466.9399, "encoder_q-layer.5": 479.3528, "encoder_q-layer.6": 547.3011, "encoder_q-layer.7": 614.1255, "encoder_q-layer.8": 691.1078, "encoder_q-layer.9": 595.005, "epoch": 0.81, "inbatch_neg_score": 0.7027, "inbatch_pos_score": 1.3906, "learning_rate": 9.555555555555556e-06, "loss": 3.1459, "norm_diff": 0.0577, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 963.147, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7036, "query_norm": 1.5418, "queue_k_norm": 1.5923, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9439, "sent_len_1": 67.0518, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2663, "stdk": 0.0494, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 82800 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.1808, "doc_norm": 1.5896, "encoder_q-embeddings": 861.5897, "encoder_q-layer.0": 585.9266, "encoder_q-layer.1": 650.5472, "encoder_q-layer.10": 618.6367, "encoder_q-layer.11": 1385.608, "encoder_q-layer.2": 751.0944, "encoder_q-layer.3": 757.1058, "encoder_q-layer.4": 762.2744, "encoder_q-layer.5": 804.0931, "encoder_q-layer.6": 769.2767, "encoder_q-layer.7": 799.6569, "encoder_q-layer.8": 860.8244, "encoder_q-layer.9": 649.0984, "epoch": 0.81, "inbatch_neg_score": 0.7063, "inbatch_pos_score": 1.4121, "learning_rate": 9.5e-06, "loss": 3.1808, "norm_diff": 0.0358, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1255.712, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7051, "query_norm": 1.5537, "queue_k_norm": 1.5947, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9745, "sent_len_1": 66.8681, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8288, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 82900 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.144, "doc_norm": 1.6011, "encoder_q-embeddings": 1088.8429, "encoder_q-layer.0": 744.1373, "encoder_q-layer.1": 842.8552, "encoder_q-layer.10": 583.097, "encoder_q-layer.11": 1354.1475, "encoder_q-layer.2": 963.0492, "encoder_q-layer.3": 1052.5409, "encoder_q-layer.4": 1207.8446, "encoder_q-layer.5": 1134.4492, "encoder_q-layer.6": 1375.9495, "encoder_q-layer.7": 1636.5223, "encoder_q-layer.8": 1367.4836, "encoder_q-layer.9": 637.2008, "epoch": 0.81, "inbatch_neg_score": 0.709, "inbatch_pos_score": 1.4092, "learning_rate": 9.444444444444445e-06, "loss": 3.144, "norm_diff": 0.0567, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1677.174, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7085, "query_norm": 1.5445, "queue_k_norm": 1.5939, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0475, "sent_len_1": 66.9843, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1775, "stdk": 0.0494, "stdq": 0.0462, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 83000 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.1238, "doc_norm": 1.5977, "encoder_q-embeddings": 513.5504, "encoder_q-layer.0": 329.8863, "encoder_q-layer.1": 354.5135, "encoder_q-layer.10": 630.1382, "encoder_q-layer.11": 1452.3663, "encoder_q-layer.2": 411.706, "encoder_q-layer.3": 438.4115, "encoder_q-layer.4": 476.0347, "encoder_q-layer.5": 513.4276, "encoder_q-layer.6": 551.3489, "encoder_q-layer.7": 644.8112, "encoder_q-layer.8": 732.5329, "encoder_q-layer.9": 626.4625, "epoch": 0.81, "inbatch_neg_score": 0.7094, "inbatch_pos_score": 1.4131, "learning_rate": 9.388888888888889e-06, "loss": 3.1238, "norm_diff": 0.0579, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1003.3023, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.71, "query_norm": 1.5399, "queue_k_norm": 1.5956, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0884, "sent_len_1": 66.8956, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2775, "stdk": 0.0492, "stdq": 0.046, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 83100 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.1764, "doc_norm": 1.5971, "encoder_q-embeddings": 500.8457, "encoder_q-layer.0": 324.3179, "encoder_q-layer.1": 350.8561, "encoder_q-layer.10": 592.8919, "encoder_q-layer.11": 1401.2787, "encoder_q-layer.2": 391.9362, "encoder_q-layer.3": 415.4826, "encoder_q-layer.4": 445.6059, "encoder_q-layer.5": 482.9255, "encoder_q-layer.6": 544.5093, "encoder_q-layer.7": 598.9939, "encoder_q-layer.8": 705.1648, "encoder_q-layer.9": 604.4468, "epoch": 0.81, "inbatch_neg_score": 0.7132, "inbatch_pos_score": 1.4092, "learning_rate": 9.333333333333334e-06, "loss": 3.1764, "norm_diff": 0.057, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 964.8494, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7124, "query_norm": 1.54, "queue_k_norm": 1.5943, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0326, "sent_len_1": 66.8134, "sent_max_len_0": 128.0, "sent_max_len_1": 188.905, "stdk": 0.0491, "stdq": 0.046, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 83200 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.1453, "doc_norm": 1.5964, "encoder_q-embeddings": 636.4559, "encoder_q-layer.0": 447.3067, "encoder_q-layer.1": 487.0229, "encoder_q-layer.10": 629.3448, "encoder_q-layer.11": 1431.16, "encoder_q-layer.2": 572.957, "encoder_q-layer.3": 568.6093, "encoder_q-layer.4": 625.3925, "encoder_q-layer.5": 674.2769, "encoder_q-layer.6": 724.1157, "encoder_q-layer.7": 784.3571, "encoder_q-layer.8": 822.6544, "encoder_q-layer.9": 642.6747, "epoch": 0.81, "inbatch_neg_score": 0.7123, "inbatch_pos_score": 1.4082, "learning_rate": 9.277777777777778e-06, "loss": 3.1453, "norm_diff": 0.0508, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1124.2812, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7119, "query_norm": 1.5456, "queue_k_norm": 1.595, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.994, "sent_len_1": 66.822, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0825, "stdk": 0.0491, "stdq": 0.0462, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 83300 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1295, "doc_norm": 1.5989, "encoder_q-embeddings": 842.6699, "encoder_q-layer.0": 589.929, "encoder_q-layer.1": 746.4144, "encoder_q-layer.10": 666.9658, "encoder_q-layer.11": 1443.7682, "encoder_q-layer.2": 754.1427, "encoder_q-layer.3": 784.9452, "encoder_q-layer.4": 750.2047, "encoder_q-layer.5": 871.0732, "encoder_q-layer.6": 792.4036, "encoder_q-layer.7": 841.3145, "encoder_q-layer.8": 897.6783, "encoder_q-layer.9": 746.673, "epoch": 0.81, "inbatch_neg_score": 0.7183, "inbatch_pos_score": 1.4033, "learning_rate": 9.222222222222222e-06, "loss": 3.1295, "norm_diff": 0.0533, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1306.0358, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7168, "query_norm": 1.5457, "queue_k_norm": 1.5977, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.103, "sent_len_1": 66.6731, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7525, "stdk": 0.0492, "stdq": 0.0461, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 83400 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.179, "doc_norm": 1.6005, "encoder_q-embeddings": 866.5565, "encoder_q-layer.0": 574.7142, "encoder_q-layer.1": 615.9963, "encoder_q-layer.10": 638.2462, "encoder_q-layer.11": 1377.2192, "encoder_q-layer.2": 745.267, "encoder_q-layer.3": 833.1776, "encoder_q-layer.4": 910.8803, "encoder_q-layer.5": 985.2453, "encoder_q-layer.6": 988.4469, "encoder_q-layer.7": 1027.1809, "encoder_q-layer.8": 862.3759, "encoder_q-layer.9": 627.1058, "epoch": 0.82, "inbatch_neg_score": 0.7161, "inbatch_pos_score": 1.4297, "learning_rate": 9.166666666666666e-06, "loss": 3.179, "norm_diff": 0.0486, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1340.1969, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7158, "query_norm": 1.5519, "queue_k_norm": 1.5972, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9635, "sent_len_1": 66.8313, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9875, "stdk": 0.0492, "stdq": 0.0465, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 83500 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.1366, "doc_norm": 1.5946, "encoder_q-embeddings": 1077.759, "encoder_q-layer.0": 768.4734, "encoder_q-layer.1": 832.3279, "encoder_q-layer.10": 644.4451, "encoder_q-layer.11": 1434.908, "encoder_q-layer.2": 1011.7556, "encoder_q-layer.3": 1125.4667, "encoder_q-layer.4": 1298.5347, "encoder_q-layer.5": 1429.8214, "encoder_q-layer.6": 1349.2075, "encoder_q-layer.7": 1464.7662, "encoder_q-layer.8": 1109.6293, "encoder_q-layer.9": 683.6385, "epoch": 0.82, "inbatch_neg_score": 0.7167, "inbatch_pos_score": 1.418, "learning_rate": 9.111111111111112e-06, "loss": 3.1366, "norm_diff": 0.0523, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1727.4957, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7163, "query_norm": 1.5423, "queue_k_norm": 1.5985, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9223, "sent_len_1": 66.866, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6362, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 83600 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 3.164, "doc_norm": 1.6012, "encoder_q-embeddings": 607.5798, "encoder_q-layer.0": 413.6207, "encoder_q-layer.1": 451.1064, "encoder_q-layer.10": 644.0535, "encoder_q-layer.11": 1360.5404, "encoder_q-layer.2": 538.9041, "encoder_q-layer.3": 606.4691, "encoder_q-layer.4": 646.8145, "encoder_q-layer.5": 677.4081, "encoder_q-layer.6": 703.9799, "encoder_q-layer.7": 769.2367, "encoder_q-layer.8": 838.3353, "encoder_q-layer.9": 634.0035, "epoch": 0.82, "inbatch_neg_score": 0.7172, "inbatch_pos_score": 1.4385, "learning_rate": 9.055555555555556e-06, "loss": 3.164, "norm_diff": 0.0521, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1103.3993, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7168, "query_norm": 1.5492, "queue_k_norm": 1.598, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.2937, "sent_len_1": 66.7254, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5387, "stdk": 0.0492, "stdq": 0.0464, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 83700 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.139, "doc_norm": 1.6002, "encoder_q-embeddings": 584.3364, "encoder_q-layer.0": 368.6953, "encoder_q-layer.1": 400.4036, "encoder_q-layer.10": 602.1673, "encoder_q-layer.11": 1444.5146, "encoder_q-layer.2": 452.7757, "encoder_q-layer.3": 483.8344, "encoder_q-layer.4": 507.1364, "encoder_q-layer.5": 544.5378, "encoder_q-layer.6": 565.6555, "encoder_q-layer.7": 653.4116, "encoder_q-layer.8": 708.0535, "encoder_q-layer.9": 598.1591, "epoch": 0.82, "inbatch_neg_score": 0.72, "inbatch_pos_score": 1.3887, "learning_rate": 9e-06, "loss": 3.139, "norm_diff": 0.0686, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1025.6236, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7197, "query_norm": 1.5317, "queue_k_norm": 1.5976, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9398, "sent_len_1": 66.7185, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6337, "stdk": 0.0492, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 83800 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1456, "doc_norm": 1.5985, "encoder_q-embeddings": 529.8552, "encoder_q-layer.0": 362.4557, "encoder_q-layer.1": 405.6829, "encoder_q-layer.10": 603.1805, "encoder_q-layer.11": 1366.8771, "encoder_q-layer.2": 454.6916, "encoder_q-layer.3": 537.0372, "encoder_q-layer.4": 539.2597, "encoder_q-layer.5": 541.2477, "encoder_q-layer.6": 562.0154, "encoder_q-layer.7": 632.7623, "encoder_q-layer.8": 672.1741, "encoder_q-layer.9": 612.2368, "epoch": 0.82, "inbatch_neg_score": 0.7186, "inbatch_pos_score": 1.3838, "learning_rate": 8.944444444444444e-06, "loss": 3.1456, "norm_diff": 0.071, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1003.5439, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7197, "query_norm": 1.5275, "queue_k_norm": 1.597, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9981, "sent_len_1": 66.4758, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4112, "stdk": 0.0491, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 83900 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.1624, "doc_norm": 1.5966, "encoder_q-embeddings": 687.4369, "encoder_q-layer.0": 452.4724, "encoder_q-layer.1": 501.2323, "encoder_q-layer.10": 615.1482, "encoder_q-layer.11": 1521.6288, "encoder_q-layer.2": 564.5645, "encoder_q-layer.3": 584.6169, "encoder_q-layer.4": 659.0112, "encoder_q-layer.5": 674.6292, "encoder_q-layer.6": 687.08, "encoder_q-layer.7": 742.1185, "encoder_q-layer.8": 775.5295, "encoder_q-layer.9": 649.0464, "epoch": 0.82, "inbatch_neg_score": 0.7211, "inbatch_pos_score": 1.3955, "learning_rate": 8.88888888888889e-06, "loss": 3.1624, "norm_diff": 0.0658, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1164.1368, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7217, "query_norm": 1.5308, "queue_k_norm": 1.5979, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1195, "sent_len_1": 66.7199, "sent_max_len_0": 128.0, "sent_max_len_1": 190.96, "stdk": 0.049, "stdq": 0.0455, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 84000 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 3.1419, "doc_norm": 1.6016, "encoder_q-embeddings": 3731.8335, "encoder_q-layer.0": 2702.2905, "encoder_q-layer.1": 3414.4497, "encoder_q-layer.10": 603.6252, "encoder_q-layer.11": 1463.6638, "encoder_q-layer.2": 4198.4043, "encoder_q-layer.3": 4348.207, "encoder_q-layer.4": 5152.7202, "encoder_q-layer.5": 4850.0732, "encoder_q-layer.6": 4826.2427, "encoder_q-layer.7": 4564.3564, "encoder_q-layer.8": 2613.7126, "encoder_q-layer.9": 927.2697, "epoch": 0.82, "inbatch_neg_score": 0.7209, "inbatch_pos_score": 1.4141, "learning_rate": 8.833333333333334e-06, "loss": 3.1419, "norm_diff": 0.0713, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5495.3062, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7217, "query_norm": 1.5304, "queue_k_norm": 1.5972, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9666, "sent_len_1": 66.9023, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4225, "stdk": 0.0492, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 84100 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1489, "doc_norm": 1.5979, "encoder_q-embeddings": 681.487, "encoder_q-layer.0": 453.9416, "encoder_q-layer.1": 523.4345, "encoder_q-layer.10": 658.0157, "encoder_q-layer.11": 1447.5847, "encoder_q-layer.2": 637.6132, "encoder_q-layer.3": 705.1503, "encoder_q-layer.4": 783.4854, "encoder_q-layer.5": 846.2351, "encoder_q-layer.6": 843.8357, "encoder_q-layer.7": 870.6875, "encoder_q-layer.8": 841.5008, "encoder_q-layer.9": 659.7213, "epoch": 0.82, "inbatch_neg_score": 0.7238, "inbatch_pos_score": 1.4111, "learning_rate": 8.777777777777778e-06, "loss": 3.1489, "norm_diff": 0.0535, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1218.3007, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7231, "query_norm": 1.5444, "queue_k_norm": 1.5986, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0287, "sent_len_1": 66.7097, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7413, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 84200 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.153, "doc_norm": 1.5956, "encoder_q-embeddings": 490.7633, "encoder_q-layer.0": 317.4752, "encoder_q-layer.1": 340.8247, "encoder_q-layer.10": 719.5267, "encoder_q-layer.11": 1579.9561, "encoder_q-layer.2": 378.8933, "encoder_q-layer.3": 403.4693, "encoder_q-layer.4": 424.0768, "encoder_q-layer.5": 440.6971, "encoder_q-layer.6": 502.9122, "encoder_q-layer.7": 589.1382, "encoder_q-layer.8": 721.7319, "encoder_q-layer.9": 685.2681, "epoch": 0.82, "inbatch_neg_score": 0.7229, "inbatch_pos_score": 1.3877, "learning_rate": 8.722222222222224e-06, "loss": 3.153, "norm_diff": 0.0588, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1029.8083, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7222, "query_norm": 1.5367, "queue_k_norm": 1.5989, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9428, "sent_len_1": 66.6355, "sent_max_len_0": 128.0, "sent_max_len_1": 187.4412, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 84300 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 3.1415, "doc_norm": 1.6035, "encoder_q-embeddings": 691.9155, "encoder_q-layer.0": 480.5439, "encoder_q-layer.1": 559.4642, "encoder_q-layer.10": 621.3391, "encoder_q-layer.11": 1385.2007, "encoder_q-layer.2": 662.6675, "encoder_q-layer.3": 768.387, "encoder_q-layer.4": 809.3804, "encoder_q-layer.5": 795.728, "encoder_q-layer.6": 797.606, "encoder_q-layer.7": 792.3213, "encoder_q-layer.8": 722.0623, "encoder_q-layer.9": 605.1194, "epoch": 0.82, "inbatch_neg_score": 0.724, "inbatch_pos_score": 1.4521, "learning_rate": 8.666666666666668e-06, "loss": 3.1415, "norm_diff": 0.054, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1189.4502, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7246, "query_norm": 1.5495, "queue_k_norm": 1.5982, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1075, "sent_len_1": 66.9742, "sent_max_len_0": 128.0, "sent_max_len_1": 191.21, "stdk": 0.0492, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 84400 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.1544, "doc_norm": 1.6024, "encoder_q-embeddings": 3253.1201, "encoder_q-layer.0": 2198.7556, "encoder_q-layer.1": 2634.019, "encoder_q-layer.10": 633.363, "encoder_q-layer.11": 1461.712, "encoder_q-layer.2": 2952.6055, "encoder_q-layer.3": 3175.4258, "encoder_q-layer.4": 3372.7039, "encoder_q-layer.5": 3328.1389, "encoder_q-layer.6": 3411.1968, "encoder_q-layer.7": 3061.262, "encoder_q-layer.8": 2372.1958, "encoder_q-layer.9": 800.1552, "epoch": 0.82, "inbatch_neg_score": 0.7264, "inbatch_pos_score": 1.3789, "learning_rate": 8.611111111111112e-06, "loss": 3.1544, "norm_diff": 0.0755, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4076.0032, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7266, "query_norm": 1.5269, "queue_k_norm": 1.6005, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0041, "sent_len_1": 67.0007, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7887, "stdk": 0.0492, "stdq": 0.0453, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 84500 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 3.1551, "doc_norm": 1.5968, "encoder_q-embeddings": 556.0569, "encoder_q-layer.0": 384.8043, "encoder_q-layer.1": 430.8293, "encoder_q-layer.10": 593.0539, "encoder_q-layer.11": 1371.9709, "encoder_q-layer.2": 482.5587, "encoder_q-layer.3": 532.9375, "encoder_q-layer.4": 522.3454, "encoder_q-layer.5": 552.2042, "encoder_q-layer.6": 644.8157, "encoder_q-layer.7": 706.3272, "encoder_q-layer.8": 737.3195, "encoder_q-layer.9": 608.5025, "epoch": 0.83, "inbatch_neg_score": 0.7253, "inbatch_pos_score": 1.4307, "learning_rate": 8.555555555555556e-06, "loss": 3.1551, "norm_diff": 0.0589, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1025.2035, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7251, "query_norm": 1.5379, "queue_k_norm": 1.6015, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.046, "sent_len_1": 66.6881, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4825, "stdk": 0.0489, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 84600 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.1563, "doc_norm": 1.6039, "encoder_q-embeddings": 1543.8729, "encoder_q-layer.0": 1028.4294, "encoder_q-layer.1": 1231.637, "encoder_q-layer.10": 1314.9498, "encoder_q-layer.11": 2858.948, "encoder_q-layer.2": 1227.1608, "encoder_q-layer.3": 1206.3489, "encoder_q-layer.4": 1231.4808, "encoder_q-layer.5": 1246.3386, "encoder_q-layer.6": 1305.6313, "encoder_q-layer.7": 1459.3333, "encoder_q-layer.8": 1533.0945, "encoder_q-layer.9": 1288.6835, "epoch": 0.83, "inbatch_neg_score": 0.7256, "inbatch_pos_score": 1.4141, "learning_rate": 8.500000000000002e-06, "loss": 3.1563, "norm_diff": 0.0542, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2304.7666, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7251, "query_norm": 1.5497, "queue_k_norm": 1.6011, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0792, "sent_len_1": 66.8078, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2475, "stdk": 0.0492, "stdq": 0.0464, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 84700 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.131, "doc_norm": 1.6018, "encoder_q-embeddings": 4574.9707, "encoder_q-layer.0": 3546.7019, "encoder_q-layer.1": 3964.5254, "encoder_q-layer.10": 1361.9261, "encoder_q-layer.11": 2838.9646, "encoder_q-layer.2": 5080.2549, "encoder_q-layer.3": 5959.5107, "encoder_q-layer.4": 6148.957, "encoder_q-layer.5": 6843.6494, "encoder_q-layer.6": 7309.6299, "encoder_q-layer.7": 6801.7412, "encoder_q-layer.8": 4453.23, "encoder_q-layer.9": 2061.4341, "epoch": 0.83, "inbatch_neg_score": 0.7253, "inbatch_pos_score": 1.3672, "learning_rate": 8.444444444444446e-06, "loss": 3.131, "norm_diff": 0.0791, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7492.1397, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7251, "query_norm": 1.5227, "queue_k_norm": 1.6017, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8694, "sent_len_1": 66.5674, "sent_max_len_0": 128.0, "sent_max_len_1": 187.5925, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 84800 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.1282, "doc_norm": 1.5967, "encoder_q-embeddings": 1048.3694, "encoder_q-layer.0": 695.0166, "encoder_q-layer.1": 764.4494, "encoder_q-layer.10": 1357.9594, "encoder_q-layer.11": 2991.2676, "encoder_q-layer.2": 842.9973, "encoder_q-layer.3": 911.1052, "encoder_q-layer.4": 943.0338, "encoder_q-layer.5": 1012.1803, "encoder_q-layer.6": 1116.3276, "encoder_q-layer.7": 1312.1914, "encoder_q-layer.8": 1422.7733, "encoder_q-layer.9": 1260.2322, "epoch": 0.83, "inbatch_neg_score": 0.7246, "inbatch_pos_score": 1.3965, "learning_rate": 8.38888888888889e-06, "loss": 3.1282, "norm_diff": 0.0598, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2056.5306, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7241, "query_norm": 1.5369, "queue_k_norm": 1.5989, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9999, "sent_len_1": 66.7752, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3738, "stdk": 0.0489, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 84900 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.1382, "doc_norm": 1.602, "encoder_q-embeddings": 1000.2576, "encoder_q-layer.0": 679.7639, "encoder_q-layer.1": 776.4047, "encoder_q-layer.10": 1191.5157, "encoder_q-layer.11": 2829.4346, "encoder_q-layer.2": 953.0549, "encoder_q-layer.3": 976.9055, "encoder_q-layer.4": 1086.0452, "encoder_q-layer.5": 1072.3759, "encoder_q-layer.6": 1189.3647, "encoder_q-layer.7": 1365.1505, "encoder_q-layer.8": 1545.2611, "encoder_q-layer.9": 1232.9398, "epoch": 0.83, "inbatch_neg_score": 0.7248, "inbatch_pos_score": 1.4258, "learning_rate": 8.333333333333334e-06, "loss": 3.1382, "norm_diff": 0.0686, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2046.3802, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7251, "query_norm": 1.5334, "queue_k_norm": 1.6017, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.002, "sent_len_1": 66.784, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9725, "stdk": 0.0491, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 85000 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1567, "doc_norm": 1.5988, "encoder_q-embeddings": 3162.4412, "encoder_q-layer.0": 2152.3279, "encoder_q-layer.1": 2349.9121, "encoder_q-layer.10": 1247.0021, "encoder_q-layer.11": 2890.3755, "encoder_q-layer.2": 2775.7966, "encoder_q-layer.3": 2781.542, "encoder_q-layer.4": 2976.0833, "encoder_q-layer.5": 3188.4819, "encoder_q-layer.6": 2511.6277, "encoder_q-layer.7": 2752.2085, "encoder_q-layer.8": 2360.4963, "encoder_q-layer.9": 1362.9579, "epoch": 0.83, "inbatch_neg_score": 0.7265, "inbatch_pos_score": 1.4121, "learning_rate": 8.27777777777778e-06, "loss": 3.1567, "norm_diff": 0.0699, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3988.1197, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7271, "query_norm": 1.5289, "queue_k_norm": 1.601, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9367, "sent_len_1": 66.8346, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5137, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 85100 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.1397, "doc_norm": 1.6028, "encoder_q-embeddings": 889.1048, "encoder_q-layer.0": 582.3555, "encoder_q-layer.1": 620.5801, "encoder_q-layer.10": 1275.09, "encoder_q-layer.11": 2905.2439, "encoder_q-layer.2": 726.3112, "encoder_q-layer.3": 767.9339, "encoder_q-layer.4": 802.9529, "encoder_q-layer.5": 845.6888, "encoder_q-layer.6": 1012.9293, "encoder_q-layer.7": 1177.4303, "encoder_q-layer.8": 1408.6107, "encoder_q-layer.9": 1269.1923, "epoch": 0.83, "inbatch_neg_score": 0.7273, "inbatch_pos_score": 1.4287, "learning_rate": 8.222222222222223e-06, "loss": 3.1397, "norm_diff": 0.056, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1918.7955, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7275, "query_norm": 1.5468, "queue_k_norm": 1.6014, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9942, "sent_len_1": 66.8168, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6438, "stdk": 0.0491, "stdq": 0.0462, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 85200 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1592, "doc_norm": 1.5969, "encoder_q-embeddings": 1285.7178, "encoder_q-layer.0": 818.681, "encoder_q-layer.1": 865.8196, "encoder_q-layer.10": 1246.8296, "encoder_q-layer.11": 2745.4724, "encoder_q-layer.2": 977.0714, "encoder_q-layer.3": 1034.3497, "encoder_q-layer.4": 1130.7074, "encoder_q-layer.5": 1208.6165, "encoder_q-layer.6": 1169.6394, "encoder_q-layer.7": 1234.1489, "encoder_q-layer.8": 1359.6877, "encoder_q-layer.9": 1200.1471, "epoch": 0.83, "inbatch_neg_score": 0.7276, "inbatch_pos_score": 1.417, "learning_rate": 8.166666666666668e-06, "loss": 3.1592, "norm_diff": 0.0584, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2067.1677, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.728, "query_norm": 1.5385, "queue_k_norm": 1.6024, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9571, "sent_len_1": 66.6823, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1413, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 85300 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1585, "doc_norm": 1.5986, "encoder_q-embeddings": 1364.6509, "encoder_q-layer.0": 898.8739, "encoder_q-layer.1": 970.1165, "encoder_q-layer.10": 1196.5603, "encoder_q-layer.11": 2751.717, "encoder_q-layer.2": 1025.3386, "encoder_q-layer.3": 1137.9042, "encoder_q-layer.4": 1291.9949, "encoder_q-layer.5": 1297.7759, "encoder_q-layer.6": 1276.6163, "encoder_q-layer.7": 1374.432, "encoder_q-layer.8": 1469.2733, "encoder_q-layer.9": 1227.4169, "epoch": 0.83, "inbatch_neg_score": 0.7313, "inbatch_pos_score": 1.4072, "learning_rate": 8.111111111111112e-06, "loss": 3.1585, "norm_diff": 0.0726, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2166.1527, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7305, "query_norm": 1.526, "queue_k_norm": 1.6017, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9035, "sent_len_1": 66.7035, "sent_max_len_0": 128.0, "sent_max_len_1": 188.795, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 85400 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.1515, "doc_norm": 1.6036, "encoder_q-embeddings": 2079.8562, "encoder_q-layer.0": 1334.6339, "encoder_q-layer.1": 1551.1418, "encoder_q-layer.10": 1339.9177, "encoder_q-layer.11": 3042.4883, "encoder_q-layer.2": 1814.6854, "encoder_q-layer.3": 2048.2815, "encoder_q-layer.4": 2345.9646, "encoder_q-layer.5": 2693.9785, "encoder_q-layer.6": 3010.7546, "encoder_q-layer.7": 3302.9888, "encoder_q-layer.8": 2729.5837, "encoder_q-layer.9": 1526.1007, "epoch": 0.83, "inbatch_neg_score": 0.7309, "inbatch_pos_score": 1.4014, "learning_rate": 8.055555555555557e-06, "loss": 3.1515, "norm_diff": 0.0741, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3521.0177, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7319, "query_norm": 1.5295, "queue_k_norm": 1.6022, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9643, "sent_len_1": 66.799, "sent_max_len_0": 128.0, "sent_max_len_1": 187.4338, "stdk": 0.0491, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 85500 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 3.1468, "doc_norm": 1.6094, "encoder_q-embeddings": 4573.3838, "encoder_q-layer.0": 3278.594, "encoder_q-layer.1": 3942.1924, "encoder_q-layer.10": 1353.2748, "encoder_q-layer.11": 2823.8647, "encoder_q-layer.2": 5481.6953, "encoder_q-layer.3": 6413.5415, "encoder_q-layer.4": 6130.5049, "encoder_q-layer.5": 5167.4219, "encoder_q-layer.6": 3894.2197, "encoder_q-layer.7": 3471.3511, "encoder_q-layer.8": 2912.1863, "encoder_q-layer.9": 1394.115, "epoch": 0.84, "inbatch_neg_score": 0.732, "inbatch_pos_score": 1.4248, "learning_rate": 8.000000000000001e-06, "loss": 3.1468, "norm_diff": 0.0694, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6411.4491, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7319, "query_norm": 1.54, "queue_k_norm": 1.6033, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8493, "sent_len_1": 66.9353, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4975, "stdk": 0.0493, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 85600 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.1372, "doc_norm": 1.6069, "encoder_q-embeddings": 904.0558, "encoder_q-layer.0": 568.121, "encoder_q-layer.1": 593.3531, "encoder_q-layer.10": 1178.7076, "encoder_q-layer.11": 2774.7124, "encoder_q-layer.2": 688.4644, "encoder_q-layer.3": 736.2881, "encoder_q-layer.4": 772.3284, "encoder_q-layer.5": 815.9851, "encoder_q-layer.6": 946.6337, "encoder_q-layer.7": 1118.9174, "encoder_q-layer.8": 1313.9186, "encoder_q-layer.9": 1167.4999, "epoch": 0.84, "inbatch_neg_score": 0.7341, "inbatch_pos_score": 1.4141, "learning_rate": 7.944444444444445e-06, "loss": 3.1372, "norm_diff": 0.0697, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1871.3746, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7339, "query_norm": 1.5372, "queue_k_norm": 1.6034, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9562, "sent_len_1": 66.7299, "sent_max_len_0": 128.0, "sent_max_len_1": 189.66, "stdk": 0.0492, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 85700 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.1416, "doc_norm": 1.5957, "encoder_q-embeddings": 990.4097, "encoder_q-layer.0": 649.7525, "encoder_q-layer.1": 677.3521, "encoder_q-layer.10": 1219.6522, "encoder_q-layer.11": 2874.073, "encoder_q-layer.2": 785.6522, "encoder_q-layer.3": 807.7136, "encoder_q-layer.4": 927.0027, "encoder_q-layer.5": 971.7631, "encoder_q-layer.6": 1128.814, "encoder_q-layer.7": 1328.9222, "encoder_q-layer.8": 1391.2168, "encoder_q-layer.9": 1248.129, "epoch": 0.84, "inbatch_neg_score": 0.737, "inbatch_pos_score": 1.3955, "learning_rate": 7.88888888888889e-06, "loss": 3.1416, "norm_diff": 0.0705, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1976.9629, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7349, "query_norm": 1.5253, "queue_k_norm": 1.6039, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.2476, "sent_len_1": 66.8576, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0125, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 85800 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.146, "doc_norm": 1.6049, "encoder_q-embeddings": 1252.8145, "encoder_q-layer.0": 799.9335, "encoder_q-layer.1": 857.5286, "encoder_q-layer.10": 1288.0626, "encoder_q-layer.11": 2983.6035, "encoder_q-layer.2": 937.4642, "encoder_q-layer.3": 985.9155, "encoder_q-layer.4": 1121.1606, "encoder_q-layer.5": 1193.4377, "encoder_q-layer.6": 1313.0557, "encoder_q-layer.7": 1419.0109, "encoder_q-layer.8": 1521.5708, "encoder_q-layer.9": 1278.7117, "epoch": 0.84, "inbatch_neg_score": 0.7318, "inbatch_pos_score": 1.3916, "learning_rate": 7.833333333333333e-06, "loss": 3.146, "norm_diff": 0.0758, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2191.1509, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7324, "query_norm": 1.5291, "queue_k_norm": 1.6017, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7924, "sent_len_1": 66.5727, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8088, "stdk": 0.0491, "stdq": 0.0454, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 85900 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1046, "doc_norm": 1.6007, "encoder_q-embeddings": 981.351, "encoder_q-layer.0": 633.0244, "encoder_q-layer.1": 674.9125, "encoder_q-layer.10": 1354.5758, "encoder_q-layer.11": 2913.9653, "encoder_q-layer.2": 765.5729, "encoder_q-layer.3": 834.6436, "encoder_q-layer.4": 882.183, "encoder_q-layer.5": 897.3538, "encoder_q-layer.6": 1051.8455, "encoder_q-layer.7": 1161.2206, "encoder_q-layer.8": 1411.8285, "encoder_q-layer.9": 1236.9182, "epoch": 0.84, "inbatch_neg_score": 0.7329, "inbatch_pos_score": 1.4258, "learning_rate": 7.777777777777777e-06, "loss": 3.1046, "norm_diff": 0.0615, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1992.8727, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7334, "query_norm": 1.5392, "queue_k_norm": 1.6056, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9925, "sent_len_1": 66.7666, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6287, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 86000 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.1393, "doc_norm": 1.6073, "encoder_q-embeddings": 1935.1195, "encoder_q-layer.0": 1400.1554, "encoder_q-layer.1": 1413.1868, "encoder_q-layer.10": 1166.6813, "encoder_q-layer.11": 2772.5769, "encoder_q-layer.2": 1591.1808, "encoder_q-layer.3": 1695.1853, "encoder_q-layer.4": 1889.0079, "encoder_q-layer.5": 1934.7917, "encoder_q-layer.6": 2065.9685, "encoder_q-layer.7": 2448.9919, "encoder_q-layer.8": 2395.1282, "encoder_q-layer.9": 1683.3589, "epoch": 0.84, "inbatch_neg_score": 0.733, "inbatch_pos_score": 1.4219, "learning_rate": 7.722222222222223e-06, "loss": 3.1393, "norm_diff": 0.0665, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3035.562, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7329, "query_norm": 1.5408, "queue_k_norm": 1.6028, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.1061, "sent_len_1": 66.7861, "sent_max_len_0": 128.0, "sent_max_len_1": 187.4375, "stdk": 0.0492, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 86100 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.1442, "doc_norm": 1.6042, "encoder_q-embeddings": 7605.2085, "encoder_q-layer.0": 4997.3813, "encoder_q-layer.1": 5407.3901, "encoder_q-layer.10": 1403.3583, "encoder_q-layer.11": 3044.7278, "encoder_q-layer.2": 6623.8223, "encoder_q-layer.3": 6839.3921, "encoder_q-layer.4": 7684.2559, "encoder_q-layer.5": 7646.2261, "encoder_q-layer.6": 7084.8569, "encoder_q-layer.7": 6477.8804, "encoder_q-layer.8": 5013.4902, "encoder_q-layer.9": 1746.9332, "epoch": 0.84, "inbatch_neg_score": 0.7316, "inbatch_pos_score": 1.375, "learning_rate": 7.666666666666667e-06, "loss": 3.1442, "norm_diff": 0.0734, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9196.6719, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7314, "query_norm": 1.5309, "queue_k_norm": 1.6035, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0736, "sent_len_1": 66.6819, "sent_max_len_0": 128.0, "sent_max_len_1": 189.41, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 86200 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.1354, "doc_norm": 1.601, "encoder_q-embeddings": 590.8639, "encoder_q-layer.0": 387.4693, "encoder_q-layer.1": 433.4784, "encoder_q-layer.10": 585.4716, "encoder_q-layer.11": 1379.1672, "encoder_q-layer.2": 498.3334, "encoder_q-layer.3": 533.4316, "encoder_q-layer.4": 577.3383, "encoder_q-layer.5": 623.726, "encoder_q-layer.6": 655.6407, "encoder_q-layer.7": 753.723, "encoder_q-layer.8": 750.6927, "encoder_q-layer.9": 616.2831, "epoch": 0.84, "inbatch_neg_score": 0.7336, "inbatch_pos_score": 1.4336, "learning_rate": 7.611111111111112e-06, "loss": 3.1354, "norm_diff": 0.0469, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1060.9955, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7334, "query_norm": 1.5541, "queue_k_norm": 1.6046, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0191, "sent_len_1": 66.6135, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5613, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 86300 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1295, "doc_norm": 1.5977, "encoder_q-embeddings": 509.8725, "encoder_q-layer.0": 338.8928, "encoder_q-layer.1": 361.049, "encoder_q-layer.10": 612.2377, "encoder_q-layer.11": 1426.6755, "encoder_q-layer.2": 400.5798, "encoder_q-layer.3": 412.2474, "encoder_q-layer.4": 463.5526, "encoder_q-layer.5": 468.6347, "encoder_q-layer.6": 569.4904, "encoder_q-layer.7": 629.0486, "encoder_q-layer.8": 709.0264, "encoder_q-layer.9": 633.4814, "epoch": 0.84, "inbatch_neg_score": 0.7355, "inbatch_pos_score": 1.4053, "learning_rate": 7.555555555555556e-06, "loss": 3.1295, "norm_diff": 0.0578, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 986.0093, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7339, "query_norm": 1.5399, "queue_k_norm": 1.6045, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1531, "sent_len_1": 66.6727, "sent_max_len_0": 128.0, "sent_max_len_1": 190.49, "stdk": 0.0487, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 86400 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1357, "doc_norm": 1.6043, "encoder_q-embeddings": 805.4828, "encoder_q-layer.0": 555.5412, "encoder_q-layer.1": 631.5233, "encoder_q-layer.10": 676.4863, "encoder_q-layer.11": 1441.1118, "encoder_q-layer.2": 717.6765, "encoder_q-layer.3": 735.3716, "encoder_q-layer.4": 758.8245, "encoder_q-layer.5": 664.8735, "encoder_q-layer.6": 661.0291, "encoder_q-layer.7": 714.8104, "encoder_q-layer.8": 815.4595, "encoder_q-layer.9": 667.0645, "epoch": 0.84, "inbatch_neg_score": 0.7334, "inbatch_pos_score": 1.4062, "learning_rate": 7.5e-06, "loss": 3.1357, "norm_diff": 0.071, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1214.6327, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7329, "query_norm": 1.5334, "queue_k_norm": 1.6068, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0352, "sent_len_1": 66.7954, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7788, "stdk": 0.049, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 86500 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1535, "doc_norm": 1.6114, "encoder_q-embeddings": 489.2008, "encoder_q-layer.0": 311.397, "encoder_q-layer.1": 331.0533, "encoder_q-layer.10": 718.8423, "encoder_q-layer.11": 1579.0376, "encoder_q-layer.2": 381.2522, "encoder_q-layer.3": 399.7148, "encoder_q-layer.4": 425.2156, "encoder_q-layer.5": 457.174, "encoder_q-layer.6": 517.2087, "encoder_q-layer.7": 639.0128, "encoder_q-layer.8": 724.2213, "encoder_q-layer.9": 674.0844, "epoch": 0.85, "inbatch_neg_score": 0.7349, "inbatch_pos_score": 1.418, "learning_rate": 7.444444444444444e-06, "loss": 3.1535, "norm_diff": 0.0728, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1040.1821, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7349, "query_norm": 1.5385, "queue_k_norm": 1.6045, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.822, "sent_len_1": 66.6401, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2262, "stdk": 0.0493, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 86600 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.1599, "doc_norm": 1.6019, "encoder_q-embeddings": 1172.1667, "encoder_q-layer.0": 845.6879, "encoder_q-layer.1": 898.5078, "encoder_q-layer.10": 701.1465, "encoder_q-layer.11": 1480.2014, "encoder_q-layer.2": 1000.3802, "encoder_q-layer.3": 957.5507, "encoder_q-layer.4": 903.1422, "encoder_q-layer.5": 717.8761, "encoder_q-layer.6": 784.0015, "encoder_q-layer.7": 842.14, "encoder_q-layer.8": 1002.8482, "encoder_q-layer.9": 753.6838, "epoch": 0.85, "inbatch_neg_score": 0.7361, "inbatch_pos_score": 1.3945, "learning_rate": 7.38888888888889e-06, "loss": 3.1599, "norm_diff": 0.063, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1481.346, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7354, "query_norm": 1.5388, "queue_k_norm": 1.6055, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.05, "sent_len_1": 66.9407, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3963, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 86700 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.1255, "doc_norm": 1.6102, "encoder_q-embeddings": 1150.7521, "encoder_q-layer.0": 829.0579, "encoder_q-layer.1": 1123.0554, "encoder_q-layer.10": 648.4327, "encoder_q-layer.11": 1456.2189, "encoder_q-layer.2": 1261.7789, "encoder_q-layer.3": 1400.9304, "encoder_q-layer.4": 1429.0868, "encoder_q-layer.5": 1311.3599, "encoder_q-layer.6": 1349.9885, "encoder_q-layer.7": 1128.6432, "encoder_q-layer.8": 981.3893, "encoder_q-layer.9": 665.5588, "epoch": 0.85, "inbatch_neg_score": 0.7377, "inbatch_pos_score": 1.4277, "learning_rate": 7.333333333333334e-06, "loss": 3.1255, "norm_diff": 0.0767, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1768.8606, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7373, "query_norm": 1.5335, "queue_k_norm": 1.6065, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9824, "sent_len_1": 66.7072, "sent_max_len_0": 128.0, "sent_max_len_1": 191.6175, "stdk": 0.0493, "stdq": 0.0455, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 86800 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 3.1291, "doc_norm": 1.6133, "encoder_q-embeddings": 508.1518, "encoder_q-layer.0": 334.6613, "encoder_q-layer.1": 351.3994, "encoder_q-layer.10": 616.761, "encoder_q-layer.11": 1414.5791, "encoder_q-layer.2": 396.5141, "encoder_q-layer.3": 407.5815, "encoder_q-layer.4": 440.9627, "encoder_q-layer.5": 445.119, "encoder_q-layer.6": 502.4234, "encoder_q-layer.7": 579.3381, "encoder_q-layer.8": 671.99, "encoder_q-layer.9": 611.6899, "epoch": 0.85, "inbatch_neg_score": 0.7347, "inbatch_pos_score": 1.459, "learning_rate": 7.277777777777778e-06, "loss": 3.1291, "norm_diff": 0.0686, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 972.9452, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7358, "query_norm": 1.5447, "queue_k_norm": 1.6072, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.2136, "sent_len_1": 66.9535, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3013, "stdk": 0.0494, "stdq": 0.0461, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 86900 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.1552, "doc_norm": 1.6033, "encoder_q-embeddings": 680.1686, "encoder_q-layer.0": 447.6671, "encoder_q-layer.1": 484.7368, "encoder_q-layer.10": 675.102, "encoder_q-layer.11": 1481.6161, "encoder_q-layer.2": 564.5352, "encoder_q-layer.3": 604.0179, "encoder_q-layer.4": 696.072, "encoder_q-layer.5": 695.9638, "encoder_q-layer.6": 782.225, "encoder_q-layer.7": 838.2415, "encoder_q-layer.8": 849.0966, "encoder_q-layer.9": 633.645, "epoch": 0.85, "inbatch_neg_score": 0.7381, "inbatch_pos_score": 1.3896, "learning_rate": 7.222222222222222e-06, "loss": 3.1552, "norm_diff": 0.0845, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1176.6001, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7368, "query_norm": 1.5189, "queue_k_norm": 1.606, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1587, "sent_len_1": 67.0213, "sent_max_len_0": 128.0, "sent_max_len_1": 190.47, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 87000 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.152, "doc_norm": 1.6075, "encoder_q-embeddings": 801.3821, "encoder_q-layer.0": 546.8835, "encoder_q-layer.1": 674.1807, "encoder_q-layer.10": 619.283, "encoder_q-layer.11": 1441.53, "encoder_q-layer.2": 742.942, "encoder_q-layer.3": 864.0368, "encoder_q-layer.4": 1026.2726, "encoder_q-layer.5": 1014.359, "encoder_q-layer.6": 1019.268, "encoder_q-layer.7": 897.3403, "encoder_q-layer.8": 804.9567, "encoder_q-layer.9": 641.6282, "epoch": 0.85, "inbatch_neg_score": 0.7352, "inbatch_pos_score": 1.4141, "learning_rate": 7.166666666666667e-06, "loss": 3.152, "norm_diff": 0.0731, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1336.9772, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7358, "query_norm": 1.5344, "queue_k_norm": 1.6068, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9336, "sent_len_1": 66.7137, "sent_max_len_0": 128.0, "sent_max_len_1": 188.68, "stdk": 0.0491, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 87100 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.1303, "doc_norm": 1.608, "encoder_q-embeddings": 1520.2295, "encoder_q-layer.0": 1055.1656, "encoder_q-layer.1": 1116.0465, "encoder_q-layer.10": 637.6906, "encoder_q-layer.11": 1426.7794, "encoder_q-layer.2": 1360.5637, "encoder_q-layer.3": 1366.1183, "encoder_q-layer.4": 1508.4449, "encoder_q-layer.5": 1618.8055, "encoder_q-layer.6": 1601.4653, "encoder_q-layer.7": 1480.2795, "encoder_q-layer.8": 1054.395, "encoder_q-layer.9": 633.6291, "epoch": 0.85, "inbatch_neg_score": 0.7382, "inbatch_pos_score": 1.4512, "learning_rate": 7.111111111111112e-06, "loss": 3.1303, "norm_diff": 0.0655, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2016.3317, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7383, "query_norm": 1.5425, "queue_k_norm": 1.6043, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9163, "sent_len_1": 66.7894, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6188, "stdk": 0.0491, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 87200 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.1396, "doc_norm": 1.6032, "encoder_q-embeddings": 495.5591, "encoder_q-layer.0": 339.437, "encoder_q-layer.1": 366.5645, "encoder_q-layer.10": 651.4262, "encoder_q-layer.11": 1530.5125, "encoder_q-layer.2": 403.4842, "encoder_q-layer.3": 398.283, "encoder_q-layer.4": 413.7554, "encoder_q-layer.5": 420.3734, "encoder_q-layer.6": 486.7055, "encoder_q-layer.7": 592.6597, "encoder_q-layer.8": 716.5886, "encoder_q-layer.9": 630.8024, "epoch": 0.85, "inbatch_neg_score": 0.7401, "inbatch_pos_score": 1.4033, "learning_rate": 7.055555555555556e-06, "loss": 3.1396, "norm_diff": 0.0649, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1019.4036, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7397, "query_norm": 1.5383, "queue_k_norm": 1.6076, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.945, "sent_len_1": 66.6747, "sent_max_len_0": 128.0, "sent_max_len_1": 189.31, "stdk": 0.0489, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 87300 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.1409, "doc_norm": 1.6047, "encoder_q-embeddings": 580.7345, "encoder_q-layer.0": 394.1264, "encoder_q-layer.1": 448.5361, "encoder_q-layer.10": 672.0183, "encoder_q-layer.11": 1507.646, "encoder_q-layer.2": 604.5476, "encoder_q-layer.3": 666.7718, "encoder_q-layer.4": 740.227, "encoder_q-layer.5": 715.7291, "encoder_q-layer.6": 750.6434, "encoder_q-layer.7": 801.6014, "encoder_q-layer.8": 858.2729, "encoder_q-layer.9": 703.2618, "epoch": 0.85, "inbatch_neg_score": 0.7397, "inbatch_pos_score": 1.4238, "learning_rate": 7.000000000000001e-06, "loss": 3.1409, "norm_diff": 0.0595, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1189.7611, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7397, "query_norm": 1.5451, "queue_k_norm": 1.6081, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8833, "sent_len_1": 66.7482, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3775, "stdk": 0.049, "stdq": 0.046, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 87400 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.1435, "doc_norm": 1.6147, "encoder_q-embeddings": 605.9847, "encoder_q-layer.0": 397.5941, "encoder_q-layer.1": 456.6804, "encoder_q-layer.10": 678.9271, "encoder_q-layer.11": 1561.4218, "encoder_q-layer.2": 530.9473, "encoder_q-layer.3": 552.7935, "encoder_q-layer.4": 587.2938, "encoder_q-layer.5": 606.896, "encoder_q-layer.6": 665.7611, "encoder_q-layer.7": 759.6003, "encoder_q-layer.8": 783.7995, "encoder_q-layer.9": 641.7421, "epoch": 0.85, "inbatch_neg_score": 0.7389, "inbatch_pos_score": 1.3906, "learning_rate": 6.944444444444445e-06, "loss": 3.1435, "norm_diff": 0.097, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1146.372, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7397, "query_norm": 1.5177, "queue_k_norm": 1.6091, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8881, "sent_len_1": 66.6261, "sent_max_len_0": 128.0, "sent_max_len_1": 191.28, "stdk": 0.0495, "stdq": 0.0449, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 87500 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1658, "doc_norm": 1.6033, "encoder_q-embeddings": 3105.9534, "encoder_q-layer.0": 2035.3473, "encoder_q-layer.1": 2554.6204, "encoder_q-layer.10": 630.8448, "encoder_q-layer.11": 1465.2303, "encoder_q-layer.2": 3193.5984, "encoder_q-layer.3": 3526.5605, "encoder_q-layer.4": 4581.519, "encoder_q-layer.5": 5148.3018, "encoder_q-layer.6": 5012.873, "encoder_q-layer.7": 4627.4595, "encoder_q-layer.8": 3371.6694, "encoder_q-layer.9": 1178.3876, "epoch": 0.86, "inbatch_neg_score": 0.741, "inbatch_pos_score": 1.4297, "learning_rate": 6.888888888888889e-06, "loss": 3.1658, "norm_diff": 0.0586, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5119.7419, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7402, "query_norm": 1.5446, "queue_k_norm": 1.6072, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0547, "sent_len_1": 66.6269, "sent_max_len_0": 128.0, "sent_max_len_1": 190.26, "stdk": 0.0489, "stdq": 0.046, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 87600 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 3.1437, "doc_norm": 1.608, "encoder_q-embeddings": 471.7063, "encoder_q-layer.0": 328.3268, "encoder_q-layer.1": 351.9655, "encoder_q-layer.10": 596.0638, "encoder_q-layer.11": 1397.3508, "encoder_q-layer.2": 404.6376, "encoder_q-layer.3": 437.3637, "encoder_q-layer.4": 463.6908, "encoder_q-layer.5": 485.0525, "encoder_q-layer.6": 530.1406, "encoder_q-layer.7": 610.8171, "encoder_q-layer.8": 666.4631, "encoder_q-layer.9": 580.4933, "epoch": 0.86, "inbatch_neg_score": 0.7429, "inbatch_pos_score": 1.4512, "learning_rate": 6.833333333333333e-06, "loss": 3.1437, "norm_diff": 0.072, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 969.7448, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7417, "query_norm": 1.536, "queue_k_norm": 1.6081, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7977, "sent_len_1": 66.6602, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2375, "stdk": 0.0491, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 87700 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.1277, "doc_norm": 1.6077, "encoder_q-embeddings": 579.3748, "encoder_q-layer.0": 404.4576, "encoder_q-layer.1": 427.475, "encoder_q-layer.10": 643.4861, "encoder_q-layer.11": 1508.744, "encoder_q-layer.2": 481.0397, "encoder_q-layer.3": 499.3064, "encoder_q-layer.4": 550.4728, "encoder_q-layer.5": 557.6143, "encoder_q-layer.6": 635.332, "encoder_q-layer.7": 699.4645, "encoder_q-layer.8": 757.9754, "encoder_q-layer.9": 629.8373, "epoch": 0.86, "inbatch_neg_score": 0.7463, "inbatch_pos_score": 1.4189, "learning_rate": 6.777777777777779e-06, "loss": 3.1277, "norm_diff": 0.0812, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1091.4426, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7456, "query_norm": 1.5265, "queue_k_norm": 1.6084, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.065, "sent_len_1": 66.7875, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0575, "stdk": 0.0491, "stdq": 0.0451, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 87800 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.1199, "doc_norm": 1.6046, "encoder_q-embeddings": 472.5318, "encoder_q-layer.0": 325.5838, "encoder_q-layer.1": 353.7538, "encoder_q-layer.10": 621.599, "encoder_q-layer.11": 1456.1063, "encoder_q-layer.2": 402.6558, "encoder_q-layer.3": 432.1302, "encoder_q-layer.4": 470.6508, "encoder_q-layer.5": 470.5816, "encoder_q-layer.6": 564.9692, "encoder_q-layer.7": 650.0301, "encoder_q-layer.8": 718.6084, "encoder_q-layer.9": 595.4985, "epoch": 0.86, "inbatch_neg_score": 0.7441, "inbatch_pos_score": 1.4092, "learning_rate": 6.722222222222223e-06, "loss": 3.1199, "norm_diff": 0.069, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 988.6872, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7441, "query_norm": 1.5357, "queue_k_norm": 1.6081, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.975, "sent_len_1": 66.7711, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1113, "stdk": 0.049, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 87900 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1257, "doc_norm": 1.6078, "encoder_q-embeddings": 878.1038, "encoder_q-layer.0": 630.6515, "encoder_q-layer.1": 695.8009, "encoder_q-layer.10": 683.9221, "encoder_q-layer.11": 1508.5696, "encoder_q-layer.2": 836.981, "encoder_q-layer.3": 930.632, "encoder_q-layer.4": 1109.8821, "encoder_q-layer.5": 1006.9022, "encoder_q-layer.6": 1138.4294, "encoder_q-layer.7": 1249.5524, "encoder_q-layer.8": 1090.1233, "encoder_q-layer.9": 656.6712, "epoch": 0.86, "inbatch_neg_score": 0.7462, "inbatch_pos_score": 1.4141, "learning_rate": 6.666666666666667e-06, "loss": 3.1257, "norm_diff": 0.0773, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1528.3578, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7466, "query_norm": 1.5305, "queue_k_norm": 1.6073, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.6635, "sent_len_1": 66.6499, "sent_max_len_0": 128.0, "sent_max_len_1": 187.995, "stdk": 0.0491, "stdq": 0.0453, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 88000 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.1602, "doc_norm": 1.608, "encoder_q-embeddings": 470.7149, "encoder_q-layer.0": 311.1802, "encoder_q-layer.1": 340.7619, "encoder_q-layer.10": 608.9062, "encoder_q-layer.11": 1399.2073, "encoder_q-layer.2": 384.3034, "encoder_q-layer.3": 398.1575, "encoder_q-layer.4": 435.1395, "encoder_q-layer.5": 434.7696, "encoder_q-layer.6": 510.9488, "encoder_q-layer.7": 582.2281, "encoder_q-layer.8": 681.0874, "encoder_q-layer.9": 594.2287, "epoch": 0.86, "inbatch_neg_score": 0.7455, "inbatch_pos_score": 1.4443, "learning_rate": 6.611111111111111e-06, "loss": 3.1602, "norm_diff": 0.0657, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 960.214, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7451, "query_norm": 1.5422, "queue_k_norm": 1.6076, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8163, "sent_len_1": 66.5858, "sent_max_len_0": 128.0, "sent_max_len_1": 188.485, "stdk": 0.0491, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 88100 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.1618, "doc_norm": 1.6065, "encoder_q-embeddings": 474.5019, "encoder_q-layer.0": 311.8459, "encoder_q-layer.1": 327.2066, "encoder_q-layer.10": 718.2875, "encoder_q-layer.11": 1500.6125, "encoder_q-layer.2": 368.0543, "encoder_q-layer.3": 388.2578, "encoder_q-layer.4": 414.6167, "encoder_q-layer.5": 445.9731, "encoder_q-layer.6": 518.7112, "encoder_q-layer.7": 593.9631, "encoder_q-layer.8": 685.4482, "encoder_q-layer.9": 629.9381, "epoch": 0.86, "inbatch_neg_score": 0.7456, "inbatch_pos_score": 1.4277, "learning_rate": 6.555555555555556e-06, "loss": 3.1618, "norm_diff": 0.063, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 993.1077, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7456, "query_norm": 1.5434, "queue_k_norm": 1.6089, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.7974, "sent_len_1": 66.7207, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7575, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 88200 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.1107, "doc_norm": 1.6066, "encoder_q-embeddings": 1079.6151, "encoder_q-layer.0": 732.6586, "encoder_q-layer.1": 814.6108, "encoder_q-layer.10": 1432.5499, "encoder_q-layer.11": 3075.9556, "encoder_q-layer.2": 948.8271, "encoder_q-layer.3": 1054.2454, "encoder_q-layer.4": 1193.5258, "encoder_q-layer.5": 1226.4131, "encoder_q-layer.6": 1396.0271, "encoder_q-layer.7": 1684.2476, "encoder_q-layer.8": 1575.0068, "encoder_q-layer.9": 1300.2169, "epoch": 0.86, "inbatch_neg_score": 0.7465, "inbatch_pos_score": 1.4033, "learning_rate": 6.5000000000000004e-06, "loss": 3.1107, "norm_diff": 0.0669, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2244.2018, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7471, "query_norm": 1.5396, "queue_k_norm": 1.6078, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.6599, "sent_len_1": 66.6981, "sent_max_len_0": 128.0, "sent_max_len_1": 189.695, "stdk": 0.049, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 88300 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 3.1499, "doc_norm": 1.6073, "encoder_q-embeddings": 2044.0859, "encoder_q-layer.0": 1411.9917, "encoder_q-layer.1": 1934.3342, "encoder_q-layer.10": 1219.4608, "encoder_q-layer.11": 2774.4194, "encoder_q-layer.2": 2247.5806, "encoder_q-layer.3": 2541.4116, "encoder_q-layer.4": 3261.8398, "encoder_q-layer.5": 3284.9001, "encoder_q-layer.6": 3668.0396, "encoder_q-layer.7": 3634.0305, "encoder_q-layer.8": 2882.9429, "encoder_q-layer.9": 1416.0897, "epoch": 0.86, "inbatch_neg_score": 0.7494, "inbatch_pos_score": 1.4785, "learning_rate": 6.4444444444444445e-06, "loss": 3.1499, "norm_diff": 0.0598, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3835.1516, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.748, "query_norm": 1.5475, "queue_k_norm": 1.6096, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9113, "sent_len_1": 66.7798, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2425, "stdk": 0.049, "stdq": 0.046, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 88400 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.1258, "doc_norm": 1.6076, "encoder_q-embeddings": 1540.4893, "encoder_q-layer.0": 1118.3387, "encoder_q-layer.1": 1160.9492, "encoder_q-layer.10": 1252.0814, "encoder_q-layer.11": 3020.0864, "encoder_q-layer.2": 1372.4899, "encoder_q-layer.3": 1386.7609, "encoder_q-layer.4": 1390.2485, "encoder_q-layer.5": 1463.2375, "encoder_q-layer.6": 1560.5322, "encoder_q-layer.7": 1552.5322, "encoder_q-layer.8": 1507.5658, "encoder_q-layer.9": 1283.0413, "epoch": 0.86, "inbatch_neg_score": 0.7501, "inbatch_pos_score": 1.418, "learning_rate": 6.3888888888888885e-06, "loss": 3.1258, "norm_diff": 0.0644, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2444.2945, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7505, "query_norm": 1.5432, "queue_k_norm": 1.6103, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9589, "sent_len_1": 66.9719, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4412, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 88500 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.1597, "doc_norm": 1.6049, "encoder_q-embeddings": 1081.8311, "encoder_q-layer.0": 730.4937, "encoder_q-layer.1": 784.7132, "encoder_q-layer.10": 1295.6649, "encoder_q-layer.11": 3039.2781, "encoder_q-layer.2": 906.4891, "encoder_q-layer.3": 970.6371, "encoder_q-layer.4": 1040.3593, "encoder_q-layer.5": 1106.0332, "encoder_q-layer.6": 1177.3824, "encoder_q-layer.7": 1361.3596, "encoder_q-layer.8": 1433.6631, "encoder_q-layer.9": 1261.8667, "epoch": 0.87, "inbatch_neg_score": 0.7504, "inbatch_pos_score": 1.4102, "learning_rate": 6.333333333333334e-06, "loss": 3.1597, "norm_diff": 0.0781, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2119.8818, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.75, "query_norm": 1.5268, "queue_k_norm": 1.609, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.2243, "sent_len_1": 66.8779, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0362, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 88600 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.1371, "doc_norm": 1.6082, "encoder_q-embeddings": 1365.8535, "encoder_q-layer.0": 905.0873, "encoder_q-layer.1": 1073.5514, "encoder_q-layer.10": 1305.0287, "encoder_q-layer.11": 3105.4036, "encoder_q-layer.2": 1208.705, "encoder_q-layer.3": 1373.917, "encoder_q-layer.4": 1468.8809, "encoder_q-layer.5": 1501.7742, "encoder_q-layer.6": 1550.1819, "encoder_q-layer.7": 1593.5337, "encoder_q-layer.8": 1471.7058, "encoder_q-layer.9": 1283.6974, "epoch": 0.87, "inbatch_neg_score": 0.7521, "inbatch_pos_score": 1.4111, "learning_rate": 6.277777777777778e-06, "loss": 3.1371, "norm_diff": 0.0786, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2380.9035, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.751, "query_norm": 1.5296, "queue_k_norm": 1.6089, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0195, "sent_len_1": 66.9049, "sent_max_len_0": 128.0, "sent_max_len_1": 192.7688, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 88700 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.1441, "doc_norm": 1.61, "encoder_q-embeddings": 1104.2734, "encoder_q-layer.0": 744.5689, "encoder_q-layer.1": 822.3639, "encoder_q-layer.10": 1298.8982, "encoder_q-layer.11": 2921.2544, "encoder_q-layer.2": 957.4181, "encoder_q-layer.3": 997.5686, "encoder_q-layer.4": 1065.5387, "encoder_q-layer.5": 1131.7975, "encoder_q-layer.6": 1227.0737, "encoder_q-layer.7": 1348.6681, "encoder_q-layer.8": 1389.7007, "encoder_q-layer.9": 1210.75, "epoch": 0.87, "inbatch_neg_score": 0.7521, "inbatch_pos_score": 1.4609, "learning_rate": 6.222222222222222e-06, "loss": 3.1441, "norm_diff": 0.0683, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2116.7333, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7515, "query_norm": 1.5417, "queue_k_norm": 1.6098, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8401, "sent_len_1": 66.818, "sent_max_len_0": 128.0, "sent_max_len_1": 188.5387, "stdk": 0.0491, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 88800 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.1265, "doc_norm": 1.6107, "encoder_q-embeddings": 41690.2227, "encoder_q-layer.0": 27967.707, "encoder_q-layer.1": 34433.5703, "encoder_q-layer.10": 1592.0311, "encoder_q-layer.11": 3070.9932, "encoder_q-layer.2": 39765.957, "encoder_q-layer.3": 43374.375, "encoder_q-layer.4": 41726.3672, "encoder_q-layer.5": 40090.0664, "encoder_q-layer.6": 44450.375, "encoder_q-layer.7": 40365.793, "encoder_q-layer.8": 18821.3906, "encoder_q-layer.9": 4449.2349, "epoch": 0.87, "inbatch_neg_score": 0.7533, "inbatch_pos_score": 1.4385, "learning_rate": 6.166666666666667e-06, "loss": 3.1265, "norm_diff": 0.0533, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 51842.0938, "preclip_grad_norm_avg": 0.0005, "q@queue_neg_score": 0.7524, "query_norm": 1.5574, "queue_k_norm": 1.6091, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8371, "sent_len_1": 66.6373, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8613, "stdk": 0.0491, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 88900 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1511, "doc_norm": 1.6167, "encoder_q-embeddings": 1102.7654, "encoder_q-layer.0": 729.3633, "encoder_q-layer.1": 824.5349, "encoder_q-layer.10": 1234.9189, "encoder_q-layer.11": 3014.4922, "encoder_q-layer.2": 911.8994, "encoder_q-layer.3": 1024.7397, "encoder_q-layer.4": 1108.9088, "encoder_q-layer.5": 1212.2922, "encoder_q-layer.6": 1246.8088, "encoder_q-layer.7": 1322.6542, "encoder_q-layer.8": 1428.0056, "encoder_q-layer.9": 1241.5682, "epoch": 0.87, "inbatch_neg_score": 0.7547, "inbatch_pos_score": 1.4355, "learning_rate": 6.111111111111111e-06, "loss": 3.1511, "norm_diff": 0.0707, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2131.6035, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7549, "query_norm": 1.5459, "queue_k_norm": 1.6092, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8164, "sent_len_1": 66.6215, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5412, "stdk": 0.0494, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 89000 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.1411, "doc_norm": 1.6141, "encoder_q-embeddings": 1219.2277, "encoder_q-layer.0": 828.1174, "encoder_q-layer.1": 946.3163, "encoder_q-layer.10": 1293.3447, "encoder_q-layer.11": 2780.7686, "encoder_q-layer.2": 1163.0309, "encoder_q-layer.3": 1257.6085, "encoder_q-layer.4": 1324.3906, "encoder_q-layer.5": 1387.0599, "encoder_q-layer.6": 1467.6108, "encoder_q-layer.7": 1640.7971, "encoder_q-layer.8": 1623.3533, "encoder_q-layer.9": 1269.2028, "epoch": 0.87, "inbatch_neg_score": 0.7555, "inbatch_pos_score": 1.4453, "learning_rate": 6.055555555555556e-06, "loss": 3.1411, "norm_diff": 0.0637, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2258.2792, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7549, "query_norm": 1.5504, "queue_k_norm": 1.6107, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9696, "sent_len_1": 66.8102, "sent_max_len_0": 128.0, "sent_max_len_1": 188.865, "stdk": 0.0492, "stdq": 0.046, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 89100 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.1549, "doc_norm": 1.6074, "encoder_q-embeddings": 829.7554, "encoder_q-layer.0": 558.7236, "encoder_q-layer.1": 595.0117, "encoder_q-layer.10": 1174.3693, "encoder_q-layer.11": 2799.1196, "encoder_q-layer.2": 663.5145, "encoder_q-layer.3": 700.9609, "encoder_q-layer.4": 717.1572, "encoder_q-layer.5": 756.9156, "encoder_q-layer.6": 881.7628, "encoder_q-layer.7": 1041.6198, "encoder_q-layer.8": 1250.2991, "encoder_q-layer.9": 1149.0341, "epoch": 0.87, "inbatch_neg_score": 0.7574, "inbatch_pos_score": 1.4541, "learning_rate": 6e-06, "loss": 3.1549, "norm_diff": 0.0654, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1807.1657, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7573, "query_norm": 1.542, "queue_k_norm": 1.6105, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8131, "sent_len_1": 66.7669, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6975, "stdk": 0.0489, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 89200 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.1517, "doc_norm": 1.6176, "encoder_q-embeddings": 1408.931, "encoder_q-layer.0": 965.8429, "encoder_q-layer.1": 1117.1442, "encoder_q-layer.10": 1356.0736, "encoder_q-layer.11": 3056.4814, "encoder_q-layer.2": 1300.9963, "encoder_q-layer.3": 1398.3779, "encoder_q-layer.4": 1382.3214, "encoder_q-layer.5": 1509.976, "encoder_q-layer.6": 1713.9302, "encoder_q-layer.7": 1826.7628, "encoder_q-layer.8": 1861.7556, "encoder_q-layer.9": 1518.3682, "epoch": 0.87, "inbatch_neg_score": 0.7578, "inbatch_pos_score": 1.459, "learning_rate": 5.944444444444445e-06, "loss": 3.1517, "norm_diff": 0.058, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2504.2066, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7578, "query_norm": 1.5596, "queue_k_norm": 1.6122, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7234, "sent_len_1": 66.9343, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1662, "stdk": 0.0493, "stdq": 0.0464, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 89300 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.1381, "doc_norm": 1.6202, "encoder_q-embeddings": 1034.99, "encoder_q-layer.0": 662.1221, "encoder_q-layer.1": 682.8, "encoder_q-layer.10": 1494.1919, "encoder_q-layer.11": 3231.0796, "encoder_q-layer.2": 750.0168, "encoder_q-layer.3": 802.4109, "encoder_q-layer.4": 865.1697, "encoder_q-layer.5": 914.944, "encoder_q-layer.6": 1043.3044, "encoder_q-layer.7": 1260.8402, "encoder_q-layer.8": 1526.3927, "encoder_q-layer.9": 1363.0908, "epoch": 0.87, "inbatch_neg_score": 0.7579, "inbatch_pos_score": 1.4414, "learning_rate": 5.888888888888889e-06, "loss": 3.1381, "norm_diff": 0.0759, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2094.7234, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7598, "query_norm": 1.5443, "queue_k_norm": 1.6114, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0439, "sent_len_1": 66.705, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9525, "stdk": 0.0495, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 89400 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.144, "doc_norm": 1.6152, "encoder_q-embeddings": 2905.2395, "encoder_q-layer.0": 2018.2566, "encoder_q-layer.1": 1936.736, "encoder_q-layer.10": 1434.5579, "encoder_q-layer.11": 3027.7832, "encoder_q-layer.2": 2318.3616, "encoder_q-layer.3": 2566.6243, "encoder_q-layer.4": 2812.4004, "encoder_q-layer.5": 2921.6021, "encoder_q-layer.6": 3395.3992, "encoder_q-layer.7": 3420.8196, "encoder_q-layer.8": 3588.8948, "encoder_q-layer.9": 2103.3892, "epoch": 0.87, "inbatch_neg_score": 0.7613, "inbatch_pos_score": 1.4346, "learning_rate": 5.833333333333334e-06, "loss": 3.144, "norm_diff": 0.0771, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4235.2395, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7607, "query_norm": 1.5381, "queue_k_norm": 1.6115, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0105, "sent_len_1": 66.7953, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3137, "stdk": 0.0492, "stdq": 0.0454, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 89500 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1475, "doc_norm": 1.6084, "encoder_q-embeddings": 971.1769, "encoder_q-layer.0": 648.593, "encoder_q-layer.1": 692.6425, "encoder_q-layer.10": 1310.0708, "encoder_q-layer.11": 2946.8091, "encoder_q-layer.2": 793.0953, "encoder_q-layer.3": 832.8814, "encoder_q-layer.4": 896.5701, "encoder_q-layer.5": 943.4767, "encoder_q-layer.6": 1028.0768, "encoder_q-layer.7": 1224.8424, "encoder_q-layer.8": 1372.3284, "encoder_q-layer.9": 1242.8688, "epoch": 0.87, "inbatch_neg_score": 0.7606, "inbatch_pos_score": 1.4219, "learning_rate": 5.777777777777778e-06, "loss": 3.1475, "norm_diff": 0.0698, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2013.1303, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7607, "query_norm": 1.5387, "queue_k_norm": 1.6118, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0147, "sent_len_1": 66.7429, "sent_max_len_0": 128.0, "sent_max_len_1": 187.7175, "stdk": 0.0489, "stdq": 0.0454, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 89600 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.1512, "doc_norm": 1.6169, "encoder_q-embeddings": 1269.8798, "encoder_q-layer.0": 883.5316, "encoder_q-layer.1": 1050.9283, "encoder_q-layer.10": 1274.8411, "encoder_q-layer.11": 2981.7571, "encoder_q-layer.2": 1219.8357, "encoder_q-layer.3": 1316.923, "encoder_q-layer.4": 1429.4182, "encoder_q-layer.5": 1447.7523, "encoder_q-layer.6": 1619.2318, "encoder_q-layer.7": 1729.8859, "encoder_q-layer.8": 1819.0326, "encoder_q-layer.9": 1446.3291, "epoch": 0.88, "inbatch_neg_score": 0.7615, "inbatch_pos_score": 1.4629, "learning_rate": 5.722222222222223e-06, "loss": 3.1512, "norm_diff": 0.0633, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2445.703, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7622, "query_norm": 1.5537, "queue_k_norm": 1.6123, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8664, "sent_len_1": 66.6214, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4512, "stdk": 0.0493, "stdq": 0.0461, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 89700 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.1159, "doc_norm": 1.6139, "encoder_q-embeddings": 1041.223, "encoder_q-layer.0": 697.3228, "encoder_q-layer.1": 764.0959, "encoder_q-layer.10": 1532.272, "encoder_q-layer.11": 2986.0205, "encoder_q-layer.2": 882.8315, "encoder_q-layer.3": 935.0059, "encoder_q-layer.4": 1034.9604, "encoder_q-layer.5": 1083.231, "encoder_q-layer.6": 1280.6273, "encoder_q-layer.7": 1442.9371, "encoder_q-layer.8": 1523.8799, "encoder_q-layer.9": 1345.8608, "epoch": 0.88, "inbatch_neg_score": 0.7637, "inbatch_pos_score": 1.457, "learning_rate": 5.666666666666667e-06, "loss": 3.1159, "norm_diff": 0.0558, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2120.3707, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7632, "query_norm": 1.5581, "queue_k_norm": 1.6139, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0539, "sent_len_1": 66.7329, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9375, "stdk": 0.0491, "stdq": 0.0462, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 89800 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.1381, "doc_norm": 1.6144, "encoder_q-embeddings": 1250.0187, "encoder_q-layer.0": 905.8939, "encoder_q-layer.1": 1032.6523, "encoder_q-layer.10": 1426.951, "encoder_q-layer.11": 3044.3823, "encoder_q-layer.2": 1178.998, "encoder_q-layer.3": 1192.6482, "encoder_q-layer.4": 1226.8818, "encoder_q-layer.5": 1214.5807, "encoder_q-layer.6": 1195.2158, "encoder_q-layer.7": 1344.2297, "encoder_q-layer.8": 1584.1763, "encoder_q-layer.9": 1369.239, "epoch": 0.88, "inbatch_neg_score": 0.7653, "inbatch_pos_score": 1.4199, "learning_rate": 5.611111111111112e-06, "loss": 3.1381, "norm_diff": 0.0674, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2275.8723, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7651, "query_norm": 1.547, "queue_k_norm": 1.6128, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7422, "sent_len_1": 66.6328, "sent_max_len_0": 128.0, "sent_max_len_1": 190.165, "stdk": 0.0491, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 89900 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1456, "doc_norm": 1.6171, "encoder_q-embeddings": 1654.5474, "encoder_q-layer.0": 1218.7119, "encoder_q-layer.1": 1426.957, "encoder_q-layer.10": 1262.2678, "encoder_q-layer.11": 2977.5002, "encoder_q-layer.2": 1691.9941, "encoder_q-layer.3": 1745.3152, "encoder_q-layer.4": 1796.0414, "encoder_q-layer.5": 1819.9469, "encoder_q-layer.6": 1683.3077, "encoder_q-layer.7": 1578.0001, "encoder_q-layer.8": 1561.6638, "encoder_q-layer.9": 1280.0563, "epoch": 0.88, "inbatch_neg_score": 0.7653, "inbatch_pos_score": 1.4707, "learning_rate": 5.555555555555556e-06, "loss": 3.1456, "norm_diff": 0.06, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2647.0022, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7661, "query_norm": 1.5571, "queue_k_norm": 1.6163, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9086, "sent_len_1": 66.8065, "sent_max_len_0": 128.0, "sent_max_len_1": 190.9675, "stdk": 0.0493, "stdq": 0.0461, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 90000 }, { "dev_runtime": 37.6562, "dev_samples_per_second": 1.7, "dev_steps_per_second": 0.027, "epoch": 0.88, "step": 90000, "test_accuracy": 94.54345703125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3078896701335907, "test_doc_norm": 1.607322335243225, "test_inbatch_neg_score": 1.1371493339538574, "test_inbatch_pos_score": 2.1192989349365234, "test_loss": 0.3078896701335907, "test_loss_align": 0.9989704489707947, "test_loss_unif": 2.9055066108703613, "test_loss_unif_q@queue": 2.9055068492889404, "test_norm_diff": 0.015200823545455933, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.7489210367202759, "test_query_norm": 1.6220368146896362, "test_queue_k_norm": 1.6164119243621826, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04331135004758835, "test_stdq": 0.043481796979904175, "test_stdqueue_k": 0.049305226653814316, "test_stdqueue_q": 0.0 }, { "dev_runtime": 37.6562, "dev_samples_per_second": 1.7, "dev_steps_per_second": 0.027, "epoch": 0.88, "eval_beir-arguana_ndcg@10": 0.38415, "eval_beir-arguana_recall@10": 0.6522, "eval_beir-arguana_recall@100": 0.93457, "eval_beir-arguana_recall@20": 0.77881, "eval_beir-avg_ndcg@10": 0.37948008333333333, "eval_beir-avg_recall@10": 0.4468521666666666, "eval_beir-avg_recall@100": 0.6270814166666667, "eval_beir-avg_recall@20": 0.50763825, "eval_beir-cqadupstack_ndcg@10": 0.2709708333333333, "eval_beir-cqadupstack_recall@10": 0.3618116666666667, "eval_beir-cqadupstack_recall@100": 0.5872541666666666, "eval_beir-cqadupstack_recall@20": 0.4275625, "eval_beir-fiqa_ndcg@10": 0.22238, "eval_beir-fiqa_recall@10": 0.276, "eval_beir-fiqa_recall@100": 0.53444, "eval_beir-fiqa_recall@20": 0.34488, "eval_beir-nfcorpus_ndcg@10": 0.30283, "eval_beir-nfcorpus_recall@10": 0.1499, "eval_beir-nfcorpus_recall@100": 0.27555, "eval_beir-nfcorpus_recall@20": 0.1801, "eval_beir-nq_ndcg@10": 0.27722, "eval_beir-nq_recall@10": 0.45614, "eval_beir-nq_recall@100": 0.79473, "eval_beir-nq_recall@20": 0.57546, "eval_beir-quora_ndcg@10": 0.82672, "eval_beir-quora_recall@10": 0.91859, "eval_beir-quora_recall@100": 0.98615, "eval_beir-quora_recall@20": 0.95147, "eval_beir-scidocs_ndcg@10": 0.14694, "eval_beir-scidocs_recall@10": 0.15473, "eval_beir-scidocs_recall@100": 0.35287, "eval_beir-scidocs_recall@20": 0.21193, "eval_beir-scifact_ndcg@10": 0.63432, "eval_beir-scifact_recall@10": 0.78733, "eval_beir-scifact_recall@100": 0.92822, "eval_beir-scifact_recall@20": 0.83511, "eval_beir-trec-covid_ndcg@10": 0.53785, "eval_beir-trec-covid_recall@10": 0.57, "eval_beir-trec-covid_recall@100": 0.4336, "eval_beir-trec-covid_recall@20": 0.551, "eval_beir-webis-touche2020_ndcg@10": 0.19142, "eval_beir-webis-touche2020_recall@10": 0.14182, "eval_beir-webis-touche2020_recall@100": 0.44343, "eval_beir-webis-touche2020_recall@20": 0.22006, "eval_senteval-avg_sts": 0.7251337697300739, "eval_senteval-sickr_spearman": 0.6928772647812322, "eval_senteval-stsb_spearman": 0.7573902746789156, "step": 90000, "test_accuracy": 94.54345703125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3078896701335907, "test_doc_norm": 1.607322335243225, "test_inbatch_neg_score": 1.1371493339538574, "test_inbatch_pos_score": 2.1192989349365234, "test_loss": 0.3078896701335907, "test_loss_align": 0.9989704489707947, "test_loss_unif": 2.9055066108703613, "test_loss_unif_q@queue": 2.9055068492889404, "test_norm_diff": 0.015200823545455933, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.7489210367202759, "test_query_norm": 1.6220368146896362, "test_queue_k_norm": 1.6164119243621826, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04331135004758835, "test_stdq": 0.043481796979904175, "test_stdqueue_k": 0.049305226653814316, "test_stdqueue_q": 0.0 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.1306, "doc_norm": 1.6161, "encoder_q-embeddings": 2474.6719, "encoder_q-layer.0": 1676.6241, "encoder_q-layer.1": 1931.1167, "encoder_q-layer.10": 1321.4515, "encoder_q-layer.11": 3125.4355, "encoder_q-layer.2": 2330.282, "encoder_q-layer.3": 2577.4187, "encoder_q-layer.4": 2735.4917, "encoder_q-layer.5": 2743.9937, "encoder_q-layer.6": 2655.9907, "encoder_q-layer.7": 2615.9817, "encoder_q-layer.8": 2792.179, "encoder_q-layer.9": 1873.6127, "epoch": 0.88, "inbatch_neg_score": 0.7686, "inbatch_pos_score": 1.4199, "learning_rate": 5.500000000000001e-06, "loss": 3.1306, "norm_diff": 0.0731, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3700.5862, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7676, "query_norm": 1.543, "queue_k_norm": 1.6145, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8351, "sent_len_1": 66.5841, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6163, "stdk": 0.0492, "stdq": 0.0455, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 90100 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.1597, "doc_norm": 1.6189, "encoder_q-embeddings": 1085.6447, "encoder_q-layer.0": 706.3303, "encoder_q-layer.1": 797.4725, "encoder_q-layer.10": 1307.3092, "encoder_q-layer.11": 2850.9851, "encoder_q-layer.2": 905.741, "encoder_q-layer.3": 976.8034, "encoder_q-layer.4": 1014.8611, "encoder_q-layer.5": 1124.212, "encoder_q-layer.6": 1260.9174, "encoder_q-layer.7": 1407.193, "encoder_q-layer.8": 1458.8956, "encoder_q-layer.9": 1211.7335, "epoch": 0.88, "inbatch_neg_score": 0.7667, "inbatch_pos_score": 1.46, "learning_rate": 5.444444444444445e-06, "loss": 3.1597, "norm_diff": 0.0704, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2052.8781, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7666, "query_norm": 1.5485, "queue_k_norm": 1.614, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0948, "sent_len_1": 66.8016, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4625, "stdk": 0.0493, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 90200 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1257, "doc_norm": 1.6135, "encoder_q-embeddings": 1832.2013, "encoder_q-layer.0": 1234.4771, "encoder_q-layer.1": 1271.0392, "encoder_q-layer.10": 2511.8618, "encoder_q-layer.11": 5842.2686, "encoder_q-layer.2": 1448.0002, "encoder_q-layer.3": 1480.9048, "encoder_q-layer.4": 1609.2397, "encoder_q-layer.5": 1714.084, "encoder_q-layer.6": 1934.8002, "encoder_q-layer.7": 2212.3584, "encoder_q-layer.8": 2807.4512, "encoder_q-layer.9": 2451.7629, "epoch": 0.88, "inbatch_neg_score": 0.7699, "inbatch_pos_score": 1.4434, "learning_rate": 5.388888888888889e-06, "loss": 3.1257, "norm_diff": 0.0632, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3893.3861, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7695, "query_norm": 1.5503, "queue_k_norm": 1.6151, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8803, "sent_len_1": 66.7331, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5875, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 90300 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.1583, "doc_norm": 1.6141, "encoder_q-embeddings": 2155.6863, "encoder_q-layer.0": 1436.1663, "encoder_q-layer.1": 1538.2712, "encoder_q-layer.10": 2552.8826, "encoder_q-layer.11": 5752.5479, "encoder_q-layer.2": 1743.8815, "encoder_q-layer.3": 1851.6606, "encoder_q-layer.4": 1979.2593, "encoder_q-layer.5": 2121.4082, "encoder_q-layer.6": 2284.9827, "encoder_q-layer.7": 2525.093, "encoder_q-layer.8": 2734.0586, "encoder_q-layer.9": 2416.3416, "epoch": 0.88, "inbatch_neg_score": 0.7701, "inbatch_pos_score": 1.4854, "learning_rate": 5.333333333333334e-06, "loss": 3.1583, "norm_diff": 0.0545, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4035.8426, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7695, "query_norm": 1.5596, "queue_k_norm": 1.6153, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1055, "sent_len_1": 66.8693, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0538, "stdk": 0.049, "stdq": 0.0462, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 90400 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.1466, "doc_norm": 1.6223, "encoder_q-embeddings": 5723.5454, "encoder_q-layer.0": 4043.6824, "encoder_q-layer.1": 4312.6855, "encoder_q-layer.10": 2478.1914, "encoder_q-layer.11": 5860.7354, "encoder_q-layer.2": 4849.5391, "encoder_q-layer.3": 4708.5063, "encoder_q-layer.4": 4896.5771, "encoder_q-layer.5": 5028.4321, "encoder_q-layer.6": 4276.5718, "encoder_q-layer.7": 4094.7104, "encoder_q-layer.8": 3546.2283, "encoder_q-layer.9": 2412.8381, "epoch": 0.88, "inbatch_neg_score": 0.7713, "inbatch_pos_score": 1.4639, "learning_rate": 5.277777777777778e-06, "loss": 3.1466, "norm_diff": 0.0687, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6884.9869, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.771, "query_norm": 1.5536, "queue_k_norm": 1.6159, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0198, "sent_len_1": 66.7664, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2237, "stdk": 0.0494, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 90500 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.1704, "doc_norm": 1.6199, "encoder_q-embeddings": 2908.2751, "encoder_q-layer.0": 1914.6709, "encoder_q-layer.1": 2212.1458, "encoder_q-layer.10": 2462.7739, "encoder_q-layer.11": 5710.9854, "encoder_q-layer.2": 2544.5913, "encoder_q-layer.3": 2751.8418, "encoder_q-layer.4": 3025.1614, "encoder_q-layer.5": 2956.1902, "encoder_q-layer.6": 3206.3508, "encoder_q-layer.7": 3290.9204, "encoder_q-layer.8": 3284.6006, "encoder_q-layer.9": 2439.3374, "epoch": 0.88, "inbatch_neg_score": 0.7733, "inbatch_pos_score": 1.4609, "learning_rate": 5.2222222222222226e-06, "loss": 3.1704, "norm_diff": 0.0812, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4826.6592, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7729, "query_norm": 1.5386, "queue_k_norm": 1.6179, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1467, "sent_len_1": 66.7441, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0575, "stdk": 0.0493, "stdq": 0.0453, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 90600 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.1327, "doc_norm": 1.6223, "encoder_q-embeddings": 2065.4966, "encoder_q-layer.0": 1409.8958, "encoder_q-layer.1": 1499.3433, "encoder_q-layer.10": 2461.9434, "encoder_q-layer.11": 5820.4233, "encoder_q-layer.2": 1780.1064, "encoder_q-layer.3": 1902.5603, "encoder_q-layer.4": 1966.05, "encoder_q-layer.5": 2055.5439, "encoder_q-layer.6": 2282.2839, "encoder_q-layer.7": 2589.7732, "encoder_q-layer.8": 2837.0588, "encoder_q-layer.9": 2446.5437, "epoch": 0.89, "inbatch_neg_score": 0.7729, "inbatch_pos_score": 1.4619, "learning_rate": 5.166666666666667e-06, "loss": 3.1327, "norm_diff": 0.0753, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4060.6259, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.772, "query_norm": 1.547, "queue_k_norm": 1.6159, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0577, "sent_len_1": 66.9125, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1175, "stdk": 0.0494, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 90700 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1228, "doc_norm": 1.6168, "encoder_q-embeddings": 1995.9663, "encoder_q-layer.0": 1323.5575, "encoder_q-layer.1": 1371.2231, "encoder_q-layer.10": 2680.8953, "encoder_q-layer.11": 5721.3535, "encoder_q-layer.2": 1539.8492, "encoder_q-layer.3": 1635.3392, "encoder_q-layer.4": 1751.0859, "encoder_q-layer.5": 1828.8677, "encoder_q-layer.6": 2086.1147, "encoder_q-layer.7": 2521.9558, "encoder_q-layer.8": 2657.1045, "encoder_q-layer.9": 2470.7744, "epoch": 0.89, "inbatch_neg_score": 0.7751, "inbatch_pos_score": 1.4521, "learning_rate": 5.1111111111111115e-06, "loss": 3.1228, "norm_diff": 0.07, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3939.1895, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7744, "query_norm": 1.5468, "queue_k_norm": 1.6177, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1393, "sent_len_1": 66.8646, "sent_max_len_0": 128.0, "sent_max_len_1": 192.6238, "stdk": 0.0491, "stdq": 0.0456, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 90800 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1337, "doc_norm": 1.61, "encoder_q-embeddings": 2218.2764, "encoder_q-layer.0": 1465.0325, "encoder_q-layer.1": 1630.7064, "encoder_q-layer.10": 2639.9194, "encoder_q-layer.11": 5888.957, "encoder_q-layer.2": 1819.0635, "encoder_q-layer.3": 2038.9138, "encoder_q-layer.4": 2335.5818, "encoder_q-layer.5": 2287.9797, "encoder_q-layer.6": 2552.4084, "encoder_q-layer.7": 2892.3948, "encoder_q-layer.8": 2842.8503, "encoder_q-layer.9": 2525.8157, "epoch": 0.89, "inbatch_neg_score": 0.7775, "inbatch_pos_score": 1.4336, "learning_rate": 5.0555555555555555e-06, "loss": 3.1337, "norm_diff": 0.0619, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4297.0332, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7764, "query_norm": 1.5481, "queue_k_norm": 1.6178, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.903, "sent_len_1": 67.021, "sent_max_len_0": 128.0, "sent_max_len_1": 190.8587, "stdk": 0.0488, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 90900 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.1528, "doc_norm": 1.6167, "encoder_q-embeddings": 2010.4141, "encoder_q-layer.0": 1406.6013, "encoder_q-layer.1": 1504.3971, "encoder_q-layer.10": 2618.2209, "encoder_q-layer.11": 6197.2827, "encoder_q-layer.2": 1804.118, "encoder_q-layer.3": 1907.9352, "encoder_q-layer.4": 2100.4429, "encoder_q-layer.5": 2039.9465, "encoder_q-layer.6": 2303.9175, "encoder_q-layer.7": 2590.9717, "encoder_q-layer.8": 2779.9929, "encoder_q-layer.9": 2594.2358, "epoch": 0.89, "inbatch_neg_score": 0.7762, "inbatch_pos_score": 1.4453, "learning_rate": 5e-06, "loss": 3.1528, "norm_diff": 0.0656, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4272.8183, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7769, "query_norm": 1.5511, "queue_k_norm": 1.6168, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0071, "sent_len_1": 66.4738, "sent_max_len_0": 128.0, "sent_max_len_1": 189.7975, "stdk": 0.0491, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 91000 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.1259, "doc_norm": 1.6149, "encoder_q-embeddings": 1082.7233, "encoder_q-layer.0": 740.4307, "encoder_q-layer.1": 833.2512, "encoder_q-layer.10": 1460.5959, "encoder_q-layer.11": 3040.0374, "encoder_q-layer.2": 949.7389, "encoder_q-layer.3": 1001.2498, "encoder_q-layer.4": 1095.3893, "encoder_q-layer.5": 1217.3209, "encoder_q-layer.6": 1394.2822, "encoder_q-layer.7": 1687.49, "encoder_q-layer.8": 1770.4647, "encoder_q-layer.9": 1327.8701, "epoch": 0.89, "inbatch_neg_score": 0.7785, "inbatch_pos_score": 1.4512, "learning_rate": 4.9444444444444444e-06, "loss": 3.1259, "norm_diff": 0.0675, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2264.5312, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7773, "query_norm": 1.5474, "queue_k_norm": 1.6166, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1216, "sent_len_1": 66.8488, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1875, "stdk": 0.049, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 91100 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.1257, "doc_norm": 1.6166, "encoder_q-embeddings": 918.7981, "encoder_q-layer.0": 612.0817, "encoder_q-layer.1": 664.6818, "encoder_q-layer.10": 1259.2914, "encoder_q-layer.11": 3092.177, "encoder_q-layer.2": 723.8133, "encoder_q-layer.3": 755.3939, "encoder_q-layer.4": 805.8848, "encoder_q-layer.5": 844.566, "encoder_q-layer.6": 994.2491, "encoder_q-layer.7": 1127.2104, "encoder_q-layer.8": 1353.5579, "encoder_q-layer.9": 1237.8169, "epoch": 0.89, "inbatch_neg_score": 0.7807, "inbatch_pos_score": 1.4199, "learning_rate": 4.888888888888889e-06, "loss": 3.1257, "norm_diff": 0.0812, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2014.5535, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7803, "query_norm": 1.5353, "queue_k_norm": 1.6175, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1308, "sent_len_1": 66.7059, "sent_max_len_0": 128.0, "sent_max_len_1": 190.0175, "stdk": 0.049, "stdq": 0.045, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 91200 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.1518, "doc_norm": 1.6161, "encoder_q-embeddings": 1018.4268, "encoder_q-layer.0": 658.7505, "encoder_q-layer.1": 689.6532, "encoder_q-layer.10": 1246.1846, "encoder_q-layer.11": 3056.9487, "encoder_q-layer.2": 746.248, "encoder_q-layer.3": 809.51, "encoder_q-layer.4": 855.9822, "encoder_q-layer.5": 892.1663, "encoder_q-layer.6": 1029.4948, "encoder_q-layer.7": 1222.3174, "encoder_q-layer.8": 1453.366, "encoder_q-layer.9": 1242.4265, "epoch": 0.89, "inbatch_neg_score": 0.7812, "inbatch_pos_score": 1.4434, "learning_rate": 4.833333333333333e-06, "loss": 3.1518, "norm_diff": 0.0699, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2061.9206, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7808, "query_norm": 1.5463, "queue_k_norm": 1.6178, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0983, "sent_len_1": 66.8235, "sent_max_len_0": 128.0, "sent_max_len_1": 191.6887, "stdk": 0.049, "stdq": 0.0455, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 91300 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.1361, "doc_norm": 1.6185, "encoder_q-embeddings": 2391.1272, "encoder_q-layer.0": 1565.078, "encoder_q-layer.1": 1730.4836, "encoder_q-layer.10": 1333.6841, "encoder_q-layer.11": 3319.9734, "encoder_q-layer.2": 2025.8087, "encoder_q-layer.3": 2230.9026, "encoder_q-layer.4": 2473.8528, "encoder_q-layer.5": 2564.929, "encoder_q-layer.6": 2238.1052, "encoder_q-layer.7": 2312.2578, "encoder_q-layer.8": 1946.502, "encoder_q-layer.9": 1344.7091, "epoch": 0.89, "inbatch_neg_score": 0.7799, "inbatch_pos_score": 1.4443, "learning_rate": 4.777777777777778e-06, "loss": 3.1361, "norm_diff": 0.0771, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3361.4168, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7798, "query_norm": 1.5414, "queue_k_norm": 1.6194, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9493, "sent_len_1": 66.9286, "sent_max_len_0": 128.0, "sent_max_len_1": 188.55, "stdk": 0.0491, "stdq": 0.0453, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 91400 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.1167, "doc_norm": 1.623, "encoder_q-embeddings": 1370.5104, "encoder_q-layer.0": 886.6631, "encoder_q-layer.1": 1018.9932, "encoder_q-layer.10": 1567.5519, "encoder_q-layer.11": 3255.9453, "encoder_q-layer.2": 1113.9497, "encoder_q-layer.3": 1290.3982, "encoder_q-layer.4": 1396.6667, "encoder_q-layer.5": 1506.4825, "encoder_q-layer.6": 1727.2065, "encoder_q-layer.7": 1852.2405, "encoder_q-layer.8": 1671.7837, "encoder_q-layer.9": 1421.0969, "epoch": 0.89, "inbatch_neg_score": 0.7787, "inbatch_pos_score": 1.4414, "learning_rate": 4.722222222222222e-06, "loss": 3.1167, "norm_diff": 0.0722, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2504.6794, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7798, "query_norm": 1.5507, "queue_k_norm": 1.6173, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0967, "sent_len_1": 66.5291, "sent_max_len_0": 128.0, "sent_max_len_1": 188.4787, "stdk": 0.0493, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91500 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.151, "doc_norm": 1.6209, "encoder_q-embeddings": 1070.6423, "encoder_q-layer.0": 702.5678, "encoder_q-layer.1": 742.8641, "encoder_q-layer.10": 1329.2046, "encoder_q-layer.11": 3083.3459, "encoder_q-layer.2": 840.8929, "encoder_q-layer.3": 877.1118, "encoder_q-layer.4": 971.7667, "encoder_q-layer.5": 1029.7902, "encoder_q-layer.6": 1179.8107, "encoder_q-layer.7": 1237.5272, "encoder_q-layer.8": 1353.3536, "encoder_q-layer.9": 1233.2717, "epoch": 0.89, "inbatch_neg_score": 0.7816, "inbatch_pos_score": 1.4443, "learning_rate": 4.666666666666667e-06, "loss": 3.151, "norm_diff": 0.0816, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2084.523, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7808, "query_norm": 1.5393, "queue_k_norm": 1.6173, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1091, "sent_len_1": 66.9521, "sent_max_len_0": 128.0, "sent_max_len_1": 190.585, "stdk": 0.0492, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91600 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 3.1446, "doc_norm": 1.6141, "encoder_q-embeddings": 1142.4985, "encoder_q-layer.0": 776.1598, "encoder_q-layer.1": 816.4971, "encoder_q-layer.10": 1229.9894, "encoder_q-layer.11": 2754.127, "encoder_q-layer.2": 908.1188, "encoder_q-layer.3": 1014.8363, "encoder_q-layer.4": 1095.8186, "encoder_q-layer.5": 1127.4734, "encoder_q-layer.6": 1150.1523, "encoder_q-layer.7": 1345.2483, "encoder_q-layer.8": 1410.446, "encoder_q-layer.9": 1173.7563, "epoch": 0.9, "inbatch_neg_score": 0.783, "inbatch_pos_score": 1.4932, "learning_rate": 4.611111111111111e-06, "loss": 3.1446, "norm_diff": 0.0524, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2046.6213, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7822, "query_norm": 1.5617, "queue_k_norm": 1.6182, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.1134, "sent_len_1": 66.9602, "sent_max_len_0": 128.0, "sent_max_len_1": 191.5037, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 91700 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1605, "doc_norm": 1.6215, "encoder_q-embeddings": 1655.2108, "encoder_q-layer.0": 1197.9436, "encoder_q-layer.1": 1450.7717, "encoder_q-layer.10": 1248.8894, "encoder_q-layer.11": 2929.0046, "encoder_q-layer.2": 1808.9399, "encoder_q-layer.3": 1907.6146, "encoder_q-layer.4": 2213.0815, "encoder_q-layer.5": 2161.1528, "encoder_q-layer.6": 2232.353, "encoder_q-layer.7": 2225.4822, "encoder_q-layer.8": 1742.3076, "encoder_q-layer.9": 1319.7421, "epoch": 0.9, "inbatch_neg_score": 0.7823, "inbatch_pos_score": 1.4648, "learning_rate": 4.555555555555556e-06, "loss": 3.1605, "norm_diff": 0.0764, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2913.273, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7817, "query_norm": 1.5451, "queue_k_norm": 1.6193, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0574, "sent_len_1": 66.7293, "sent_max_len_0": 128.0, "sent_max_len_1": 187.4175, "stdk": 0.0492, "stdq": 0.0455, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 91800 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.1332, "doc_norm": 1.6142, "encoder_q-embeddings": 1411.7457, "encoder_q-layer.0": 979.8738, "encoder_q-layer.1": 1054.0824, "encoder_q-layer.10": 1240.9027, "encoder_q-layer.11": 2940.8445, "encoder_q-layer.2": 1254.0383, "encoder_q-layer.3": 1327.2642, "encoder_q-layer.4": 1388.3171, "encoder_q-layer.5": 1389.9305, "encoder_q-layer.6": 1470.7432, "encoder_q-layer.7": 1474.6541, "encoder_q-layer.8": 1514.4585, "encoder_q-layer.9": 1228.8815, "epoch": 0.9, "inbatch_neg_score": 0.783, "inbatch_pos_score": 1.459, "learning_rate": 4.5e-06, "loss": 3.1332, "norm_diff": 0.0736, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2326.5752, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7832, "query_norm": 1.5406, "queue_k_norm": 1.6185, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0363, "sent_len_1": 66.6423, "sent_max_len_0": 128.0, "sent_max_len_1": 188.9087, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 91900 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1285, "doc_norm": 1.6181, "encoder_q-embeddings": 1182.4786, "encoder_q-layer.0": 842.1204, "encoder_q-layer.1": 915.4605, "encoder_q-layer.10": 1272.6244, "encoder_q-layer.11": 3007.4805, "encoder_q-layer.2": 1107.6012, "encoder_q-layer.3": 1141.6085, "encoder_q-layer.4": 1159.5773, "encoder_q-layer.5": 1267.5977, "encoder_q-layer.6": 1348.1115, "encoder_q-layer.7": 1507.0249, "encoder_q-layer.8": 1563.3118, "encoder_q-layer.9": 1276.2053, "epoch": 0.9, "inbatch_neg_score": 0.7806, "inbatch_pos_score": 1.4678, "learning_rate": 4.444444444444445e-06, "loss": 3.1285, "norm_diff": 0.0661, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2247.0159, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7817, "query_norm": 1.552, "queue_k_norm": 1.6184, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1792, "sent_len_1": 66.8932, "sent_max_len_0": 128.0, "sent_max_len_1": 189.9837, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92000 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1483, "doc_norm": 1.6081, "encoder_q-embeddings": 1163.5306, "encoder_q-layer.0": 763.8153, "encoder_q-layer.1": 830.0422, "encoder_q-layer.10": 1298.9288, "encoder_q-layer.11": 2997.0725, "encoder_q-layer.2": 910.5572, "encoder_q-layer.3": 1000.2888, "encoder_q-layer.4": 1121.353, "encoder_q-layer.5": 1151.4661, "encoder_q-layer.6": 1219.0945, "encoder_q-layer.7": 1429.4966, "encoder_q-layer.8": 1614.1975, "encoder_q-layer.9": 1277.6166, "epoch": 0.9, "inbatch_neg_score": 0.7842, "inbatch_pos_score": 1.4639, "learning_rate": 4.388888888888889e-06, "loss": 3.1483, "norm_diff": 0.0671, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2203.8285, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7842, "query_norm": 1.5411, "queue_k_norm": 1.6205, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8205, "sent_len_1": 66.8201, "sent_max_len_0": 128.0, "sent_max_len_1": 193.2725, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 92100 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.1321, "doc_norm": 1.6248, "encoder_q-embeddings": 1353.2745, "encoder_q-layer.0": 921.7433, "encoder_q-layer.1": 1030.1763, "encoder_q-layer.10": 1208.8793, "encoder_q-layer.11": 2962.8721, "encoder_q-layer.2": 1224.4006, "encoder_q-layer.3": 1408.1144, "encoder_q-layer.4": 1713.1532, "encoder_q-layer.5": 1796.2423, "encoder_q-layer.6": 1718.1333, "encoder_q-layer.7": 2033.1458, "encoder_q-layer.8": 1959.2148, "encoder_q-layer.9": 1343.3875, "epoch": 0.9, "inbatch_neg_score": 0.786, "inbatch_pos_score": 1.499, "learning_rate": 4.333333333333334e-06, "loss": 3.1321, "norm_diff": 0.0679, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2565.6656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7856, "query_norm": 1.5569, "queue_k_norm": 1.6185, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8371, "sent_len_1": 66.651, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3887, "stdk": 0.0493, "stdq": 0.046, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92200 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 3.1277, "doc_norm": 1.6227, "encoder_q-embeddings": 1291.1295, "encoder_q-layer.0": 932.9499, "encoder_q-layer.1": 1061.1166, "encoder_q-layer.10": 1210.5081, "encoder_q-layer.11": 2831.8218, "encoder_q-layer.2": 1232.515, "encoder_q-layer.3": 1282.8245, "encoder_q-layer.4": 1412.1436, "encoder_q-layer.5": 1556.5281, "encoder_q-layer.6": 1493.1489, "encoder_q-layer.7": 1570.2642, "encoder_q-layer.8": 1568.998, "encoder_q-layer.9": 1270.8473, "epoch": 0.9, "inbatch_neg_score": 0.7836, "inbatch_pos_score": 1.4844, "learning_rate": 4.277777777777778e-06, "loss": 3.1277, "norm_diff": 0.0676, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2298.4656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7837, "query_norm": 1.5551, "queue_k_norm": 1.6209, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7492, "sent_len_1": 66.5407, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1513, "stdk": 0.0492, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 92300 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 3.1266, "doc_norm": 1.6236, "encoder_q-embeddings": 1036.2245, "encoder_q-layer.0": 674.2542, "encoder_q-layer.1": 745.5438, "encoder_q-layer.10": 1273.1465, "encoder_q-layer.11": 2817.665, "encoder_q-layer.2": 878.1381, "encoder_q-layer.3": 998.0298, "encoder_q-layer.4": 1087.4553, "encoder_q-layer.5": 1127.1315, "encoder_q-layer.6": 1162.8188, "encoder_q-layer.7": 1291.4846, "encoder_q-layer.8": 1404.3505, "encoder_q-layer.9": 1235.2609, "epoch": 0.9, "inbatch_neg_score": 0.7844, "inbatch_pos_score": 1.4746, "learning_rate": 4.222222222222223e-06, "loss": 3.1266, "norm_diff": 0.0677, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2005.9712, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7852, "query_norm": 1.5559, "queue_k_norm": 1.6199, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9223, "sent_len_1": 66.785, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6788, "stdk": 0.0493, "stdq": 0.046, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 92400 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1265, "doc_norm": 1.6164, "encoder_q-embeddings": 1007.8522, "encoder_q-layer.0": 662.0234, "encoder_q-layer.1": 686.5673, "encoder_q-layer.10": 1345.323, "encoder_q-layer.11": 3035.3542, "encoder_q-layer.2": 770.7716, "encoder_q-layer.3": 788.0311, "encoder_q-layer.4": 839.1525, "encoder_q-layer.5": 886.7028, "encoder_q-layer.6": 1038.8591, "encoder_q-layer.7": 1283.1396, "encoder_q-layer.8": 1399.2843, "encoder_q-layer.9": 1259.689, "epoch": 0.9, "inbatch_neg_score": 0.7844, "inbatch_pos_score": 1.459, "learning_rate": 4.166666666666667e-06, "loss": 3.1265, "norm_diff": 0.0768, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2039.2101, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7852, "query_norm": 1.5397, "queue_k_norm": 1.6222, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9921, "sent_len_1": 66.724, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3887, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 92500 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 3.1215, "doc_norm": 1.6264, "encoder_q-embeddings": 1747.989, "encoder_q-layer.0": 1167.719, "encoder_q-layer.1": 1405.3662, "encoder_q-layer.10": 1240.0929, "encoder_q-layer.11": 2863.686, "encoder_q-layer.2": 1857.3412, "encoder_q-layer.3": 1988.991, "encoder_q-layer.4": 2085.2405, "encoder_q-layer.5": 2142.1973, "encoder_q-layer.6": 2079.8997, "encoder_q-layer.7": 2195.4072, "encoder_q-layer.8": 1794.6545, "encoder_q-layer.9": 1317.7954, "epoch": 0.9, "inbatch_neg_score": 0.7868, "inbatch_pos_score": 1.4961, "learning_rate": 4.111111111111112e-06, "loss": 3.1215, "norm_diff": 0.0761, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2876.8931, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7876, "query_norm": 1.5503, "queue_k_norm": 1.6198, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0212, "sent_len_1": 66.9801, "sent_max_len_0": 128.0, "sent_max_len_1": 188.895, "stdk": 0.0494, "stdq": 0.0457, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92600 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.1462, "doc_norm": 1.6248, "encoder_q-embeddings": 1318.671, "encoder_q-layer.0": 872.1504, "encoder_q-layer.1": 971.0966, "encoder_q-layer.10": 1242.6879, "encoder_q-layer.11": 3018.552, "encoder_q-layer.2": 1175.1174, "encoder_q-layer.3": 1248.729, "encoder_q-layer.4": 1312.5579, "encoder_q-layer.5": 1351.3217, "encoder_q-layer.6": 1461.5479, "encoder_q-layer.7": 1625.6824, "encoder_q-layer.8": 1676.9071, "encoder_q-layer.9": 1248.5825, "epoch": 0.91, "inbatch_neg_score": 0.7884, "inbatch_pos_score": 1.4648, "learning_rate": 4.055555555555556e-06, "loss": 3.1462, "norm_diff": 0.079, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2312.7251, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7876, "query_norm": 1.5458, "queue_k_norm": 1.6221, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.1184, "sent_len_1": 67.0419, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6413, "stdk": 0.0492, "stdq": 0.0454, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 92700 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 3.1192, "doc_norm": 1.6251, "encoder_q-embeddings": 2013.3914, "encoder_q-layer.0": 1395.4536, "encoder_q-layer.1": 1515.3904, "encoder_q-layer.10": 1191.2639, "encoder_q-layer.11": 2794.2681, "encoder_q-layer.2": 1699.7468, "encoder_q-layer.3": 1949.6741, "encoder_q-layer.4": 2087.3528, "encoder_q-layer.5": 2174.0317, "encoder_q-layer.6": 2191.5125, "encoder_q-layer.7": 1912.5828, "encoder_q-layer.8": 1716.593, "encoder_q-layer.9": 1276.7861, "epoch": 0.91, "inbatch_neg_score": 0.7867, "inbatch_pos_score": 1.5107, "learning_rate": 4.000000000000001e-06, "loss": 3.1192, "norm_diff": 0.0635, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2874.6399, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7876, "query_norm": 1.5616, "queue_k_norm": 1.6204, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9872, "sent_len_1": 66.7734, "sent_max_len_0": 128.0, "sent_max_len_1": 190.64, "stdk": 0.0493, "stdq": 0.0462, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 92800 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.1336, "doc_norm": 1.6191, "encoder_q-embeddings": 910.9048, "encoder_q-layer.0": 595.2731, "encoder_q-layer.1": 616.0833, "encoder_q-layer.10": 1217.7056, "encoder_q-layer.11": 2762.5862, "encoder_q-layer.2": 695.5132, "encoder_q-layer.3": 730.767, "encoder_q-layer.4": 754.5601, "encoder_q-layer.5": 789.1526, "encoder_q-layer.6": 896.5803, "encoder_q-layer.7": 1036.9031, "encoder_q-layer.8": 1301.9276, "encoder_q-layer.9": 1207.2344, "epoch": 0.91, "inbatch_neg_score": 0.7882, "inbatch_pos_score": 1.4736, "learning_rate": 3.944444444444445e-06, "loss": 3.1336, "norm_diff": 0.0671, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1822.1877, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7876, "query_norm": 1.552, "queue_k_norm": 1.6209, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8469, "sent_len_1": 66.8491, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5938, "stdk": 0.049, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 92900 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.1424, "doc_norm": 1.6197, "encoder_q-embeddings": 1376.5316, "encoder_q-layer.0": 923.1931, "encoder_q-layer.1": 1103.8146, "encoder_q-layer.10": 1274.6606, "encoder_q-layer.11": 3058.8328, "encoder_q-layer.2": 1345.6653, "encoder_q-layer.3": 1461.6534, "encoder_q-layer.4": 1624.6903, "encoder_q-layer.5": 1571.7216, "encoder_q-layer.6": 1539.7407, "encoder_q-layer.7": 1691.1571, "encoder_q-layer.8": 1782.5784, "encoder_q-layer.9": 1348.8658, "epoch": 0.91, "inbatch_neg_score": 0.7884, "inbatch_pos_score": 1.4512, "learning_rate": 3.888888888888889e-06, "loss": 3.1424, "norm_diff": 0.0759, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2500.4172, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7876, "query_norm": 1.5437, "queue_k_norm": 1.6221, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9405, "sent_len_1": 66.8225, "sent_max_len_0": 128.0, "sent_max_len_1": 191.04, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 93000 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1435, "doc_norm": 1.616, "encoder_q-embeddings": 1168.6893, "encoder_q-layer.0": 779.5818, "encoder_q-layer.1": 861.9111, "encoder_q-layer.10": 1376.9758, "encoder_q-layer.11": 2956.4043, "encoder_q-layer.2": 988.1011, "encoder_q-layer.3": 1145.705, "encoder_q-layer.4": 1181.3917, "encoder_q-layer.5": 1230.3069, "encoder_q-layer.6": 1397.4772, "encoder_q-layer.7": 1564.8424, "encoder_q-layer.8": 1536.5613, "encoder_q-layer.9": 1281.6992, "epoch": 0.91, "inbatch_neg_score": 0.7868, "inbatch_pos_score": 1.4678, "learning_rate": 3.833333333333334e-06, "loss": 3.1435, "norm_diff": 0.062, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2194.7191, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7866, "query_norm": 1.554, "queue_k_norm": 1.6222, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9483, "sent_len_1": 66.5102, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7525, "stdk": 0.0488, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 93100 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.1245, "doc_norm": 1.6182, "encoder_q-embeddings": 1474.9506, "encoder_q-layer.0": 1065.7274, "encoder_q-layer.1": 1218.2291, "encoder_q-layer.10": 1260.8616, "encoder_q-layer.11": 2789.8259, "encoder_q-layer.2": 1363.5448, "encoder_q-layer.3": 1459.1699, "encoder_q-layer.4": 1571.5029, "encoder_q-layer.5": 1707.3986, "encoder_q-layer.6": 1638.3802, "encoder_q-layer.7": 1509.33, "encoder_q-layer.8": 1443.3059, "encoder_q-layer.9": 1229.1401, "epoch": 0.91, "inbatch_neg_score": 0.7883, "inbatch_pos_score": 1.501, "learning_rate": 3.777777777777778e-06, "loss": 3.1245, "norm_diff": 0.0612, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2377.2028, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7876, "query_norm": 1.557, "queue_k_norm": 1.6226, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1361, "sent_len_1": 66.8775, "sent_max_len_0": 128.0, "sent_max_len_1": 190.535, "stdk": 0.0489, "stdq": 0.046, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 93200 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.1424, "doc_norm": 1.6219, "encoder_q-embeddings": 981.7151, "encoder_q-layer.0": 616.7434, "encoder_q-layer.1": 654.4413, "encoder_q-layer.10": 1228.7557, "encoder_q-layer.11": 2896.7676, "encoder_q-layer.2": 735.9194, "encoder_q-layer.3": 790.5229, "encoder_q-layer.4": 863.2541, "encoder_q-layer.5": 876.7355, "encoder_q-layer.6": 991.8033, "encoder_q-layer.7": 1214.2354, "encoder_q-layer.8": 1356.38, "encoder_q-layer.9": 1222.1074, "epoch": 0.91, "inbatch_neg_score": 0.7887, "inbatch_pos_score": 1.4727, "learning_rate": 3.722222222222222e-06, "loss": 3.1424, "norm_diff": 0.077, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1943.1084, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7881, "query_norm": 1.5449, "queue_k_norm": 1.6212, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8259, "sent_len_1": 66.6539, "sent_max_len_0": 128.0, "sent_max_len_1": 187.5163, "stdk": 0.0491, "stdq": 0.0454, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 93300 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.1026, "doc_norm": 1.6172, "encoder_q-embeddings": 1387.9846, "encoder_q-layer.0": 903.8665, "encoder_q-layer.1": 1019.2722, "encoder_q-layer.10": 1342.0372, "encoder_q-layer.11": 3175.6724, "encoder_q-layer.2": 1217.8506, "encoder_q-layer.3": 1426.8381, "encoder_q-layer.4": 1432.9938, "encoder_q-layer.5": 1538.9025, "encoder_q-layer.6": 1458.677, "encoder_q-layer.7": 1517.0863, "encoder_q-layer.8": 1545.7881, "encoder_q-layer.9": 1273.6444, "epoch": 0.91, "inbatch_neg_score": 0.7906, "inbatch_pos_score": 1.4424, "learning_rate": 3.666666666666667e-06, "loss": 3.1026, "norm_diff": 0.0672, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2420.8622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.79, "query_norm": 1.55, "queue_k_norm": 1.6237, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.2333, "sent_len_1": 66.8661, "sent_max_len_0": 128.0, "sent_max_len_1": 189.585, "stdk": 0.0489, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 93400 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.1497, "doc_norm": 1.6255, "encoder_q-embeddings": 1986.0638, "encoder_q-layer.0": 1289.3617, "encoder_q-layer.1": 1459.9329, "encoder_q-layer.10": 1303.1978, "encoder_q-layer.11": 2970.7102, "encoder_q-layer.2": 1670.0494, "encoder_q-layer.3": 1815.0896, "encoder_q-layer.4": 2029.5868, "encoder_q-layer.5": 2280.761, "encoder_q-layer.6": 2263.363, "encoder_q-layer.7": 2203.9207, "encoder_q-layer.8": 2171.1279, "encoder_q-layer.9": 1447.3342, "epoch": 0.91, "inbatch_neg_score": 0.7879, "inbatch_pos_score": 1.4688, "learning_rate": 3.611111111111111e-06, "loss": 3.1497, "norm_diff": 0.0701, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3023.7614, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7876, "query_norm": 1.5553, "queue_k_norm": 1.6221, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.6368, "sent_len_1": 66.598, "sent_max_len_0": 128.0, "sent_max_len_1": 187.6587, "stdk": 0.0492, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 93500 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 3.1376, "doc_norm": 1.6222, "encoder_q-embeddings": 1922.505, "encoder_q-layer.0": 1306.0651, "encoder_q-layer.1": 1470.2117, "encoder_q-layer.10": 1225.4946, "encoder_q-layer.11": 2835.5305, "encoder_q-layer.2": 1681.8344, "encoder_q-layer.3": 1673.7668, "encoder_q-layer.4": 1867.2692, "encoder_q-layer.5": 1863.4064, "encoder_q-layer.6": 2171.1902, "encoder_q-layer.7": 2084.6697, "encoder_q-layer.8": 1915.8844, "encoder_q-layer.9": 1261.3029, "epoch": 0.91, "inbatch_neg_score": 0.7893, "inbatch_pos_score": 1.5, "learning_rate": 3.555555555555556e-06, "loss": 3.1376, "norm_diff": 0.0583, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2821.4225, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7891, "query_norm": 1.5638, "queue_k_norm": 1.6232, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.2176, "sent_len_1": 66.991, "sent_max_len_0": 128.0, "sent_max_len_1": 190.545, "stdk": 0.0491, "stdq": 0.0463, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 93600 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.1332, "doc_norm": 1.6217, "encoder_q-embeddings": 1013.9414, "encoder_q-layer.0": 646.6998, "encoder_q-layer.1": 693.2679, "encoder_q-layer.10": 1250.9517, "encoder_q-layer.11": 2863.7568, "encoder_q-layer.2": 766.4033, "encoder_q-layer.3": 806.4169, "encoder_q-layer.4": 883.8264, "encoder_q-layer.5": 897.0276, "encoder_q-layer.6": 1064.9108, "encoder_q-layer.7": 1242.9199, "encoder_q-layer.8": 1436.54, "encoder_q-layer.9": 1272.1085, "epoch": 0.91, "inbatch_neg_score": 0.7902, "inbatch_pos_score": 1.4902, "learning_rate": 3.5000000000000004e-06, "loss": 3.1332, "norm_diff": 0.0686, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1977.2107, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7896, "query_norm": 1.5531, "queue_k_norm": 1.6243, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.975, "sent_len_1": 66.7922, "sent_max_len_0": 128.0, "sent_max_len_1": 188.2625, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 93700 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.1447, "doc_norm": 1.6247, "encoder_q-embeddings": 3184.3501, "encoder_q-layer.0": 2469.2822, "encoder_q-layer.1": 3071.7944, "encoder_q-layer.10": 1375.694, "encoder_q-layer.11": 3220.1296, "encoder_q-layer.2": 4136.791, "encoder_q-layer.3": 4343.2471, "encoder_q-layer.4": 4890.0605, "encoder_q-layer.5": 4905.6553, "encoder_q-layer.6": 4528.6499, "encoder_q-layer.7": 5106.0322, "encoder_q-layer.8": 4480.1699, "encoder_q-layer.9": 2559.3923, "epoch": 0.92, "inbatch_neg_score": 0.7906, "inbatch_pos_score": 1.4346, "learning_rate": 3.4444444444444444e-06, "loss": 3.1447, "norm_diff": 0.0846, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5826.9178, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.791, "query_norm": 1.54, "queue_k_norm": 1.6233, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9229, "sent_len_1": 66.7941, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1637, "stdk": 0.0492, "stdq": 0.0452, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 93800 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.1254, "doc_norm": 1.6212, "encoder_q-embeddings": 1012.6208, "encoder_q-layer.0": 649.0029, "encoder_q-layer.1": 689.2939, "encoder_q-layer.10": 1343.6239, "encoder_q-layer.11": 3266.2822, "encoder_q-layer.2": 796.5593, "encoder_q-layer.3": 825.7484, "encoder_q-layer.4": 906.986, "encoder_q-layer.5": 931.8576, "encoder_q-layer.6": 1088.5962, "encoder_q-layer.7": 1296.3296, "encoder_q-layer.8": 1511.5347, "encoder_q-layer.9": 1313.5409, "epoch": 0.92, "inbatch_neg_score": 0.7916, "inbatch_pos_score": 1.4316, "learning_rate": 3.3888888888888893e-06, "loss": 3.1254, "norm_diff": 0.0815, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2173.0458, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7915, "query_norm": 1.5398, "queue_k_norm": 1.6237, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0266, "sent_len_1": 66.8344, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1775, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 93900 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1498, "doc_norm": 1.6214, "encoder_q-embeddings": 15242.0293, "encoder_q-layer.0": 10864.2871, "encoder_q-layer.1": 12540.6006, "encoder_q-layer.10": 1371.4846, "encoder_q-layer.11": 3052.8528, "encoder_q-layer.2": 13971.8086, "encoder_q-layer.3": 16330.3389, "encoder_q-layer.4": 17514.1523, "encoder_q-layer.5": 22599.0742, "encoder_q-layer.6": 21939.2852, "encoder_q-layer.7": 21948.0039, "encoder_q-layer.8": 15483.4355, "encoder_q-layer.9": 3466.1262, "epoch": 0.92, "inbatch_neg_score": 0.7898, "inbatch_pos_score": 1.4668, "learning_rate": 3.3333333333333333e-06, "loss": 3.1498, "norm_diff": 0.0741, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 23188.2039, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.7905, "query_norm": 1.5473, "queue_k_norm": 1.6245, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9028, "sent_len_1": 66.7918, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7713, "stdk": 0.049, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 94000 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.111, "doc_norm": 1.6204, "encoder_q-embeddings": 1343.7412, "encoder_q-layer.0": 903.291, "encoder_q-layer.1": 1061.5906, "encoder_q-layer.10": 1337.8779, "encoder_q-layer.11": 2904.1794, "encoder_q-layer.2": 1211.3588, "encoder_q-layer.3": 1291.2751, "encoder_q-layer.4": 1432.9548, "encoder_q-layer.5": 1508.8173, "encoder_q-layer.6": 1705.5363, "encoder_q-layer.7": 1847.0104, "encoder_q-layer.8": 1807.7113, "encoder_q-layer.9": 1329.3501, "epoch": 0.92, "inbatch_neg_score": 0.7905, "inbatch_pos_score": 1.4502, "learning_rate": 3.277777777777778e-06, "loss": 3.111, "norm_diff": 0.0772, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2429.9482, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.79, "query_norm": 1.5433, "queue_k_norm": 1.6256, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9036, "sent_len_1": 67.0286, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4675, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 94100 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1068, "doc_norm": 1.6178, "encoder_q-embeddings": 536.3345, "encoder_q-layer.0": 339.3633, "encoder_q-layer.1": 361.5222, "encoder_q-layer.10": 638.2821, "encoder_q-layer.11": 1459.5365, "encoder_q-layer.2": 414.6568, "encoder_q-layer.3": 448.8432, "encoder_q-layer.4": 473.9182, "encoder_q-layer.5": 482.0149, "encoder_q-layer.6": 530.9382, "encoder_q-layer.7": 604.5976, "encoder_q-layer.8": 684.2759, "encoder_q-layer.9": 631.2968, "epoch": 0.92, "inbatch_neg_score": 0.7906, "inbatch_pos_score": 1.4727, "learning_rate": 3.2222222222222222e-06, "loss": 3.1068, "norm_diff": 0.0648, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1022.9094, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7905, "query_norm": 1.553, "queue_k_norm": 1.6249, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9456, "sent_len_1": 66.5997, "sent_max_len_0": 128.0, "sent_max_len_1": 188.075, "stdk": 0.0488, "stdq": 0.0458, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 94200 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1422, "doc_norm": 1.6238, "encoder_q-embeddings": 1355.9813, "encoder_q-layer.0": 912.7049, "encoder_q-layer.1": 1048.6243, "encoder_q-layer.10": 634.7236, "encoder_q-layer.11": 1458.949, "encoder_q-layer.2": 1226.2485, "encoder_q-layer.3": 1330.4906, "encoder_q-layer.4": 1485.8499, "encoder_q-layer.5": 1392.8276, "encoder_q-layer.6": 1238.6206, "encoder_q-layer.7": 1421.9384, "encoder_q-layer.8": 1110.166, "encoder_q-layer.9": 712.8215, "epoch": 0.92, "inbatch_neg_score": 0.7872, "inbatch_pos_score": 1.4658, "learning_rate": 3.166666666666667e-06, "loss": 3.1422, "norm_diff": 0.0752, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1844.333, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7886, "query_norm": 1.5485, "queue_k_norm": 1.6242, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8983, "sent_len_1": 66.8521, "sent_max_len_0": 128.0, "sent_max_len_1": 188.7138, "stdk": 0.0492, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 94300 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1493, "doc_norm": 1.6234, "encoder_q-embeddings": 444.0762, "encoder_q-layer.0": 302.3088, "encoder_q-layer.1": 312.144, "encoder_q-layer.10": 652.6085, "encoder_q-layer.11": 1481.9761, "encoder_q-layer.2": 346.6377, "encoder_q-layer.3": 368.5405, "encoder_q-layer.4": 392.3938, "encoder_q-layer.5": 411.3578, "encoder_q-layer.6": 488.4067, "encoder_q-layer.7": 580.5383, "encoder_q-layer.8": 690.2202, "encoder_q-layer.9": 639.8734, "epoch": 0.92, "inbatch_neg_score": 0.7886, "inbatch_pos_score": 1.4668, "learning_rate": 3.111111111111111e-06, "loss": 3.1493, "norm_diff": 0.0754, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 985.7969, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7896, "query_norm": 1.548, "queue_k_norm": 1.624, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0211, "sent_len_1": 66.5483, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8887, "stdk": 0.0491, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 94400 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.1271, "doc_norm": 1.6216, "encoder_q-embeddings": 568.223, "encoder_q-layer.0": 381.9745, "encoder_q-layer.1": 413.6579, "encoder_q-layer.10": 636.0231, "encoder_q-layer.11": 1423.1652, "encoder_q-layer.2": 477.5397, "encoder_q-layer.3": 511.2477, "encoder_q-layer.4": 550.0472, "encoder_q-layer.5": 589.6891, "encoder_q-layer.6": 670.3159, "encoder_q-layer.7": 758.991, "encoder_q-layer.8": 814.9535, "encoder_q-layer.9": 681.3532, "epoch": 0.92, "inbatch_neg_score": 0.79, "inbatch_pos_score": 1.4922, "learning_rate": 3.0555555555555556e-06, "loss": 3.1271, "norm_diff": 0.0544, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1074.6529, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7896, "query_norm": 1.5673, "queue_k_norm": 1.6239, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0858, "sent_len_1": 66.6789, "sent_max_len_0": 128.0, "sent_max_len_1": 186.575, "stdk": 0.049, "stdq": 0.0465, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 94500 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1279, "doc_norm": 1.6282, "encoder_q-embeddings": 490.946, "encoder_q-layer.0": 334.3576, "encoder_q-layer.1": 356.7077, "encoder_q-layer.10": 709.9531, "encoder_q-layer.11": 1519.8616, "encoder_q-layer.2": 393.6796, "encoder_q-layer.3": 412.8143, "encoder_q-layer.4": 445.3151, "encoder_q-layer.5": 462.7835, "encoder_q-layer.6": 506.2007, "encoder_q-layer.7": 597.9899, "encoder_q-layer.8": 691.6962, "encoder_q-layer.9": 625.018, "epoch": 0.92, "inbatch_neg_score": 0.7901, "inbatch_pos_score": 1.459, "learning_rate": 3e-06, "loss": 3.1279, "norm_diff": 0.0877, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1008.5379, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.791, "query_norm": 1.5405, "queue_k_norm": 1.6239, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.794, "sent_len_1": 66.6427, "sent_max_len_0": 128.0, "sent_max_len_1": 190.5863, "stdk": 0.0493, "stdq": 0.0453, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 94600 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.126, "doc_norm": 1.6271, "encoder_q-embeddings": 519.178, "encoder_q-layer.0": 341.6521, "encoder_q-layer.1": 367.6409, "encoder_q-layer.10": 635.8693, "encoder_q-layer.11": 1488.2173, "encoder_q-layer.2": 437.2106, "encoder_q-layer.3": 461.0767, "encoder_q-layer.4": 486.6225, "encoder_q-layer.5": 519.0058, "encoder_q-layer.6": 585.5477, "encoder_q-layer.7": 650.1013, "encoder_q-layer.8": 694.8998, "encoder_q-layer.9": 611.652, "epoch": 0.92, "inbatch_neg_score": 0.7903, "inbatch_pos_score": 1.4775, "learning_rate": 2.9444444444444445e-06, "loss": 3.126, "norm_diff": 0.0719, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1036.3615, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7905, "query_norm": 1.5552, "queue_k_norm": 1.624, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9421, "sent_len_1": 66.6292, "sent_max_len_0": 128.0, "sent_max_len_1": 190.4875, "stdk": 0.0492, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 94700 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.1081, "doc_norm": 1.6279, "encoder_q-embeddings": 551.1251, "encoder_q-layer.0": 354.1718, "encoder_q-layer.1": 367.3044, "encoder_q-layer.10": 687.6435, "encoder_q-layer.11": 1524.2228, "encoder_q-layer.2": 425.2809, "encoder_q-layer.3": 437.1326, "encoder_q-layer.4": 468.0639, "encoder_q-layer.5": 497.2811, "encoder_q-layer.6": 544.1554, "encoder_q-layer.7": 622.7504, "encoder_q-layer.8": 688.8769, "encoder_q-layer.9": 621.6591, "epoch": 0.93, "inbatch_neg_score": 0.7913, "inbatch_pos_score": 1.4678, "learning_rate": 2.888888888888889e-06, "loss": 3.1081, "norm_diff": 0.0836, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1046.8554, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7915, "query_norm": 1.5443, "queue_k_norm": 1.6261, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9151, "sent_len_1": 66.5891, "sent_max_len_0": 128.0, "sent_max_len_1": 188.925, "stdk": 0.0492, "stdq": 0.0454, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 94800 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.1577, "doc_norm": 1.6208, "encoder_q-embeddings": 669.7737, "encoder_q-layer.0": 439.7318, "encoder_q-layer.1": 463.068, "encoder_q-layer.10": 621.6324, "encoder_q-layer.11": 1465.4304, "encoder_q-layer.2": 546.4568, "encoder_q-layer.3": 587.0074, "encoder_q-layer.4": 588.8514, "encoder_q-layer.5": 595.2225, "encoder_q-layer.6": 647.3148, "encoder_q-layer.7": 674.6348, "encoder_q-layer.8": 742.3953, "encoder_q-layer.9": 627.3733, "epoch": 0.93, "inbatch_neg_score": 0.7903, "inbatch_pos_score": 1.4834, "learning_rate": 2.8333333333333335e-06, "loss": 3.1577, "norm_diff": 0.0707, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1110.5988, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7905, "query_norm": 1.5501, "queue_k_norm": 1.6261, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8452, "sent_len_1": 66.8396, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1687, "stdk": 0.049, "stdq": 0.0457, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 94900 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0899, "doc_norm": 1.6235, "encoder_q-embeddings": 635.0618, "encoder_q-layer.0": 422.7371, "encoder_q-layer.1": 469.0953, "encoder_q-layer.10": 660.4855, "encoder_q-layer.11": 1464.8101, "encoder_q-layer.2": 548.5352, "encoder_q-layer.3": 606.2968, "encoder_q-layer.4": 644.5683, "encoder_q-layer.5": 679.7496, "encoder_q-layer.6": 700.4047, "encoder_q-layer.7": 742.3655, "encoder_q-layer.8": 750.3468, "encoder_q-layer.9": 654.8229, "epoch": 0.93, "inbatch_neg_score": 0.7913, "inbatch_pos_score": 1.4814, "learning_rate": 2.777777777777778e-06, "loss": 3.0899, "norm_diff": 0.0674, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1129.362, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7915, "query_norm": 1.5562, "queue_k_norm": 1.624, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9659, "sent_len_1": 66.8603, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0137, "stdk": 0.0491, "stdq": 0.046, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 95000 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.1413, "doc_norm": 1.6222, "encoder_q-embeddings": 887.0527, "encoder_q-layer.0": 617.5422, "encoder_q-layer.1": 653.8419, "encoder_q-layer.10": 614.267, "encoder_q-layer.11": 1414.6835, "encoder_q-layer.2": 810.9359, "encoder_q-layer.3": 774.3558, "encoder_q-layer.4": 787.9551, "encoder_q-layer.5": 810.5151, "encoder_q-layer.6": 812.8145, "encoder_q-layer.7": 866.813, "encoder_q-layer.8": 826.743, "encoder_q-layer.9": 625.8743, "epoch": 0.93, "inbatch_neg_score": 0.7913, "inbatch_pos_score": 1.4766, "learning_rate": 2.7222222222222224e-06, "loss": 3.1413, "norm_diff": 0.0681, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1280.1927, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7905, "query_norm": 1.5541, "queue_k_norm": 1.6255, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9402, "sent_len_1": 66.5681, "sent_max_len_0": 128.0, "sent_max_len_1": 186.5725, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 95100 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.1342, "doc_norm": 1.6305, "encoder_q-embeddings": 585.3311, "encoder_q-layer.0": 388.8509, "encoder_q-layer.1": 435.0337, "encoder_q-layer.10": 663.8154, "encoder_q-layer.11": 1468.7059, "encoder_q-layer.2": 487.2504, "encoder_q-layer.3": 510.9774, "encoder_q-layer.4": 533.7061, "encoder_q-layer.5": 583.6622, "encoder_q-layer.6": 620.4731, "encoder_q-layer.7": 693.0012, "encoder_q-layer.8": 759.7797, "encoder_q-layer.9": 635.1432, "epoch": 0.93, "inbatch_neg_score": 0.7899, "inbatch_pos_score": 1.4854, "learning_rate": 2.666666666666667e-06, "loss": 3.1342, "norm_diff": 0.0746, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1076.2069, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.791, "query_norm": 1.5558, "queue_k_norm": 1.6237, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1383, "sent_len_1": 66.9736, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6637, "stdk": 0.0494, "stdq": 0.046, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 95200 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.1341, "doc_norm": 1.6285, "encoder_q-embeddings": 538.3407, "encoder_q-layer.0": 365.214, "encoder_q-layer.1": 410.2883, "encoder_q-layer.10": 699.6381, "encoder_q-layer.11": 1536.7932, "encoder_q-layer.2": 476.0791, "encoder_q-layer.3": 519.0912, "encoder_q-layer.4": 616.8396, "encoder_q-layer.5": 666.0676, "encoder_q-layer.6": 848.8862, "encoder_q-layer.7": 988.8257, "encoder_q-layer.8": 1067.2605, "encoder_q-layer.9": 722.0337, "epoch": 0.93, "inbatch_neg_score": 0.7927, "inbatch_pos_score": 1.4756, "learning_rate": 2.6111111111111113e-06, "loss": 3.1341, "norm_diff": 0.0782, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1211.6145, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7925, "query_norm": 1.5503, "queue_k_norm": 1.6235, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8871, "sent_len_1": 66.4839, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5387, "stdk": 0.0493, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 95300 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.1252, "doc_norm": 1.6213, "encoder_q-embeddings": 554.8084, "encoder_q-layer.0": 394.3258, "encoder_q-layer.1": 409.9684, "encoder_q-layer.10": 640.1682, "encoder_q-layer.11": 1437.4579, "encoder_q-layer.2": 457.2804, "encoder_q-layer.3": 451.3904, "encoder_q-layer.4": 489.8665, "encoder_q-layer.5": 533.4288, "encoder_q-layer.6": 550.2481, "encoder_q-layer.7": 609.0526, "encoder_q-layer.8": 669.5361, "encoder_q-layer.9": 600.7408, "epoch": 0.93, "inbatch_neg_score": 0.7921, "inbatch_pos_score": 1.4902, "learning_rate": 2.5555555555555557e-06, "loss": 3.1252, "norm_diff": 0.0646, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1016.7517, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.793, "query_norm": 1.5568, "queue_k_norm": 1.6249, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8644, "sent_len_1": 66.502, "sent_max_len_0": 128.0, "sent_max_len_1": 189.8275, "stdk": 0.049, "stdq": 0.046, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 95400 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.0996, "doc_norm": 1.623, "encoder_q-embeddings": 529.6038, "encoder_q-layer.0": 370.7324, "encoder_q-layer.1": 416.7878, "encoder_q-layer.10": 709.1567, "encoder_q-layer.11": 1495.1127, "encoder_q-layer.2": 469.7054, "encoder_q-layer.3": 509.6913, "encoder_q-layer.4": 535.4832, "encoder_q-layer.5": 564.4781, "encoder_q-layer.6": 571.9897, "encoder_q-layer.7": 709.8152, "encoder_q-layer.8": 744.337, "encoder_q-layer.9": 654.9493, "epoch": 0.93, "inbatch_neg_score": 0.7938, "inbatch_pos_score": 1.4873, "learning_rate": 2.5e-06, "loss": 3.0996, "norm_diff": 0.0673, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1053.6685, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7939, "query_norm": 1.5557, "queue_k_norm": 1.6251, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9508, "sent_len_1": 66.7772, "sent_max_len_0": 128.0, "sent_max_len_1": 188.1562, "stdk": 0.049, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 95500 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.1439, "doc_norm": 1.6248, "encoder_q-embeddings": 903.796, "encoder_q-layer.0": 638.885, "encoder_q-layer.1": 737.5099, "encoder_q-layer.10": 668.8021, "encoder_q-layer.11": 1452.6793, "encoder_q-layer.2": 919.2825, "encoder_q-layer.3": 995.9756, "encoder_q-layer.4": 1130.3774, "encoder_q-layer.5": 1184.4308, "encoder_q-layer.6": 1253.2374, "encoder_q-layer.7": 1433.2255, "encoder_q-layer.8": 1092.0228, "encoder_q-layer.9": 655.5226, "epoch": 0.93, "inbatch_neg_score": 0.7945, "inbatch_pos_score": 1.4707, "learning_rate": 2.4444444444444447e-06, "loss": 3.1439, "norm_diff": 0.0724, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1581.3633, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7949, "query_norm": 1.5524, "queue_k_norm": 1.625, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8521, "sent_len_1": 66.5261, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4375, "stdk": 0.0491, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 95600 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.1126, "doc_norm": 1.6223, "encoder_q-embeddings": 8563.6436, "encoder_q-layer.0": 5685.0005, "encoder_q-layer.1": 6085.9419, "encoder_q-layer.10": 671.2087, "encoder_q-layer.11": 1550.1488, "encoder_q-layer.2": 6863.8125, "encoder_q-layer.3": 7016.5239, "encoder_q-layer.4": 7301.9609, "encoder_q-layer.5": 7050.9326, "encoder_q-layer.6": 7035.1763, "encoder_q-layer.7": 8633.5469, "encoder_q-layer.8": 5967.6558, "encoder_q-layer.9": 1283.3192, "epoch": 0.93, "inbatch_neg_score": 0.7938, "inbatch_pos_score": 1.4658, "learning_rate": 2.388888888888889e-06, "loss": 3.1126, "norm_diff": 0.0743, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9772.355, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7939, "query_norm": 1.548, "queue_k_norm": 1.6264, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.065, "sent_len_1": 66.7889, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6912, "stdk": 0.049, "stdq": 0.0456, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 95700 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.1389, "doc_norm": 1.6271, "encoder_q-embeddings": 712.3105, "encoder_q-layer.0": 511.8627, "encoder_q-layer.1": 545.293, "encoder_q-layer.10": 678.8977, "encoder_q-layer.11": 1529.5693, "encoder_q-layer.2": 643.7379, "encoder_q-layer.3": 684.3842, "encoder_q-layer.4": 755.1694, "encoder_q-layer.5": 802.2053, "encoder_q-layer.6": 879.5536, "encoder_q-layer.7": 867.3336, "encoder_q-layer.8": 830.3129, "encoder_q-layer.9": 654.4705, "epoch": 0.94, "inbatch_neg_score": 0.7955, "inbatch_pos_score": 1.4648, "learning_rate": 2.3333333333333336e-06, "loss": 3.1389, "norm_diff": 0.0737, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1260.5584, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7949, "query_norm": 1.5534, "queue_k_norm": 1.6264, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.146, "sent_len_1": 66.8189, "sent_max_len_0": 128.0, "sent_max_len_1": 191.1838, "stdk": 0.0492, "stdq": 0.0458, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 95800 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.1304, "doc_norm": 1.6278, "encoder_q-embeddings": 708.0601, "encoder_q-layer.0": 490.776, "encoder_q-layer.1": 519.9022, "encoder_q-layer.10": 635.7335, "encoder_q-layer.11": 1535.4153, "encoder_q-layer.2": 614.3389, "encoder_q-layer.3": 610.5404, "encoder_q-layer.4": 613.6389, "encoder_q-layer.5": 638.5349, "encoder_q-layer.6": 670.6155, "encoder_q-layer.7": 744.1726, "encoder_q-layer.8": 788.3381, "encoder_q-layer.9": 631.729, "epoch": 0.94, "inbatch_neg_score": 0.794, "inbatch_pos_score": 1.4648, "learning_rate": 2.277777777777778e-06, "loss": 3.1304, "norm_diff": 0.0733, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1179.5694, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7944, "query_norm": 1.5546, "queue_k_norm": 1.6256, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0049, "sent_len_1": 66.7911, "sent_max_len_0": 128.0, "sent_max_len_1": 187.5425, "stdk": 0.0492, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 95900 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1319, "doc_norm": 1.6254, "encoder_q-embeddings": 465.5964, "encoder_q-layer.0": 313.2893, "encoder_q-layer.1": 317.8214, "encoder_q-layer.10": 614.6633, "encoder_q-layer.11": 1473.5298, "encoder_q-layer.2": 349.6156, "encoder_q-layer.3": 370.1321, "encoder_q-layer.4": 398.066, "encoder_q-layer.5": 417.4114, "encoder_q-layer.6": 469.3312, "encoder_q-layer.7": 562.9025, "encoder_q-layer.8": 682.2361, "encoder_q-layer.9": 634.7638, "epoch": 0.94, "inbatch_neg_score": 0.7949, "inbatch_pos_score": 1.4775, "learning_rate": 2.2222222222222225e-06, "loss": 3.1319, "norm_diff": 0.0706, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 982.619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7944, "query_norm": 1.5548, "queue_k_norm": 1.6262, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.909, "sent_len_1": 66.7446, "sent_max_len_0": 128.0, "sent_max_len_1": 186.5288, "stdk": 0.0491, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 96000 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.1176, "doc_norm": 1.6251, "encoder_q-embeddings": 534.8091, "encoder_q-layer.0": 356.9949, "encoder_q-layer.1": 397.0004, "encoder_q-layer.10": 666.274, "encoder_q-layer.11": 1502.5885, "encoder_q-layer.2": 451.3138, "encoder_q-layer.3": 438.6504, "encoder_q-layer.4": 475.6703, "encoder_q-layer.5": 488.7105, "encoder_q-layer.6": 519.5728, "encoder_q-layer.7": 634.561, "encoder_q-layer.8": 717.7823, "encoder_q-layer.9": 639.2882, "epoch": 0.94, "inbatch_neg_score": 0.7961, "inbatch_pos_score": 1.4883, "learning_rate": 2.166666666666667e-06, "loss": 3.1176, "norm_diff": 0.0682, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1032.9689, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7954, "query_norm": 1.5569, "queue_k_norm": 1.6256, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8871, "sent_len_1": 66.6742, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3, "stdk": 0.0491, "stdq": 0.046, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 96100 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.1512, "doc_norm": 1.6312, "encoder_q-embeddings": 1040.3597, "encoder_q-layer.0": 659.2542, "encoder_q-layer.1": 717.5479, "encoder_q-layer.10": 1293.0209, "encoder_q-layer.11": 3053.6755, "encoder_q-layer.2": 828.1411, "encoder_q-layer.3": 870.7198, "encoder_q-layer.4": 963.8295, "encoder_q-layer.5": 1002.6949, "encoder_q-layer.6": 1167.6378, "encoder_q-layer.7": 1354.8229, "encoder_q-layer.8": 1540.9169, "encoder_q-layer.9": 1308.0304, "epoch": 0.94, "inbatch_neg_score": 0.7955, "inbatch_pos_score": 1.4531, "learning_rate": 2.1111111111111114e-06, "loss": 3.1512, "norm_diff": 0.0898, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2125.2491, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7959, "query_norm": 1.5414, "queue_k_norm": 1.6263, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.1866, "sent_len_1": 66.9829, "sent_max_len_0": 128.0, "sent_max_len_1": 191.5012, "stdk": 0.0494, "stdq": 0.0453, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 96200 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 3.1238, "doc_norm": 1.6344, "encoder_q-embeddings": 1069.6962, "encoder_q-layer.0": 686.888, "encoder_q-layer.1": 720.5658, "encoder_q-layer.10": 1245.1848, "encoder_q-layer.11": 2913.3389, "encoder_q-layer.2": 823.8362, "encoder_q-layer.3": 896.2906, "encoder_q-layer.4": 971.2831, "encoder_q-layer.5": 990.0278, "encoder_q-layer.6": 1159.7642, "encoder_q-layer.7": 1275.7542, "encoder_q-layer.8": 1466.2408, "encoder_q-layer.9": 1231.5459, "epoch": 0.94, "inbatch_neg_score": 0.7951, "inbatch_pos_score": 1.5137, "learning_rate": 2.055555555555556e-06, "loss": 3.1238, "norm_diff": 0.0704, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2055.6705, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7954, "query_norm": 1.564, "queue_k_norm": 1.6256, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9192, "sent_len_1": 66.7571, "sent_max_len_0": 128.0, "sent_max_len_1": 191.0575, "stdk": 0.0495, "stdq": 0.0463, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 96300 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.1512, "doc_norm": 1.6268, "encoder_q-embeddings": 1023.031, "encoder_q-layer.0": 659.3455, "encoder_q-layer.1": 702.1702, "encoder_q-layer.10": 1343.1736, "encoder_q-layer.11": 3016.5334, "encoder_q-layer.2": 801.4954, "encoder_q-layer.3": 853.6965, "encoder_q-layer.4": 849.4606, "encoder_q-layer.5": 935.2035, "encoder_q-layer.6": 1015.8529, "encoder_q-layer.7": 1275.8925, "encoder_q-layer.8": 1421.6359, "encoder_q-layer.9": 1310.4215, "epoch": 0.94, "inbatch_neg_score": 0.7964, "inbatch_pos_score": 1.4756, "learning_rate": 2.0000000000000003e-06, "loss": 3.1512, "norm_diff": 0.0778, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2039.9316, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7954, "query_norm": 1.5491, "queue_k_norm": 1.6246, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.803, "sent_len_1": 66.7484, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5238, "stdk": 0.0491, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 96400 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 3.133, "doc_norm": 1.6205, "encoder_q-embeddings": 1000.5853, "encoder_q-layer.0": 655.7438, "encoder_q-layer.1": 672.6141, "encoder_q-layer.10": 1248.5323, "encoder_q-layer.11": 2938.5835, "encoder_q-layer.2": 768.9022, "encoder_q-layer.3": 793.8857, "encoder_q-layer.4": 865.8487, "encoder_q-layer.5": 901.8948, "encoder_q-layer.6": 1030.8973, "encoder_q-layer.7": 1214.2699, "encoder_q-layer.8": 1449.9358, "encoder_q-layer.9": 1243.8579, "epoch": 0.94, "inbatch_neg_score": 0.7982, "inbatch_pos_score": 1.4941, "learning_rate": 1.9444444444444444e-06, "loss": 3.133, "norm_diff": 0.0688, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2004.0568, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7974, "query_norm": 1.5517, "queue_k_norm": 1.627, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.5743, "sent_len_1": 66.4734, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3075, "stdk": 0.0489, "stdq": 0.0457, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 96500 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.1454, "doc_norm": 1.6247, "encoder_q-embeddings": 2428.9639, "encoder_q-layer.0": 1628.3413, "encoder_q-layer.1": 1872.749, "encoder_q-layer.10": 1332.3218, "encoder_q-layer.11": 2976.3494, "encoder_q-layer.2": 2319.9338, "encoder_q-layer.3": 2526.5972, "encoder_q-layer.4": 2833.4919, "encoder_q-layer.5": 2930.968, "encoder_q-layer.6": 3194.2434, "encoder_q-layer.7": 3998.7031, "encoder_q-layer.8": 3379.1235, "encoder_q-layer.9": 1488.2338, "epoch": 0.94, "inbatch_neg_score": 0.796, "inbatch_pos_score": 1.457, "learning_rate": 1.888888888888889e-06, "loss": 3.1454, "norm_diff": 0.0812, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4009.78, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7959, "query_norm": 1.5434, "queue_k_norm": 1.6272, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0721, "sent_len_1": 66.7491, "sent_max_len_0": 128.0, "sent_max_len_1": 190.1788, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 96600 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 3.1096, "doc_norm": 1.6271, "encoder_q-embeddings": 895.8323, "encoder_q-layer.0": 601.6376, "encoder_q-layer.1": 619.0306, "encoder_q-layer.10": 1338.7441, "encoder_q-layer.11": 3058.1116, "encoder_q-layer.2": 726.0092, "encoder_q-layer.3": 740.4032, "encoder_q-layer.4": 785.4417, "encoder_q-layer.5": 836.6, "encoder_q-layer.6": 990.3027, "encoder_q-layer.7": 1158.1127, "encoder_q-layer.8": 1400.0343, "encoder_q-layer.9": 1240.7571, "epoch": 0.94, "inbatch_neg_score": 0.7959, "inbatch_pos_score": 1.5166, "learning_rate": 1.8333333333333335e-06, "loss": 3.1096, "norm_diff": 0.0772, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1975.865, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7964, "query_norm": 1.5499, "queue_k_norm": 1.626, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9573, "sent_len_1": 66.9014, "sent_max_len_0": 128.0, "sent_max_len_1": 190.2688, "stdk": 0.0492, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 96700 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1476, "doc_norm": 1.6233, "encoder_q-embeddings": 1072.4094, "encoder_q-layer.0": 691.7136, "encoder_q-layer.1": 729.8138, "encoder_q-layer.10": 1228.2899, "encoder_q-layer.11": 2953.3293, "encoder_q-layer.2": 831.7639, "encoder_q-layer.3": 915.0835, "encoder_q-layer.4": 960.9299, "encoder_q-layer.5": 1000.2888, "encoder_q-layer.6": 1124.004, "encoder_q-layer.7": 1262.4054, "encoder_q-layer.8": 1439.8818, "encoder_q-layer.9": 1275.5792, "epoch": 0.95, "inbatch_neg_score": 0.7975, "inbatch_pos_score": 1.4873, "learning_rate": 1.777777777777778e-06, "loss": 3.1476, "norm_diff": 0.0718, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2062.5698, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7974, "query_norm": 1.5515, "queue_k_norm": 1.6268, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.6743, "sent_len_1": 66.5766, "sent_max_len_0": 128.0, "sent_max_len_1": 191.4175, "stdk": 0.049, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 96800 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 3.1105, "doc_norm": 1.6296, "encoder_q-embeddings": 1379.7034, "encoder_q-layer.0": 909.0817, "encoder_q-layer.1": 971.2291, "encoder_q-layer.10": 1415.696, "encoder_q-layer.11": 3088.835, "encoder_q-layer.2": 1136.6156, "encoder_q-layer.3": 1161.111, "encoder_q-layer.4": 1254.5299, "encoder_q-layer.5": 1245.7073, "encoder_q-layer.6": 1340.5813, "encoder_q-layer.7": 1423.2598, "encoder_q-layer.8": 1567.0791, "encoder_q-layer.9": 1344.4124, "epoch": 0.95, "inbatch_neg_score": 0.7964, "inbatch_pos_score": 1.4883, "learning_rate": 1.7222222222222222e-06, "loss": 3.1105, "norm_diff": 0.0818, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2285.5006, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7974, "query_norm": 1.5478, "queue_k_norm": 1.626, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.1162, "sent_len_1": 66.9289, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1325, "stdk": 0.0493, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 96900 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.1255, "doc_norm": 1.6216, "encoder_q-embeddings": 1034.2961, "encoder_q-layer.0": 679.8314, "encoder_q-layer.1": 736.861, "encoder_q-layer.10": 1232.0051, "encoder_q-layer.11": 2942.1125, "encoder_q-layer.2": 858.4785, "encoder_q-layer.3": 874.2191, "encoder_q-layer.4": 963.8414, "encoder_q-layer.5": 985.099, "encoder_q-layer.6": 1090.0382, "encoder_q-layer.7": 1264.8972, "encoder_q-layer.8": 1424.5657, "encoder_q-layer.9": 1251.1136, "epoch": 0.95, "inbatch_neg_score": 0.7991, "inbatch_pos_score": 1.4854, "learning_rate": 1.6666666666666667e-06, "loss": 3.1255, "norm_diff": 0.069, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2047.1265, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7979, "query_norm": 1.5526, "queue_k_norm": 1.6257, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9657, "sent_len_1": 66.802, "sent_max_len_0": 128.0, "sent_max_len_1": 191.4125, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 97000 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.1147, "doc_norm": 1.6253, "encoder_q-embeddings": 1106.8784, "encoder_q-layer.0": 736.1497, "encoder_q-layer.1": 801.4086, "encoder_q-layer.10": 1374.5938, "encoder_q-layer.11": 2904.9907, "encoder_q-layer.2": 915.3994, "encoder_q-layer.3": 994.3898, "encoder_q-layer.4": 1078.8334, "encoder_q-layer.5": 1085.6284, "encoder_q-layer.6": 1187.9338, "encoder_q-layer.7": 1359.032, "encoder_q-layer.8": 1449.6924, "encoder_q-layer.9": 1200.0723, "epoch": 0.95, "inbatch_neg_score": 0.7994, "inbatch_pos_score": 1.4873, "learning_rate": 1.6111111111111111e-06, "loss": 3.1147, "norm_diff": 0.0735, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2076.5025, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7988, "query_norm": 1.5519, "queue_k_norm": 1.6267, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9508, "sent_len_1": 66.7869, "sent_max_len_0": 128.0, "sent_max_len_1": 188.6525, "stdk": 0.0491, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 97100 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1367, "doc_norm": 1.6286, "encoder_q-embeddings": 937.454, "encoder_q-layer.0": 626.9391, "encoder_q-layer.1": 661.9055, "encoder_q-layer.10": 1294.2898, "encoder_q-layer.11": 3006.8459, "encoder_q-layer.2": 743.9683, "encoder_q-layer.3": 778.4683, "encoder_q-layer.4": 843.2409, "encoder_q-layer.5": 913.2504, "encoder_q-layer.6": 1059.5433, "encoder_q-layer.7": 1225.5563, "encoder_q-layer.8": 1404.5641, "encoder_q-layer.9": 1289.9438, "epoch": 0.95, "inbatch_neg_score": 0.7981, "inbatch_pos_score": 1.4727, "learning_rate": 1.5555555555555556e-06, "loss": 3.1367, "norm_diff": 0.0848, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2001.1335, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7983, "query_norm": 1.5438, "queue_k_norm": 1.6274, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1752, "sent_len_1": 66.6953, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8113, "stdk": 0.0492, "stdq": 0.0454, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 97200 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.1255, "doc_norm": 1.6357, "encoder_q-embeddings": 1087.1, "encoder_q-layer.0": 738.4236, "encoder_q-layer.1": 806.7236, "encoder_q-layer.10": 1271.7927, "encoder_q-layer.11": 2914.8582, "encoder_q-layer.2": 919.8989, "encoder_q-layer.3": 990.1647, "encoder_q-layer.4": 1055.4711, "encoder_q-layer.5": 1170.6295, "encoder_q-layer.6": 1284.8486, "encoder_q-layer.7": 1536.2731, "encoder_q-layer.8": 1572.863, "encoder_q-layer.9": 1283.5388, "epoch": 0.95, "inbatch_neg_score": 0.7978, "inbatch_pos_score": 1.5049, "learning_rate": 1.5e-06, "loss": 3.1255, "norm_diff": 0.0806, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2153.7599, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7983, "query_norm": 1.555, "queue_k_norm": 1.6272, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8852, "sent_len_1": 66.8179, "sent_max_len_0": 128.0, "sent_max_len_1": 189.1362, "stdk": 0.0495, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 97300 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.1482, "doc_norm": 1.6264, "encoder_q-embeddings": 1091.1449, "encoder_q-layer.0": 720.5469, "encoder_q-layer.1": 796.7941, "encoder_q-layer.10": 1239.9541, "encoder_q-layer.11": 2924.6785, "encoder_q-layer.2": 921.5251, "encoder_q-layer.3": 987.7806, "encoder_q-layer.4": 1046.2655, "encoder_q-layer.5": 1129.0002, "encoder_q-layer.6": 1217.1869, "encoder_q-layer.7": 1265.1534, "encoder_q-layer.8": 1371.9103, "encoder_q-layer.9": 1196.717, "epoch": 0.95, "inbatch_neg_score": 0.7984, "inbatch_pos_score": 1.4854, "learning_rate": 1.4444444444444445e-06, "loss": 3.1482, "norm_diff": 0.0725, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2079.6347, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7979, "query_norm": 1.5539, "queue_k_norm": 1.627, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8417, "sent_len_1": 66.5906, "sent_max_len_0": 128.0, "sent_max_len_1": 190.305, "stdk": 0.0491, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 97400 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.1654, "doc_norm": 1.631, "encoder_q-embeddings": 1137.7904, "encoder_q-layer.0": 789.3634, "encoder_q-layer.1": 910.019, "encoder_q-layer.10": 1375.2484, "encoder_q-layer.11": 3036.7383, "encoder_q-layer.2": 1068.483, "encoder_q-layer.3": 1139.3661, "encoder_q-layer.4": 1246.1377, "encoder_q-layer.5": 1352.0104, "encoder_q-layer.6": 1366.3429, "encoder_q-layer.7": 1501.8373, "encoder_q-layer.8": 1448.3319, "encoder_q-layer.9": 1210.7128, "epoch": 0.95, "inbatch_neg_score": 0.799, "inbatch_pos_score": 1.4824, "learning_rate": 1.388888888888889e-06, "loss": 3.1654, "norm_diff": 0.0866, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2227.7758, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7983, "query_norm": 1.5445, "queue_k_norm": 1.6272, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0257, "sent_len_1": 66.7733, "sent_max_len_0": 128.0, "sent_max_len_1": 191.2237, "stdk": 0.0493, "stdq": 0.0454, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 97500 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.1276, "doc_norm": 1.6267, "encoder_q-embeddings": 1003.2155, "encoder_q-layer.0": 683.9483, "encoder_q-layer.1": 712.5925, "encoder_q-layer.10": 1225.3713, "encoder_q-layer.11": 2938.7166, "encoder_q-layer.2": 819.6984, "encoder_q-layer.3": 879.6201, "encoder_q-layer.4": 984.6255, "encoder_q-layer.5": 1032.6326, "encoder_q-layer.6": 1148.5179, "encoder_q-layer.7": 1349.6171, "encoder_q-layer.8": 1468.3857, "encoder_q-layer.9": 1209.1268, "epoch": 0.95, "inbatch_neg_score": 0.7989, "inbatch_pos_score": 1.4648, "learning_rate": 1.3333333333333334e-06, "loss": 3.1276, "norm_diff": 0.0858, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2064.4211, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7988, "query_norm": 1.5409, "queue_k_norm": 1.6265, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.0727, "sent_len_1": 66.8338, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7088, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 97600 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1425, "doc_norm": 1.6264, "encoder_q-embeddings": 1273.8069, "encoder_q-layer.0": 847.3696, "encoder_q-layer.1": 959.4254, "encoder_q-layer.10": 1327.9318, "encoder_q-layer.11": 2920.1465, "encoder_q-layer.2": 1052.8948, "encoder_q-layer.3": 1156.9491, "encoder_q-layer.4": 1308.0109, "encoder_q-layer.5": 1456.3784, "encoder_q-layer.6": 1474.1522, "encoder_q-layer.7": 1539.7584, "encoder_q-layer.8": 1543.7385, "encoder_q-layer.9": 1295.0756, "epoch": 0.95, "inbatch_neg_score": 0.7987, "inbatch_pos_score": 1.498, "learning_rate": 1.2777777777777779e-06, "loss": 3.1425, "norm_diff": 0.0624, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2260.2361, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7983, "query_norm": 1.564, "queue_k_norm": 1.6276, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0138, "sent_len_1": 66.8099, "sent_max_len_0": 128.0, "sent_max_len_1": 190.3088, "stdk": 0.0491, "stdq": 0.0463, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 97700 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.1271, "doc_norm": 1.6231, "encoder_q-embeddings": 1776.0162, "encoder_q-layer.0": 1193.3483, "encoder_q-layer.1": 1287.1792, "encoder_q-layer.10": 1232.3416, "encoder_q-layer.11": 2841.4285, "encoder_q-layer.2": 1489.8282, "encoder_q-layer.3": 1344.4718, "encoder_q-layer.4": 1333.2289, "encoder_q-layer.5": 1089.1038, "encoder_q-layer.6": 1032.9999, "encoder_q-layer.7": 1178.9534, "encoder_q-layer.8": 1296.1869, "encoder_q-layer.9": 1201.9382, "epoch": 0.95, "inbatch_neg_score": 0.7976, "inbatch_pos_score": 1.4883, "learning_rate": 1.2222222222222223e-06, "loss": 3.1271, "norm_diff": 0.0699, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2327.3118, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7983, "query_norm": 1.5532, "queue_k_norm": 1.628, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8478, "sent_len_1": 66.7556, "sent_max_len_0": 128.0, "sent_max_len_1": 192.7175, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 97800 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.1341, "doc_norm": 1.6252, "encoder_q-embeddings": 1520.3242, "encoder_q-layer.0": 1136.8524, "encoder_q-layer.1": 1277.5979, "encoder_q-layer.10": 1307.8444, "encoder_q-layer.11": 3081.2432, "encoder_q-layer.2": 1368.1368, "encoder_q-layer.3": 1401.3473, "encoder_q-layer.4": 1537.7885, "encoder_q-layer.5": 1487.6257, "encoder_q-layer.6": 1502.0864, "encoder_q-layer.7": 1656.632, "encoder_q-layer.8": 1565.696, "encoder_q-layer.9": 1308.951, "epoch": 0.96, "inbatch_neg_score": 0.8004, "inbatch_pos_score": 1.4639, "learning_rate": 1.1666666666666668e-06, "loss": 3.1341, "norm_diff": 0.0813, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2523.8068, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7988, "query_norm": 1.5438, "queue_k_norm": 1.6295, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.9144, "sent_len_1": 66.8402, "sent_max_len_0": 128.0, "sent_max_len_1": 190.7012, "stdk": 0.049, "stdq": 0.0454, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 97900 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.1197, "doc_norm": 1.625, "encoder_q-embeddings": 1256.0249, "encoder_q-layer.0": 833.632, "encoder_q-layer.1": 937.6289, "encoder_q-layer.10": 1213.2897, "encoder_q-layer.11": 2972.5955, "encoder_q-layer.2": 1146.119, "encoder_q-layer.3": 1189.2988, "encoder_q-layer.4": 1334.484, "encoder_q-layer.5": 1417.0861, "encoder_q-layer.6": 1478.2388, "encoder_q-layer.7": 1738.4373, "encoder_q-layer.8": 1682.3075, "encoder_q-layer.9": 1300.598, "epoch": 0.96, "inbatch_neg_score": 0.8008, "inbatch_pos_score": 1.4688, "learning_rate": 1.1111111111111112e-06, "loss": 3.1197, "norm_diff": 0.0783, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2347.7945, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7998, "query_norm": 1.5467, "queue_k_norm": 1.6271, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.8556, "sent_len_1": 66.683, "sent_max_len_0": 128.0, "sent_max_len_1": 190.6838, "stdk": 0.049, "stdq": 0.0455, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 98000 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.1427, "doc_norm": 1.6194, "encoder_q-embeddings": 2529.1221, "encoder_q-layer.0": 1625.2609, "encoder_q-layer.1": 1734.0674, "encoder_q-layer.10": 1380.8546, "encoder_q-layer.11": 3121.3818, "encoder_q-layer.2": 1897.4531, "encoder_q-layer.3": 2144.8906, "encoder_q-layer.4": 2497.7539, "encoder_q-layer.5": 2543.1848, "encoder_q-layer.6": 2715.6736, "encoder_q-layer.7": 2853.9067, "encoder_q-layer.8": 2208.7708, "encoder_q-layer.9": 1390.8766, "epoch": 0.96, "inbatch_neg_score": 0.8014, "inbatch_pos_score": 1.4551, "learning_rate": 1.0555555555555557e-06, "loss": 3.1427, "norm_diff": 0.0774, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3510.7584, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8003, "query_norm": 1.542, "queue_k_norm": 1.6269, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.2879, "sent_len_1": 66.976, "sent_max_len_0": 128.0, "sent_max_len_1": 192.015, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 98100 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.1358, "doc_norm": 1.6271, "encoder_q-embeddings": 2122.8474, "encoder_q-layer.0": 1398.7823, "encoder_q-layer.1": 1452.5569, "encoder_q-layer.10": 2627.7222, "encoder_q-layer.11": 5939.418, "encoder_q-layer.2": 1619.7357, "encoder_q-layer.3": 1714.2858, "encoder_q-layer.4": 1808.1337, "encoder_q-layer.5": 1904.4634, "encoder_q-layer.6": 2131.8274, "encoder_q-layer.7": 2466.4387, "encoder_q-layer.8": 2750.6521, "encoder_q-layer.9": 2564.8376, "epoch": 0.96, "inbatch_neg_score": 0.7975, "inbatch_pos_score": 1.4922, "learning_rate": 1.0000000000000002e-06, "loss": 3.1358, "norm_diff": 0.069, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4071.7704, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7988, "query_norm": 1.558, "queue_k_norm": 1.6284, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 65.0622, "sent_len_1": 66.7078, "sent_max_len_0": 128.0, "sent_max_len_1": 188.3425, "stdk": 0.0491, "stdq": 0.046, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 98200 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1007, "doc_norm": 1.6293, "encoder_q-embeddings": 9076.2988, "encoder_q-layer.0": 6417.002, "encoder_q-layer.1": 7157.5718, "encoder_q-layer.10": 2388.9006, "encoder_q-layer.11": 5953.5908, "encoder_q-layer.2": 8745.0195, "encoder_q-layer.3": 7446.7686, "encoder_q-layer.4": 7703.9102, "encoder_q-layer.5": 6153.229, "encoder_q-layer.6": 4745.6431, "encoder_q-layer.7": 4861.5713, "encoder_q-layer.8": 3974.219, "encoder_q-layer.9": 2536.0923, "epoch": 0.96, "inbatch_neg_score": 0.8003, "inbatch_pos_score": 1.4785, "learning_rate": 9.444444444444445e-07, "loss": 3.1007, "norm_diff": 0.0845, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9812.5887, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7998, "query_norm": 1.5448, "queue_k_norm": 1.628, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.2252, "sent_len_1": 67.0728, "sent_max_len_0": 128.0, "sent_max_len_1": 191.505, "stdk": 0.0492, "stdq": 0.0454, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 98300 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.11, "doc_norm": 1.6224, "encoder_q-embeddings": 2195.0552, "encoder_q-layer.0": 1523.8815, "encoder_q-layer.1": 1700.2957, "encoder_q-layer.10": 2521.437, "encoder_q-layer.11": 6004.335, "encoder_q-layer.2": 2039.2294, "encoder_q-layer.3": 2134.2932, "encoder_q-layer.4": 2262.9504, "encoder_q-layer.5": 2381.4158, "encoder_q-layer.6": 2499.1975, "encoder_q-layer.7": 2775.4429, "encoder_q-layer.8": 3019.9565, "encoder_q-layer.9": 2418.0374, "epoch": 0.96, "inbatch_neg_score": 0.799, "inbatch_pos_score": 1.4902, "learning_rate": 8.88888888888889e-07, "loss": 3.11, "norm_diff": 0.0696, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4344.7306, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7983, "query_norm": 1.5528, "queue_k_norm": 1.6296, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.9862, "sent_len_1": 66.9573, "sent_max_len_0": 128.0, "sent_max_len_1": 192.3663, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 98400 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 3.122, "doc_norm": 1.6289, "encoder_q-embeddings": 1684.2423, "encoder_q-layer.0": 1119.2596, "encoder_q-layer.1": 1143.9159, "encoder_q-layer.10": 2616.3984, "encoder_q-layer.11": 5631.6655, "encoder_q-layer.2": 1282.3613, "encoder_q-layer.3": 1326.8058, "encoder_q-layer.4": 1399.1775, "encoder_q-layer.5": 1487.8159, "encoder_q-layer.6": 1783.7587, "encoder_q-layer.7": 2034.3096, "encoder_q-layer.8": 2402.0479, "encoder_q-layer.9": 2303.1887, "epoch": 0.96, "inbatch_neg_score": 0.7968, "inbatch_pos_score": 1.5713, "learning_rate": 8.333333333333333e-07, "loss": 3.122, "norm_diff": 0.0523, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3622.8317, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7979, "query_norm": 1.5765, "queue_k_norm": 1.6288, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.9663, "sent_len_1": 66.8059, "sent_max_len_0": 128.0, "sent_max_len_1": 188.8462, "stdk": 0.0492, "stdq": 0.0469, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 98500 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.1534, "doc_norm": 1.6281, "encoder_q-embeddings": 1240.9238, "encoder_q-layer.0": 805.614, "encoder_q-layer.1": 917.7928, "encoder_q-layer.10": 1338.5502, "encoder_q-layer.11": 3054.9529, "encoder_q-layer.2": 1103.6761, "encoder_q-layer.3": 1210.7523, "encoder_q-layer.4": 1348.4546, "encoder_q-layer.5": 1376.5708, "encoder_q-layer.6": 1474.2139, "encoder_q-layer.7": 1582.7113, "encoder_q-layer.8": 1650.1355, "encoder_q-layer.9": 1309.3778, "epoch": 0.96, "inbatch_neg_score": 0.7992, "inbatch_pos_score": 1.4854, "learning_rate": 7.777777777777778e-07, "loss": 3.1534, "norm_diff": 0.0774, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2340.9762, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7993, "query_norm": 1.5507, "queue_k_norm": 1.6285, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9041, "sent_len_1": 66.8534, "sent_max_len_0": 128.0, "sent_max_len_1": 189.3688, "stdk": 0.0491, "stdq": 0.0457, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 98600 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.1374, "doc_norm": 1.6229, "encoder_q-embeddings": 2149.7021, "encoder_q-layer.0": 1463.6196, "encoder_q-layer.1": 1689.7904, "encoder_q-layer.10": 1271.963, "encoder_q-layer.11": 3062.9973, "encoder_q-layer.2": 1977.5349, "encoder_q-layer.3": 2271.2505, "encoder_q-layer.4": 2577.2527, "encoder_q-layer.5": 2799.7515, "encoder_q-layer.6": 3248.8948, "encoder_q-layer.7": 3202.9636, "encoder_q-layer.8": 2740.9436, "encoder_q-layer.9": 1644.2767, "epoch": 0.96, "inbatch_neg_score": 0.7992, "inbatch_pos_score": 1.4941, "learning_rate": 7.222222222222222e-07, "loss": 3.1374, "norm_diff": 0.074, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3651.0146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7993, "query_norm": 1.5489, "queue_k_norm": 1.6274, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8583, "sent_len_1": 66.8656, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9425, "stdk": 0.0489, "stdq": 0.0456, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 98700 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.1228, "doc_norm": 1.6353, "encoder_q-embeddings": 2815.772, "encoder_q-layer.0": 1941.7177, "encoder_q-layer.1": 2119.9092, "encoder_q-layer.10": 1365.4369, "encoder_q-layer.11": 3197.7788, "encoder_q-layer.2": 2345.6003, "encoder_q-layer.3": 2460.8271, "encoder_q-layer.4": 2667.4785, "encoder_q-layer.5": 2478.2627, "encoder_q-layer.6": 2820.7776, "encoder_q-layer.7": 2966.2373, "encoder_q-layer.8": 2592.6677, "encoder_q-layer.9": 1420.16, "epoch": 0.96, "inbatch_neg_score": 0.8012, "inbatch_pos_score": 1.4629, "learning_rate": 6.666666666666667e-07, "loss": 3.1228, "norm_diff": 0.0967, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3805.0481, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8018, "query_norm": 1.5386, "queue_k_norm": 1.6266, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.1485, "sent_len_1": 66.7839, "sent_max_len_0": 128.0, "sent_max_len_1": 190.65, "stdk": 0.0495, "stdq": 0.0451, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 98800 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.1449, "doc_norm": 1.6276, "encoder_q-embeddings": 505.4417, "encoder_q-layer.0": 332.3542, "encoder_q-layer.1": 372.5187, "encoder_q-layer.10": 630.0106, "encoder_q-layer.11": 1490.8594, "encoder_q-layer.2": 404.2332, "encoder_q-layer.3": 421.2356, "encoder_q-layer.4": 456.2198, "encoder_q-layer.5": 464.0778, "encoder_q-layer.6": 545.048, "encoder_q-layer.7": 639.564, "encoder_q-layer.8": 708.5679, "encoder_q-layer.9": 623.1411, "epoch": 0.97, "inbatch_neg_score": 0.7991, "inbatch_pos_score": 1.4844, "learning_rate": 6.111111111111112e-07, "loss": 3.1449, "norm_diff": 0.0776, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1029.6532, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7998, "query_norm": 1.55, "queue_k_norm": 1.626, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.7609, "sent_len_1": 66.7925, "sent_max_len_0": 128.0, "sent_max_len_1": 189.5575, "stdk": 0.0491, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 98900 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.1427, "doc_norm": 1.6257, "encoder_q-embeddings": 608.6304, "encoder_q-layer.0": 400.6755, "encoder_q-layer.1": 461.8012, "encoder_q-layer.10": 660.9629, "encoder_q-layer.11": 1544.9276, "encoder_q-layer.2": 543.2045, "encoder_q-layer.3": 569.2464, "encoder_q-layer.4": 582.9301, "encoder_q-layer.5": 610.5192, "encoder_q-layer.6": 685.8011, "encoder_q-layer.7": 679.9697, "encoder_q-layer.8": 723.1649, "encoder_q-layer.9": 646.743, "epoch": 0.97, "inbatch_neg_score": 0.8004, "inbatch_pos_score": 1.4512, "learning_rate": 5.555555555555556e-07, "loss": 3.1427, "norm_diff": 0.0859, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1124.8756, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7998, "query_norm": 1.5399, "queue_k_norm": 1.6268, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.8827, "sent_len_1": 66.7448, "sent_max_len_0": 128.0, "sent_max_len_1": 189.0337, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 99000 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.1169, "doc_norm": 1.625, "encoder_q-embeddings": 487.1332, "encoder_q-layer.0": 313.3098, "encoder_q-layer.1": 347.4796, "encoder_q-layer.10": 608.6552, "encoder_q-layer.11": 1494.085, "encoder_q-layer.2": 405.4938, "encoder_q-layer.3": 409.9704, "encoder_q-layer.4": 440.6172, "encoder_q-layer.5": 447.0775, "encoder_q-layer.6": 505.5657, "encoder_q-layer.7": 565.4058, "encoder_q-layer.8": 677.6221, "encoder_q-layer.9": 601.1205, "epoch": 0.97, "inbatch_neg_score": 0.8007, "inbatch_pos_score": 1.498, "learning_rate": 5.000000000000001e-07, "loss": 3.1169, "norm_diff": 0.0746, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 997.4508, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7993, "query_norm": 1.5504, "queue_k_norm": 1.6275, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.8697, "sent_len_1": 66.6521, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4563, "stdk": 0.049, "stdq": 0.0457, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 99100 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 3.141, "doc_norm": 1.6252, "encoder_q-embeddings": 1485.5378, "encoder_q-layer.0": 971.4633, "encoder_q-layer.1": 1100.7433, "encoder_q-layer.10": 656.392, "encoder_q-layer.11": 1472.4589, "encoder_q-layer.2": 1290.7911, "encoder_q-layer.3": 1323.7185, "encoder_q-layer.4": 1414.8651, "encoder_q-layer.5": 1342.6287, "encoder_q-layer.6": 1052.2322, "encoder_q-layer.7": 999.6823, "encoder_q-layer.8": 883.146, "encoder_q-layer.9": 640.2141, "epoch": 0.97, "inbatch_neg_score": 0.8014, "inbatch_pos_score": 1.5059, "learning_rate": 4.444444444444445e-07, "loss": 3.141, "norm_diff": 0.0722, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1783.9466, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7998, "query_norm": 1.553, "queue_k_norm": 1.6258, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.059, "sent_len_1": 66.6816, "sent_max_len_0": 128.0, "sent_max_len_1": 188.0913, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 99200 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.1111, "doc_norm": 1.6232, "encoder_q-embeddings": 531.3, "encoder_q-layer.0": 369.3612, "encoder_q-layer.1": 417.9042, "encoder_q-layer.10": 605.1995, "encoder_q-layer.11": 1401.8605, "encoder_q-layer.2": 493.4672, "encoder_q-layer.3": 533.6185, "encoder_q-layer.4": 572.4954, "encoder_q-layer.5": 606.019, "encoder_q-layer.6": 683.3998, "encoder_q-layer.7": 745.4193, "encoder_q-layer.8": 798.1882, "encoder_q-layer.9": 621.493, "epoch": 0.97, "inbatch_neg_score": 0.8005, "inbatch_pos_score": 1.5039, "learning_rate": 3.888888888888889e-07, "loss": 3.1111, "norm_diff": 0.0666, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1049.1002, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7998, "query_norm": 1.5566, "queue_k_norm": 1.6301, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 65.0489, "sent_len_1": 66.867, "sent_max_len_0": 128.0, "sent_max_len_1": 189.6425, "stdk": 0.0489, "stdq": 0.046, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 99300 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.1356, "doc_norm": 1.6264, "encoder_q-embeddings": 544.3378, "encoder_q-layer.0": 360.8826, "encoder_q-layer.1": 396.7446, "encoder_q-layer.10": 680.9468, "encoder_q-layer.11": 1480.412, "encoder_q-layer.2": 444.3055, "encoder_q-layer.3": 458.0107, "encoder_q-layer.4": 471.8878, "encoder_q-layer.5": 500.0557, "encoder_q-layer.6": 550.7263, "encoder_q-layer.7": 609.4535, "encoder_q-layer.8": 689.3629, "encoder_q-layer.9": 618.5887, "epoch": 0.97, "inbatch_neg_score": 0.8011, "inbatch_pos_score": 1.4443, "learning_rate": 3.3333333333333335e-07, "loss": 3.1356, "norm_diff": 0.0908, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1040.6837, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8003, "query_norm": 1.5357, "queue_k_norm": 1.628, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9281, "sent_len_1": 66.732, "sent_max_len_0": 128.0, "sent_max_len_1": 189.2188, "stdk": 0.0491, "stdq": 0.045, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 99400 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 3.126, "doc_norm": 1.6303, "encoder_q-embeddings": 683.7311, "encoder_q-layer.0": 466.8259, "encoder_q-layer.1": 563.6986, "encoder_q-layer.10": 647.4399, "encoder_q-layer.11": 1406.7869, "encoder_q-layer.2": 685.073, "encoder_q-layer.3": 708.8535, "encoder_q-layer.4": 781.5479, "encoder_q-layer.5": 825.6468, "encoder_q-layer.6": 810.918, "encoder_q-layer.7": 917.209, "encoder_q-layer.8": 962.4536, "encoder_q-layer.9": 640.0618, "epoch": 0.97, "inbatch_neg_score": 0.7983, "inbatch_pos_score": 1.5176, "learning_rate": 2.777777777777778e-07, "loss": 3.126, "norm_diff": 0.0682, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1238.2339, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7983, "query_norm": 1.5621, "queue_k_norm": 1.6277, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 64.7305, "sent_len_1": 66.7192, "sent_max_len_0": 128.0, "sent_max_len_1": 187.8862, "stdk": 0.0492, "stdq": 0.0462, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 99500 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.1359, "doc_norm": 1.6268, "encoder_q-embeddings": 534.6382, "encoder_q-layer.0": 362.5091, "encoder_q-layer.1": 399.9092, "encoder_q-layer.10": 683.9824, "encoder_q-layer.11": 1520.4178, "encoder_q-layer.2": 440.8601, "encoder_q-layer.3": 481.3538, "encoder_q-layer.4": 542.3274, "encoder_q-layer.5": 542.2966, "encoder_q-layer.6": 588.402, "encoder_q-layer.7": 684.8657, "encoder_q-layer.8": 734.9099, "encoder_q-layer.9": 642.4219, "epoch": 0.97, "inbatch_neg_score": 0.8013, "inbatch_pos_score": 1.46, "learning_rate": 2.2222222222222224e-07, "loss": 3.1359, "norm_diff": 0.0802, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1076.4753, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8013, "query_norm": 1.5466, "queue_k_norm": 1.6286, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 64.963, "sent_len_1": 66.7582, "sent_max_len_0": 128.0, "sent_max_len_1": 188.84, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 99600 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.1285, "doc_norm": 1.6267, "encoder_q-embeddings": 509.1297, "encoder_q-layer.0": 342.2402, "encoder_q-layer.1": 366.0719, "encoder_q-layer.10": 688.582, "encoder_q-layer.11": 1499.7104, "encoder_q-layer.2": 403.1726, "encoder_q-layer.3": 416.599, "encoder_q-layer.4": 469.459, "encoder_q-layer.5": 477.6151, "encoder_q-layer.6": 530.834, "encoder_q-layer.7": 578.3519, "encoder_q-layer.8": 671.5649, "encoder_q-layer.9": 621.3677, "epoch": 0.97, "inbatch_neg_score": 0.8006, "inbatch_pos_score": 1.4785, "learning_rate": 1.6666666666666668e-07, "loss": 3.1285, "norm_diff": 0.0795, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1015.968, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8008, "query_norm": 1.5472, "queue_k_norm": 1.6279, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 64.8921, "sent_len_1": 66.7904, "sent_max_len_0": 128.0, "sent_max_len_1": 188.795, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 99700 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.1258, "doc_norm": 1.6335, "encoder_q-embeddings": 468.8048, "encoder_q-layer.0": 309.5146, "encoder_q-layer.1": 334.0506, "encoder_q-layer.10": 697.8617, "encoder_q-layer.11": 1488.3055, "encoder_q-layer.2": 372.6438, "encoder_q-layer.3": 387.1603, "encoder_q-layer.4": 418.9378, "encoder_q-layer.5": 420.2871, "encoder_q-layer.6": 487.7356, "encoder_q-layer.7": 578.3751, "encoder_q-layer.8": 680.5081, "encoder_q-layer.9": 616.0317, "epoch": 0.97, "inbatch_neg_score": 0.8023, "inbatch_pos_score": 1.502, "learning_rate": 1.1111111111111112e-07, "loss": 3.1258, "norm_diff": 0.078, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 984.3736, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8008, "query_norm": 1.5555, "queue_k_norm": 1.6271, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 64.9765, "sent_len_1": 66.7563, "sent_max_len_0": 128.0, "sent_max_len_1": 189.4525, "stdk": 0.0494, "stdq": 0.0459, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 99800 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.1336, "doc_norm": 1.6221, "encoder_q-embeddings": 1234.7236, "encoder_q-layer.0": 860.2324, "encoder_q-layer.1": 1042.6755, "encoder_q-layer.10": 684.2162, "encoder_q-layer.11": 1423.9255, "encoder_q-layer.2": 1192.1025, "encoder_q-layer.3": 1288.9143, "encoder_q-layer.4": 1396.2981, "encoder_q-layer.5": 1508.1541, "encoder_q-layer.6": 1568.2279, "encoder_q-layer.7": 1458.5782, "encoder_q-layer.8": 1125.9252, "encoder_q-layer.9": 659.9648, "epoch": 0.98, "inbatch_neg_score": 0.7996, "inbatch_pos_score": 1.4941, "learning_rate": 5.555555555555556e-08, "loss": 3.1336, "norm_diff": 0.0588, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1844.0064, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7993, "query_norm": 1.5633, "queue_k_norm": 1.6291, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 65.0928, "sent_len_1": 66.9947, "sent_max_len_0": 128.0, "sent_max_len_1": 192.5962, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 99900 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.1272, "doc_norm": 1.6259, "encoder_q-embeddings": 494.1666, "encoder_q-layer.0": 326.925, "encoder_q-layer.1": 333.7584, "encoder_q-layer.10": 614.2658, "encoder_q-layer.11": 1479.3057, "encoder_q-layer.2": 380.183, "encoder_q-layer.3": 389.9003, "encoder_q-layer.4": 425.15, "encoder_q-layer.5": 430.5507, "encoder_q-layer.6": 479.9345, "encoder_q-layer.7": 559.0207, "encoder_q-layer.8": 643.0842, "encoder_q-layer.9": 603.2275, "epoch": 0.98, "inbatch_neg_score": 0.8001, "inbatch_pos_score": 1.4961, "learning_rate": 0.0, "loss": 3.1272, "norm_diff": 0.074, "norm_loss": 0.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 982.2622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8003, "query_norm": 1.5519, "queue_k_norm": 1.6274, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 65.112, "sent_len_1": 66.5937, "sent_max_len_0": 128.0, "sent_max_len_1": 187.9462, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 100000 }, { "dev_runtime": 38.0401, "dev_samples_per_second": 1.682, "dev_steps_per_second": 0.026, "epoch": 0.98, "step": 100000, "test_accuracy": 94.62890625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.30579674243927, "test_doc_norm": 1.6166253089904785, "test_inbatch_neg_score": 1.167407512664795, "test_inbatch_pos_score": 2.140146255493164, "test_loss": 0.30579674243927, "test_loss_align": 0.9694724082946777, "test_loss_unif": 2.8071036338806152, "test_loss_unif_q@queue": 2.8071038722991943, "test_norm_diff": 0.006956934928894043, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.7821612358093262, "test_query_norm": 1.616883635520935, "test_queue_k_norm": 1.6272437572479248, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.043169643729925156, "test_stdq": 0.04317212849855423, "test_stdqueue_k": 0.04919165372848511, "test_stdqueue_q": 0.0 }, { "dev_runtime": 38.0401, "dev_samples_per_second": 1.682, "dev_steps_per_second": 0.026, "epoch": 0.98, "eval_beir-arguana_ndcg@10": 0.39138, "eval_beir-arguana_recall@10": 0.66216, "eval_beir-arguana_recall@100": 0.93599, "eval_beir-arguana_recall@20": 0.7909, "eval_beir-avg_ndcg@10": 0.3805048333333333, "eval_beir-avg_recall@10": 0.4477325, "eval_beir-avg_recall@100": 0.6274010833333333, "eval_beir-avg_recall@20": 0.5071286666666668, "eval_beir-cqadupstack_ndcg@10": 0.27104833333333334, "eval_beir-cqadupstack_recall@10": 0.363245, "eval_beir-cqadupstack_recall@100": 0.5892308333333333, "eval_beir-cqadupstack_recall@20": 0.4280566666666667, "eval_beir-fiqa_ndcg@10": 0.22343, "eval_beir-fiqa_recall@10": 0.28095, "eval_beir-fiqa_recall@100": 0.53699, "eval_beir-fiqa_recall@20": 0.3412, "eval_beir-nfcorpus_ndcg@10": 0.30319, "eval_beir-nfcorpus_recall@10": 0.1525, "eval_beir-nfcorpus_recall@100": 0.27586, "eval_beir-nfcorpus_recall@20": 0.18001, "eval_beir-nq_ndcg@10": 0.27168, "eval_beir-nq_recall@10": 0.44928, "eval_beir-nq_recall@100": 0.79179, "eval_beir-nq_recall@20": 0.56817, "eval_beir-quora_ndcg@10": 0.82685, "eval_beir-quora_recall@10": 0.91814, "eval_beir-quora_recall@100": 0.98627, "eval_beir-quora_recall@20": 0.95101, "eval_beir-scidocs_ndcg@10": 0.14703, "eval_beir-scidocs_recall@10": 0.15518, "eval_beir-scidocs_recall@100": 0.35427, "eval_beir-scidocs_recall@20": 0.21462, "eval_beir-scifact_ndcg@10": 0.64459, "eval_beir-scifact_recall@10": 0.78733, "eval_beir-scifact_recall@100": 0.93422, "eval_beir-scifact_recall@20": 0.84011, "eval_beir-trec-covid_ndcg@10": 0.53589, "eval_beir-trec-covid_recall@10": 0.572, "eval_beir-trec-covid_recall@100": 0.4254, "eval_beir-trec-covid_recall@20": 0.542, "eval_beir-webis-touche2020_ndcg@10": 0.18996, "eval_beir-webis-touche2020_recall@10": 0.13654, "eval_beir-webis-touche2020_recall@100": 0.44399, "eval_beir-webis-touche2020_recall@20": 0.21521, "eval_senteval-avg_sts": 0.7273805222302221, "eval_senteval-sickr_spearman": 0.6974671648657597, "eval_senteval-stsb_spearman": 0.7572938795946844, "step": 100000, "test_accuracy": 94.62890625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.30579674243927, "test_doc_norm": 1.6166253089904785, "test_inbatch_neg_score": 1.167407512664795, "test_inbatch_pos_score": 2.140146255493164, "test_loss": 0.30579674243927, "test_loss_align": 0.9694724082946777, "test_loss_unif": 2.8071036338806152, "test_loss_unif_q@queue": 2.8071038722991943, "test_norm_diff": 0.006956934928894043, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.7821612358093262, "test_query_norm": 1.616883635520935, "test_queue_k_norm": 1.6272437572479248, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.043169643729925156, "test_stdq": 0.04317212849855423, "test_stdqueue_k": 0.04919165372848511, "test_stdqueue_q": 0.0 }, { "epoch": 0.98, "step": 100000, "total_flos": 0, "train_runtime": 248357.4129, "train_samples_per_second": 0.403 } ], "max_steps": 100000, "num_train_epochs": 1, "total_flos": 0, "trial_name": null, "trial_params": null }