|
{ |
|
"best_metric": 0.301629900932312, |
|
"best_model_checkpoint": "saves/LLaMA2-7B-Chat/lora/2023-08-25-11-47-37/checkpoint-800", |
|
"epoch": 2.9978586723768736, |
|
"global_step": 2100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9999300626531746e-05, |
|
"logits/chosen": -0.7403622269630432, |
|
"logits/rejected": -0.6866486668586731, |
|
"logps/chosen": -181.39700317382812, |
|
"logps/rejected": -208.150146484375, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -5.543716179090552e-05, |
|
"rewards/margins": 0.008117685094475746, |
|
"rewards/rejected": -0.008173122070729733, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.999720254525684e-05, |
|
"logits/chosen": -0.7327243089675903, |
|
"logits/rejected": -0.6666526794433594, |
|
"logps/chosen": -231.14956665039062, |
|
"logps/rejected": -259.27978515625, |
|
"loss": 0.6702, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.010688358917832375, |
|
"rewards/margins": 0.04831721633672714, |
|
"rewards/rejected": -0.05900556966662407, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9995272362322506e-05, |
|
"logits/chosen": -0.7042385935783386, |
|
"logits/rejected": -0.6349071264266968, |
|
"logps/chosen": -188.58367919921875, |
|
"logps/rejected": -223.201416015625, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.07146739959716797, |
|
"rewards/margins": 0.05914110690355301, |
|
"rewards/rejected": -0.13060849905014038, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.999093662527553e-05, |
|
"logits/chosen": -0.6996177434921265, |
|
"logits/rejected": -0.6077739000320435, |
|
"logps/chosen": -196.89700317382812, |
|
"logps/rejected": -243.52450561523438, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.0204925499856472, |
|
"rewards/margins": 0.22103042900562286, |
|
"rewards/rejected": -0.24152295291423798, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.998520264838675e-05, |
|
"logits/chosen": -0.6775780916213989, |
|
"logits/rejected": -0.6084710359573364, |
|
"logps/chosen": -194.9524688720703, |
|
"logps/rejected": -232.9259490966797, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.22763672471046448, |
|
"rewards/margins": 0.2767763137817383, |
|
"rewards/rejected": -0.5044130086898804, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.997807075247146e-05, |
|
"logits/chosen": -0.6930967569351196, |
|
"logits/rejected": -0.624418318271637, |
|
"logps/chosen": -201.62631225585938, |
|
"logps/rejected": -231.8871307373047, |
|
"loss": 0.5054, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.42100492119789124, |
|
"rewards/margins": 0.5007229447364807, |
|
"rewards/rejected": -0.9217279553413391, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.996954133655838e-05, |
|
"logits/chosen": -0.6575398445129395, |
|
"logits/rejected": -0.5734818577766418, |
|
"logps/chosen": -215.608154296875, |
|
"logps/rejected": -259.5364990234375, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.129244089126587, |
|
"rewards/margins": 0.6850830316543579, |
|
"rewards/rejected": -1.8143272399902344, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9959614877867264e-05, |
|
"logits/chosen": -0.5865448713302612, |
|
"logits/rejected": -0.5297074317932129, |
|
"logps/chosen": -251.30545043945312, |
|
"logps/rejected": -282.8074035644531, |
|
"loss": 0.4294, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.6625725030899048, |
|
"rewards/margins": 0.9844868779182434, |
|
"rewards/rejected": -2.647059440612793, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.994829193178228e-05, |
|
"logits/chosen": -0.6305592656135559, |
|
"logits/rejected": -0.5662962794303894, |
|
"logps/chosen": -238.34683227539062, |
|
"logps/rejected": -288.0771789550781, |
|
"loss": 0.4195, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.0896081924438477, |
|
"rewards/margins": 1.2067267894744873, |
|
"rewards/rejected": -3.296334743499756, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.9935573131820854e-05, |
|
"logits/chosen": -0.5488190054893494, |
|
"logits/rejected": -0.4458453059196472, |
|
"logps/chosen": -237.5546417236328, |
|
"logps/rejected": -296.1310119628906, |
|
"loss": 0.2968, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.8281638622283936, |
|
"rewards/margins": 1.8660491704940796, |
|
"rewards/rejected": -4.694213390350342, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.99214591895983e-05, |
|
"logits/chosen": -0.496940940618515, |
|
"logits/rejected": -0.39123407006263733, |
|
"logps/chosen": -235.4664764404297, |
|
"logps/rejected": -301.9604797363281, |
|
"loss": 0.3675, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -4.971009731292725, |
|
"rewards/margins": 2.1594393253326416, |
|
"rewards/rejected": -7.130448818206787, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.990595089478795e-05, |
|
"logits/chosen": -0.44920119643211365, |
|
"logits/rejected": -0.3571397066116333, |
|
"logps/chosen": -270.9496154785156, |
|
"logps/rejected": -346.5156555175781, |
|
"loss": 0.3613, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -5.407833576202393, |
|
"rewards/margins": 2.685551166534424, |
|
"rewards/rejected": -8.093385696411133, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9889049115077005e-05, |
|
"logits/chosen": -0.5083180665969849, |
|
"logits/rejected": -0.4510710835456848, |
|
"logps/chosen": -266.8662109375, |
|
"logps/rejected": -305.4253845214844, |
|
"loss": 0.4624, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.9966888427734375, |
|
"rewards/margins": 2.306278705596924, |
|
"rewards/rejected": -8.302966117858887, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.987075479611796e-05, |
|
"logits/chosen": -0.45440906286239624, |
|
"logits/rejected": -0.38472697138786316, |
|
"logps/chosen": -277.6520690917969, |
|
"logps/rejected": -340.0121765136719, |
|
"loss": 0.4809, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -6.467832088470459, |
|
"rewards/margins": 2.306380271911621, |
|
"rewards/rejected": -8.774212837219238, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9851068961475725e-05, |
|
"logits/chosen": -0.44587111473083496, |
|
"logits/rejected": -0.3460499048233032, |
|
"logps/chosen": -248.66030883789062, |
|
"logps/rejected": -324.4167785644531, |
|
"loss": 0.3326, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.31089973449707, |
|
"rewards/margins": 2.787933826446533, |
|
"rewards/rejected": -8.098833084106445, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.982999271257033e-05, |
|
"logits/chosen": -0.47116953134536743, |
|
"logits/rejected": -0.40097618103027344, |
|
"logps/chosen": -300.4339904785156, |
|
"logps/rejected": -352.66363525390625, |
|
"loss": 0.3232, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -5.735043525695801, |
|
"rewards/margins": 2.7127461433410645, |
|
"rewards/rejected": -8.447790145874023, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.980752722861531e-05, |
|
"logits/chosen": -0.5418936014175415, |
|
"logits/rejected": -0.48130369186401367, |
|
"logps/chosen": -273.4432067871094, |
|
"logps/rejected": -305.4323425292969, |
|
"loss": 0.4748, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -6.1163129806518555, |
|
"rewards/margins": 1.9569565057754517, |
|
"rewards/rejected": -8.07326889038086, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.978367376655177e-05, |
|
"logits/chosen": -0.6019693613052368, |
|
"logits/rejected": -0.501119077205658, |
|
"logps/chosen": -241.8748016357422, |
|
"logps/rejected": -303.2873840332031, |
|
"loss": 0.3463, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.80649471282959, |
|
"rewards/margins": 2.6070265769958496, |
|
"rewards/rejected": -7.413522243499756, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9758433660977964e-05, |
|
"logits/chosen": -0.5586158633232117, |
|
"logits/rejected": -0.46125540137290955, |
|
"logps/chosen": -253.9844512939453, |
|
"logps/rejected": -318.15960693359375, |
|
"loss": 0.2994, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -4.774440288543701, |
|
"rewards/margins": 2.6352882385253906, |
|
"rewards/rejected": -7.40972900390625, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9731808324074717e-05, |
|
"logits/chosen": -0.5857738256454468, |
|
"logits/rejected": -0.5078204274177551, |
|
"logps/chosen": -244.3525390625, |
|
"logps/rejected": -299.4289855957031, |
|
"loss": 0.4066, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -4.955862998962402, |
|
"rewards/margins": 2.2310631275177, |
|
"rewards/rejected": -7.186926364898682, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_logits/chosen": -0.6736606359481812, |
|
"eval_logits/rejected": -0.5934695601463318, |
|
"eval_logps/chosen": -269.2135009765625, |
|
"eval_logps/rejected": -330.87744140625, |
|
"eval_loss": 0.36347857117652893, |
|
"eval_rewards/accuracies": 0.8051801919937134, |
|
"eval_rewards/chosen": -5.528914451599121, |
|
"eval_rewards/margins": 2.5316162109375, |
|
"eval_rewards/rejected": -8.060530662536621, |
|
"eval_runtime": 618.2599, |
|
"eval_samples_per_second": 0.954, |
|
"eval_steps_per_second": 0.12, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.970379924552635e-05, |
|
"logits/chosen": -0.6208111047744751, |
|
"logits/rejected": -0.5168194770812988, |
|
"logps/chosen": -256.57330322265625, |
|
"logps/rejected": -336.573486328125, |
|
"loss": 0.2796, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.576841354370117, |
|
"rewards/margins": 2.8258020877838135, |
|
"rewards/rejected": -8.402643203735352, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9674407992437394e-05, |
|
"logits/chosen": -0.5667734742164612, |
|
"logits/rejected": -0.5014970302581787, |
|
"logps/chosen": -279.5911560058594, |
|
"logps/rejected": -323.6059875488281, |
|
"loss": 0.3282, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -6.653090476989746, |
|
"rewards/margins": 2.4938788414001465, |
|
"rewards/rejected": -9.14696979522705, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.964363620924485e-05, |
|
"logits/chosen": -0.5388852953910828, |
|
"logits/rejected": -0.4672119617462158, |
|
"logps/chosen": -275.47357177734375, |
|
"logps/rejected": -326.81182861328125, |
|
"loss": 0.4124, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -6.149296760559082, |
|
"rewards/margins": 2.311483860015869, |
|
"rewards/rejected": -8.460780143737793, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.961148561762622e-05, |
|
"logits/chosen": -0.5033109784126282, |
|
"logits/rejected": -0.405276358127594, |
|
"logps/chosen": -256.9668884277344, |
|
"logps/rejected": -325.78839111328125, |
|
"loss": 0.3181, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -6.381417274475098, |
|
"rewards/margins": 2.8026371002197266, |
|
"rewards/rejected": -9.184053421020508, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9577958016403156e-05, |
|
"logits/chosen": -0.5019578337669373, |
|
"logits/rejected": -0.42507410049438477, |
|
"logps/chosen": -298.7206115722656, |
|
"logps/rejected": -352.7537841796875, |
|
"loss": 0.4869, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -6.763588905334473, |
|
"rewards/margins": 2.4632394313812256, |
|
"rewards/rejected": -9.226827621459961, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.954305528144085e-05, |
|
"logits/chosen": -0.5088043808937073, |
|
"logits/rejected": -0.40408769249916077, |
|
"logps/chosen": -260.7892761230469, |
|
"logps/rejected": -330.84747314453125, |
|
"loss": 0.2442, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -6.382019996643066, |
|
"rewards/margins": 3.018307685852051, |
|
"rewards/rejected": -9.400327682495117, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9506779365543046e-05, |
|
"logits/chosen": -0.4910973608493805, |
|
"logits/rejected": -0.4001993238925934, |
|
"logps/chosen": -273.5351867675781, |
|
"logps/rejected": -341.2701110839844, |
|
"loss": 0.3465, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -6.231161117553711, |
|
"rewards/margins": 2.6937344074249268, |
|
"rewards/rejected": -8.924895286560059, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.946913229834279e-05, |
|
"logits/chosen": -0.5775797367095947, |
|
"logits/rejected": -0.48806411027908325, |
|
"logps/chosen": -259.29351806640625, |
|
"logps/rejected": -339.4101257324219, |
|
"loss": 0.3707, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -6.118729114532471, |
|
"rewards/margins": 3.231736421585083, |
|
"rewards/rejected": -9.350464820861816, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.943011618618887e-05, |
|
"logits/chosen": -0.4882396161556244, |
|
"logits/rejected": -0.4096860885620117, |
|
"logps/chosen": -258.9165954589844, |
|
"logps/rejected": -323.74822998046875, |
|
"loss": 0.3093, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -5.742823600769043, |
|
"rewards/margins": 3.2949295043945312, |
|
"rewards/rejected": -9.037752151489258, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.938973321202799e-05, |
|
"logits/chosen": -0.5276007056236267, |
|
"logits/rejected": -0.4654686450958252, |
|
"logps/chosen": -320.4021301269531, |
|
"logps/rejected": -362.60992431640625, |
|
"loss": 0.4327, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -7.360659122467041, |
|
"rewards/margins": 2.3584816455841064, |
|
"rewards/rejected": -9.719141006469727, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.93479856352826e-05, |
|
"logits/chosen": -0.6080215573310852, |
|
"logits/rejected": -0.561368465423584, |
|
"logps/chosen": -260.1334533691406, |
|
"logps/rejected": -304.76434326171875, |
|
"loss": 0.4775, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -5.641587257385254, |
|
"rewards/margins": 2.074352264404297, |
|
"rewards/rejected": -7.715939521789551, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.930487579172451e-05, |
|
"logits/chosen": -0.6534048318862915, |
|
"logits/rejected": -0.557198166847229, |
|
"logps/chosen": -238.5316925048828, |
|
"logps/rejected": -303.6593017578125, |
|
"loss": 0.3279, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.701598167419434, |
|
"rewards/margins": 2.6716597080230713, |
|
"rewards/rejected": -7.373257637023926, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.926040609334418e-05, |
|
"logits/chosen": -0.5949841141700745, |
|
"logits/rejected": -0.4924190938472748, |
|
"logps/chosen": -268.88604736328125, |
|
"logps/rejected": -336.0675048828125, |
|
"loss": 0.3907, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.271938800811768, |
|
"rewards/margins": 2.7763962745666504, |
|
"rewards/rejected": -8.048335075378418, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9214579028215776e-05, |
|
"logits/chosen": -0.5787319540977478, |
|
"logits/rejected": -0.5097864270210266, |
|
"logps/chosen": -270.76904296875, |
|
"logps/rejected": -330.5365905761719, |
|
"loss": 0.4062, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -5.298426628112793, |
|
"rewards/margins": 2.6809771060943604, |
|
"rewards/rejected": -7.979403495788574, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9167397160358e-05, |
|
"logits/chosen": -0.5979365110397339, |
|
"logits/rejected": -0.5267354846000671, |
|
"logps/chosen": -250.3006591796875, |
|
"logps/rejected": -310.52764892578125, |
|
"loss": 0.3127, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -5.677347660064697, |
|
"rewards/margins": 2.7255032062530518, |
|
"rewards/rejected": -8.402850151062012, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.911886312959055e-05, |
|
"logits/chosen": -0.600624144077301, |
|
"logits/rejected": -0.5323041677474976, |
|
"logps/chosen": -263.458984375, |
|
"logps/rejected": -324.83404541015625, |
|
"loss": 0.2828, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.505527496337891, |
|
"rewards/margins": 2.577638626098633, |
|
"rewards/rejected": -8.083166122436523, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.906897965138653e-05, |
|
"logits/chosen": -0.6335456371307373, |
|
"logits/rejected": -0.5383679270744324, |
|
"logps/chosen": -271.63812255859375, |
|
"logps/rejected": -345.01702880859375, |
|
"loss": 0.3611, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -6.672010898590088, |
|
"rewards/margins": 3.4995288848876953, |
|
"rewards/rejected": -10.171539306640625, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.90177495167204e-05, |
|
"logits/chosen": -0.6199727058410645, |
|
"logits/rejected": -0.5486131906509399, |
|
"logps/chosen": -282.41583251953125, |
|
"logps/rejected": -347.9653625488281, |
|
"loss": 0.4037, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -6.799245357513428, |
|
"rewards/margins": 2.8858962059020996, |
|
"rewards/rejected": -9.685141563415527, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.896517559191194e-05, |
|
"logits/chosen": -0.683984100818634, |
|
"logits/rejected": -0.5775423049926758, |
|
"logps/chosen": -262.58538818359375, |
|
"logps/rejected": -335.623779296875, |
|
"loss": 0.4663, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -7.013031959533691, |
|
"rewards/margins": 3.129578113555908, |
|
"rewards/rejected": -10.142609596252441, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.891126081846579e-05, |
|
"logits/chosen": -0.6567696332931519, |
|
"logits/rejected": -0.5673569440841675, |
|
"logps/chosen": -256.83892822265625, |
|
"logps/rejected": -325.61065673828125, |
|
"loss": 0.3115, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -6.3370866775512695, |
|
"rewards/margins": 3.1089577674865723, |
|
"rewards/rejected": -9.446043968200684, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -0.7238260507583618, |
|
"eval_logits/rejected": -0.6414477229118347, |
|
"eval_logps/chosen": -288.9779968261719, |
|
"eval_logps/rejected": -360.12066650390625, |
|
"eval_loss": 0.3467591404914856, |
|
"eval_rewards/accuracies": 0.832207202911377, |
|
"eval_rewards/chosen": -7.505363464355469, |
|
"eval_rewards/margins": 3.479483127593994, |
|
"eval_rewards/rejected": -10.984847068786621, |
|
"eval_runtime": 617.3232, |
|
"eval_samples_per_second": 0.956, |
|
"eval_steps_per_second": 0.12, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.8856008212906925e-05, |
|
"logits/chosen": -0.7143681049346924, |
|
"logits/rejected": -0.6183390021324158, |
|
"logps/chosen": -269.6623229980469, |
|
"logps/rejected": -350.52459716796875, |
|
"loss": 0.2657, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -7.174610137939453, |
|
"rewards/margins": 3.6835849285125732, |
|
"rewards/rejected": -10.858195304870605, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.879942086661184e-05, |
|
"logits/chosen": -0.6396993398666382, |
|
"logits/rejected": -0.5612179040908813, |
|
"logps/chosen": -320.8450622558594, |
|
"logps/rejected": -394.2232360839844, |
|
"loss": 0.2258, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -8.637298583984375, |
|
"rewards/margins": 3.9497833251953125, |
|
"rewards/rejected": -12.587082862854004, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8741501945635656e-05, |
|
"logits/chosen": -0.6102300882339478, |
|
"logits/rejected": -0.5374587178230286, |
|
"logps/chosen": -261.81256103515625, |
|
"logps/rejected": -329.33453369140625, |
|
"loss": 0.3376, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -7.931007385253906, |
|
"rewards/margins": 3.3228728771209717, |
|
"rewards/rejected": -11.253880500793457, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8682254690534876e-05, |
|
"logits/chosen": -0.589969277381897, |
|
"logits/rejected": -0.5134058594703674, |
|
"logps/chosen": -284.9112243652344, |
|
"logps/rejected": -351.90643310546875, |
|
"loss": 0.3639, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -7.687941551208496, |
|
"rewards/margins": 3.43585205078125, |
|
"rewards/rejected": -11.123793601989746, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.862168241618617e-05, |
|
"logits/chosen": -0.5980569124221802, |
|
"logits/rejected": -0.5199486017227173, |
|
"logps/chosen": -243.0383758544922, |
|
"logps/rejected": -318.08319091796875, |
|
"loss": 0.371, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -6.680181980133057, |
|
"rewards/margins": 3.751661777496338, |
|
"rewards/rejected": -10.431844711303711, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.8559788511600876e-05, |
|
"logits/chosen": -0.5468825101852417, |
|
"logits/rejected": -0.46182718873023987, |
|
"logps/chosen": -300.0223388671875, |
|
"logps/rejected": -365.42559814453125, |
|
"loss": 0.3014, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -8.608762741088867, |
|
"rewards/margins": 3.9094433784484863, |
|
"rewards/rejected": -12.518205642700195, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.849657643973535e-05, |
|
"logits/chosen": -0.5835626721382141, |
|
"logits/rejected": -0.46411094069480896, |
|
"logps/chosen": -289.1901550292969, |
|
"logps/rejected": -391.5763244628906, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -7.415358066558838, |
|
"rewards/margins": 4.35217809677124, |
|
"rewards/rejected": -11.767536163330078, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.843204973729729e-05, |
|
"logits/chosen": -0.5552169680595398, |
|
"logits/rejected": -0.4667941927909851, |
|
"logps/chosen": -262.4269714355469, |
|
"logps/rejected": -344.49462890625, |
|
"loss": 0.318, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -6.991745948791504, |
|
"rewards/margins": 3.809674024581909, |
|
"rewards/rejected": -10.801420211791992, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.8366212014547775e-05, |
|
"logits/chosen": -0.5869508981704712, |
|
"logits/rejected": -0.536375880241394, |
|
"logps/chosen": -263.1332092285156, |
|
"logps/rejected": -323.9794006347656, |
|
"loss": 0.3479, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -6.706090450286865, |
|
"rewards/margins": 3.0286171436309814, |
|
"rewards/rejected": -9.734708786010742, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.8299066955099335e-05, |
|
"logits/chosen": -0.6492255330085754, |
|
"logits/rejected": -0.572007954120636, |
|
"logps/chosen": -283.6471862792969, |
|
"logps/rejected": -338.55535888671875, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -6.6781325340271, |
|
"rewards/margins": 1.9866206645965576, |
|
"rewards/rejected": -8.664752960205078, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.823061831570981e-05, |
|
"logits/chosen": -0.5709847807884216, |
|
"logits/rejected": -0.49480828642845154, |
|
"logps/chosen": -280.56005859375, |
|
"logps/rejected": -343.18511962890625, |
|
"loss": 0.3881, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.904370307922363, |
|
"rewards/margins": 2.5633044242858887, |
|
"rewards/rejected": -9.46767520904541, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.816086992607219e-05, |
|
"logits/chosen": -0.5884816646575928, |
|
"logits/rejected": -0.48144420981407166, |
|
"logps/chosen": -259.3785095214844, |
|
"logps/rejected": -330.3200988769531, |
|
"loss": 0.3031, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -6.253561973571777, |
|
"rewards/margins": 2.664248466491699, |
|
"rewards/rejected": -8.917810440063477, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.8089825688600324e-05, |
|
"logits/chosen": -0.6093907356262207, |
|
"logits/rejected": -0.5078362226486206, |
|
"logps/chosen": -264.2926025390625, |
|
"logps/rejected": -339.7705993652344, |
|
"loss": 0.2332, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.6783013343811035, |
|
"rewards/margins": 3.519818067550659, |
|
"rewards/rejected": -10.198118209838867, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.8017489578210604e-05, |
|
"logits/chosen": -0.5917445421218872, |
|
"logits/rejected": -0.49844178557395935, |
|
"logps/chosen": -271.13677978515625, |
|
"logps/rejected": -333.9804382324219, |
|
"loss": 0.4124, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -7.525943756103516, |
|
"rewards/margins": 2.7923340797424316, |
|
"rewards/rejected": -10.318277359008789, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.794386564209953e-05, |
|
"logits/chosen": -0.6364978551864624, |
|
"logits/rejected": -0.5267010927200317, |
|
"logps/chosen": -282.91021728515625, |
|
"logps/rejected": -378.1858825683594, |
|
"loss": 0.2063, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.725545406341553, |
|
"rewards/margins": 4.508333206176758, |
|
"rewards/rejected": -11.233880043029785, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.786895799951732e-05, |
|
"logits/chosen": -0.5923280119895935, |
|
"logits/rejected": -0.512219250202179, |
|
"logps/chosen": -285.5169372558594, |
|
"logps/rejected": -352.58148193359375, |
|
"loss": 0.3159, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -8.174338340759277, |
|
"rewards/margins": 4.347126007080078, |
|
"rewards/rejected": -12.521463394165039, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.779277084153737e-05, |
|
"logits/chosen": -0.5970318913459778, |
|
"logits/rejected": -0.4951511025428772, |
|
"logps/chosen": -299.33294677734375, |
|
"logps/rejected": -385.574462890625, |
|
"loss": 0.3647, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -8.652000427246094, |
|
"rewards/margins": 3.701908588409424, |
|
"rewards/rejected": -12.353909492492676, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.7715308430821864e-05, |
|
"logits/chosen": -0.5532945394515991, |
|
"logits/rejected": -0.470702588558197, |
|
"logps/chosen": -289.81219482421875, |
|
"logps/rejected": -367.1038513183594, |
|
"loss": 0.3418, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -8.14123821258545, |
|
"rewards/margins": 4.235669136047363, |
|
"rewards/rejected": -12.376907348632812, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.765242323034498e-05, |
|
"logits/chosen": -0.5983850359916687, |
|
"logits/rejected": -0.5276827216148376, |
|
"logps/chosen": -291.8949890136719, |
|
"logps/rejected": -367.7808532714844, |
|
"loss": 0.2942, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -6.658424377441406, |
|
"rewards/margins": 4.356478691101074, |
|
"rewards/rejected": -11.01490306854248, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.757267633420931e-05, |
|
"logits/chosen": -0.5903723239898682, |
|
"logits/rejected": -0.470381498336792, |
|
"logps/chosen": -245.4473876953125, |
|
"logps/rejected": -322.921630859375, |
|
"loss": 0.2653, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -5.816718578338623, |
|
"rewards/margins": 3.5717501640319824, |
|
"rewards/rejected": -9.388467788696289, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_logits/chosen": -0.6423404216766357, |
|
"eval_logits/rejected": -0.5549700856208801, |
|
"eval_logps/chosen": -277.4593811035156, |
|
"eval_logps/rejected": -346.16680908203125, |
|
"eval_loss": 0.3175624907016754, |
|
"eval_rewards/accuracies": 0.8372747302055359, |
|
"eval_rewards/chosen": -6.353503227233887, |
|
"eval_rewards/margins": 3.2359619140625, |
|
"eval_rewards/rejected": -9.589465141296387, |
|
"eval_runtime": 619.6776, |
|
"eval_samples_per_second": 0.952, |
|
"eval_steps_per_second": 0.119, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7491666499598794e-05, |
|
"logits/chosen": -0.4752650856971741, |
|
"logits/rejected": -0.4291561245918274, |
|
"logps/chosen": -308.3399963378906, |
|
"logps/rejected": -351.7706604003906, |
|
"loss": 0.2566, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -6.376312255859375, |
|
"rewards/margins": 2.960242986679077, |
|
"rewards/rejected": -9.336555480957031, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7409398259003744e-05, |
|
"logits/chosen": -0.5722283124923706, |
|
"logits/rejected": -0.43486547470092773, |
|
"logps/chosen": -271.27618408203125, |
|
"logps/rejected": -350.6097412109375, |
|
"loss": 0.3905, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -6.20401668548584, |
|
"rewards/margins": 3.1277387142181396, |
|
"rewards/rejected": -9.331754684448242, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.732587621532214e-05, |
|
"logits/chosen": -0.4831075668334961, |
|
"logits/rejected": -0.3779456317424774, |
|
"logps/chosen": -275.4000549316406, |
|
"logps/rejected": -348.45404052734375, |
|
"loss": 0.3821, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -7.6802659034729, |
|
"rewards/margins": 2.4858384132385254, |
|
"rewards/rejected": -10.166104316711426, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.724110504160208e-05, |
|
"logits/chosen": -0.42866721749305725, |
|
"logits/rejected": -0.39401277899742126, |
|
"logps/chosen": -322.8934020996094, |
|
"logps/rejected": -362.2698669433594, |
|
"loss": 0.3577, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -8.4872407913208, |
|
"rewards/margins": 2.9192988872528076, |
|
"rewards/rejected": -11.406539916992188, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.715508948078037e-05, |
|
"logits/chosen": -0.43140649795532227, |
|
"logits/rejected": -0.382727712392807, |
|
"logps/chosen": -309.9305114746094, |
|
"logps/rejected": -364.6683044433594, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -8.847053527832031, |
|
"rewards/margins": 2.7970926761627197, |
|
"rewards/rejected": -11.644145965576172, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.706783434541708e-05, |
|
"logits/chosen": -0.43298617005348206, |
|
"logits/rejected": -0.35435712337493896, |
|
"logps/chosen": -274.5734558105469, |
|
"logps/rejected": -323.9297790527344, |
|
"loss": 0.3598, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -7.113492012023926, |
|
"rewards/margins": 2.833127737045288, |
|
"rewards/rejected": -9.946619033813477, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6979344517426345e-05, |
|
"logits/chosen": -0.44348543882369995, |
|
"logits/rejected": -0.3648318350315094, |
|
"logps/chosen": -311.8634338378906, |
|
"logps/rejected": -370.2818298339844, |
|
"loss": 0.3383, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -8.36948299407959, |
|
"rewards/margins": 2.6521434783935547, |
|
"rewards/rejected": -11.021625518798828, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6889624947803195e-05, |
|
"logits/chosen": -0.4924314618110657, |
|
"logits/rejected": -0.3927859663963318, |
|
"logps/chosen": -286.1222229003906, |
|
"logps/rejected": -348.00445556640625, |
|
"loss": 0.2847, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -6.249931812286377, |
|
"rewards/margins": 3.2729618549346924, |
|
"rewards/rejected": -9.522893905639648, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.679868065634656e-05, |
|
"logits/chosen": -0.4742973744869232, |
|
"logits/rejected": -0.3609599769115448, |
|
"logps/chosen": -267.9313049316406, |
|
"logps/rejected": -338.3818359375, |
|
"loss": 0.2471, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.949034690856934, |
|
"rewards/margins": 3.703294277191162, |
|
"rewards/rejected": -10.652329444885254, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6706516731378406e-05, |
|
"logits/chosen": -0.4882968068122864, |
|
"logits/rejected": -0.4227726459503174, |
|
"logps/chosen": -256.69964599609375, |
|
"logps/rejected": -336.7755432128906, |
|
"loss": 0.2902, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.316679954528809, |
|
"rewards/margins": 3.5853703022003174, |
|
"rewards/rejected": -9.902050971984863, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.661313832945904e-05, |
|
"logits/chosen": -0.45601844787597656, |
|
"logits/rejected": -0.35979634523391724, |
|
"logps/chosen": -297.2603759765625, |
|
"logps/rejected": -384.87481689453125, |
|
"loss": 0.2746, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -8.420430183410645, |
|
"rewards/margins": 4.463082313537598, |
|
"rewards/rejected": -12.883511543273926, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.65185506750986e-05, |
|
"logits/chosen": -0.39386382699012756, |
|
"logits/rejected": -0.28367191553115845, |
|
"logps/chosen": -309.7892150878906, |
|
"logps/rejected": -404.3554382324219, |
|
"loss": 0.3581, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -10.17359447479248, |
|
"rewards/margins": 4.7968430519104, |
|
"rewards/rejected": -14.970438003540039, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.642275906046475e-05, |
|
"logits/chosen": -0.42610305547714233, |
|
"logits/rejected": -0.32776302099227905, |
|
"logps/chosen": -308.56304931640625, |
|
"logps/rejected": -394.921142578125, |
|
"loss": 0.3434, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -9.843785285949707, |
|
"rewards/margins": 4.186667442321777, |
|
"rewards/rejected": -14.030451774597168, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.632576884508662e-05, |
|
"logits/chosen": -0.38670024275779724, |
|
"logits/rejected": -0.29175546765327454, |
|
"logps/chosen": -317.8299560546875, |
|
"logps/rejected": -382.1197814941406, |
|
"loss": 0.3862, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -10.289070129394531, |
|
"rewards/margins": 4.806598663330078, |
|
"rewards/rejected": -15.095669746398926, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.622758545555485e-05, |
|
"logits/chosen": -0.38157421350479126, |
|
"logits/rejected": -0.26931703090667725, |
|
"logps/chosen": -292.02069091796875, |
|
"logps/rejected": -388.94720458984375, |
|
"loss": 0.3453, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -7.471795558929443, |
|
"rewards/margins": 4.848240852355957, |
|
"rewards/rejected": -12.320035934448242, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.612821438521805e-05, |
|
"logits/chosen": -0.3492121398448944, |
|
"logits/rejected": -0.2813674807548523, |
|
"logps/chosen": -305.62554931640625, |
|
"logps/rejected": -356.89898681640625, |
|
"loss": 0.2206, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -8.157720565795898, |
|
"rewards/margins": 3.738807201385498, |
|
"rewards/rejected": -11.896527290344238, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.602766119387544e-05, |
|
"logits/chosen": -0.3653913736343384, |
|
"logits/rejected": -0.2597258687019348, |
|
"logps/chosen": -311.99371337890625, |
|
"logps/rejected": -382.8812255859375, |
|
"loss": 0.3092, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -8.370109558105469, |
|
"rewards/margins": 3.6346893310546875, |
|
"rewards/rejected": -12.004796981811523, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.592593150746576e-05, |
|
"logits/chosen": -0.3710058033466339, |
|
"logits/rejected": -0.2727561891078949, |
|
"logps/chosen": -288.8029479980469, |
|
"logps/rejected": -361.223388671875, |
|
"loss": 0.3955, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -7.695769309997559, |
|
"rewards/margins": 3.3667006492614746, |
|
"rewards/rejected": -11.062471389770508, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.5823031017752485e-05, |
|
"logits/chosen": -0.3490789532661438, |
|
"logits/rejected": -0.2746933102607727, |
|
"logps/chosen": -281.90704345703125, |
|
"logps/rejected": -338.3580322265625, |
|
"loss": 0.328, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -7.884057521820068, |
|
"rewards/margins": 2.856874942779541, |
|
"rewards/rejected": -10.740933418273926, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.571896548200542e-05, |
|
"logits/chosen": -0.4303611218929291, |
|
"logits/rejected": -0.3262333273887634, |
|
"logps/chosen": -261.8278503417969, |
|
"logps/rejected": -337.40521240234375, |
|
"loss": 0.2372, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.010989189147949, |
|
"rewards/margins": 3.444431781768799, |
|
"rewards/rejected": -9.45542049407959, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_logits/chosen": -0.4868081510066986, |
|
"eval_logits/rejected": -0.3944084346294403, |
|
"eval_logps/chosen": -282.0663757324219, |
|
"eval_logps/rejected": -350.36456298828125, |
|
"eval_loss": 0.32044681906700134, |
|
"eval_rewards/accuracies": 0.8355855941772461, |
|
"eval_rewards/chosen": -6.814201831817627, |
|
"eval_rewards/margins": 3.195034980773926, |
|
"eval_rewards/rejected": -10.009236335754395, |
|
"eval_runtime": 635.268, |
|
"eval_samples_per_second": 0.929, |
|
"eval_steps_per_second": 0.116, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.5613740722678525e-05, |
|
"logits/chosen": -0.39522626996040344, |
|
"logits/rejected": -0.3030610680580139, |
|
"logps/chosen": -260.17266845703125, |
|
"logps/rejected": -320.7784118652344, |
|
"loss": 0.388, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -7.52327823638916, |
|
"rewards/margins": 3.621577739715576, |
|
"rewards/rejected": -11.144853591918945, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.550736262708418e-05, |
|
"logits/chosen": -0.4773409962654114, |
|
"logits/rejected": -0.3466936945915222, |
|
"logps/chosen": -277.6498107910156, |
|
"logps/rejected": -373.0572509765625, |
|
"loss": 0.2318, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -6.803755760192871, |
|
"rewards/margins": 3.537554979324341, |
|
"rewards/rejected": -10.34131145477295, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.5399837147063825e-05, |
|
"logits/chosen": -0.47985219955444336, |
|
"logits/rejected": -0.38592132925987244, |
|
"logps/chosen": -246.52841186523438, |
|
"logps/rejected": -332.86248779296875, |
|
"loss": 0.3023, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -6.49072265625, |
|
"rewards/margins": 3.5066089630126953, |
|
"rewards/rejected": -9.997331619262695, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.529117029865488e-05, |
|
"logits/chosen": -0.4670870900154114, |
|
"logits/rejected": -0.36911553144454956, |
|
"logps/chosen": -260.58758544921875, |
|
"logps/rejected": -342.8556823730469, |
|
"loss": 0.3234, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -7.461057186126709, |
|
"rewards/margins": 4.485016822814941, |
|
"rewards/rejected": -11.946073532104492, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.518136816175419e-05, |
|
"logits/chosen": -0.5157058835029602, |
|
"logits/rejected": -0.4533557891845703, |
|
"logps/chosen": -296.40618896484375, |
|
"logps/rejected": -343.72515869140625, |
|
"loss": 0.4552, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -8.75401496887207, |
|
"rewards/margins": 2.8250133991241455, |
|
"rewards/rejected": -11.57902717590332, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.5070436879777865e-05, |
|
"logits/chosen": -0.5464299917221069, |
|
"logits/rejected": -0.4678220748901367, |
|
"logps/chosen": -253.18478393554688, |
|
"logps/rejected": -321.50714111328125, |
|
"loss": 0.3686, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -7.0356292724609375, |
|
"rewards/margins": 3.3276419639587402, |
|
"rewards/rejected": -10.36327075958252, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.495838265931754e-05, |
|
"logits/chosen": -0.5636313557624817, |
|
"logits/rejected": -0.4402475357055664, |
|
"logps/chosen": -268.5367126464844, |
|
"logps/rejected": -361.71697998046875, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -7.160154819488525, |
|
"rewards/margins": 3.917328357696533, |
|
"rewards/rejected": -11.077482223510742, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.4845211769793116e-05, |
|
"logits/chosen": -0.5847368240356445, |
|
"logits/rejected": -0.4880955219268799, |
|
"logps/chosen": -264.53619384765625, |
|
"logps/rejected": -339.5098571777344, |
|
"loss": 0.2691, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -7.8903937339782715, |
|
"rewards/margins": 3.62628173828125, |
|
"rewards/rejected": -11.51667594909668, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.473093054310199e-05, |
|
"logits/chosen": -0.5972886085510254, |
|
"logits/rejected": -0.5250357389450073, |
|
"logps/chosen": -300.294189453125, |
|
"logps/rejected": -370.1164855957031, |
|
"loss": 0.3729, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -7.556139945983887, |
|
"rewards/margins": 4.214568614959717, |
|
"rewards/rejected": -11.770709037780762, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.46155453732648e-05, |
|
"logits/chosen": -0.5448333024978638, |
|
"logits/rejected": -0.4635641574859619, |
|
"logps/chosen": -265.34027099609375, |
|
"logps/rejected": -342.3074035644531, |
|
"loss": 0.3015, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -7.357121467590332, |
|
"rewards/margins": 4.111447334289551, |
|
"rewards/rejected": -11.468568801879883, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.449906271606766e-05, |
|
"logits/chosen": -0.5757399797439575, |
|
"logits/rejected": -0.5023797750473022, |
|
"logps/chosen": -283.24224853515625, |
|
"logps/rejected": -336.78485107421875, |
|
"loss": 0.309, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -8.1708345413208, |
|
"rewards/margins": 3.2705204486846924, |
|
"rewards/rejected": -11.441353797912598, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.438148908870095e-05, |
|
"logits/chosen": -0.560847282409668, |
|
"logits/rejected": -0.4687212407588959, |
|
"logps/chosen": -262.0433349609375, |
|
"logps/rejected": -343.6734313964844, |
|
"loss": 0.2833, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -9.314498901367188, |
|
"rewards/margins": 3.861955165863037, |
|
"rewards/rejected": -13.176454544067383, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.426283106939474e-05, |
|
"logits/chosen": -0.5405054092407227, |
|
"logits/rejected": -0.4934763014316559, |
|
"logps/chosen": -344.86859130859375, |
|
"logps/rejected": -396.4712219238281, |
|
"loss": 0.3875, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -11.307413101196289, |
|
"rewards/margins": 4.146143436431885, |
|
"rewards/rejected": -15.453557014465332, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.414309529705064e-05, |
|
"logits/chosen": -0.5560386776924133, |
|
"logits/rejected": -0.47137537598609924, |
|
"logps/chosen": -328.85467529296875, |
|
"logps/rejected": -410.0113830566406, |
|
"loss": 0.4106, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -10.061564445495605, |
|
"rewards/margins": 4.763692855834961, |
|
"rewards/rejected": -14.825258255004883, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.402228847087047e-05, |
|
"logits/chosen": -0.6119273900985718, |
|
"logits/rejected": -0.5192709565162659, |
|
"logps/chosen": -285.03509521484375, |
|
"logps/rejected": -359.7369689941406, |
|
"loss": 0.3005, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -9.684968948364258, |
|
"rewards/margins": 4.337248802185059, |
|
"rewards/rejected": -14.022216796875, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.390041734998132e-05, |
|
"logits/chosen": -0.6753469109535217, |
|
"logits/rejected": -0.5812332034111023, |
|
"logps/chosen": -275.0848083496094, |
|
"logps/rejected": -365.5320739746094, |
|
"loss": 0.2345, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -7.314253807067871, |
|
"rewards/margins": 4.565116882324219, |
|
"rewards/rejected": -11.879369735717773, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.3777488753057494e-05, |
|
"logits/chosen": -0.6396665573120117, |
|
"logits/rejected": -0.5838817358016968, |
|
"logps/chosen": -311.5262145996094, |
|
"logps/rejected": -385.2457580566406, |
|
"loss": 0.2286, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -8.526026725769043, |
|
"rewards/margins": 3.986698627471924, |
|
"rewards/rejected": -12.512724876403809, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.365350955793892e-05, |
|
"logits/chosen": -0.6760267019271851, |
|
"logits/rejected": -0.620951771736145, |
|
"logps/chosen": -313.78411865234375, |
|
"logps/rejected": -380.7740478515625, |
|
"loss": 0.3314, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -9.628170013427734, |
|
"rewards/margins": 3.3307273387908936, |
|
"rewards/rejected": -12.958897590637207, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.3528486701246376e-05, |
|
"logits/chosen": -0.6738308072090149, |
|
"logits/rejected": -0.5901960730552673, |
|
"logps/chosen": -285.67437744140625, |
|
"logps/rejected": -362.6622009277344, |
|
"loss": 0.3119, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -9.267045974731445, |
|
"rewards/margins": 3.928302049636841, |
|
"rewards/rejected": -13.195347785949707, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.3402427177993366e-05, |
|
"logits/chosen": -0.6766126155853271, |
|
"logits/rejected": -0.6122361421585083, |
|
"logps/chosen": -292.34326171875, |
|
"logps/rejected": -355.0762634277344, |
|
"loss": 0.4221, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -8.092927932739258, |
|
"rewards/margins": 4.039681911468506, |
|
"rewards/rejected": -12.132608413696289, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_logits/chosen": -0.7289233803749084, |
|
"eval_logits/rejected": -0.6465120911598206, |
|
"eval_logps/chosen": -292.57330322265625, |
|
"eval_logps/rejected": -368.62078857421875, |
|
"eval_loss": 0.31609994173049927, |
|
"eval_rewards/accuracies": 0.8474099040031433, |
|
"eval_rewards/chosen": -7.864894390106201, |
|
"eval_rewards/margins": 3.9699699878692627, |
|
"eval_rewards/rejected": -11.83486557006836, |
|
"eval_runtime": 640.9294, |
|
"eval_samples_per_second": 0.921, |
|
"eval_steps_per_second": 0.115, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.327533804119476e-05, |
|
"logits/chosen": -0.6126315593719482, |
|
"logits/rejected": -0.5508357286453247, |
|
"logps/chosen": -282.2762756347656, |
|
"logps/rejected": -364.5578308105469, |
|
"loss": 0.316, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -7.242485046386719, |
|
"rewards/margins": 3.816096782684326, |
|
"rewards/rejected": -11.05858039855957, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.314722640147222e-05, |
|
"logits/chosen": -0.6599456071853638, |
|
"logits/rejected": -0.5939927697181702, |
|
"logps/chosen": -323.53668212890625, |
|
"logps/rejected": -396.10833740234375, |
|
"loss": 0.2941, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -8.693486213684082, |
|
"rewards/margins": 4.0846662521362305, |
|
"rewards/rejected": -12.778152465820312, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.301809942665625e-05, |
|
"logits/chosen": -0.6404463052749634, |
|
"logits/rejected": -0.5707911849021912, |
|
"logps/chosen": -307.43768310546875, |
|
"logps/rejected": -379.2852478027344, |
|
"loss": 0.4079, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -9.3197660446167, |
|
"rewards/margins": 3.961516857147217, |
|
"rewards/rejected": -13.281283378601074, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.28879643413853e-05, |
|
"logits/chosen": -0.65186607837677, |
|
"logits/rejected": -0.565552830696106, |
|
"logps/chosen": -310.2057189941406, |
|
"logps/rejected": -402.9633483886719, |
|
"loss": 0.2945, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -10.662870407104492, |
|
"rewards/margins": 4.87128210067749, |
|
"rewards/rejected": -15.534152030944824, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.2756828426701426e-05, |
|
"logits/chosen": -0.648743212223053, |
|
"logits/rejected": -0.5434762239456177, |
|
"logps/chosen": -288.9256591796875, |
|
"logps/rejected": -386.8144836425781, |
|
"loss": 0.2866, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -10.066654205322266, |
|
"rewards/margins": 5.4559478759765625, |
|
"rewards/rejected": -15.522601127624512, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.2624699019643e-05, |
|
"logits/chosen": -0.6845074892044067, |
|
"logits/rejected": -0.6216086149215698, |
|
"logps/chosen": -298.6322937011719, |
|
"logps/rejected": -368.7320861816406, |
|
"loss": 0.3515, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -8.717851638793945, |
|
"rewards/margins": 4.482119560241699, |
|
"rewards/rejected": -13.199971199035645, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.249158351283414e-05, |
|
"logits/chosen": -0.6760630011558533, |
|
"logits/rejected": -0.5973928570747375, |
|
"logps/chosen": -334.3371887207031, |
|
"logps/rejected": -412.73712158203125, |
|
"loss": 0.2374, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -11.443408012390137, |
|
"rewards/margins": 5.742681503295898, |
|
"rewards/rejected": -17.18608856201172, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.235748935407115e-05, |
|
"logits/chosen": -0.6781556010246277, |
|
"logits/rejected": -0.6254957914352417, |
|
"logps/chosen": -314.7560729980469, |
|
"logps/rejected": -381.03436279296875, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -10.918689727783203, |
|
"rewards/margins": 4.366137981414795, |
|
"rewards/rejected": -15.284828186035156, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.2222424045905774e-05, |
|
"logits/chosen": -0.703301727771759, |
|
"logits/rejected": -0.6157757639884949, |
|
"logps/chosen": -298.96417236328125, |
|
"logps/rejected": -374.37408447265625, |
|
"loss": 0.1956, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -10.03012752532959, |
|
"rewards/margins": 4.996109962463379, |
|
"rewards/rejected": -15.026239395141602, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.2086395145225456e-05, |
|
"logits/chosen": -0.6199958920478821, |
|
"logits/rejected": -0.5463480353355408, |
|
"logps/chosen": -297.76715087890625, |
|
"logps/rejected": -383.3893737792969, |
|
"loss": 0.3658, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -9.150928497314453, |
|
"rewards/margins": 4.323057651519775, |
|
"rewards/rejected": -13.473986625671387, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.1949410262830525e-05, |
|
"logits/chosen": -0.6586548686027527, |
|
"logits/rejected": -0.598033607006073, |
|
"logps/chosen": -298.9888610839844, |
|
"logps/rejected": -358.97113037109375, |
|
"loss": 0.3435, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -7.790317535400391, |
|
"rewards/margins": 3.196791172027588, |
|
"rewards/rejected": -10.987109184265137, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.1811477063008357e-05, |
|
"logits/chosen": -0.647259533405304, |
|
"logits/rejected": -0.5749470591545105, |
|
"logps/chosen": -283.01129150390625, |
|
"logps/rejected": -346.770263671875, |
|
"loss": 0.1992, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -8.713510513305664, |
|
"rewards/margins": 3.8825290203094482, |
|
"rewards/rejected": -12.596040725708008, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.167260326310459e-05, |
|
"logits/chosen": -0.655887246131897, |
|
"logits/rejected": -0.5856226682662964, |
|
"logps/chosen": -353.996337890625, |
|
"logps/rejected": -423.36199951171875, |
|
"loss": 0.3649, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -7.39592981338501, |
|
"rewards/margins": 4.420022010803223, |
|
"rewards/rejected": -11.81595230102539, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.1532796633091296e-05, |
|
"logits/chosen": -0.6630634069442749, |
|
"logits/rejected": -0.5999043583869934, |
|
"logps/chosen": -311.5198669433594, |
|
"logps/rejected": -382.8631896972656, |
|
"loss": 0.2865, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -7.376175880432129, |
|
"rewards/margins": 4.25052547454834, |
|
"rewards/rejected": -11.626702308654785, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.139206499513231e-05, |
|
"logits/chosen": -0.6371768712997437, |
|
"logits/rejected": -0.552124559879303, |
|
"logps/chosen": -315.57696533203125, |
|
"logps/rejected": -386.8127746582031, |
|
"loss": 0.3623, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -7.146079063415527, |
|
"rewards/margins": 3.366482973098755, |
|
"rewards/rejected": -10.51256275177002, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1250416223145515e-05, |
|
"logits/chosen": -0.6766015887260437, |
|
"logits/rejected": -0.5857943296432495, |
|
"logps/chosen": -259.1465759277344, |
|
"logps/rejected": -349.38995361328125, |
|
"loss": 0.3386, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -6.320498466491699, |
|
"rewards/margins": 3.3259353637695312, |
|
"rewards/rejected": -9.64643383026123, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.110785824236236e-05, |
|
"logits/chosen": -0.6103043556213379, |
|
"logits/rejected": -0.5319772958755493, |
|
"logps/chosen": -256.13446044921875, |
|
"logps/rejected": -331.8528137207031, |
|
"loss": 0.1824, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -6.32611608505249, |
|
"rewards/margins": 3.831328868865967, |
|
"rewards/rejected": -10.157445907592773, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.0964399028884394e-05, |
|
"logits/chosen": -0.6808601021766663, |
|
"logits/rejected": -0.5574377179145813, |
|
"logps/chosen": -255.42105102539062, |
|
"logps/rejected": -355.5065002441406, |
|
"loss": 0.2103, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.239567756652832, |
|
"rewards/margins": 4.258026599884033, |
|
"rewards/rejected": -10.497593879699707, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.082004660923703e-05, |
|
"logits/chosen": -0.6791261434555054, |
|
"logits/rejected": -0.5921913385391235, |
|
"logps/chosen": -267.8202819824219, |
|
"logps/rejected": -350.561767578125, |
|
"loss": 0.2129, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -7.478656768798828, |
|
"rewards/margins": 4.9322614669799805, |
|
"rewards/rejected": -12.410919189453125, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.067480905992045e-05, |
|
"logits/chosen": -0.7032105326652527, |
|
"logits/rejected": -0.600141167640686, |
|
"logps/chosen": -297.6030578613281, |
|
"logps/rejected": -384.7508850097656, |
|
"loss": 0.2602, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -9.610403060913086, |
|
"rewards/margins": 5.1422200202941895, |
|
"rewards/rejected": -14.7526216506958, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/chosen": -0.7646234631538391, |
|
"eval_logits/rejected": -0.6766347289085388, |
|
"eval_logps/chosen": -310.1993103027344, |
|
"eval_logps/rejected": -401.2223205566406, |
|
"eval_loss": 0.3386208117008209, |
|
"eval_rewards/accuracies": 0.8237612247467041, |
|
"eval_rewards/chosen": -9.627497673034668, |
|
"eval_rewards/margins": 5.467515468597412, |
|
"eval_rewards/rejected": -15.095013618469238, |
|
"eval_runtime": 635.6148, |
|
"eval_samples_per_second": 0.928, |
|
"eval_steps_per_second": 0.116, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.052869450695776e-05, |
|
"logits/chosen": -0.7094699144363403, |
|
"logits/rejected": -0.6273935437202454, |
|
"logps/chosen": -315.97381591796875, |
|
"logps/rejected": -408.48388671875, |
|
"loss": 0.2588, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -10.845590591430664, |
|
"rewards/margins": 6.2558746337890625, |
|
"rewards/rejected": -17.101465225219727, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.0381711125440284e-05, |
|
"logits/chosen": -0.6483619213104248, |
|
"logits/rejected": -0.6038894653320312, |
|
"logps/chosen": -354.60809326171875, |
|
"logps/rejected": -419.0562438964844, |
|
"loss": 0.4409, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -11.688039779663086, |
|
"rewards/margins": 6.04089879989624, |
|
"rewards/rejected": -17.728939056396484, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.023386713907021e-05, |
|
"logits/chosen": -0.7812396883964539, |
|
"logits/rejected": -0.6916221976280212, |
|
"logps/chosen": -264.0221862792969, |
|
"logps/rejected": -359.23480224609375, |
|
"loss": 0.2446, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -8.11898136138916, |
|
"rewards/margins": 5.1189188957214355, |
|
"rewards/rejected": -13.237902641296387, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.0085170819700446e-05, |
|
"logits/chosen": -0.693078339099884, |
|
"logits/rejected": -0.5796866416931152, |
|
"logps/chosen": -304.8370056152344, |
|
"logps/rejected": -397.3229675292969, |
|
"loss": 0.2739, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -7.398787021636963, |
|
"rewards/margins": 4.530589580535889, |
|
"rewards/rejected": -11.929376602172852, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.9935630486871854e-05, |
|
"logits/chosen": -0.7021247744560242, |
|
"logits/rejected": -0.6098093390464783, |
|
"logps/chosen": -297.61724853515625, |
|
"logps/rejected": -383.40618896484375, |
|
"loss": 0.2087, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -7.8868536949157715, |
|
"rewards/margins": 4.646349906921387, |
|
"rewards/rejected": -12.533205032348633, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.9785254507347725e-05, |
|
"logits/chosen": -0.7181745171546936, |
|
"logits/rejected": -0.6586878299713135, |
|
"logps/chosen": -266.53192138671875, |
|
"logps/rejected": -329.3477478027344, |
|
"loss": 0.3401, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -7.311608791351318, |
|
"rewards/margins": 3.7649588584899902, |
|
"rewards/rejected": -11.076568603515625, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.963405129464569e-05, |
|
"logits/chosen": -0.7155243158340454, |
|
"logits/rejected": -0.6209796667098999, |
|
"logps/chosen": -287.1348571777344, |
|
"logps/rejected": -365.0896911621094, |
|
"loss": 0.3035, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -7.610356330871582, |
|
"rewards/margins": 4.313301086425781, |
|
"rewards/rejected": -11.923657417297363, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.948202930856697e-05, |
|
"logits/chosen": -0.7253776788711548, |
|
"logits/rejected": -0.6412879228591919, |
|
"logps/chosen": -294.8027038574219, |
|
"logps/rejected": -365.4157409667969, |
|
"loss": 0.2924, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -7.643288612365723, |
|
"rewards/margins": 4.211095809936523, |
|
"rewards/rejected": -11.85438346862793, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.932919705472306e-05, |
|
"logits/chosen": -0.7267691493034363, |
|
"logits/rejected": -0.6244961023330688, |
|
"logps/chosen": -308.15771484375, |
|
"logps/rejected": -402.1585693359375, |
|
"loss": 0.256, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -7.583883762359619, |
|
"rewards/margins": 4.4898681640625, |
|
"rewards/rejected": -12.073751449584961, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.917556308405983e-05, |
|
"logits/chosen": -0.7271707653999329, |
|
"logits/rejected": -0.652643084526062, |
|
"logps/chosen": -300.73663330078125, |
|
"logps/rejected": -384.3062438964844, |
|
"loss": 0.3276, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -7.197851657867432, |
|
"rewards/margins": 3.703601121902466, |
|
"rewards/rejected": -10.901453018188477, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.902113599237911e-05, |
|
"logits/chosen": -0.7691100835800171, |
|
"logits/rejected": -0.6458523273468018, |
|
"logps/chosen": -280.18316650390625, |
|
"logps/rejected": -383.2926940917969, |
|
"loss": 0.2926, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -7.250345706939697, |
|
"rewards/margins": 4.688068866729736, |
|
"rewards/rejected": -11.938413619995117, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8865924419857754e-05, |
|
"logits/chosen": -0.7814677953720093, |
|
"logits/rejected": -0.6641688942909241, |
|
"logps/chosen": -278.87969970703125, |
|
"logps/rejected": -368.9512939453125, |
|
"loss": 0.2021, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -7.232874393463135, |
|
"rewards/margins": 4.8439531326293945, |
|
"rewards/rejected": -12.076827049255371, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.8709937050564236e-05, |
|
"logits/chosen": -0.7687026262283325, |
|
"logits/rejected": -0.6667603254318237, |
|
"logps/chosen": -292.98126220703125, |
|
"logps/rejected": -387.62799072265625, |
|
"loss": 0.277, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -9.241242408752441, |
|
"rewards/margins": 5.332201957702637, |
|
"rewards/rejected": -14.573443412780762, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.855318261197275e-05, |
|
"logits/chosen": -0.7737613916397095, |
|
"logits/rejected": -0.6474000215530396, |
|
"logps/chosen": -336.3672790527344, |
|
"logps/rejected": -445.0223083496094, |
|
"loss": 0.2395, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -9.492551803588867, |
|
"rewards/margins": 6.300028324127197, |
|
"rewards/rejected": -15.792581558227539, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.8395669874474915e-05, |
|
"logits/chosen": -0.7845865488052368, |
|
"logits/rejected": -0.6610395312309265, |
|
"logps/chosen": -248.314697265625, |
|
"logps/rejected": -367.29705810546875, |
|
"loss": 0.1917, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -8.131852149963379, |
|
"rewards/margins": 5.7781267166137695, |
|
"rewards/rejected": -13.909977912902832, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.823740765088911e-05, |
|
"logits/chosen": -0.8033556938171387, |
|
"logits/rejected": -0.6713747978210449, |
|
"logps/chosen": -300.24530029296875, |
|
"logps/rejected": -392.0542907714844, |
|
"loss": 0.2671, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -9.415604591369629, |
|
"rewards/margins": 5.034509658813477, |
|
"rewards/rejected": -14.450113296508789, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.8078404795967346e-05, |
|
"logits/chosen": -0.8138594627380371, |
|
"logits/rejected": -0.7433984875679016, |
|
"logps/chosen": -287.4174499511719, |
|
"logps/rejected": -362.7212829589844, |
|
"loss": 0.3496, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -9.672359466552734, |
|
"rewards/margins": 4.173836708068848, |
|
"rewards/rejected": -13.846196174621582, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7918670205899863e-05, |
|
"logits/chosen": -0.8317422866821289, |
|
"logits/rejected": -0.7558469772338867, |
|
"logps/chosen": -288.2098388671875, |
|
"logps/rejected": -361.7750549316406, |
|
"loss": 0.2927, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -8.647623062133789, |
|
"rewards/margins": 4.320425987243652, |
|
"rewards/rejected": -12.968048095703125, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7758212817817405e-05, |
|
"logits/chosen": -0.7649298906326294, |
|
"logits/rejected": -0.6621825695037842, |
|
"logps/chosen": -288.03863525390625, |
|
"logps/rejected": -373.8728942871094, |
|
"loss": 0.3193, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -9.204707145690918, |
|
"rewards/margins": 4.832235813140869, |
|
"rewards/rejected": -14.036943435668945, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.759704160929119e-05, |
|
"logits/chosen": -0.7728848457336426, |
|
"logits/rejected": -0.6979540586471558, |
|
"logps/chosen": -301.2487487792969, |
|
"logps/rejected": -388.1095275878906, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -8.232488632202148, |
|
"rewards/margins": 4.96901273727417, |
|
"rewards/rejected": -13.201499938964844, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -0.8436519503593445, |
|
"eval_logits/rejected": -0.7541723251342773, |
|
"eval_logps/chosen": -293.4511413574219, |
|
"eval_logps/rejected": -372.1683044433594, |
|
"eval_loss": 0.3109963834285736, |
|
"eval_rewards/accuracies": 0.8372747302055359, |
|
"eval_rewards/chosen": -7.952679634094238, |
|
"eval_rewards/margins": 4.236935138702393, |
|
"eval_rewards/rejected": -12.189615249633789, |
|
"eval_runtime": 628.464, |
|
"eval_samples_per_second": 0.939, |
|
"eval_steps_per_second": 0.118, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.743516559783055e-05, |
|
"logits/chosen": -0.7652101516723633, |
|
"logits/rejected": -0.6921292543411255, |
|
"logps/chosen": -280.3987121582031, |
|
"logps/rejected": -345.48663330078125, |
|
"loss": 0.283, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -8.315412521362305, |
|
"rewards/margins": 4.362612724304199, |
|
"rewards/rejected": -12.67802619934082, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.727259384037852e-05, |
|
"logits/chosen": -0.8166988492012024, |
|
"logits/rejected": -0.7282482385635376, |
|
"logps/chosen": -304.59136962890625, |
|
"logps/rejected": -391.103759765625, |
|
"loss": 0.2604, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -7.748098850250244, |
|
"rewards/margins": 4.206179618835449, |
|
"rewards/rejected": -11.954277992248535, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.7109335432805006e-05, |
|
"logits/chosen": -0.8081442713737488, |
|
"logits/rejected": -0.7295960783958435, |
|
"logps/chosen": -281.9895324707031, |
|
"logps/rejected": -376.1358947753906, |
|
"loss": 0.167, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -7.398708343505859, |
|
"rewards/margins": 4.381220817565918, |
|
"rewards/rejected": -11.779929161071777, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6945399509397894e-05, |
|
"logits/chosen": -0.7130390405654907, |
|
"logits/rejected": -0.6480911374092102, |
|
"logps/chosen": -279.84307861328125, |
|
"logps/rejected": -350.45172119140625, |
|
"loss": 0.1926, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -8.203062057495117, |
|
"rewards/margins": 4.551394462585449, |
|
"rewards/rejected": -12.754457473754883, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.678079524235201e-05, |
|
"logits/chosen": -0.7252935171127319, |
|
"logits/rejected": -0.6565495133399963, |
|
"logps/chosen": -294.0028076171875, |
|
"logps/rejected": -342.4349365234375, |
|
"loss": 0.317, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -9.260039329528809, |
|
"rewards/margins": 4.053720951080322, |
|
"rewards/rejected": -13.313758850097656, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6615531841255925e-05, |
|
"logits/chosen": -0.8046767115592957, |
|
"logits/rejected": -0.7409691214561462, |
|
"logps/chosen": -289.1624755859375, |
|
"logps/rejected": -357.64154052734375, |
|
"loss": 0.3597, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -9.035421371459961, |
|
"rewards/margins": 4.042548179626465, |
|
"rewards/rejected": -13.077969551086426, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.6449618552576695e-05, |
|
"logits/chosen": -0.7710264921188354, |
|
"logits/rejected": -0.6653739213943481, |
|
"logps/chosen": -302.7008056640625, |
|
"logps/rejected": -397.70733642578125, |
|
"loss": 0.1649, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -7.9936842918396, |
|
"rewards/margins": 4.4242682456970215, |
|
"rewards/rejected": -12.417953491210938, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.628306465914247e-05, |
|
"logits/chosen": -0.7142969369888306, |
|
"logits/rejected": -0.6375448107719421, |
|
"logps/chosen": -302.502685546875, |
|
"logps/rejected": -367.00189208984375, |
|
"loss": 0.1794, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.712922096252441, |
|
"rewards/margins": 5.476689338684082, |
|
"rewards/rejected": -15.189611434936523, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.611587947962319e-05, |
|
"logits/chosen": -0.7714215517044067, |
|
"logits/rejected": -0.66948401927948, |
|
"logps/chosen": -301.31634521484375, |
|
"logps/rejected": -381.21990966796875, |
|
"loss": 0.2573, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -10.052844047546387, |
|
"rewards/margins": 4.719965934753418, |
|
"rewards/rejected": -14.772811889648438, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.5948072368009164e-05, |
|
"logits/chosen": -0.7102927565574646, |
|
"logits/rejected": -0.6355124711990356, |
|
"logps/chosen": -282.0082702636719, |
|
"logps/rejected": -378.452880859375, |
|
"loss": 0.107, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.155842781066895, |
|
"rewards/margins": 5.713255405426025, |
|
"rewards/rejected": -14.869099617004395, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.577965271308771e-05, |
|
"logits/chosen": -0.7958633303642273, |
|
"logits/rejected": -0.68883216381073, |
|
"logps/chosen": -325.7532653808594, |
|
"logps/rejected": -427.8883361816406, |
|
"loss": 0.2044, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -10.508358001708984, |
|
"rewards/margins": 6.410652160644531, |
|
"rewards/rejected": -16.919010162353516, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.56106299379179e-05, |
|
"logits/chosen": -0.7746936082839966, |
|
"logits/rejected": -0.6815862655639648, |
|
"logps/chosen": -318.01556396484375, |
|
"logps/rejected": -420.08782958984375, |
|
"loss": 0.1296, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -10.266778945922852, |
|
"rewards/margins": 6.939404487609863, |
|
"rewards/rejected": -17.2061824798584, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.544101349930328e-05, |
|
"logits/chosen": -0.8044699430465698, |
|
"logits/rejected": -0.7396459579467773, |
|
"logps/chosen": -339.45635986328125, |
|
"logps/rejected": -424.66339111328125, |
|
"loss": 0.1796, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -12.193872451782227, |
|
"rewards/margins": 6.339139938354492, |
|
"rewards/rejected": -18.533010482788086, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.527081288726282e-05, |
|
"logits/chosen": -0.7859062552452087, |
|
"logits/rejected": -0.6897066831588745, |
|
"logps/chosen": -325.6007080078125, |
|
"logps/rejected": -410.3528747558594, |
|
"loss": 0.218, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -10.483186721801758, |
|
"rewards/margins": 6.411290168762207, |
|
"rewards/rejected": -16.89447784423828, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.510003762449988e-05, |
|
"logits/chosen": -0.7973426580429077, |
|
"logits/rejected": -0.6915854215621948, |
|
"logps/chosen": -297.2774353027344, |
|
"logps/rejected": -404.8102722167969, |
|
"loss": 0.1424, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.953185081481934, |
|
"rewards/margins": 6.822778224945068, |
|
"rewards/rejected": -16.775964736938477, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.4928697265869515e-05, |
|
"logits/chosen": -0.787533164024353, |
|
"logits/rejected": -0.6797725558280945, |
|
"logps/chosen": -309.8548583984375, |
|
"logps/rejected": -425.79150390625, |
|
"loss": 0.2192, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -11.761067390441895, |
|
"rewards/margins": 6.672986030578613, |
|
"rewards/rejected": -18.434053421020508, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.475680139784379e-05, |
|
"logits/chosen": -0.7720845937728882, |
|
"logits/rejected": -0.6839457154273987, |
|
"logps/chosen": -337.7498779296875, |
|
"logps/rejected": -457.72076416015625, |
|
"loss": 0.2444, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -11.690729141235352, |
|
"rewards/margins": 6.822265625, |
|
"rewards/rejected": -18.51299285888672, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.458435963797543e-05, |
|
"logits/chosen": -0.8231949806213379, |
|
"logits/rejected": -0.7576412558555603, |
|
"logps/chosen": -294.1539306640625, |
|
"logps/rejected": -383.0396728515625, |
|
"loss": 0.3039, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -10.36955738067627, |
|
"rewards/margins": 5.448855876922607, |
|
"rewards/rejected": -15.818414688110352, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.44113816343598e-05, |
|
"logits/chosen": -0.7150863409042358, |
|
"logits/rejected": -0.6669724583625793, |
|
"logps/chosen": -310.73748779296875, |
|
"logps/rejected": -375.4198303222656, |
|
"loss": 0.3313, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -9.648468971252441, |
|
"rewards/margins": 4.457318305969238, |
|
"rewards/rejected": -14.105786323547363, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4237877065094994e-05, |
|
"logits/chosen": -0.767318606376648, |
|
"logits/rejected": -0.6701061725616455, |
|
"logps/chosen": -275.4406433105469, |
|
"logps/rejected": -367.7728576660156, |
|
"loss": 0.1817, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.383347511291504, |
|
"rewards/margins": 4.588535308837891, |
|
"rewards/rejected": -11.971882820129395, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_logits/chosen": -0.818303108215332, |
|
"eval_logits/rejected": -0.7322244644165039, |
|
"eval_logps/chosen": -289.8185119628906, |
|
"eval_logps/rejected": -367.3077087402344, |
|
"eval_loss": 0.301629900932312, |
|
"eval_rewards/accuracies": 0.8490990996360779, |
|
"eval_rewards/chosen": -7.58941650390625, |
|
"eval_rewards/margins": 4.114140033721924, |
|
"eval_rewards/rejected": -11.703556060791016, |
|
"eval_runtime": 629.8382, |
|
"eval_samples_per_second": 0.937, |
|
"eval_steps_per_second": 0.117, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.40638556377404e-05, |
|
"logits/chosen": -0.728428304195404, |
|
"logits/rejected": -0.6463292837142944, |
|
"logps/chosen": -318.2840270996094, |
|
"logps/rejected": -395.8282165527344, |
|
"loss": 0.2384, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -7.830063819885254, |
|
"rewards/margins": 4.368121147155762, |
|
"rewards/rejected": -12.198186874389648, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.388932708877355e-05, |
|
"logits/chosen": -0.7160743474960327, |
|
"logits/rejected": -0.6455902457237244, |
|
"logps/chosen": -251.85696411132812, |
|
"logps/rejected": -321.5653381347656, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -6.794950008392334, |
|
"rewards/margins": 3.9946556091308594, |
|
"rewards/rejected": -10.789606094360352, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.3714301183045385e-05, |
|
"logits/chosen": -0.764244794845581, |
|
"logits/rejected": -0.688468337059021, |
|
"logps/chosen": -280.7123718261719, |
|
"logps/rejected": -356.2121887207031, |
|
"loss": 0.1289, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -7.16657018661499, |
|
"rewards/margins": 4.953124046325684, |
|
"rewards/rejected": -12.119695663452148, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3538787713233864e-05, |
|
"logits/chosen": -0.731895923614502, |
|
"logits/rejected": -0.6309586763381958, |
|
"logps/chosen": -307.6092529296875, |
|
"logps/rejected": -397.4252014160156, |
|
"loss": 0.1751, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -8.668050765991211, |
|
"rewards/margins": 4.992732048034668, |
|
"rewards/rejected": -13.660783767700195, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.336279649929614e-05, |
|
"logits/chosen": -0.7539547681808472, |
|
"logits/rejected": -0.6342164278030396, |
|
"logps/chosen": -263.11358642578125, |
|
"logps/rejected": -358.7630920410156, |
|
"loss": 0.2056, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -8.6781644821167, |
|
"rewards/margins": 5.381811141967773, |
|
"rewards/rejected": -14.059976577758789, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.318633738791904e-05, |
|
"logits/chosen": -0.7153991460800171, |
|
"logits/rejected": -0.6307059526443481, |
|
"logps/chosen": -311.097412109375, |
|
"logps/rejected": -402.82525634765625, |
|
"loss": 0.1759, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -10.348584175109863, |
|
"rewards/margins": 6.3924970626831055, |
|
"rewards/rejected": -16.74108123779297, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3009420251968244e-05, |
|
"logits/chosen": -0.6473314762115479, |
|
"logits/rejected": -0.5811099410057068, |
|
"logps/chosen": -352.05438232421875, |
|
"logps/rejected": -461.4651794433594, |
|
"loss": 0.1742, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -12.2769193649292, |
|
"rewards/margins": 7.059269905090332, |
|
"rewards/rejected": -19.336191177368164, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.2832054989935816e-05, |
|
"logits/chosen": -0.6122779846191406, |
|
"logits/rejected": -0.535004734992981, |
|
"logps/chosen": -347.81536865234375, |
|
"logps/rejected": -453.3810119628906, |
|
"loss": 0.2875, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -15.12812328338623, |
|
"rewards/margins": 6.798358917236328, |
|
"rewards/rejected": -21.926481246948242, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.2654251525386446e-05, |
|
"logits/chosen": -0.6338608860969543, |
|
"logits/rejected": -0.5856156945228577, |
|
"logps/chosen": -412.0361328125, |
|
"logps/rejected": -496.10009765625, |
|
"loss": 0.4185, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -17.997289657592773, |
|
"rewards/margins": 6.917158603668213, |
|
"rewards/rejected": -24.914447784423828, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.247601980640217e-05, |
|
"logits/chosen": -0.5718280076980591, |
|
"logits/rejected": -0.4849475026130676, |
|
"logps/chosen": -344.85693359375, |
|
"logps/rejected": -459.1637268066406, |
|
"loss": 0.2713, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -13.744694709777832, |
|
"rewards/margins": 8.065214157104492, |
|
"rewards/rejected": -21.80990982055664, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.229736980502584e-05, |
|
"logits/chosen": -0.6040414571762085, |
|
"logits/rejected": -0.5074735283851624, |
|
"logps/chosen": -340.82037353515625, |
|
"logps/rejected": -451.6734924316406, |
|
"loss": 0.0962, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -14.090229988098145, |
|
"rewards/margins": 7.7687201499938965, |
|
"rewards/rejected": -21.858951568603516, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2118311516703126e-05, |
|
"logits/chosen": -0.6098260879516602, |
|
"logits/rejected": -0.5122631788253784, |
|
"logps/chosen": -334.35614013671875, |
|
"logps/rejected": -455.30645751953125, |
|
"loss": 0.2506, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -11.668330192565918, |
|
"rewards/margins": 7.914732933044434, |
|
"rewards/rejected": -19.583065032958984, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.193885495972333e-05, |
|
"logits/chosen": -0.5786959528923035, |
|
"logits/rejected": -0.46283912658691406, |
|
"logps/chosen": -279.6617431640625, |
|
"logps/rejected": -409.13555908203125, |
|
"loss": 0.1431, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.93445110321045, |
|
"rewards/margins": 7.9315080642700195, |
|
"rewards/rejected": -17.86595916748047, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.175901017465882e-05, |
|
"logits/chosen": -0.5994729399681091, |
|
"logits/rejected": -0.49313443899154663, |
|
"logps/chosen": -322.4575500488281, |
|
"logps/rejected": -422.0381774902344, |
|
"loss": 0.2778, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -11.083158493041992, |
|
"rewards/margins": 5.571134090423584, |
|
"rewards/rejected": -16.654294967651367, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.15787872238033e-05, |
|
"logits/chosen": -0.537551999092102, |
|
"logits/rejected": -0.4302655756473541, |
|
"logps/chosen": -306.2028503417969, |
|
"logps/rejected": -396.2867736816406, |
|
"loss": 0.145, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.533339500427246, |
|
"rewards/margins": 6.1773786544799805, |
|
"rewards/rejected": -15.710718154907227, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.139819619060875e-05, |
|
"logits/chosen": -0.6228593587875366, |
|
"logits/rejected": -0.48516249656677246, |
|
"logps/chosen": -295.23992919921875, |
|
"logps/rejected": -409.243408203125, |
|
"loss": 0.1149, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -9.989641189575195, |
|
"rewards/margins": 5.8016133308410645, |
|
"rewards/rejected": -15.791254997253418, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.121724717912138e-05, |
|
"logits/chosen": -0.5744537711143494, |
|
"logits/rejected": -0.48622965812683105, |
|
"logps/chosen": -287.59918212890625, |
|
"logps/rejected": -393.9739990234375, |
|
"loss": 0.1598, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -10.127786636352539, |
|
"rewards/margins": 6.246671676635742, |
|
"rewards/rejected": -16.37445831298828, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.1035950313416186e-05, |
|
"logits/chosen": -0.5441586375236511, |
|
"logits/rejected": -0.4760468900203705, |
|
"logps/chosen": -310.5946350097656, |
|
"logps/rejected": -382.4266052246094, |
|
"loss": 0.2672, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -10.724095344543457, |
|
"rewards/margins": 4.989101409912109, |
|
"rewards/rejected": -15.71319580078125, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.0854315737030596e-05, |
|
"logits/chosen": -0.5022346377372742, |
|
"logits/rejected": -0.4010813236236572, |
|
"logps/chosen": -339.79217529296875, |
|
"logps/rejected": -446.3185119628906, |
|
"loss": 0.1778, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.776400566101074, |
|
"rewards/margins": 6.131360054016113, |
|
"rewards/rejected": -15.907760620117188, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.067235361239691e-05, |
|
"logits/chosen": -0.5425149202346802, |
|
"logits/rejected": -0.4898713231086731, |
|
"logps/chosen": -321.54107666015625, |
|
"logps/rejected": -396.061767578125, |
|
"loss": 0.1302, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.591651916503906, |
|
"rewards/margins": 5.938723087310791, |
|
"rewards/rejected": -15.530374526977539, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_logits/chosen": -0.6444261074066162, |
|
"eval_logits/rejected": -0.5499116778373718, |
|
"eval_logps/chosen": -311.4747314453125, |
|
"eval_logps/rejected": -402.7172546386719, |
|
"eval_loss": 0.31004929542541504, |
|
"eval_rewards/accuracies": 0.8440315127372742, |
|
"eval_rewards/chosen": -9.75503921508789, |
|
"eval_rewards/margins": 5.48947286605835, |
|
"eval_rewards/rejected": -15.244511604309082, |
|
"eval_runtime": 630.05, |
|
"eval_samples_per_second": 0.936, |
|
"eval_steps_per_second": 0.117, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.0490074120273705e-05, |
|
"logits/chosen": -0.5694734454154968, |
|
"logits/rejected": -0.4494093060493469, |
|
"logps/chosen": -316.00225830078125, |
|
"logps/rejected": -421.5968322753906, |
|
"loss": 0.1442, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -9.85560417175293, |
|
"rewards/margins": 6.093335151672363, |
|
"rewards/rejected": -15.948938369750977, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.0307487459176214e-05, |
|
"logits/chosen": -0.5880926847457886, |
|
"logits/rejected": -0.5118281245231628, |
|
"logps/chosen": -308.32135009765625, |
|
"logps/rejected": -398.5103454589844, |
|
"loss": 0.1556, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -10.929667472839355, |
|
"rewards/margins": 6.390011310577393, |
|
"rewards/rejected": -17.319677352905273, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.0124603844805767e-05, |
|
"logits/chosen": -0.6120959520339966, |
|
"logits/rejected": -0.5137495994567871, |
|
"logps/chosen": -274.90301513671875, |
|
"logps/rejected": -376.3819580078125, |
|
"loss": 0.1425, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -8.89677906036377, |
|
"rewards/margins": 6.732755184173584, |
|
"rewards/rejected": -15.629533767700195, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.9941433509478156e-05, |
|
"logits/chosen": -0.6175707578659058, |
|
"logits/rejected": -0.5255206823348999, |
|
"logps/chosen": -344.87469482421875, |
|
"logps/rejected": -474.46417236328125, |
|
"loss": 0.1191, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -11.884271621704102, |
|
"rewards/margins": 8.336640357971191, |
|
"rewards/rejected": -20.22091293334961, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.9757986701551217e-05, |
|
"logits/chosen": -0.6097782850265503, |
|
"logits/rejected": -0.5127190351486206, |
|
"logps/chosen": -341.8258361816406, |
|
"logps/rejected": -455.5184631347656, |
|
"loss": 0.3434, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -13.099444389343262, |
|
"rewards/margins": 6.77566385269165, |
|
"rewards/rejected": -19.87510871887207, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.9574273684851368e-05, |
|
"logits/chosen": -0.6266240477561951, |
|
"logits/rejected": -0.5253201723098755, |
|
"logps/chosen": -338.4235534667969, |
|
"logps/rejected": -465.1094665527344, |
|
"loss": 0.2116, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -11.452531814575195, |
|
"rewards/margins": 8.521638870239258, |
|
"rewards/rejected": -19.97416877746582, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.9390304738099384e-05, |
|
"logits/chosen": -0.5922500491142273, |
|
"logits/rejected": -0.4831523299217224, |
|
"logps/chosen": -318.95013427734375, |
|
"logps/rejected": -428.1212463378906, |
|
"loss": 0.2513, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -10.582112312316895, |
|
"rewards/margins": 6.628012180328369, |
|
"rewards/rejected": -17.21012306213379, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.9206090154335286e-05, |
|
"logits/chosen": -0.6134576201438904, |
|
"logits/rejected": -0.5388885140419006, |
|
"logps/chosen": -346.55096435546875, |
|
"logps/rejected": -444.6732482910156, |
|
"loss": 0.1774, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -12.223394393920898, |
|
"rewards/margins": 7.667645454406738, |
|
"rewards/rejected": -19.891040802001953, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.902164024034246e-05, |
|
"logits/chosen": -0.6513973474502563, |
|
"logits/rejected": -0.5483208894729614, |
|
"logps/chosen": -284.67681884765625, |
|
"logps/rejected": -405.2633361816406, |
|
"loss": 0.1937, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -9.958810806274414, |
|
"rewards/margins": 7.701412200927734, |
|
"rewards/rejected": -17.66022491455078, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8836965316071008e-05, |
|
"logits/chosen": -0.5772289037704468, |
|
"logits/rejected": -0.46622151136398315, |
|
"logps/chosen": -317.7015686035156, |
|
"logps/rejected": -425.06414794921875, |
|
"loss": 0.2158, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -10.088019371032715, |
|
"rewards/margins": 6.411609649658203, |
|
"rewards/rejected": -16.4996280670166, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8652075714060295e-05, |
|
"logits/chosen": -0.5903247594833374, |
|
"logits/rejected": -0.46506476402282715, |
|
"logps/chosen": -282.30987548828125, |
|
"logps/rejected": -395.7618103027344, |
|
"loss": 0.144, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.98021125793457, |
|
"rewards/margins": 6.78778076171875, |
|
"rewards/rejected": -15.76799201965332, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.84669817788609e-05, |
|
"logits/chosen": -0.6248329877853394, |
|
"logits/rejected": -0.5722588300704956, |
|
"logps/chosen": -274.6296081542969, |
|
"logps/rejected": -356.05841064453125, |
|
"loss": 0.2905, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -8.918261528015137, |
|
"rewards/margins": 4.942833423614502, |
|
"rewards/rejected": -13.86109447479248, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.8281693866455828e-05, |
|
"logits/chosen": -0.6164138913154602, |
|
"logits/rejected": -0.487932026386261, |
|
"logps/chosen": -297.5748291015625, |
|
"logps/rejected": -422.21270751953125, |
|
"loss": 0.1084, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.948514938354492, |
|
"rewards/margins": 6.124258995056152, |
|
"rewards/rejected": -15.072774887084961, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.809622234368106e-05, |
|
"logits/chosen": -0.5711289644241333, |
|
"logits/rejected": -0.48795753717422485, |
|
"logps/chosen": -316.45428466796875, |
|
"logps/rejected": -398.5165710449219, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -9.117523193359375, |
|
"rewards/margins": 5.211916923522949, |
|
"rewards/rejected": -14.329440116882324, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.791057758764557e-05, |
|
"logits/chosen": -0.6409615874290466, |
|
"logits/rejected": -0.5637831687927246, |
|
"logps/chosen": -284.11212158203125, |
|
"logps/rejected": -370.08154296875, |
|
"loss": 0.1588, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -8.732550621032715, |
|
"rewards/margins": 5.230337142944336, |
|
"rewards/rejected": -13.962888717651367, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.7724769985150724e-05, |
|
"logits/chosen": -0.6284760236740112, |
|
"logits/rejected": -0.5196130871772766, |
|
"logps/chosen": -282.87725830078125, |
|
"logps/rejected": -395.02667236328125, |
|
"loss": 0.1299, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -8.227498054504395, |
|
"rewards/margins": 6.107052803039551, |
|
"rewards/rejected": -14.334550857543945, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.7538809932109088e-05, |
|
"logits/chosen": -0.5886726379394531, |
|
"logits/rejected": -0.5417076945304871, |
|
"logps/chosen": -316.87481689453125, |
|
"logps/rejected": -394.08331298828125, |
|
"loss": 0.2341, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.180241584777832, |
|
"rewards/margins": 5.964137554168701, |
|
"rewards/rejected": -15.144380569458008, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.7352707832962865e-05, |
|
"logits/chosen": -0.6544710397720337, |
|
"logits/rejected": -0.5368794202804565, |
|
"logps/chosen": -300.3536682128906, |
|
"logps/rejected": -411.3946228027344, |
|
"loss": 0.1395, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.438309669494629, |
|
"rewards/margins": 6.873599052429199, |
|
"rewards/rejected": -16.311908721923828, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.7166474100101673e-05, |
|
"logits/chosen": -0.5713576078414917, |
|
"logits/rejected": -0.5301340818405151, |
|
"logps/chosen": -340.8475341796875, |
|
"logps/rejected": -415.6228942871094, |
|
"loss": 0.3981, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -11.409144401550293, |
|
"rewards/margins": 6.274621963500977, |
|
"rewards/rejected": -17.683767318725586, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.698011915328007e-05, |
|
"logits/chosen": -0.599279522895813, |
|
"logits/rejected": -0.5524182915687561, |
|
"logps/chosen": -303.6630554199219, |
|
"logps/rejected": -378.23028564453125, |
|
"loss": 0.1827, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.248080253601074, |
|
"rewards/margins": 5.6905198097229, |
|
"rewards/rejected": -14.93860149383545, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_logits/chosen": -0.6628807783126831, |
|
"eval_logits/rejected": -0.5718752145767212, |
|
"eval_logps/chosen": -301.6939697265625, |
|
"eval_logps/rejected": -390.1686096191406, |
|
"eval_loss": 0.30422672629356384, |
|
"eval_rewards/accuracies": 0.8474099040031433, |
|
"eval_rewards/chosen": -8.776962280273438, |
|
"eval_rewards/margins": 5.212679386138916, |
|
"eval_rewards/rejected": -13.989642143249512, |
|
"eval_runtime": 634.3305, |
|
"eval_samples_per_second": 0.93, |
|
"eval_steps_per_second": 0.117, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6793653419034482e-05, |
|
"logits/chosen": -0.6186808943748474, |
|
"logits/rejected": -0.48634958267211914, |
|
"logps/chosen": -316.30010986328125, |
|
"logps/rejected": -424.89190673828125, |
|
"loss": 0.2464, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -8.771645545959473, |
|
"rewards/margins": 5.069327354431152, |
|
"rewards/rejected": -13.840974807739258, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6607087330099896e-05, |
|
"logits/chosen": -0.5876252055168152, |
|
"logits/rejected": -0.47562646865844727, |
|
"logps/chosen": -290.5619201660156, |
|
"logps/rejected": -375.8335876464844, |
|
"loss": 0.1158, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.724277496337891, |
|
"rewards/margins": 4.774792194366455, |
|
"rewards/rejected": -12.499069213867188, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.6420431324826117e-05, |
|
"logits/chosen": -0.5867003202438354, |
|
"logits/rejected": -0.4697963297367096, |
|
"logps/chosen": -310.0220642089844, |
|
"logps/rejected": -403.21875, |
|
"loss": 0.2792, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -9.85572338104248, |
|
"rewards/margins": 4.663787841796875, |
|
"rewards/rejected": -14.519511222839355, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.623369584659378e-05, |
|
"logits/chosen": -0.6174901127815247, |
|
"logits/rejected": -0.5109848976135254, |
|
"logps/chosen": -287.13739013671875, |
|
"logps/rejected": -391.56988525390625, |
|
"loss": 0.1586, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -8.303506851196289, |
|
"rewards/margins": 6.212575912475586, |
|
"rewards/rejected": -14.516082763671875, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.604689134322999e-05, |
|
"logits/chosen": -0.6351348161697388, |
|
"logits/rejected": -0.5409465432167053, |
|
"logps/chosen": -311.77081298828125, |
|
"logps/rejected": -398.3648681640625, |
|
"loss": 0.247, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -10.259727478027344, |
|
"rewards/margins": 5.910742282867432, |
|
"rewards/rejected": -16.170469284057617, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5860028266423842e-05, |
|
"logits/chosen": -0.6340417265892029, |
|
"logits/rejected": -0.5169082283973694, |
|
"logps/chosen": -298.0379638671875, |
|
"logps/rejected": -422.7288513183594, |
|
"loss": 0.1215, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.591285705566406, |
|
"rewards/margins": 7.814417839050293, |
|
"rewards/rejected": -17.405704498291016, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5673117071141572e-05, |
|
"logits/chosen": -0.6154165267944336, |
|
"logits/rejected": -0.5234094858169556, |
|
"logps/chosen": -323.8004150390625, |
|
"logps/rejected": -435.37017822265625, |
|
"loss": 0.1312, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.34335708618164, |
|
"rewards/margins": 6.607995510101318, |
|
"rewards/rejected": -15.9513521194458, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5486168215041658e-05, |
|
"logits/chosen": -0.681925892829895, |
|
"logits/rejected": -0.589966893196106, |
|
"logps/chosen": -333.8614196777344, |
|
"logps/rejected": -436.3663024902344, |
|
"loss": 0.1047, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -10.642461776733398, |
|
"rewards/margins": 7.235697269439697, |
|
"rewards/rejected": -17.87816047668457, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5299192157889684e-05, |
|
"logits/chosen": -0.7273809313774109, |
|
"logits/rejected": -0.6066460013389587, |
|
"logps/chosen": -306.0868835449219, |
|
"logps/rejected": -421.70843505859375, |
|
"loss": 0.2175, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -11.231630325317383, |
|
"rewards/margins": 6.70697021484375, |
|
"rewards/rejected": -17.938602447509766, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5112199360973144e-05, |
|
"logits/chosen": -0.6637213230133057, |
|
"logits/rejected": -0.5721598863601685, |
|
"logps/chosen": -306.43194580078125, |
|
"logps/rejected": -416.2913513183594, |
|
"loss": 0.2024, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -11.468119621276855, |
|
"rewards/margins": 7.181309700012207, |
|
"rewards/rejected": -18.649429321289062, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4925200286516097e-05, |
|
"logits/chosen": -0.6478602886199951, |
|
"logits/rejected": -0.5861232876777649, |
|
"logps/chosen": -358.63763427734375, |
|
"logps/rejected": -457.5341796875, |
|
"loss": 0.1814, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -13.788897514343262, |
|
"rewards/margins": 7.263192653656006, |
|
"rewards/rejected": -21.05208969116211, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4738205397093864e-05, |
|
"logits/chosen": -0.6312528848648071, |
|
"logits/rejected": -0.5815685987472534, |
|
"logps/chosen": -361.5733337402344, |
|
"logps/rejected": -460.53643798828125, |
|
"loss": 0.1815, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -13.369549751281738, |
|
"rewards/margins": 7.296360015869141, |
|
"rewards/rejected": -20.665910720825195, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4551225155047573e-05, |
|
"logits/chosen": -0.7103615999221802, |
|
"logits/rejected": -0.6112437844276428, |
|
"logps/chosen": -295.20159912109375, |
|
"logps/rejected": -415.26708984375, |
|
"loss": 0.2122, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -11.453139305114746, |
|
"rewards/margins": 6.4525604248046875, |
|
"rewards/rejected": -17.90570068359375, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.436427002189887e-05, |
|
"logits/chosen": -0.7065337896347046, |
|
"logits/rejected": -0.6106780767440796, |
|
"logps/chosen": -296.2615661621094, |
|
"logps/rejected": -409.4453430175781, |
|
"loss": 0.2441, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.548809051513672, |
|
"rewards/margins": 6.738284111022949, |
|
"rewards/rejected": -16.287092208862305, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.417735045776453e-05, |
|
"logits/chosen": -0.7064138650894165, |
|
"logits/rejected": -0.6145733594894409, |
|
"logps/chosen": -307.0589294433594, |
|
"logps/rejected": -408.3307189941406, |
|
"loss": 0.2607, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.794357299804688, |
|
"rewards/margins": 6.440938472747803, |
|
"rewards/rejected": -16.235294342041016, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.3990476920771286e-05, |
|
"logits/chosen": -0.7192695140838623, |
|
"logits/rejected": -0.6189507246017456, |
|
"logps/chosen": -304.00958251953125, |
|
"logps/rejected": -403.37261962890625, |
|
"loss": 0.2878, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -10.866254806518555, |
|
"rewards/margins": 5.947203636169434, |
|
"rewards/rejected": -16.813457489013672, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.380365986647061e-05, |
|
"logits/chosen": -0.7065758109092712, |
|
"logits/rejected": -0.641872227191925, |
|
"logps/chosen": -302.8485412597656, |
|
"logps/rejected": -375.76678466796875, |
|
"loss": 0.3072, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -8.800479888916016, |
|
"rewards/margins": 5.099647521972656, |
|
"rewards/rejected": -13.900128364562988, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3616909747253813e-05, |
|
"logits/chosen": -0.7004716992378235, |
|
"logits/rejected": -0.6112024188041687, |
|
"logps/chosen": -309.6910095214844, |
|
"logps/rejected": -411.49603271484375, |
|
"loss": 0.1917, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.332594871520996, |
|
"rewards/margins": 5.8438544273376465, |
|
"rewards/rejected": -15.176447868347168, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3430237011767167e-05, |
|
"logits/chosen": -0.6590038537979126, |
|
"logits/rejected": -0.6286568641662598, |
|
"logps/chosen": -323.1142578125, |
|
"logps/rejected": -386.729248046875, |
|
"loss": 0.1316, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -10.204951286315918, |
|
"rewards/margins": 5.9645094871521, |
|
"rewards/rejected": -16.16946029663086, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.324365210432736e-05, |
|
"logits/chosen": -0.5852640867233276, |
|
"logits/rejected": -0.5034244656562805, |
|
"logps/chosen": -326.63592529296875, |
|
"logps/rejected": -411.2947692871094, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -10.820871353149414, |
|
"rewards/margins": 6.314654350280762, |
|
"rewards/rejected": -17.13552474975586, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_logits/chosen": -0.7270016670227051, |
|
"eval_logits/rejected": -0.6384946703910828, |
|
"eval_logps/chosen": -315.1874084472656, |
|
"eval_logps/rejected": -408.69952392578125, |
|
"eval_loss": 0.31625449657440186, |
|
"eval_rewards/accuracies": 0.8462837934494019, |
|
"eval_rewards/chosen": -10.126302719116211, |
|
"eval_rewards/margins": 5.716432571411133, |
|
"eval_rewards/rejected": -15.842737197875977, |
|
"eval_runtime": 634.9153, |
|
"eval_samples_per_second": 0.929, |
|
"eval_steps_per_second": 0.117, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.305716546433709e-05, |
|
"logits/chosen": -0.6512206792831421, |
|
"logits/rejected": -0.5776611566543579, |
|
"logps/chosen": -314.0259094238281, |
|
"logps/rejected": -414.47381591796875, |
|
"loss": 0.0902, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.528992652893066, |
|
"rewards/margins": 6.7770538330078125, |
|
"rewards/rejected": -16.306045532226562, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.287078752570102e-05, |
|
"logits/chosen": -0.6705536246299744, |
|
"logits/rejected": -0.598176121711731, |
|
"logps/chosen": -297.4195251464844, |
|
"logps/rejected": -379.747314453125, |
|
"loss": 0.1952, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -10.207822799682617, |
|
"rewards/margins": 6.272828102111816, |
|
"rewards/rejected": -16.480653762817383, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.268452871624197e-05, |
|
"logits/chosen": -0.6773346662521362, |
|
"logits/rejected": -0.5620519518852234, |
|
"logps/chosen": -294.04931640625, |
|
"logps/rejected": -409.580078125, |
|
"loss": 0.2763, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -9.497777938842773, |
|
"rewards/margins": 6.8513360023498535, |
|
"rewards/rejected": -16.3491153717041, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.2498399457117507e-05, |
|
"logits/chosen": -0.6227138638496399, |
|
"logits/rejected": -0.5035305023193359, |
|
"logps/chosen": -294.11920166015625, |
|
"logps/rejected": -401.7522888183594, |
|
"loss": 0.1382, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -11.312589645385742, |
|
"rewards/margins": 6.311239719390869, |
|
"rewards/rejected": -17.623828887939453, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.2312410162236883e-05, |
|
"logits/chosen": -0.6217914819717407, |
|
"logits/rejected": -0.5321698188781738, |
|
"logps/chosen": -295.13458251953125, |
|
"logps/rejected": -377.95477294921875, |
|
"loss": 0.1539, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.668817520141602, |
|
"rewards/margins": 5.681490421295166, |
|
"rewards/rejected": -15.350309371948242, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.212657123767834e-05, |
|
"logits/chosen": -0.5903616547584534, |
|
"logits/rejected": -0.5240530371665955, |
|
"logps/chosen": -316.3643798828125, |
|
"logps/rejected": -406.33770751953125, |
|
"loss": 0.3331, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.005239486694336, |
|
"rewards/margins": 5.931736946105957, |
|
"rewards/rejected": -14.936976432800293, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.1940893081106945e-05, |
|
"logits/chosen": -0.6103485822677612, |
|
"logits/rejected": -0.5175549387931824, |
|
"logps/chosen": -305.53564453125, |
|
"logps/rejected": -410.45703125, |
|
"loss": 0.255, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -9.499643325805664, |
|
"rewards/margins": 6.158064365386963, |
|
"rewards/rejected": -15.657705307006836, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.1755386081192787e-05, |
|
"logits/chosen": -0.59885174036026, |
|
"logits/rejected": -0.5194819569587708, |
|
"logps/chosen": -310.4798278808594, |
|
"logps/rejected": -393.2937927246094, |
|
"loss": 0.2257, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -9.330568313598633, |
|
"rewards/margins": 5.730391502380371, |
|
"rewards/rejected": -15.060958862304688, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.1570060617029803e-05, |
|
"logits/chosen": -0.636796236038208, |
|
"logits/rejected": -0.5199950933456421, |
|
"logps/chosen": -274.0980529785156, |
|
"logps/rejected": -394.688232421875, |
|
"loss": 0.1851, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.185386657714844, |
|
"rewards/margins": 6.295451641082764, |
|
"rewards/rejected": -14.48083782196045, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.1384927057554994e-05, |
|
"logits/chosen": -0.6535948514938354, |
|
"logits/rejected": -0.5714461207389832, |
|
"logps/chosen": -307.36199951171875, |
|
"logps/rejected": -401.22943115234375, |
|
"loss": 0.1526, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.084749221801758, |
|
"rewards/margins": 6.451979160308838, |
|
"rewards/rejected": -15.536727905273438, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.119999576096832e-05, |
|
"logits/chosen": -0.6453255414962769, |
|
"logits/rejected": -0.5674460530281067, |
|
"logps/chosen": -276.06915283203125, |
|
"logps/rejected": -355.5702819824219, |
|
"loss": 0.122, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.452573776245117, |
|
"rewards/margins": 6.0047831535339355, |
|
"rewards/rejected": -14.457359313964844, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.1015277074153173e-05, |
|
"logits/chosen": -0.6841253042221069, |
|
"logits/rejected": -0.5945171117782593, |
|
"logps/chosen": -268.9891357421875, |
|
"logps/rejected": -360.72113037109375, |
|
"loss": 0.169, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.454917907714844, |
|
"rewards/margins": 5.437428951263428, |
|
"rewards/rejected": -14.892346382141113, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.0830781332097446e-05, |
|
"logits/chosen": -0.6696738600730896, |
|
"logits/rejected": -0.5968156456947327, |
|
"logps/chosen": -316.0697937011719, |
|
"logps/rejected": -404.7145080566406, |
|
"loss": 0.2108, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -10.760259628295898, |
|
"rewards/margins": 5.666402816772461, |
|
"rewards/rejected": -16.42666244506836, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.0646518857315286e-05, |
|
"logits/chosen": -0.678158164024353, |
|
"logits/rejected": -0.5591440796852112, |
|
"logps/chosen": -289.26123046875, |
|
"logps/rejected": -401.099609375, |
|
"loss": 0.1788, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.927151679992676, |
|
"rewards/margins": 6.700024604797363, |
|
"rewards/rejected": -15.627174377441406, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.0462499959269593e-05, |
|
"logits/chosen": -0.6390899419784546, |
|
"logits/rejected": -0.548464834690094, |
|
"logps/chosen": -317.1548156738281, |
|
"logps/rejected": -421.1346130371094, |
|
"loss": 0.1597, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -10.989806175231934, |
|
"rewards/margins": 7.106358528137207, |
|
"rewards/rejected": -18.09616470336914, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.027873493379515e-05, |
|
"logits/chosen": -0.6277307271957397, |
|
"logits/rejected": -0.5807863473892212, |
|
"logps/chosen": -339.5156555175781, |
|
"logps/rejected": -441.6835021972656, |
|
"loss": 0.1767, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -12.768059730529785, |
|
"rewards/margins": 8.134899139404297, |
|
"rewards/rejected": -20.9029598236084, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.009523406252263e-05, |
|
"logits/chosen": -0.6854568719863892, |
|
"logits/rejected": -0.5896847248077393, |
|
"logps/chosen": -311.6221008300781, |
|
"logps/rejected": -419.89874267578125, |
|
"loss": 0.1899, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -11.535701751708984, |
|
"rewards/margins": 7.6829938888549805, |
|
"rewards/rejected": -19.218698501586914, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.9912007612303273e-05, |
|
"logits/chosen": -0.6906639933586121, |
|
"logits/rejected": -0.5620826482772827, |
|
"logps/chosen": -312.9720458984375, |
|
"logps/rejected": -456.7838439941406, |
|
"loss": 0.0855, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -12.278552055358887, |
|
"rewards/margins": 8.670220375061035, |
|
"rewards/rejected": -20.94877052307129, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.972906583463453e-05, |
|
"logits/chosen": -0.6635552644729614, |
|
"logits/rejected": -0.5383149981498718, |
|
"logps/chosen": -345.0733337402344, |
|
"logps/rejected": -486.62255859375, |
|
"loss": 0.2801, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -12.4635648727417, |
|
"rewards/margins": 9.190757751464844, |
|
"rewards/rejected": -21.654321670532227, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.9546418965086442e-05, |
|
"logits/chosen": -0.6591445207595825, |
|
"logits/rejected": -0.5743043422698975, |
|
"logps/chosen": -331.17236328125, |
|
"logps/rejected": -429.6659240722656, |
|
"loss": 0.3806, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -13.678789138793945, |
|
"rewards/margins": 7.728486061096191, |
|
"rewards/rejected": -21.407276153564453, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_logits/chosen": -0.7206234335899353, |
|
"eval_logits/rejected": -0.6278703808784485, |
|
"eval_logps/chosen": -342.28253173828125, |
|
"eval_logps/rejected": -453.8127136230469, |
|
"eval_loss": 0.36858823895454407, |
|
"eval_rewards/accuracies": 0.8389639258384705, |
|
"eval_rewards/chosen": -12.83581256866455, |
|
"eval_rewards/margins": 7.5182414054870605, |
|
"eval_rewards/rejected": -20.354055404663086, |
|
"eval_runtime": 636.0122, |
|
"eval_samples_per_second": 0.928, |
|
"eval_steps_per_second": 0.116, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.936407722272897e-05, |
|
"logits/chosen": -0.6339019536972046, |
|
"logits/rejected": -0.5375872850418091, |
|
"logps/chosen": -344.52325439453125, |
|
"logps/rejected": -457.71044921875, |
|
"loss": 0.2081, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -12.950579643249512, |
|
"rewards/margins": 7.515866756439209, |
|
"rewards/rejected": -20.466445922851562, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.918205080956027e-05, |
|
"logits/chosen": -0.666244626045227, |
|
"logits/rejected": -0.5665922164916992, |
|
"logps/chosen": -329.7124938964844, |
|
"logps/rejected": -429.80743408203125, |
|
"loss": 0.2238, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -12.665807723999023, |
|
"rewards/margins": 6.334603309631348, |
|
"rewards/rejected": -19.000411987304688, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.9000349909935853e-05, |
|
"logits/chosen": -0.6387889981269836, |
|
"logits/rejected": -0.5700687170028687, |
|
"logps/chosen": -349.22650146484375, |
|
"logps/rejected": -453.98126220703125, |
|
"loss": 0.3322, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -11.654592514038086, |
|
"rewards/margins": 8.054789543151855, |
|
"rewards/rejected": -19.70938491821289, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.881898468999877e-05, |
|
"logits/chosen": -0.6908236145973206, |
|
"logits/rejected": -0.5804257392883301, |
|
"logps/chosen": -322.011962890625, |
|
"logps/rejected": -424.2376403808594, |
|
"loss": 0.2416, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -11.60191822052002, |
|
"rewards/margins": 5.517712116241455, |
|
"rewards/rejected": -17.119630813598633, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.8637965297110875e-05, |
|
"logits/chosen": -0.7101341485977173, |
|
"logits/rejected": -0.5983768701553345, |
|
"logps/chosen": -301.9000549316406, |
|
"logps/rejected": -410.75616455078125, |
|
"loss": 0.2319, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.447723388671875, |
|
"rewards/margins": 6.480834007263184, |
|
"rewards/rejected": -15.928556442260742, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.8457301859284997e-05, |
|
"logits/chosen": -0.634274423122406, |
|
"logits/rejected": -0.5649651288986206, |
|
"logps/chosen": -323.43743896484375, |
|
"logps/rejected": -414.6199645996094, |
|
"loss": 0.1706, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -8.627046585083008, |
|
"rewards/margins": 6.101047992706299, |
|
"rewards/rejected": -14.728096008300781, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.827700448461836e-05, |
|
"logits/chosen": -0.689033567905426, |
|
"logits/rejected": -0.5954291820526123, |
|
"logps/chosen": -299.60467529296875, |
|
"logps/rejected": -390.62445068359375, |
|
"loss": 0.2506, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -8.309510231018066, |
|
"rewards/margins": 5.420113563537598, |
|
"rewards/rejected": -13.729623794555664, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.809708326072697e-05, |
|
"logits/chosen": -0.7266713380813599, |
|
"logits/rejected": -0.6097759008407593, |
|
"logps/chosen": -285.2210388183594, |
|
"logps/rejected": -398.92987060546875, |
|
"loss": 0.1802, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.085926055908203, |
|
"rewards/margins": 5.3229451179504395, |
|
"rewards/rejected": -13.4088716506958, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.7917548254181273e-05, |
|
"logits/chosen": -0.6782897114753723, |
|
"logits/rejected": -0.5887526273727417, |
|
"logps/chosen": -302.5890197753906, |
|
"logps/rejected": -385.482421875, |
|
"loss": 0.2692, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -9.081730842590332, |
|
"rewards/margins": 4.592134952545166, |
|
"rewards/rejected": -13.673864364624023, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.773840950994286e-05, |
|
"logits/chosen": -0.7145626544952393, |
|
"logits/rejected": -0.6292673945426941, |
|
"logps/chosen": -280.5959167480469, |
|
"logps/rejected": -367.8932189941406, |
|
"loss": 0.2564, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -8.174077987670898, |
|
"rewards/margins": 5.061230182647705, |
|
"rewards/rejected": -13.235307693481445, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.7559677050802544e-05, |
|
"logits/chosen": -0.7608879208564758, |
|
"logits/rejected": -0.675665020942688, |
|
"logps/chosen": -273.0861511230469, |
|
"logps/rejected": -371.0169677734375, |
|
"loss": 0.0942, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -7.474713325500488, |
|
"rewards/margins": 5.627039909362793, |
|
"rewards/rejected": -13.101751327514648, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.7381360876819498e-05, |
|
"logits/chosen": -0.6994717121124268, |
|
"logits/rejected": -0.615476667881012, |
|
"logps/chosen": -297.80438232421875, |
|
"logps/rejected": -379.27227783203125, |
|
"loss": 0.1891, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -8.676013946533203, |
|
"rewards/margins": 5.39193058013916, |
|
"rewards/rejected": -14.067944526672363, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.7203470964761803e-05, |
|
"logits/chosen": -0.779729425907135, |
|
"logits/rejected": -0.6966686248779297, |
|
"logps/chosen": -306.7568359375, |
|
"logps/rejected": -396.3369140625, |
|
"loss": 0.1812, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.232124328613281, |
|
"rewards/margins": 5.344344139099121, |
|
"rewards/rejected": -14.576467514038086, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.702601726754825e-05, |
|
"logits/chosen": -0.7657872438430786, |
|
"logits/rejected": -0.695815920829773, |
|
"logps/chosen": -310.4090881347656, |
|
"logps/rejected": -391.716552734375, |
|
"loss": 0.1966, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -10.204607009887695, |
|
"rewards/margins": 5.683955669403076, |
|
"rewards/rejected": -15.888562202453613, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.6849009713691454e-05, |
|
"logits/chosen": -0.7923716306686401, |
|
"logits/rejected": -0.7177401781082153, |
|
"logps/chosen": -327.0381164550781, |
|
"logps/rejected": -423.6458435058594, |
|
"loss": 0.1769, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -10.632159233093262, |
|
"rewards/margins": 5.828563213348389, |
|
"rewards/rejected": -16.460723876953125, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.6672458206742357e-05, |
|
"logits/chosen": -0.8086712956428528, |
|
"logits/rejected": -0.7244886755943298, |
|
"logps/chosen": -305.0154113769531, |
|
"logps/rejected": -400.279296875, |
|
"loss": 0.1933, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -10.2609224319458, |
|
"rewards/margins": 5.673158168792725, |
|
"rewards/rejected": -15.934080123901367, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.6496372624736144e-05, |
|
"logits/chosen": -0.7907301187515259, |
|
"logits/rejected": -0.6912959814071655, |
|
"logps/chosen": -317.41009521484375, |
|
"logps/rejected": -433.7093200683594, |
|
"loss": 0.2232, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -10.225587844848633, |
|
"rewards/margins": 7.162631988525391, |
|
"rewards/rejected": -17.388219833374023, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.632076281963954e-05, |
|
"logits/chosen": -0.7801695466041565, |
|
"logits/rejected": -0.7278872728347778, |
|
"logps/chosen": -302.41412353515625, |
|
"logps/rejected": -372.16424560546875, |
|
"loss": 0.2812, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -10.3536376953125, |
|
"rewards/margins": 5.181726932525635, |
|
"rewards/rejected": -15.535364151000977, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.6145638616799635e-05, |
|
"logits/chosen": -0.7852599024772644, |
|
"logits/rejected": -0.6998826265335083, |
|
"logps/chosen": -294.0755920410156, |
|
"logps/rejected": -397.5310363769531, |
|
"loss": 0.1557, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -10.05544662475586, |
|
"rewards/margins": 7.03853702545166, |
|
"rewards/rejected": -17.093982696533203, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.5971009814394106e-05, |
|
"logits/chosen": -0.7821277379989624, |
|
"logits/rejected": -0.7008212208747864, |
|
"logps/chosen": -301.183837890625, |
|
"logps/rejected": -388.2525329589844, |
|
"loss": 0.1469, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -10.988730430603027, |
|
"rewards/margins": 6.046961784362793, |
|
"rewards/rejected": -17.035694122314453, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_logits/chosen": -0.8328903913497925, |
|
"eval_logits/rejected": -0.7482640147209167, |
|
"eval_logps/chosen": -325.64190673828125, |
|
"eval_logps/rejected": -427.86767578125, |
|
"eval_loss": 0.3321731686592102, |
|
"eval_rewards/accuracies": 0.8423423171043396, |
|
"eval_rewards/chosen": -11.17175579071045, |
|
"eval_rewards/margins": 6.587794303894043, |
|
"eval_rewards/rejected": -17.759550094604492, |
|
"eval_runtime": 635.9076, |
|
"eval_samples_per_second": 0.928, |
|
"eval_steps_per_second": 0.116, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.5796886182883053e-05, |
|
"logits/chosen": -0.7993537783622742, |
|
"logits/rejected": -0.7007473707199097, |
|
"logps/chosen": -317.75762939453125, |
|
"logps/rejected": -439.30316162109375, |
|
"loss": 0.0972, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -11.04917049407959, |
|
"rewards/margins": 8.292473793029785, |
|
"rewards/rejected": -19.341646194458008, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.5623277464462328e-05, |
|
"logits/chosen": -0.8289450407028198, |
|
"logits/rejected": -0.7163116931915283, |
|
"logps/chosen": -315.34765625, |
|
"logps/rejected": -438.7181091308594, |
|
"loss": 0.2319, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -10.446613311767578, |
|
"rewards/margins": 7.03295373916626, |
|
"rewards/rejected": -17.479564666748047, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.545019337251844e-05, |
|
"logits/chosen": -0.7775189280509949, |
|
"logits/rejected": -0.6993460655212402, |
|
"logps/chosen": -307.9075927734375, |
|
"logps/rejected": -410.87091064453125, |
|
"loss": 0.2345, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -12.067144393920898, |
|
"rewards/margins": 7.187569618225098, |
|
"rewards/rejected": -19.254711151123047, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.527764359108514e-05, |
|
"logits/chosen": -0.7477961778640747, |
|
"logits/rejected": -0.7031870484352112, |
|
"logps/chosen": -342.15264892578125, |
|
"logps/rejected": -424.57745361328125, |
|
"loss": 0.2554, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -11.66588306427002, |
|
"rewards/margins": 5.763236045837402, |
|
"rewards/rejected": -17.429119110107422, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.5105637774301538e-05, |
|
"logits/chosen": -0.7917758226394653, |
|
"logits/rejected": -0.6779271364212036, |
|
"logps/chosen": -323.00006103515625, |
|
"logps/rejected": -441.6917419433594, |
|
"loss": 0.2046, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -11.066459655761719, |
|
"rewards/margins": 7.040872097015381, |
|
"rewards/rejected": -18.107330322265625, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.4934185545872018e-05, |
|
"logits/chosen": -0.8037103414535522, |
|
"logits/rejected": -0.6963068246841431, |
|
"logps/chosen": -310.79595947265625, |
|
"logps/rejected": -422.8213806152344, |
|
"loss": 0.1437, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -12.192479133605957, |
|
"rewards/margins": 6.472376346588135, |
|
"rewards/rejected": -18.66485595703125, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.4763296498527743e-05, |
|
"logits/chosen": -0.759530782699585, |
|
"logits/rejected": -0.6498897671699524, |
|
"logps/chosen": -321.39703369140625, |
|
"logps/rejected": -416.76788330078125, |
|
"loss": 0.2091, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -11.570536613464355, |
|
"rewards/margins": 6.058943271636963, |
|
"rewards/rejected": -17.629478454589844, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.4592980193489975e-05, |
|
"logits/chosen": -0.8029254674911499, |
|
"logits/rejected": -0.7215882539749146, |
|
"logps/chosen": -262.2799072265625, |
|
"logps/rejected": -352.44500732421875, |
|
"loss": 0.1489, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.85411548614502, |
|
"rewards/margins": 5.6461181640625, |
|
"rewards/rejected": -15.50023365020752, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.4423246159935106e-05, |
|
"logits/chosen": -0.8059856295585632, |
|
"logits/rejected": -0.7341285347938538, |
|
"logps/chosen": -325.6473693847656, |
|
"logps/rejected": -416.6791076660156, |
|
"loss": 0.1912, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -10.563329696655273, |
|
"rewards/margins": 6.393132209777832, |
|
"rewards/rejected": -16.956462860107422, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.4254103894461523e-05, |
|
"logits/chosen": -0.7335131764411926, |
|
"logits/rejected": -0.6533200144767761, |
|
"logps/chosen": -318.139892578125, |
|
"logps/rejected": -401.5143127441406, |
|
"loss": 0.2516, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -10.073238372802734, |
|
"rewards/margins": 6.2794013023376465, |
|
"rewards/rejected": -16.35264015197754, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.4085562860558255e-05, |
|
"logits/chosen": -0.719249963760376, |
|
"logits/rejected": -0.6460169553756714, |
|
"logps/chosen": -322.72747802734375, |
|
"logps/rejected": -429.255615234375, |
|
"loss": 0.1255, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -10.514101028442383, |
|
"rewards/margins": 7.0062384605407715, |
|
"rewards/rejected": -17.520339965820312, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.3917632488075478e-05, |
|
"logits/chosen": -0.7693089246749878, |
|
"logits/rejected": -0.7073890566825867, |
|
"logps/chosen": -286.15875244140625, |
|
"logps/rejected": -377.4967956542969, |
|
"loss": 0.1379, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.781511306762695, |
|
"rewards/margins": 5.981696128845215, |
|
"rewards/rejected": -14.763208389282227, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.3750322172696972e-05, |
|
"logits/chosen": -0.7237778902053833, |
|
"logits/rejected": -0.6620101928710938, |
|
"logps/chosen": -343.70208740234375, |
|
"logps/rejected": -417.9403381347656, |
|
"loss": 0.1845, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -9.25228214263916, |
|
"rewards/margins": 6.4635820388793945, |
|
"rewards/rejected": -15.715863227844238, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.3583641275414374e-05, |
|
"logits/chosen": -0.7408095598220825, |
|
"logits/rejected": -0.675685703754425, |
|
"logps/chosen": -324.8341979980469, |
|
"logps/rejected": -430.85870361328125, |
|
"loss": 0.1623, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -10.316813468933105, |
|
"rewards/margins": 7.033916473388672, |
|
"rewards/rejected": -17.35072898864746, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.3417599122003464e-05, |
|
"logits/chosen": -0.711288332939148, |
|
"logits/rejected": -0.6432172060012817, |
|
"logps/chosen": -334.97637939453125, |
|
"logps/rejected": -435.75775146484375, |
|
"loss": 0.2307, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -10.687657356262207, |
|
"rewards/margins": 6.426644802093506, |
|
"rewards/rejected": -17.114303588867188, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.3252205002502366e-05, |
|
"logits/chosen": -0.7844573259353638, |
|
"logits/rejected": -0.6775509715080261, |
|
"logps/chosen": -284.2342834472656, |
|
"logps/rejected": -379.44110107421875, |
|
"loss": 0.1668, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.471349716186523, |
|
"rewards/margins": 5.543021202087402, |
|
"rewards/rejected": -15.014370918273926, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.3087468170691817e-05, |
|
"logits/chosen": -0.750018835067749, |
|
"logits/rejected": -0.6729363203048706, |
|
"logps/chosen": -306.68292236328125, |
|
"logps/rejected": -400.35662841796875, |
|
"loss": 0.1697, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -10.160202026367188, |
|
"rewards/margins": 6.081552982330322, |
|
"rewards/rejected": -16.24175453186035, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.2923397843577362e-05, |
|
"logits/chosen": -0.754466712474823, |
|
"logits/rejected": -0.6770204901695251, |
|
"logps/chosen": -278.1436462402344, |
|
"logps/rejected": -368.91998291015625, |
|
"loss": 0.1467, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -8.730508804321289, |
|
"rewards/margins": 5.559985637664795, |
|
"rewards/rejected": -14.290494918823242, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.2760003200873699e-05, |
|
"logits/chosen": -0.7139883637428284, |
|
"logits/rejected": -0.6362855434417725, |
|
"logps/chosen": -325.02447509765625, |
|
"logps/rejected": -417.4795837402344, |
|
"loss": 0.3396, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -9.703086853027344, |
|
"rewards/margins": 6.167611122131348, |
|
"rewards/rejected": -15.870697021484375, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.2597293384491055e-05, |
|
"logits/chosen": -0.7074969410896301, |
|
"logits/rejected": -0.6166056394577026, |
|
"logps/chosen": -294.9327087402344, |
|
"logps/rejected": -406.3798828125, |
|
"loss": 0.1569, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.556290626525879, |
|
"rewards/margins": 6.053108215332031, |
|
"rewards/rejected": -14.609397888183594, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -0.789574384689331, |
|
"eval_logits/rejected": -0.7062578201293945, |
|
"eval_logps/chosen": -301.232421875, |
|
"eval_logps/rejected": -388.99957275390625, |
|
"eval_loss": 0.31303778290748596, |
|
"eval_rewards/accuracies": 0.840653121471405, |
|
"eval_rewards/chosen": -8.730807304382324, |
|
"eval_rewards/margins": 5.1419358253479, |
|
"eval_rewards/rejected": -13.872743606567383, |
|
"eval_runtime": 632.3153, |
|
"eval_samples_per_second": 0.933, |
|
"eval_steps_per_second": 0.117, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.2435277498023726e-05, |
|
"logits/chosen": -0.7611679434776306, |
|
"logits/rejected": -0.6773897409439087, |
|
"logps/chosen": -317.03436279296875, |
|
"logps/rejected": -415.4639587402344, |
|
"loss": 0.1077, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.259374618530273, |
|
"rewards/margins": 5.53312349319458, |
|
"rewards/rejected": -13.792497634887695, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.2273964606240718e-05, |
|
"logits/chosen": -0.7133153080940247, |
|
"logits/rejected": -0.6482645273208618, |
|
"logps/chosen": -302.9320983886719, |
|
"logps/rejected": -396.37603759765625, |
|
"loss": 0.097, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.211008071899414, |
|
"rewards/margins": 6.492849826812744, |
|
"rewards/rejected": -14.703857421875, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.2113363734578548e-05, |
|
"logits/chosen": -0.7752204537391663, |
|
"logits/rejected": -0.669845700263977, |
|
"logps/chosen": -272.97967529296875, |
|
"logps/rejected": -356.39300537109375, |
|
"loss": 0.1588, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.287445068359375, |
|
"rewards/margins": 5.196799278259277, |
|
"rewards/rejected": -14.484247207641602, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.1953483868636297e-05, |
|
"logits/chosen": -0.7488294839859009, |
|
"logits/rejected": -0.6603050231933594, |
|
"logps/chosen": -281.25152587890625, |
|
"logps/rejected": -383.7618103027344, |
|
"loss": 0.1373, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.339924812316895, |
|
"rewards/margins": 5.9965314865112305, |
|
"rewards/rejected": -15.336456298828125, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.1794333953672893e-05, |
|
"logits/chosen": -0.7725504040718079, |
|
"logits/rejected": -0.6966471076011658, |
|
"logps/chosen": -263.1004638671875, |
|
"logps/rejected": -341.1067199707031, |
|
"loss": 0.1322, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.56114673614502, |
|
"rewards/margins": 5.4928059577941895, |
|
"rewards/rejected": -15.053950309753418, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.1635922894106563e-05, |
|
"logits/chosen": -0.7949464917182922, |
|
"logits/rejected": -0.6885765790939331, |
|
"logps/chosen": -277.63470458984375, |
|
"logps/rejected": -396.4646911621094, |
|
"loss": 0.0822, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -9.648672103881836, |
|
"rewards/margins": 6.952709197998047, |
|
"rewards/rejected": -16.60137939453125, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.1478259553016682e-05, |
|
"logits/chosen": -0.7254813313484192, |
|
"logits/rejected": -0.6469436883926392, |
|
"logps/chosen": -305.3290710449219, |
|
"logps/rejected": -412.3343200683594, |
|
"loss": 0.1569, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.462736129760742, |
|
"rewards/margins": 7.043429374694824, |
|
"rewards/rejected": -16.506162643432617, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.132135275164785e-05, |
|
"logits/chosen": -0.715414822101593, |
|
"logits/rejected": -0.633540153503418, |
|
"logps/chosen": -326.2629699707031, |
|
"logps/rejected": -436.6180725097656, |
|
"loss": 0.0853, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.169827461242676, |
|
"rewards/margins": 7.911678314208984, |
|
"rewards/rejected": -19.08150863647461, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.11652112689164e-05, |
|
"logits/chosen": -0.7707673907279968, |
|
"logits/rejected": -0.6575115919113159, |
|
"logps/chosen": -291.2886657714844, |
|
"logps/rejected": -415.09991455078125, |
|
"loss": 0.0722, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -11.116385459899902, |
|
"rewards/margins": 8.105655670166016, |
|
"rewards/rejected": -19.222042083740234, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.1009843840919148e-05, |
|
"logits/chosen": -0.7095931768417358, |
|
"logits/rejected": -0.6079090237617493, |
|
"logps/chosen": -335.9273681640625, |
|
"logps/rejected": -473.82720947265625, |
|
"loss": 0.1153, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -12.183424949645996, |
|
"rewards/margins": 9.362308502197266, |
|
"rewards/rejected": -21.545734405517578, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.0855259160444639e-05, |
|
"logits/chosen": -0.720962405204773, |
|
"logits/rejected": -0.6497917771339417, |
|
"logps/chosen": -356.9051513671875, |
|
"logps/rejected": -463.81121826171875, |
|
"loss": 0.1042, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -12.96000862121582, |
|
"rewards/margins": 7.875531196594238, |
|
"rewards/rejected": -20.83553695678711, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.070146587648682e-05, |
|
"logits/chosen": -0.6962164640426636, |
|
"logits/rejected": -0.6002769470214844, |
|
"logps/chosen": -313.9371643066406, |
|
"logps/rejected": -452.33734130859375, |
|
"loss": 0.0977, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -10.213385581970215, |
|
"rewards/margins": 8.161968231201172, |
|
"rewards/rejected": -18.375354766845703, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.054847259376108e-05, |
|
"logits/chosen": -0.7321516275405884, |
|
"logits/rejected": -0.6194690465927124, |
|
"logps/chosen": -328.2876281738281, |
|
"logps/rejected": -443.07080078125, |
|
"loss": 0.1543, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -12.295181274414062, |
|
"rewards/margins": 7.805327415466309, |
|
"rewards/rejected": -20.100509643554688, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.0396287872222826e-05, |
|
"logits/chosen": -0.6940537691116333, |
|
"logits/rejected": -0.5973513722419739, |
|
"logps/chosen": -344.5832824707031, |
|
"logps/rejected": -469.13226318359375, |
|
"loss": 0.0966, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.552555084228516, |
|
"rewards/margins": 8.099371910095215, |
|
"rewards/rejected": -19.651926040649414, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.0244920226588597e-05, |
|
"logits/chosen": -0.7023944854736328, |
|
"logits/rejected": -0.5980740785598755, |
|
"logps/chosen": -326.51611328125, |
|
"logps/rejected": -451.57196044921875, |
|
"loss": 0.0942, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.211535453796387, |
|
"rewards/margins": 7.994822025299072, |
|
"rewards/rejected": -19.20635986328125, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.0094378125859602e-05, |
|
"logits/chosen": -0.6640266180038452, |
|
"logits/rejected": -0.5848334431648254, |
|
"logps/chosen": -310.6120910644531, |
|
"logps/rejected": -409.9443359375, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.730935096740723, |
|
"rewards/margins": 6.782107353210449, |
|
"rewards/rejected": -18.513042449951172, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.944669992847946e-06, |
|
"logits/chosen": -0.6944273710250854, |
|
"logits/rejected": -0.604742705821991, |
|
"logps/chosen": -330.4608154296875, |
|
"logps/rejected": -453.2232360839844, |
|
"loss": 0.086, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -12.015411376953125, |
|
"rewards/margins": 8.11739730834961, |
|
"rewards/rejected": -20.132808685302734, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.79580420370532e-06, |
|
"logits/chosen": -0.7286392450332642, |
|
"logits/rejected": -0.6263021230697632, |
|
"logps/chosen": -308.3360595703125, |
|
"logps/rejected": -432.9368591308594, |
|
"loss": 0.165, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -11.494908332824707, |
|
"rewards/margins": 7.952043056488037, |
|
"rewards/rejected": -19.44695281982422, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.64778908745437e-06, |
|
"logits/chosen": -0.716744601726532, |
|
"logits/rejected": -0.6369553804397583, |
|
"logps/chosen": -316.4634094238281, |
|
"logps/rejected": -428.46661376953125, |
|
"loss": 0.1728, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -12.187715530395508, |
|
"rewards/margins": 7.782134056091309, |
|
"rewards/rejected": -19.969850540161133, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.500632925522735e-06, |
|
"logits/chosen": -0.6882919073104858, |
|
"logits/rejected": -0.6268382668495178, |
|
"logps/chosen": -319.6679992675781, |
|
"logps/rejected": -438.73016357421875, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -12.020519256591797, |
|
"rewards/margins": 8.925442695617676, |
|
"rewards/rejected": -20.945960998535156, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_logits/chosen": -0.7649368047714233, |
|
"eval_logits/rejected": -0.6812288165092468, |
|
"eval_logps/chosen": -335.2308654785156, |
|
"eval_logps/rejected": -445.3587951660156, |
|
"eval_loss": 0.3642928898334503, |
|
"eval_rewards/accuracies": 0.8389639258384705, |
|
"eval_rewards/chosen": -12.13064956665039, |
|
"eval_rewards/margins": 7.378009796142578, |
|
"eval_rewards/rejected": -19.5086612701416, |
|
"eval_runtime": 631.0725, |
|
"eval_samples_per_second": 0.935, |
|
"eval_steps_per_second": 0.117, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.354343951279632e-06, |
|
"logits/chosen": -0.7252810001373291, |
|
"logits/rejected": -0.6079670190811157, |
|
"logps/chosen": -309.0469970703125, |
|
"logps/rejected": -457.93267822265625, |
|
"loss": 0.2084, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -12.472055435180664, |
|
"rewards/margins": 9.21606731414795, |
|
"rewards/rejected": -21.688121795654297, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.208930349575242e-06, |
|
"logits/chosen": -0.6720947027206421, |
|
"logits/rejected": -0.5662213563919067, |
|
"logps/chosen": -321.1401062011719, |
|
"logps/rejected": -448.83868408203125, |
|
"loss": 0.1943, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -11.870892524719238, |
|
"rewards/margins": 8.259675025939941, |
|
"rewards/rejected": -20.130569458007812, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.064400256282757e-06, |
|
"logits/chosen": -0.6956688165664673, |
|
"logits/rejected": -0.6111762523651123, |
|
"logps/chosen": -315.5310974121094, |
|
"logps/rejected": -440.07305908203125, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.427092552185059, |
|
"rewards/margins": 8.057827949523926, |
|
"rewards/rejected": -19.484920501708984, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.920761757843201e-06, |
|
"logits/chosen": -0.7292866706848145, |
|
"logits/rejected": -0.6416276693344116, |
|
"logps/chosen": -322.6993103027344, |
|
"logps/rejected": -446.1142578125, |
|
"loss": 0.1744, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -10.683124542236328, |
|
"rewards/margins": 7.721262454986572, |
|
"rewards/rejected": -18.404386520385742, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.778022890812951e-06, |
|
"logits/chosen": -0.6863434314727783, |
|
"logits/rejected": -0.6112778782844543, |
|
"logps/chosen": -297.8013916015625, |
|
"logps/rejected": -401.1690368652344, |
|
"loss": 0.1268, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -10.715972900390625, |
|
"rewards/margins": 8.0502290725708, |
|
"rewards/rejected": -18.766202926635742, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.636191641414126e-06, |
|
"logits/chosen": -0.7288874387741089, |
|
"logits/rejected": -0.633169949054718, |
|
"logps/chosen": -291.8887939453125, |
|
"logps/rejected": -412.6988830566406, |
|
"loss": 0.0766, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -10.809249877929688, |
|
"rewards/margins": 6.968530178070068, |
|
"rewards/rejected": -17.777782440185547, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.495275945087744e-06, |
|
"logits/chosen": -0.7463115453720093, |
|
"logits/rejected": -0.663573145866394, |
|
"logps/chosen": -305.2309265136719, |
|
"logps/rejected": -413.68060302734375, |
|
"loss": 0.075, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -10.54316520690918, |
|
"rewards/margins": 7.828655242919922, |
|
"rewards/rejected": -18.3718204498291, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.355283686049753e-06, |
|
"logits/chosen": -0.6592190861701965, |
|
"logits/rejected": -0.6037066578865051, |
|
"logps/chosen": -350.1833801269531, |
|
"logps/rejected": -446.6246032714844, |
|
"loss": 0.1614, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -12.453142166137695, |
|
"rewards/margins": 7.875624179840088, |
|
"rewards/rejected": -20.328765869140625, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.2162226968499e-06, |
|
"logits/chosen": -0.7309607863426208, |
|
"logits/rejected": -0.6280379295349121, |
|
"logps/chosen": -320.71087646484375, |
|
"logps/rejected": -439.3594665527344, |
|
"loss": 0.1038, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -11.36469841003418, |
|
"rewards/margins": 7.764462947845459, |
|
"rewards/rejected": -19.129161834716797, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.078100757933485e-06, |
|
"logits/chosen": -0.752869725227356, |
|
"logits/rejected": -0.6576402187347412, |
|
"logps/chosen": -331.00958251953125, |
|
"logps/rejected": -480.381591796875, |
|
"loss": 0.054, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -10.68550968170166, |
|
"rewards/margins": 9.434442520141602, |
|
"rewards/rejected": -20.119953155517578, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.940925597206054e-06, |
|
"logits/chosen": -0.7123974561691284, |
|
"logits/rejected": -0.63076251745224, |
|
"logps/chosen": -336.4826965332031, |
|
"logps/rejected": -456.6839904785156, |
|
"loss": 0.0782, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -12.438753128051758, |
|
"rewards/margins": 8.677999496459961, |
|
"rewards/rejected": -21.11675453186035, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.804704889601053e-06, |
|
"logits/chosen": -0.7371711730957031, |
|
"logits/rejected": -0.6661837100982666, |
|
"logps/chosen": -346.4564208984375, |
|
"logps/rejected": -456.84149169921875, |
|
"loss": 0.2248, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -13.385701179504395, |
|
"rewards/margins": 7.154642581939697, |
|
"rewards/rejected": -20.540346145629883, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.669446256650379e-06, |
|
"logits/chosen": -0.762246310710907, |
|
"logits/rejected": -0.6955366730690002, |
|
"logps/chosen": -311.168212890625, |
|
"logps/rejected": -435.25042724609375, |
|
"loss": 0.0792, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -10.985504150390625, |
|
"rewards/margins": 8.26388931274414, |
|
"rewards/rejected": -19.249393463134766, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.535157266057966e-06, |
|
"logits/chosen": -0.7201426029205322, |
|
"logits/rejected": -0.6314554214477539, |
|
"logps/chosen": -316.18902587890625, |
|
"logps/rejected": -429.396484375, |
|
"loss": 0.1438, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -12.557358741760254, |
|
"rewards/margins": 8.245777130126953, |
|
"rewards/rejected": -20.803136825561523, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.401845431276378e-06, |
|
"logits/chosen": -0.7455998659133911, |
|
"logits/rejected": -0.6421800851821899, |
|
"logps/chosen": -313.2838134765625, |
|
"logps/rejected": -451.022216796875, |
|
"loss": 0.0919, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -12.479084014892578, |
|
"rewards/margins": 9.705238342285156, |
|
"rewards/rejected": -22.1843204498291, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.2959045297832655e-06, |
|
"logits/chosen": -0.6873536705970764, |
|
"logits/rejected": -0.6031038761138916, |
|
"logps/chosen": -368.57135009765625, |
|
"logps/rejected": -476.9346618652344, |
|
"loss": 0.1034, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -14.671506881713867, |
|
"rewards/margins": 8.930734634399414, |
|
"rewards/rejected": -23.60224151611328, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.164370334582929e-06, |
|
"logits/chosen": -0.7369526028633118, |
|
"logits/rejected": -0.6584600210189819, |
|
"logps/chosen": -343.1117248535156, |
|
"logps/rejected": -468.9244689941406, |
|
"loss": 0.1545, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -13.25184440612793, |
|
"rewards/margins": 9.72932243347168, |
|
"rewards/rejected": -22.98116683959961, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.0338340406768125e-06, |
|
"logits/chosen": -0.772254467010498, |
|
"logits/rejected": -0.6851469278335571, |
|
"logps/chosen": -354.40570068359375, |
|
"logps/rejected": -454.22650146484375, |
|
"loss": 0.4876, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -14.60003662109375, |
|
"rewards/margins": 6.612873077392578, |
|
"rewards/rejected": -21.212909698486328, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 6.904302951554553e-06, |
|
"logits/chosen": -0.7557543516159058, |
|
"logits/rejected": -0.6616033911705017, |
|
"logps/chosen": -361.57037353515625, |
|
"logps/rejected": -494.0552673339844, |
|
"loss": 0.1023, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -14.2002592086792, |
|
"rewards/margins": 8.30376148223877, |
|
"rewards/rejected": -22.504018783569336, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 6.775784314464717e-06, |
|
"logits/chosen": -0.7427912950515747, |
|
"logits/rejected": -0.682418942451477, |
|
"logps/chosen": -352.435546875, |
|
"logps/rejected": -462.44061279296875, |
|
"loss": 0.0723, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -13.251668930053711, |
|
"rewards/margins": 8.912009239196777, |
|
"rewards/rejected": -22.163679122924805, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_logits/chosen": -0.7945932745933533, |
|
"eval_logits/rejected": -0.7141597270965576, |
|
"eval_logps/chosen": -342.12542724609375, |
|
"eval_logps/rejected": -455.8727722167969, |
|
"eval_loss": 0.37223583459854126, |
|
"eval_rewards/accuracies": 0.840653121471405, |
|
"eval_rewards/chosen": -12.820108413696289, |
|
"eval_rewards/margins": 7.73995304107666, |
|
"eval_rewards/rejected": -20.560060501098633, |
|
"eval_runtime": 629.4936, |
|
"eval_samples_per_second": 0.937, |
|
"eval_steps_per_second": 0.118, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 6.6482853200092924e-06, |
|
"logits/chosen": -0.6936749219894409, |
|
"logits/rejected": -0.6220188736915588, |
|
"logps/chosen": -356.90338134765625, |
|
"logps/rejected": -475.64691162109375, |
|
"loss": 0.1111, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -13.1315336227417, |
|
"rewards/margins": 8.700096130371094, |
|
"rewards/rejected": -21.83163070678711, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.521813101741411e-06, |
|
"logits/chosen": -0.7499922513961792, |
|
"logits/rejected": -0.6749961972236633, |
|
"logps/chosen": -344.0547180175781, |
|
"logps/rejected": -466.3595275878906, |
|
"loss": 0.1059, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -12.227682113647461, |
|
"rewards/margins": 8.883087158203125, |
|
"rewards/rejected": -21.11077117919922, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.396374735766181e-06, |
|
"logits/chosen": -0.7167544960975647, |
|
"logits/rejected": -0.6709025502204895, |
|
"logps/chosen": -351.1544189453125, |
|
"logps/rejected": -438.2394104003906, |
|
"loss": 0.1059, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -13.60278606414795, |
|
"rewards/margins": 7.707704067230225, |
|
"rewards/rejected": -21.310489654541016, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.271977240344795e-06, |
|
"logits/chosen": -0.7438760995864868, |
|
"logits/rejected": -0.6707813143730164, |
|
"logps/chosen": -321.48309326171875, |
|
"logps/rejected": -439.22149658203125, |
|
"loss": 0.0866, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -12.498029708862305, |
|
"rewards/margins": 8.192224502563477, |
|
"rewards/rejected": -20.69025421142578, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.1486275755019005e-06, |
|
"logits/chosen": -0.6989586353302002, |
|
"logits/rejected": -0.6493550539016724, |
|
"logps/chosen": -329.2831726074219, |
|
"logps/rejected": -439.18218994140625, |
|
"loss": 0.1222, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -13.665060043334961, |
|
"rewards/margins": 8.314728736877441, |
|
"rewards/rejected": -21.979787826538086, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.026332642636115e-06, |
|
"logits/chosen": -0.702304482460022, |
|
"logits/rejected": -0.6438758373260498, |
|
"logps/chosen": -390.09967041015625, |
|
"logps/rejected": -493.91180419921875, |
|
"loss": 0.1361, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -13.747671127319336, |
|
"rewards/margins": 8.659051895141602, |
|
"rewards/rejected": -22.406721115112305, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 5.905099284133952e-06, |
|
"logits/chosen": -0.7772132754325867, |
|
"logits/rejected": -0.6689720153808594, |
|
"logps/chosen": -308.82843017578125, |
|
"logps/rejected": -431.65509033203125, |
|
"loss": 0.1382, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.163544654846191, |
|
"rewards/margins": 8.15145492553711, |
|
"rewards/rejected": -20.314998626708984, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5.784934282986956e-06, |
|
"logits/chosen": -0.7181901335716248, |
|
"logits/rejected": -0.6271171569824219, |
|
"logps/chosen": -342.0475158691406, |
|
"logps/rejected": -475.50592041015625, |
|
"loss": 0.1081, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -12.426340103149414, |
|
"rewards/margins": 9.301948547363281, |
|
"rewards/rejected": -21.728286743164062, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.66584436241222e-06, |
|
"logits/chosen": -0.7596362233161926, |
|
"logits/rejected": -0.6781996488571167, |
|
"logps/chosen": -330.19781494140625, |
|
"logps/rejected": -452.943115234375, |
|
"loss": 0.1819, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -11.281097412109375, |
|
"rewards/margins": 8.911158561706543, |
|
"rewards/rejected": -20.192258834838867, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.547836185476215e-06, |
|
"logits/chosen": -0.7507878541946411, |
|
"logits/rejected": -0.7018341422080994, |
|
"logps/chosen": -341.07794189453125, |
|
"logps/rejected": -448.7339782714844, |
|
"loss": 0.0879, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -11.908292770385742, |
|
"rewards/margins": 8.718576431274414, |
|
"rewards/rejected": -20.626869201660156, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.430916354721979e-06, |
|
"logits/chosen": -0.7594354748725891, |
|
"logits/rejected": -0.6595009565353394, |
|
"logps/chosen": -316.389892578125, |
|
"logps/rejected": -454.554443359375, |
|
"loss": 0.1113, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.677900314331055, |
|
"rewards/margins": 8.170553207397461, |
|
"rewards/rejected": -20.84845542907715, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5.3150914117996995e-06, |
|
"logits/chosen": -0.7210731506347656, |
|
"logits/rejected": -0.6538037061691284, |
|
"logps/chosen": -318.8692932128906, |
|
"logps/rejected": -421.8642578125, |
|
"loss": 0.1365, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.26388168334961, |
|
"rewards/margins": 7.935153961181641, |
|
"rewards/rejected": -20.199037551879883, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.200367837100758e-06, |
|
"logits/chosen": -0.7334044575691223, |
|
"logits/rejected": -0.672054648399353, |
|
"logps/chosen": -323.24078369140625, |
|
"logps/rejected": -429.05157470703125, |
|
"loss": 0.069, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -12.442166328430176, |
|
"rewards/margins": 7.741090297698975, |
|
"rewards/rejected": -20.18325424194336, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.086752049395094e-06, |
|
"logits/chosen": -0.7441354990005493, |
|
"logits/rejected": -0.6552435755729675, |
|
"logps/chosen": -315.9906921386719, |
|
"logps/rejected": -443.126220703125, |
|
"loss": 0.1109, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -11.681642532348633, |
|
"rewards/margins": 8.423504829406738, |
|
"rewards/rejected": -20.105148315429688, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.974250405472109e-06, |
|
"logits/chosen": -0.7096320986747742, |
|
"logits/rejected": -0.6288970112800598, |
|
"logps/chosen": -354.1776123046875, |
|
"logps/rejected": -479.00018310546875, |
|
"loss": 0.0848, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -12.93957233428955, |
|
"rewards/margins": 8.176461219787598, |
|
"rewards/rejected": -21.11603355407715, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.862869199784984e-06, |
|
"logits/chosen": -0.7399638891220093, |
|
"logits/rejected": -0.6677154898643494, |
|
"logps/chosen": -356.2599792480469, |
|
"logps/rejected": -480.15069580078125, |
|
"loss": 0.2023, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -12.257336616516113, |
|
"rewards/margins": 8.49841594696045, |
|
"rewards/rejected": -20.755756378173828, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.752614664098543e-06, |
|
"logits/chosen": -0.7103666067123413, |
|
"logits/rejected": -0.639140248298645, |
|
"logps/chosen": -376.16302490234375, |
|
"logps/rejected": -511.90496826171875, |
|
"loss": 0.1083, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -13.711847305297852, |
|
"rewards/margins": 10.188641548156738, |
|
"rewards/rejected": -23.900487899780273, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.643492967140539e-06, |
|
"logits/chosen": -0.7156988978385925, |
|
"logits/rejected": -0.6765825748443604, |
|
"logps/chosen": -373.48577880859375, |
|
"logps/rejected": -482.2682189941406, |
|
"loss": 0.2513, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -13.767964363098145, |
|
"rewards/margins": 8.47219467163086, |
|
"rewards/rejected": -22.240158081054688, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.535510214256549e-06, |
|
"logits/chosen": -0.7317145466804504, |
|
"logits/rejected": -0.6762410402297974, |
|
"logps/chosen": -327.09478759765625, |
|
"logps/rejected": -449.31414794921875, |
|
"loss": 0.1438, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -11.390679359436035, |
|
"rewards/margins": 8.416214942932129, |
|
"rewards/rejected": -19.806894302368164, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.428672447068357e-06, |
|
"logits/chosen": -0.7378804683685303, |
|
"logits/rejected": -0.6945641040802002, |
|
"logps/chosen": -323.4637756347656, |
|
"logps/rejected": -419.35260009765625, |
|
"loss": 0.1131, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -11.466388702392578, |
|
"rewards/margins": 7.729311466217041, |
|
"rewards/rejected": -19.195703506469727, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_logits/chosen": -0.7948499321937561, |
|
"eval_logits/rejected": -0.715542733669281, |
|
"eval_logps/chosen": -339.3921813964844, |
|
"eval_logps/rejected": -452.3672790527344, |
|
"eval_loss": 0.36875221133232117, |
|
"eval_rewards/accuracies": 0.8474099040031433, |
|
"eval_rewards/chosen": -12.546786308288574, |
|
"eval_rewards/margins": 7.662726402282715, |
|
"eval_rewards/rejected": -20.20951271057129, |
|
"eval_runtime": 625.8738, |
|
"eval_samples_per_second": 0.943, |
|
"eval_steps_per_second": 0.118, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.322985643135952e-06, |
|
"logits/chosen": -0.7170902490615845, |
|
"logits/rejected": -0.6458307504653931, |
|
"logps/chosen": -341.5028991699219, |
|
"logps/rejected": -437.49224853515625, |
|
"loss": 0.1824, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.686590194702148, |
|
"rewards/margins": 7.4898223876953125, |
|
"rewards/rejected": -20.176410675048828, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.218455715623068e-06, |
|
"logits/chosen": -0.7318763136863708, |
|
"logits/rejected": -0.6571904420852661, |
|
"logps/chosen": -300.69110107421875, |
|
"logps/rejected": -413.32635498046875, |
|
"loss": 0.1268, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.804631233215332, |
|
"rewards/margins": 6.896049499511719, |
|
"rewards/rejected": -19.700679779052734, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.115088512966334e-06, |
|
"logits/chosen": -0.7219916582107544, |
|
"logits/rejected": -0.6459473967552185, |
|
"logps/chosen": -341.9270935058594, |
|
"logps/rejected": -463.9122009277344, |
|
"loss": 0.1514, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -11.602262496948242, |
|
"rewards/margins": 8.365216255187988, |
|
"rewards/rejected": -19.967479705810547, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.012889818548069e-06, |
|
"logits/chosen": -0.7364938855171204, |
|
"logits/rejected": -0.6087579727172852, |
|
"logps/chosen": -348.5492248535156, |
|
"logps/rejected": -495.57550048828125, |
|
"loss": 0.0564, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -13.234907150268555, |
|
"rewards/margins": 9.206663131713867, |
|
"rewards/rejected": -22.441570281982422, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.911865350372709e-06, |
|
"logits/chosen": -0.7059997916221619, |
|
"logits/rejected": -0.6595257520675659, |
|
"logps/chosen": -325.22308349609375, |
|
"logps/rejected": -446.0380859375, |
|
"loss": 0.0853, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -12.88878059387207, |
|
"rewards/margins": 8.93287181854248, |
|
"rewards/rejected": -21.8216552734375, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.812020760746868e-06, |
|
"logits/chosen": -0.7647970914840698, |
|
"logits/rejected": -0.6486034393310547, |
|
"logps/chosen": -317.36492919921875, |
|
"logps/rejected": -447.95867919921875, |
|
"loss": 0.0711, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -12.881604194641113, |
|
"rewards/margins": 8.319483757019043, |
|
"rewards/rejected": -21.201087951660156, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.7133616359630974e-06, |
|
"logits/chosen": -0.7516032457351685, |
|
"logits/rejected": -0.658551812171936, |
|
"logps/chosen": -330.45745849609375, |
|
"logps/rejected": -453.15594482421875, |
|
"loss": 0.0743, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -12.388571739196777, |
|
"rewards/margins": 9.21638298034668, |
|
"rewards/rejected": -21.604957580566406, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.6158934959873353e-06, |
|
"logits/chosen": -0.784338116645813, |
|
"logits/rejected": -0.6736604571342468, |
|
"logps/chosen": -320.1543884277344, |
|
"logps/rejected": -450.89013671875, |
|
"loss": 0.1555, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -12.086525917053223, |
|
"rewards/margins": 8.345721244812012, |
|
"rewards/rejected": -20.4322452545166, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.5196217941500796e-06, |
|
"logits/chosen": -0.7751826047897339, |
|
"logits/rejected": -0.6881800889968872, |
|
"logps/chosen": -339.41839599609375, |
|
"logps/rejected": -474.77801513671875, |
|
"loss": 0.0875, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -13.182660102844238, |
|
"rewards/margins": 8.881035804748535, |
|
"rewards/rejected": -22.063697814941406, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.4245519168412454e-06, |
|
"logits/chosen": -0.7259628176689148, |
|
"logits/rejected": -0.6622989773750305, |
|
"logps/chosen": -355.24053955078125, |
|
"logps/rejected": -447.45086669921875, |
|
"loss": 0.2138, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -13.35826587677002, |
|
"rewards/margins": 7.497842311859131, |
|
"rewards/rejected": -20.856107711791992, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.3306891832088156e-06, |
|
"logits/chosen": -0.746944010257721, |
|
"logits/rejected": -0.6582907438278198, |
|
"logps/chosen": -380.9755859375, |
|
"logps/rejected": -486.35308837890625, |
|
"loss": 0.1279, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -12.789942741394043, |
|
"rewards/margins": 8.4408597946167, |
|
"rewards/rejected": -21.230802536010742, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2380388448612437e-06, |
|
"logits/chosen": -0.6990784406661987, |
|
"logits/rejected": -0.6312959790229797, |
|
"logps/chosen": -366.27655029296875, |
|
"logps/rejected": -469.2547302246094, |
|
"loss": 0.244, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -15.525199890136719, |
|
"rewards/margins": 7.310397148132324, |
|
"rewards/rejected": -22.83559799194336, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.146606085573603e-06, |
|
"logits/chosen": -0.740514874458313, |
|
"logits/rejected": -0.6599899530410767, |
|
"logps/chosen": -317.57574462890625, |
|
"logps/rejected": -435.7705993652344, |
|
"loss": 0.1435, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.04985237121582, |
|
"rewards/margins": 7.957073211669922, |
|
"rewards/rejected": -20.006925582885742, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.056396020997562e-06, |
|
"logits/chosen": -0.7322230339050293, |
|
"logits/rejected": -0.6569544076919556, |
|
"logps/chosen": -362.6092834472656, |
|
"logps/rejected": -485.44482421875, |
|
"loss": 0.0454, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -13.219217300415039, |
|
"rewards/margins": 8.730463027954102, |
|
"rewards/rejected": -21.949678421020508, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.967413698375196e-06, |
|
"logits/chosen": -0.7420450448989868, |
|
"logits/rejected": -0.6468544006347656, |
|
"logps/chosen": -327.250732421875, |
|
"logps/rejected": -459.04473876953125, |
|
"loss": 0.177, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -13.451974868774414, |
|
"rewards/margins": 8.368537902832031, |
|
"rewards/rejected": -21.820512771606445, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.8796640962565374e-06, |
|
"logits/chosen": -0.7958037853240967, |
|
"logits/rejected": -0.6800004243850708, |
|
"logps/chosen": -296.9697265625, |
|
"logps/rejected": -447.215576171875, |
|
"loss": 0.0896, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -11.82789421081543, |
|
"rewards/margins": 8.446623802185059, |
|
"rewards/rejected": -20.274517059326172, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.793152124221082e-06, |
|
"logits/chosen": -0.7644537687301636, |
|
"logits/rejected": -0.7251614332199097, |
|
"logps/chosen": -329.49261474609375, |
|
"logps/rejected": -430.5299377441406, |
|
"loss": 0.0995, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -11.930797576904297, |
|
"rewards/margins": 8.091941833496094, |
|
"rewards/rejected": -20.02273941040039, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.707882622603064e-06, |
|
"logits/chosen": -0.7441886067390442, |
|
"logits/rejected": -0.6562716960906982, |
|
"logps/chosen": -357.31890869140625, |
|
"logps/rejected": -461.20294189453125, |
|
"loss": 0.1297, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -12.62116527557373, |
|
"rewards/margins": 7.356168270111084, |
|
"rewards/rejected": -19.977331161499023, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.6238603622206427e-06, |
|
"logits/chosen": -0.7436205744743347, |
|
"logits/rejected": -0.6629747152328491, |
|
"logps/chosen": -311.5270080566406, |
|
"logps/rejected": -435.06500244140625, |
|
"loss": 0.0931, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.621973037719727, |
|
"rewards/margins": 8.590081214904785, |
|
"rewards/rejected": -20.212055206298828, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.5410900441089903e-06, |
|
"logits/chosen": -0.7431643009185791, |
|
"logits/rejected": -0.6770168542861938, |
|
"logps/chosen": -330.6737976074219, |
|
"logps/rejected": -447.3936462402344, |
|
"loss": 0.0945, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.168190956115723, |
|
"rewards/margins": 8.586854934692383, |
|
"rewards/rejected": -20.755046844482422, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_logits/chosen": -0.7968736886978149, |
|
"eval_logits/rejected": -0.7183612585067749, |
|
"eval_logps/chosen": -335.7042236328125, |
|
"eval_logps/rejected": -446.875732421875, |
|
"eval_loss": 0.3584381639957428, |
|
"eval_rewards/accuracies": 0.8457207083702087, |
|
"eval_rewards/chosen": -12.177987098693848, |
|
"eval_rewards/margins": 7.48237419128418, |
|
"eval_rewards/rejected": -19.66036033630371, |
|
"eval_runtime": 624.735, |
|
"eval_samples_per_second": 0.944, |
|
"eval_steps_per_second": 0.118, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.4595762992572712e-06, |
|
"logits/chosen": -0.7891380190849304, |
|
"logits/rejected": -0.6826705932617188, |
|
"logps/chosen": -301.73529052734375, |
|
"logps/rejected": -426.7586364746094, |
|
"loss": 0.089, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.779993057250977, |
|
"rewards/margins": 8.001260757446289, |
|
"rewards/rejected": -19.781253814697266, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.379323688349516e-06, |
|
"logits/chosen": -0.774692952632904, |
|
"logits/rejected": -0.6912479400634766, |
|
"logps/chosen": -312.35345458984375, |
|
"logps/rejected": -437.408935546875, |
|
"loss": 0.1415, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -11.091418266296387, |
|
"rewards/margins": 8.911345481872559, |
|
"rewards/rejected": -20.002765655517578, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.3003367015094648e-06, |
|
"logits/chosen": -0.7375775575637817, |
|
"logits/rejected": -0.6502904295921326, |
|
"logps/chosen": -316.27899169921875, |
|
"logps/rejected": -445.7583923339844, |
|
"loss": 0.1636, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -11.737090110778809, |
|
"rewards/margins": 8.128774642944336, |
|
"rewards/rejected": -19.865863800048828, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.222619758049366e-06, |
|
"logits/chosen": -0.7506482005119324, |
|
"logits/rejected": -0.6760163307189941, |
|
"logps/chosen": -322.274658203125, |
|
"logps/rejected": -427.0292053222656, |
|
"loss": 0.094, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -11.803396224975586, |
|
"rewards/margins": 8.132063865661621, |
|
"rewards/rejected": -19.935461044311523, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.1461772062226744e-06, |
|
"logits/chosen": -0.7350204586982727, |
|
"logits/rejected": -0.6686005592346191, |
|
"logps/chosen": -341.1342468261719, |
|
"logps/rejected": -450.25115966796875, |
|
"loss": 0.1273, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -13.146771430969238, |
|
"rewards/margins": 7.856252193450928, |
|
"rewards/rejected": -21.00302505493164, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.0710133229807972e-06, |
|
"logits/chosen": -0.715478777885437, |
|
"logits/rejected": -0.6499828100204468, |
|
"logps/chosen": -338.44708251953125, |
|
"logps/rejected": -469.2776794433594, |
|
"loss": 0.0638, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -12.38154411315918, |
|
"rewards/margins": 9.532751083374023, |
|
"rewards/rejected": -21.914297103881836, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.9971323137337877e-06, |
|
"logits/chosen": -0.7852704524993896, |
|
"logits/rejected": -0.6784430146217346, |
|
"logps/chosen": -316.7117919921875, |
|
"logps/rejected": -447.6055603027344, |
|
"loss": 0.0958, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.501298904418945, |
|
"rewards/margins": 8.513867378234863, |
|
"rewards/rejected": -20.015165328979492, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.9245383121150677e-06, |
|
"logits/chosen": -0.7602156400680542, |
|
"logits/rejected": -0.644091784954071, |
|
"logps/chosen": -325.88238525390625, |
|
"logps/rejected": -452.2569274902344, |
|
"loss": 0.0752, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -12.122355461120605, |
|
"rewards/margins": 8.712382316589355, |
|
"rewards/rejected": -20.83473777770996, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.8532353797501318e-06, |
|
"logits/chosen": -0.8067118525505066, |
|
"logits/rejected": -0.6883231401443481, |
|
"logps/chosen": -263.7755126953125, |
|
"logps/rejected": -394.8865966796875, |
|
"loss": 0.1782, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -11.147211074829102, |
|
"rewards/margins": 8.368277549743652, |
|
"rewards/rejected": -19.51548957824707, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.7832275060293058e-06, |
|
"logits/chosen": -0.713138222694397, |
|
"logits/rejected": -0.6749216318130493, |
|
"logps/chosen": -358.03326416015625, |
|
"logps/rejected": -430.595458984375, |
|
"loss": 0.192, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -12.651468276977539, |
|
"rewards/margins": 6.105168342590332, |
|
"rewards/rejected": -18.756637573242188, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.714518607884541e-06, |
|
"logits/chosen": -0.7321309447288513, |
|
"logits/rejected": -0.695332407951355, |
|
"logps/chosen": -344.478515625, |
|
"logps/rejected": -447.49676513671875, |
|
"loss": 0.0926, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -14.228265762329102, |
|
"rewards/margins": 8.79620361328125, |
|
"rewards/rejected": -23.02446746826172, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.6471125295702771e-06, |
|
"logits/chosen": -0.6923691034317017, |
|
"logits/rejected": -0.6331754326820374, |
|
"logps/chosen": -372.4197998046875, |
|
"logps/rejected": -483.35211181640625, |
|
"loss": 0.1032, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -13.578657150268555, |
|
"rewards/margins": 8.160131454467773, |
|
"rewards/rejected": -21.738788604736328, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.5810130424483344e-06, |
|
"logits/chosen": -0.7160879373550415, |
|
"logits/rejected": -0.6329125165939331, |
|
"logps/chosen": -336.08477783203125, |
|
"logps/rejected": -432.1241760253906, |
|
"loss": 0.2121, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -13.814013481140137, |
|
"rewards/margins": 6.619012355804443, |
|
"rewards/rejected": -20.433029174804688, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.516223844776915e-06, |
|
"logits/chosen": -0.7514700889587402, |
|
"logits/rejected": -0.6609387993812561, |
|
"logps/chosen": -324.404052734375, |
|
"logps/rejected": -448.9908752441406, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -11.760614395141602, |
|
"rewards/margins": 8.642805099487305, |
|
"rewards/rejected": -20.403419494628906, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.4527485615036862e-06, |
|
"logits/chosen": -0.7465354204177856, |
|
"logits/rejected": -0.6571397185325623, |
|
"logps/chosen": -353.6893005371094, |
|
"logps/rejected": -489.797607421875, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -12.575372695922852, |
|
"rewards/margins": 8.94145393371582, |
|
"rewards/rejected": -21.51682472229004, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.3905907440629752e-06, |
|
"logits/chosen": -0.6932064890861511, |
|
"logits/rejected": -0.6302663683891296, |
|
"logps/chosen": -348.0553283691406, |
|
"logps/rejected": -456.8697814941406, |
|
"loss": 0.2112, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -12.977668762207031, |
|
"rewards/margins": 8.577461242675781, |
|
"rewards/rejected": -21.555130004882812, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.3297538701770501e-06, |
|
"logits/chosen": -0.747500479221344, |
|
"logits/rejected": -0.7013819813728333, |
|
"logps/chosen": -332.62005615234375, |
|
"logps/rejected": -440.78790283203125, |
|
"loss": 0.0986, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.874519348144531, |
|
"rewards/margins": 8.623730659484863, |
|
"rewards/rejected": -20.498249053955078, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.2702413436615462e-06, |
|
"logits/chosen": -0.7028972506523132, |
|
"logits/rejected": -0.6385982036590576, |
|
"logps/chosen": -361.61181640625, |
|
"logps/rejected": -468.5482482910156, |
|
"loss": 0.1186, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -12.61416244506836, |
|
"rewards/margins": 8.866883277893066, |
|
"rewards/rejected": -21.48104476928711, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.2120564942350283e-06, |
|
"logits/chosen": -0.7638375759124756, |
|
"logits/rejected": -0.6577700972557068, |
|
"logps/chosen": -319.9593811035156, |
|
"logps/rejected": -437.13031005859375, |
|
"loss": 0.1401, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.249938011169434, |
|
"rewards/margins": 8.148392677307129, |
|
"rewards/rejected": -20.398330688476562, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.1552025773327007e-06, |
|
"logits/chosen": -0.7245215773582458, |
|
"logits/rejected": -0.6284134387969971, |
|
"logps/chosen": -306.7867431640625, |
|
"logps/rejected": -436.78546142578125, |
|
"loss": 0.1278, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -12.182191848754883, |
|
"rewards/margins": 9.15956974029541, |
|
"rewards/rejected": -21.34176254272461, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_logits/chosen": -0.7998992800712585, |
|
"eval_logits/rejected": -0.7217458486557007, |
|
"eval_logps/chosen": -342.2319641113281, |
|
"eval_logps/rejected": -457.3778991699219, |
|
"eval_loss": 0.36866995692253113, |
|
"eval_rewards/accuracies": 0.8440315127372742, |
|
"eval_rewards/chosen": -12.830760955810547, |
|
"eval_rewards/margins": 7.879815101623535, |
|
"eval_rewards/rejected": -20.7105770111084, |
|
"eval_runtime": 624.3362, |
|
"eval_samples_per_second": 0.945, |
|
"eval_steps_per_second": 0.119, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.0996827739242378e-06, |
|
"logits/chosen": -0.7412150502204895, |
|
"logits/rejected": -0.6861370801925659, |
|
"logps/chosen": -306.575927734375, |
|
"logps/rejected": -408.7833557128906, |
|
"loss": 0.1435, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -12.888595581054688, |
|
"rewards/margins": 7.791186332702637, |
|
"rewards/rejected": -20.679784774780273, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.0455001903358408e-06, |
|
"logits/chosen": -0.709639847278595, |
|
"logits/rejected": -0.6203404068946838, |
|
"logps/chosen": -321.64569091796875, |
|
"logps/rejected": -453.91485595703125, |
|
"loss": 0.1028, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.887872695922852, |
|
"rewards/margins": 8.95976448059082, |
|
"rewards/rejected": -20.847637176513672, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.926578580764234e-07, |
|
"logits/chosen": -0.7054871320724487, |
|
"logits/rejected": -0.6629317402839661, |
|
"logps/chosen": -357.4303894042969, |
|
"logps/rejected": -468.74737548828125, |
|
"loss": 0.2004, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -13.1585111618042, |
|
"rewards/margins": 8.137038230895996, |
|
"rewards/rejected": -21.295551300048828, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.411587336679989e-07, |
|
"logits/chosen": -0.7350466251373291, |
|
"logits/rejected": -0.6697758436203003, |
|
"logps/chosen": -312.005615234375, |
|
"logps/rejected": -406.21966552734375, |
|
"loss": 0.1247, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -12.038698196411133, |
|
"rewards/margins": 7.214744567871094, |
|
"rewards/rejected": -19.25344467163086, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.910056984802684e-07, |
|
"logits/chosen": -0.7347787022590637, |
|
"logits/rejected": -0.6246198415756226, |
|
"logps/chosen": -298.8125305175781, |
|
"logps/rejected": -445.503662109375, |
|
"loss": 0.0763, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.131376266479492, |
|
"rewards/margins": 9.458544731140137, |
|
"rewards/rejected": -21.589921951293945, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.422015585694027e-07, |
|
"logits/chosen": -0.7699988484382629, |
|
"logits/rejected": -0.6832550168037415, |
|
"logps/chosen": -343.7002868652344, |
|
"logps/rejected": -494.98895263671875, |
|
"loss": 0.0463, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -12.285306930541992, |
|
"rewards/margins": 9.926998138427734, |
|
"rewards/rejected": -22.212303161621094, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.947490445210481e-07, |
|
"logits/chosen": -0.7716695666313171, |
|
"logits/rejected": -0.6654036641120911, |
|
"logps/chosen": -318.41204833984375, |
|
"logps/rejected": -472.1630859375, |
|
"loss": 0.0569, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -10.414375305175781, |
|
"rewards/margins": 9.393988609313965, |
|
"rewards/rejected": -19.808364868164062, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.486508112975548e-07, |
|
"logits/chosen": -0.750883162021637, |
|
"logits/rejected": -0.6417092084884644, |
|
"logps/chosen": -358.035888671875, |
|
"logps/rejected": -483.28375244140625, |
|
"loss": 0.1304, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -12.721799850463867, |
|
"rewards/margins": 9.087457656860352, |
|
"rewards/rejected": -21.809255599975586, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.039094380894201e-07, |
|
"logits/chosen": -0.7995635271072388, |
|
"logits/rejected": -0.7029562592506409, |
|
"logps/chosen": -294.6755065917969, |
|
"logps/rejected": -413.42633056640625, |
|
"loss": 0.1785, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -11.831644058227539, |
|
"rewards/margins": 7.644218444824219, |
|
"rewards/rejected": -19.47586441040039, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.605274281709928e-07, |
|
"logits/chosen": -0.7642998099327087, |
|
"logits/rejected": -0.659366250038147, |
|
"logps/chosen": -306.3656005859375, |
|
"logps/rejected": -450.6410217285156, |
|
"loss": 0.1146, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -10.251351356506348, |
|
"rewards/margins": 10.174342155456543, |
|
"rewards/rejected": -20.425691604614258, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.18507208760416e-07, |
|
"logits/chosen": -0.7471681833267212, |
|
"logits/rejected": -0.6478559374809265, |
|
"logps/chosen": -321.161865234375, |
|
"logps/rejected": -449.05914306640625, |
|
"loss": 0.0901, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.283639907836914, |
|
"rewards/margins": 8.744890213012695, |
|
"rewards/rejected": -21.028532028198242, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.778511308838108e-07, |
|
"logits/chosen": -0.7544344663619995, |
|
"logits/rejected": -0.6768537759780884, |
|
"logps/chosen": -310.51556396484375, |
|
"logps/rejected": -426.0118103027344, |
|
"loss": 0.1287, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.344531059265137, |
|
"rewards/margins": 6.812464237213135, |
|
"rewards/rejected": -19.156993865966797, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.385614692437535e-07, |
|
"logits/chosen": -0.6924375295639038, |
|
"logits/rejected": -0.6150094270706177, |
|
"logps/chosen": -387.09197998046875, |
|
"logps/rejected": -509.143798828125, |
|
"loss": 0.1562, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -13.612237930297852, |
|
"rewards/margins": 9.396891593933105, |
|
"rewards/rejected": -23.00912857055664, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.006404220919996e-07, |
|
"logits/chosen": -0.754596471786499, |
|
"logits/rejected": -0.6776602864265442, |
|
"logps/chosen": -318.8907165527344, |
|
"logps/rejected": -433.22845458984375, |
|
"loss": 0.1572, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.657855033874512, |
|
"rewards/margins": 8.882246971130371, |
|
"rewards/rejected": -21.54010009765625, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.6409011110648824e-07, |
|
"logits/chosen": -0.7340957522392273, |
|
"logits/rejected": -0.6476359963417053, |
|
"logps/chosen": -379.16302490234375, |
|
"logps/rejected": -497.436767578125, |
|
"loss": 0.1936, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -12.644585609436035, |
|
"rewards/margins": 8.512834548950195, |
|
"rewards/rejected": -21.157419204711914, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.2891258127264745e-07, |
|
"logits/chosen": -0.7442411184310913, |
|
"logits/rejected": -0.705703616142273, |
|
"logps/chosen": -350.7422790527344, |
|
"logps/rejected": -448.9573669433594, |
|
"loss": 0.116, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -13.5606050491333, |
|
"rewards/margins": 8.004423141479492, |
|
"rewards/rejected": -21.56502914428711, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.95109800768953e-07, |
|
"logits/chosen": -0.7254571914672852, |
|
"logits/rejected": -0.6887027621269226, |
|
"logps/chosen": -375.9730224609375, |
|
"logps/rejected": -458.26971435546875, |
|
"loss": 0.2051, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -13.190668106079102, |
|
"rewards/margins": 7.557926177978516, |
|
"rewards/rejected": -20.748594284057617, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.626836608568329e-07, |
|
"logits/chosen": -0.7006704211235046, |
|
"logits/rejected": -0.6205529570579529, |
|
"logps/chosen": -319.3720703125, |
|
"logps/rejected": -416.69512939453125, |
|
"loss": 0.1203, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.687978744506836, |
|
"rewards/margins": 7.960215091705322, |
|
"rewards/rejected": -19.648197174072266, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.3163597577484386e-07, |
|
"logits/chosen": -0.7121957540512085, |
|
"logits/rejected": -0.6481103897094727, |
|
"logps/chosen": -326.4841003417969, |
|
"logps/rejected": -440.3439025878906, |
|
"loss": 0.0783, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.64727783203125, |
|
"rewards/margins": 9.399308204650879, |
|
"rewards/rejected": -21.046586990356445, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.0196848263716327e-07, |
|
"logits/chosen": -0.7509000897407532, |
|
"logits/rejected": -0.6495534181594849, |
|
"logps/chosen": -319.08905029296875, |
|
"logps/rejected": -482.9366760253906, |
|
"loss": 0.0854, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -11.420373916625977, |
|
"rewards/margins": 11.027167320251465, |
|
"rewards/rejected": -22.44754409790039, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_logits/chosen": -0.8029840588569641, |
|
"eval_logits/rejected": -0.725413978099823, |
|
"eval_logps/chosen": -341.44500732421875, |
|
"eval_logps/rejected": -456.2197570800781, |
|
"eval_loss": 0.3673795163631439, |
|
"eval_rewards/accuracies": 0.8423423171043396, |
|
"eval_rewards/chosen": -12.752063751220703, |
|
"eval_rewards/margins": 7.842697620391846, |
|
"eval_rewards/rejected": -20.59476089477539, |
|
"eval_runtime": 623.8707, |
|
"eval_samples_per_second": 0.946, |
|
"eval_steps_per_second": 0.119, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.7368284133639234e-07, |
|
"logits/chosen": -0.7552281618118286, |
|
"logits/rejected": -0.6545037627220154, |
|
"logps/chosen": -314.4807434082031, |
|
"logps/rejected": -443.82147216796875, |
|
"loss": 0.1253, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.672876358032227, |
|
"rewards/margins": 7.915417671203613, |
|
"rewards/rejected": -20.58829116821289, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.4678063445069677e-07, |
|
"logits/chosen": -0.7692059278488159, |
|
"logits/rejected": -0.7040669322013855, |
|
"logps/chosen": -350.35260009765625, |
|
"logps/rejected": -466.9951171875, |
|
"loss": 0.1242, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -14.1748685836792, |
|
"rewards/margins": 8.309191703796387, |
|
"rewards/rejected": -22.48406219482422, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.2126336715526108e-07, |
|
"logits/chosen": -0.8056436777114868, |
|
"logits/rejected": -0.7118881344795227, |
|
"logps/chosen": -284.54278564453125, |
|
"logps/rejected": -399.5000915527344, |
|
"loss": 0.1557, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -11.914950370788574, |
|
"rewards/margins": 7.418919563293457, |
|
"rewards/rejected": -19.33386993408203, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.9713246713805588e-07, |
|
"logits/chosen": -0.8005222082138062, |
|
"logits/rejected": -0.6936267018318176, |
|
"logps/chosen": -307.55511474609375, |
|
"logps/rejected": -454.9164123535156, |
|
"loss": 0.1227, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -11.771246910095215, |
|
"rewards/margins": 9.68768310546875, |
|
"rewards/rejected": -21.45892906188965, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.7438928451998237e-07, |
|
"logits/chosen": -0.7469397187232971, |
|
"logits/rejected": -0.6772466897964478, |
|
"logps/chosen": -342.0335998535156, |
|
"logps/rejected": -445.16015625, |
|
"loss": 0.1061, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -11.809127807617188, |
|
"rewards/margins": 8.282861709594727, |
|
"rewards/rejected": -20.091991424560547, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.5303509177932162e-07, |
|
"logits/chosen": -0.7542864680290222, |
|
"logits/rejected": -0.6823698282241821, |
|
"logps/chosen": -313.5861511230469, |
|
"logps/rejected": -404.9496765136719, |
|
"loss": 0.1119, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -12.185118675231934, |
|
"rewards/margins": 7.000368595123291, |
|
"rewards/rejected": -19.185487747192383, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.3307108368054155e-07, |
|
"logits/chosen": -0.7539202570915222, |
|
"logits/rejected": -0.645233154296875, |
|
"logps/chosen": -308.0301208496094, |
|
"logps/rejected": -441.741943359375, |
|
"loss": 0.1458, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.790322303771973, |
|
"rewards/margins": 8.550907135009766, |
|
"rewards/rejected": -20.341230392456055, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.1449837720745037e-07, |
|
"logits/chosen": -0.718715488910675, |
|
"logits/rejected": -0.6458573341369629, |
|
"logps/chosen": -330.89447021484375, |
|
"logps/rejected": -441.72528076171875, |
|
"loss": 0.1692, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.986010551452637, |
|
"rewards/margins": 8.552804946899414, |
|
"rewards/rejected": -21.538816452026367, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 9.731801150069387e-08, |
|
"logits/chosen": -0.7210200428962708, |
|
"logits/rejected": -0.6217471957206726, |
|
"logps/chosen": -347.4426574707031, |
|
"logps/rejected": -486.0657653808594, |
|
"loss": 0.0726, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -12.151016235351562, |
|
"rewards/margins": 9.202622413635254, |
|
"rewards/rejected": -21.3536376953125, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.15309477996351e-08, |
|
"logits/chosen": -0.7119767069816589, |
|
"logits/rejected": -0.6514331698417664, |
|
"logps/chosen": -362.55950927734375, |
|
"logps/rejected": -488.556396484375, |
|
"loss": 0.1991, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -13.719714164733887, |
|
"rewards/margins": 9.306211471557617, |
|
"rewards/rejected": -23.025928497314453, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.71380693885476e-08, |
|
"logits/chosen": -0.7274218797683716, |
|
"logits/rejected": -0.6286214590072632, |
|
"logps/chosen": -301.7664794921875, |
|
"logps/rejected": -432.122802734375, |
|
"loss": 0.1054, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.399335861206055, |
|
"rewards/margins": 8.370085716247559, |
|
"rewards/rejected": -19.769420623779297, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.414018154721867e-08, |
|
"logits/chosen": -0.7270965576171875, |
|
"logits/rejected": -0.6529392004013062, |
|
"logps/chosen": -382.65313720703125, |
|
"logps/rejected": -480.0885314941406, |
|
"loss": 0.1839, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -13.652270317077637, |
|
"rewards/margins": 7.76800537109375, |
|
"rewards/rejected": -21.420276641845703, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.253801150587711e-08, |
|
"logits/chosen": -0.7643383145332336, |
|
"logits/rejected": -0.667759895324707, |
|
"logps/chosen": -337.26776123046875, |
|
"logps/rejected": -496.50762939453125, |
|
"loss": 0.0747, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -10.862228393554688, |
|
"rewards/margins": 10.453422546386719, |
|
"rewards/rejected": -21.31565284729004, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.233220840451745e-08, |
|
"logits/chosen": -0.7620392441749573, |
|
"logits/rejected": -0.6975805759429932, |
|
"logps/chosen": -349.519287109375, |
|
"logps/rejected": -454.963623046875, |
|
"loss": 0.2762, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -14.199236869812012, |
|
"rewards/margins": 7.715222358703613, |
|
"rewards/rejected": -21.914459228515625, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.352334325657346e-08, |
|
"logits/chosen": -0.7281954884529114, |
|
"logits/rejected": -0.6507598161697388, |
|
"logps/chosen": -318.5377197265625, |
|
"logps/rejected": -432.486328125, |
|
"loss": 0.1795, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -12.938772201538086, |
|
"rewards/margins": 8.980487823486328, |
|
"rewards/rejected": -21.919260025024414, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.6111908916965903e-08, |
|
"logits/chosen": -0.7682954668998718, |
|
"logits/rejected": -0.6592772006988525, |
|
"logps/chosen": -321.8499755859375, |
|
"logps/rejected": -453.6134338378906, |
|
"loss": 0.1563, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -12.17408275604248, |
|
"rewards/margins": 7.791375160217285, |
|
"rewards/rejected": -19.965457916259766, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.009832005454403e-08, |
|
"logits/chosen": -0.7626203298568726, |
|
"logits/rejected": -0.7161679267883301, |
|
"logps/chosen": -344.42767333984375, |
|
"logps/rejected": -449.30120849609375, |
|
"loss": 0.1605, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -12.562573432922363, |
|
"rewards/margins": 7.796416282653809, |
|
"rewards/rejected": -20.35898780822754, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 5.48291312886251e-09, |
|
"logits/chosen": -0.7352942228317261, |
|
"logits/rejected": -0.6795369982719421, |
|
"logps/chosen": -343.33673095703125, |
|
"logps/rejected": -459.80108642578125, |
|
"loss": 0.0967, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -12.095025062561035, |
|
"rewards/margins": 8.376882553100586, |
|
"rewards/rejected": -20.471906661987305, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.2659463713770036e-09, |
|
"logits/chosen": -0.7337998151779175, |
|
"logits/rejected": -0.6700155138969421, |
|
"logps/chosen": -360.0134582519531, |
|
"logps/rejected": -472.6907653808594, |
|
"loss": 0.0913, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -13.066427230834961, |
|
"rewards/margins": 7.702609062194824, |
|
"rewards/rejected": -20.76903533935547, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.4759977098074446e-10, |
|
"logits/chosen": -0.6764062643051147, |
|
"logits/rejected": -0.5925078392028809, |
|
"logps/chosen": -360.7391052246094, |
|
"logps/rejected": -490.27947998046875, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -13.079617500305176, |
|
"rewards/margins": 9.26874828338623, |
|
"rewards/rejected": -22.348365783691406, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -0.8027873635292053, |
|
"eval_logits/rejected": -0.7252189517021179, |
|
"eval_logps/chosen": -341.1743469238281, |
|
"eval_logps/rejected": -455.748779296875, |
|
"eval_loss": 0.36684781312942505, |
|
"eval_rewards/accuracies": 0.8423423171043396, |
|
"eval_rewards/chosen": -12.725000381469727, |
|
"eval_rewards/margins": 7.822659015655518, |
|
"eval_rewards/rejected": -20.547658920288086, |
|
"eval_runtime": 632.3932, |
|
"eval_samples_per_second": 0.933, |
|
"eval_steps_per_second": 0.117, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2100, |
|
"total_flos": 2.1332577741304627e+18, |
|
"train_loss": 0.22352480982031142, |
|
"train_runtime": 78395.9769, |
|
"train_samples_per_second": 0.429, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"max_steps": 2100, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.1332577741304627e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|