|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.6155917425310937, |
|
"eval_steps": 10, |
|
"global_step": 700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004615976407231696, |
|
"grad_norm": 60.83765068742266, |
|
"learning_rate": 1.1494252873563218e-08, |
|
"logits/chosen": 0.4711977541446686, |
|
"logits/rejected": 0.4847034811973572, |
|
"logps/chosen": -41.84939193725586, |
|
"logps/rejected": -44.508792877197266, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.009231952814463392, |
|
"grad_norm": 89.49857504360673, |
|
"learning_rate": 2.2988505747126436e-08, |
|
"logits/chosen": 0.4102262556552887, |
|
"logits/rejected": 0.4489870071411133, |
|
"logps/chosen": -33.33359909057617, |
|
"logps/rejected": -48.11466979980469, |
|
"loss": 0.6965, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.016991177573800087, |
|
"rewards/margins": -0.00249446090310812, |
|
"rewards/rejected": 0.01948563940823078, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01384792922169509, |
|
"grad_norm": 91.503921376188, |
|
"learning_rate": 3.448275862068965e-08, |
|
"logits/chosen": 0.4212642312049866, |
|
"logits/rejected": 0.448761522769928, |
|
"logps/chosen": -39.75364685058594, |
|
"logps/rejected": -51.98044967651367, |
|
"loss": 0.7058, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": 0.01352924108505249, |
|
"rewards/margins": -0.021095700562000275, |
|
"rewards/rejected": 0.034624941647052765, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.018463905628926785, |
|
"grad_norm": 74.67414376851612, |
|
"learning_rate": 4.597701149425287e-08, |
|
"logits/chosen": 0.3533351719379425, |
|
"logits/rejected": 0.38716739416122437, |
|
"logps/chosen": -42.66749954223633, |
|
"logps/rejected": -59.93525695800781, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.5972222089767456, |
|
"rewards/chosen": 0.15689438581466675, |
|
"rewards/margins": 0.02920585870742798, |
|
"rewards/rejected": 0.12768852710723877, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.023079882036158482, |
|
"grad_norm": 68.70984683419653, |
|
"learning_rate": 5.747126436781609e-08, |
|
"logits/chosen": 0.49728691577911377, |
|
"logits/rejected": 0.5158182978630066, |
|
"logps/chosen": -40.442108154296875, |
|
"logps/rejected": -47.894962310791016, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": 0.20257243514060974, |
|
"rewards/margins": 0.033093564212322235, |
|
"rewards/rejected": 0.1694788932800293, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.023079882036158482, |
|
"eval_logits/chosen": 0.3284655511379242, |
|
"eval_logits/rejected": 0.3523290753364563, |
|
"eval_logps/chosen": -41.368160247802734, |
|
"eval_logps/rejected": -47.68316650390625, |
|
"eval_loss": 0.6900005340576172, |
|
"eval_rewards/accuracies": 0.5040322542190552, |
|
"eval_rewards/chosen": 0.18856020271778107, |
|
"eval_rewards/margins": 0.010975954122841358, |
|
"eval_rewards/rejected": 0.1775842159986496, |
|
"eval_runtime": 223.5149, |
|
"eval_samples_per_second": 7.758, |
|
"eval_steps_per_second": 1.942, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02769585844339018, |
|
"grad_norm": 83.08821357925031, |
|
"learning_rate": 6.89655172413793e-08, |
|
"logits/chosen": 0.39168137311935425, |
|
"logits/rejected": 0.428312748670578, |
|
"logps/chosen": -40.189659118652344, |
|
"logps/rejected": -55.229732513427734, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.5138888955116272, |
|
"rewards/chosen": 0.10889428108930588, |
|
"rewards/margins": 0.025906018912792206, |
|
"rewards/rejected": 0.08298826217651367, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.032311834850621876, |
|
"grad_norm": 83.2788469670015, |
|
"learning_rate": 8.045977011494252e-08, |
|
"logits/chosen": 0.4244603216648102, |
|
"logits/rejected": 0.45606857538223267, |
|
"logps/chosen": -45.81875228881836, |
|
"logps/rejected": -59.79555130004883, |
|
"loss": 0.707, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.0007680323324166238, |
|
"rewards/margins": -0.02245757356286049, |
|
"rewards/rejected": 0.023225605487823486, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03692781125785357, |
|
"grad_norm": 65.27464739827121, |
|
"learning_rate": 9.195402298850574e-08, |
|
"logits/chosen": 0.43778783082962036, |
|
"logits/rejected": 0.47771337628364563, |
|
"logps/chosen": -33.643489837646484, |
|
"logps/rejected": -47.315940856933594, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.16155114769935608, |
|
"rewards/margins": 0.009526676498353481, |
|
"rewards/rejected": 0.15202444791793823, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04154378766508527, |
|
"grad_norm": 60.46601141846051, |
|
"learning_rate": 1.0344827586206897e-07, |
|
"logits/chosen": 0.4576772153377533, |
|
"logits/rejected": 0.4669303894042969, |
|
"logps/chosen": -49.01601791381836, |
|
"logps/rejected": -44.165489196777344, |
|
"loss": 0.7024, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": 0.1639019399881363, |
|
"rewards/margins": -0.013081331737339497, |
|
"rewards/rejected": 0.1769832819700241, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.046159764072316964, |
|
"grad_norm": 79.31933330847342, |
|
"learning_rate": 1.1494252873563217e-07, |
|
"logits/chosen": 0.40101033449172974, |
|
"logits/rejected": 0.4429229199886322, |
|
"logps/chosen": -42.295860290527344, |
|
"logps/rejected": -61.62363052368164, |
|
"loss": 0.6993, |
|
"rewards/accuracies": 0.5694444179534912, |
|
"rewards/chosen": 0.1744026243686676, |
|
"rewards/margins": -0.005564332008361816, |
|
"rewards/rejected": 0.17996692657470703, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.046159764072316964, |
|
"eval_logits/chosen": 0.3300890624523163, |
|
"eval_logits/rejected": 0.3539319634437561, |
|
"eval_logps/chosen": -41.36879348754883, |
|
"eval_logps/rejected": -47.67192840576172, |
|
"eval_loss": 0.6927710771560669, |
|
"eval_rewards/accuracies": 0.4694700539112091, |
|
"eval_rewards/chosen": 0.1882432997226715, |
|
"eval_rewards/margins": 0.005040565971285105, |
|
"eval_rewards/rejected": 0.18320275843143463, |
|
"eval_runtime": 220.5959, |
|
"eval_samples_per_second": 7.861, |
|
"eval_steps_per_second": 1.967, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05077574047954866, |
|
"grad_norm": 74.95614553584633, |
|
"learning_rate": 1.2643678160919542e-07, |
|
"logits/chosen": 0.35644879937171936, |
|
"logits/rejected": 0.39824995398521423, |
|
"logps/chosen": -44.09666442871094, |
|
"logps/rejected": -67.98532104492188, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.061192478984594345, |
|
"rewards/margins": 0.024405598640441895, |
|
"rewards/rejected": 0.03678688034415245, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05539171688678036, |
|
"grad_norm": 59.95529358393387, |
|
"learning_rate": 1.379310344827586e-07, |
|
"logits/chosen": 0.4076593816280365, |
|
"logits/rejected": 0.4187220335006714, |
|
"logps/chosen": -50.34169006347656, |
|
"logps/rejected": -52.33488464355469, |
|
"loss": 0.6737, |
|
"rewards/accuracies": 0.5972222089767456, |
|
"rewards/chosen": 0.18035806715488434, |
|
"rewards/margins": 0.046983275562524796, |
|
"rewards/rejected": 0.13337479531764984, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06000769329401205, |
|
"grad_norm": 58.82337491053465, |
|
"learning_rate": 1.4942528735632184e-07, |
|
"logits/chosen": 0.38400429487228394, |
|
"logits/rejected": 0.3896331191062927, |
|
"logps/chosen": -45.30482482910156, |
|
"logps/rejected": -38.63485336303711, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 0.12515152990818024, |
|
"rewards/margins": 0.005594419315457344, |
|
"rewards/rejected": 0.11955711245536804, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06462366970124375, |
|
"grad_norm": 79.835671840217, |
|
"learning_rate": 1.6091954022988505e-07, |
|
"logits/chosen": 0.38133352994918823, |
|
"logits/rejected": 0.4193841814994812, |
|
"logps/chosen": -46.66801452636719, |
|
"logps/rejected": -66.68572998046875, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 0.1615171581506729, |
|
"rewards/margins": 0.03017430752515793, |
|
"rewards/rejected": 0.1313428282737732, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06923964610847544, |
|
"grad_norm": 63.4551490148668, |
|
"learning_rate": 1.7241379310344828e-07, |
|
"logits/chosen": 0.38135284185409546, |
|
"logits/rejected": 0.4095006585121155, |
|
"logps/chosen": -40.06434631347656, |
|
"logps/rejected": -49.53153610229492, |
|
"loss": 0.6732, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.18324324488639832, |
|
"rewards/margins": 0.04405728355050087, |
|
"rewards/rejected": 0.13918595016002655, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06923964610847544, |
|
"eval_logits/chosen": 0.33052363991737366, |
|
"eval_logits/rejected": 0.3544217050075531, |
|
"eval_logps/chosen": -41.5091438293457, |
|
"eval_logps/rejected": -47.86111068725586, |
|
"eval_loss": 0.6812014579772949, |
|
"eval_rewards/accuracies": 0.546658992767334, |
|
"eval_rewards/chosen": 0.11806601285934448, |
|
"eval_rewards/margins": 0.029455602169036865, |
|
"eval_rewards/rejected": 0.08861041069030762, |
|
"eval_runtime": 220.5898, |
|
"eval_samples_per_second": 7.861, |
|
"eval_steps_per_second": 1.967, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07385562251570714, |
|
"grad_norm": 54.00177702879831, |
|
"learning_rate": 1.839080459770115e-07, |
|
"logits/chosen": 0.4309755563735962, |
|
"logits/rejected": 0.45285335183143616, |
|
"logps/chosen": -42.45962905883789, |
|
"logps/rejected": -47.46916198730469, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 0.059671804308891296, |
|
"rewards/margins": 0.03700065612792969, |
|
"rewards/rejected": 0.022671150043606758, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07847159892293884, |
|
"grad_norm": 55.987754524641964, |
|
"learning_rate": 1.9540229885057472e-07, |
|
"logits/chosen": 0.3958838880062103, |
|
"logits/rejected": 0.43136459589004517, |
|
"logps/chosen": -37.61958694458008, |
|
"logps/rejected": -52.296146392822266, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.5138888955116272, |
|
"rewards/chosen": 0.2221948355436325, |
|
"rewards/margins": 0.04236772283911705, |
|
"rewards/rejected": 0.17982712388038635, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08308757533017054, |
|
"grad_norm": 67.31410028619514, |
|
"learning_rate": 2.0689655172413793e-07, |
|
"logits/chosen": 0.44812121987342834, |
|
"logits/rejected": 0.46431127190589905, |
|
"logps/chosen": -42.98078155517578, |
|
"logps/rejected": -41.65153884887695, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.6388888955116272, |
|
"rewards/chosen": 0.30534830689430237, |
|
"rewards/margins": 0.09969804435968399, |
|
"rewards/rejected": 0.20565026998519897, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08770355173740223, |
|
"grad_norm": 57.904024813693326, |
|
"learning_rate": 2.1839080459770114e-07, |
|
"logits/chosen": 0.49128374457359314, |
|
"logits/rejected": 0.5145975351333618, |
|
"logps/chosen": -44.50560760498047, |
|
"logps/rejected": -49.38070297241211, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.5972222089767456, |
|
"rewards/chosen": 0.18226391077041626, |
|
"rewards/margins": 0.0719500482082367, |
|
"rewards/rejected": 0.11031384021043777, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09231952814463393, |
|
"grad_norm": 64.05496800782066, |
|
"learning_rate": 2.2988505747126435e-07, |
|
"logits/chosen": 0.46414005756378174, |
|
"logits/rejected": 0.47909700870513916, |
|
"logps/chosen": -45.80656433105469, |
|
"logps/rejected": -48.13614273071289, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.6388888955116272, |
|
"rewards/chosen": 0.1019454374909401, |
|
"rewards/margins": 0.07323868572711945, |
|
"rewards/rejected": 0.028706755489110947, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09231952814463393, |
|
"eval_logits/chosen": 0.3304091989994049, |
|
"eval_logits/rejected": 0.35432368516921997, |
|
"eval_logps/chosen": -41.51032638549805, |
|
"eval_logps/rejected": -47.951194763183594, |
|
"eval_loss": 0.6623325347900391, |
|
"eval_rewards/accuracies": 0.5748847723007202, |
|
"eval_rewards/chosen": 0.11747448146343231, |
|
"eval_rewards/margins": 0.07390521466732025, |
|
"eval_rewards/rejected": 0.04356926307082176, |
|
"eval_runtime": 220.5888, |
|
"eval_samples_per_second": 7.861, |
|
"eval_steps_per_second": 1.967, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09693550455186563, |
|
"grad_norm": 57.52177717893154, |
|
"learning_rate": 2.413793103448276e-07, |
|
"logits/chosen": 0.40689817070961, |
|
"logits/rejected": 0.427402138710022, |
|
"logps/chosen": -38.75439453125, |
|
"logps/rejected": -44.31669235229492, |
|
"loss": 0.6302, |
|
"rewards/accuracies": 0.7222222089767456, |
|
"rewards/chosen": 0.2862703502178192, |
|
"rewards/margins": 0.14747940003871918, |
|
"rewards/rejected": 0.13879093527793884, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.10155148095909731, |
|
"grad_norm": 64.43087177898828, |
|
"learning_rate": 2.5287356321839084e-07, |
|
"logits/chosen": 0.38502392172813416, |
|
"logits/rejected": 0.42915642261505127, |
|
"logps/chosen": -44.23611831665039, |
|
"logps/rejected": -70.150634765625, |
|
"loss": 0.6523, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.29370981454849243, |
|
"rewards/margins": 0.11090421676635742, |
|
"rewards/rejected": 0.18280558288097382, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.10616745736632902, |
|
"grad_norm": 60.64457511313282, |
|
"learning_rate": 2.64367816091954e-07, |
|
"logits/chosen": 0.4625084698200226, |
|
"logits/rejected": 0.47940170764923096, |
|
"logps/chosen": -47.40989685058594, |
|
"logps/rejected": -50.2266731262207, |
|
"loss": 0.6586, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 0.19395428895950317, |
|
"rewards/margins": 0.09819034487009048, |
|
"rewards/rejected": 0.09576395153999329, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11078343377356072, |
|
"grad_norm": 48.97275141927136, |
|
"learning_rate": 2.758620689655172e-07, |
|
"logits/chosen": 0.40377330780029297, |
|
"logits/rejected": 0.4251302480697632, |
|
"logps/chosen": -40.91835021972656, |
|
"logps/rejected": -46.69221878051758, |
|
"loss": 0.6553, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.19001546502113342, |
|
"rewards/margins": 0.11491294950246811, |
|
"rewards/rejected": 0.07510250806808472, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1153994101807924, |
|
"grad_norm": 50.405334242894924, |
|
"learning_rate": 2.873563218390804e-07, |
|
"logits/chosen": 0.42986366152763367, |
|
"logits/rejected": 0.4425734579563141, |
|
"logps/chosen": -45.240882873535156, |
|
"logps/rejected": -45.33219528198242, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.5972222089767456, |
|
"rewards/chosen": 0.27720221877098083, |
|
"rewards/margins": 0.10149689018726349, |
|
"rewards/rejected": 0.17570529878139496, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1153994101807924, |
|
"eval_logits/chosen": 0.3320508301258087, |
|
"eval_logits/rejected": 0.35591834783554077, |
|
"eval_logps/chosen": -41.36655807495117, |
|
"eval_logps/rejected": -48.028892517089844, |
|
"eval_loss": 0.6255878210067749, |
|
"eval_rewards/accuracies": 0.6278801560401917, |
|
"eval_rewards/chosen": 0.1893603652715683, |
|
"eval_rewards/margins": 0.18464061617851257, |
|
"eval_rewards/rejected": 0.004719759337604046, |
|
"eval_runtime": 220.5277, |
|
"eval_samples_per_second": 7.863, |
|
"eval_steps_per_second": 1.968, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1200153865880241, |
|
"grad_norm": 59.45036930086866, |
|
"learning_rate": 2.988505747126437e-07, |
|
"logits/chosen": 0.4412320852279663, |
|
"logits/rejected": 0.47745391726493835, |
|
"logps/chosen": -38.808204650878906, |
|
"logps/rejected": -57.61214828491211, |
|
"loss": 0.6523, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.24403540790081024, |
|
"rewards/margins": 0.14384596049785614, |
|
"rewards/rejected": 0.10018942505121231, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1246313629952558, |
|
"grad_norm": 54.50350019894718, |
|
"learning_rate": 3.103448275862069e-07, |
|
"logits/chosen": 0.305615097284317, |
|
"logits/rejected": 0.3378358781337738, |
|
"logps/chosen": -41.46311950683594, |
|
"logps/rejected": -55.873138427734375, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.5694444179534912, |
|
"rewards/chosen": 0.2176976054906845, |
|
"rewards/margins": 0.17212893068790436, |
|
"rewards/rejected": 0.045568663626909256, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1292473394024875, |
|
"grad_norm": 49.044811443941626, |
|
"learning_rate": 3.218390804597701e-07, |
|
"logits/chosen": 0.4806426763534546, |
|
"logits/rejected": 0.5007810592651367, |
|
"logps/chosen": -37.00300216674805, |
|
"logps/rejected": -42.795040130615234, |
|
"loss": 0.6005, |
|
"rewards/accuracies": 0.6944444179534912, |
|
"rewards/chosen": 0.41853082180023193, |
|
"rewards/margins": 0.23157899081707, |
|
"rewards/rejected": 0.18695180118083954, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1338633158097192, |
|
"grad_norm": 54.19272761171978, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": 0.4073159098625183, |
|
"logits/rejected": 0.4315372109413147, |
|
"logps/chosen": -39.63461685180664, |
|
"logps/rejected": -41.75359344482422, |
|
"loss": 0.5767, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.488341748714447, |
|
"rewards/margins": 0.2964838743209839, |
|
"rewards/rejected": 0.19185791909694672, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.13847929221695088, |
|
"grad_norm": 45.35161413256781, |
|
"learning_rate": 3.4482758620689656e-07, |
|
"logits/chosen": 0.3869187831878662, |
|
"logits/rejected": 0.4154462218284607, |
|
"logps/chosen": -40.21774673461914, |
|
"logps/rejected": -49.05698013305664, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.6944444179534912, |
|
"rewards/chosen": 0.4058813452720642, |
|
"rewards/margins": 0.2916874289512634, |
|
"rewards/rejected": 0.11419390141963959, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13847929221695088, |
|
"eval_logits/chosen": 0.3351307511329651, |
|
"eval_logits/rejected": 0.35898876190185547, |
|
"eval_logps/chosen": -40.8883171081543, |
|
"eval_logps/rejected": -47.73066711425781, |
|
"eval_loss": 0.5981891751289368, |
|
"eval_rewards/accuracies": 0.6745391488075256, |
|
"eval_rewards/chosen": 0.4284805655479431, |
|
"eval_rewards/margins": 0.2746467888355255, |
|
"eval_rewards/rejected": 0.15383380651474, |
|
"eval_runtime": 220.5776, |
|
"eval_samples_per_second": 7.861, |
|
"eval_steps_per_second": 1.968, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1430952686241826, |
|
"grad_norm": 43.3905484769897, |
|
"learning_rate": 3.5632183908045977e-07, |
|
"logits/chosen": 0.4458725154399872, |
|
"logits/rejected": 0.46262863278388977, |
|
"logps/chosen": -40.7205924987793, |
|
"logps/rejected": -47.21548080444336, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": 0.42287477850914, |
|
"rewards/margins": 0.2391357719898224, |
|
"rewards/rejected": 0.18373897671699524, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.14771124503141428, |
|
"grad_norm": 46.32576522285691, |
|
"learning_rate": 3.67816091954023e-07, |
|
"logits/chosen": 0.42775771021842957, |
|
"logits/rejected": 0.4581214487552643, |
|
"logps/chosen": -42.59015655517578, |
|
"logps/rejected": -51.6392822265625, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.43508684635162354, |
|
"rewards/margins": 0.3611146807670593, |
|
"rewards/rejected": 0.0739721804857254, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.152327221438646, |
|
"grad_norm": 42.51968356117282, |
|
"learning_rate": 3.793103448275862e-07, |
|
"logits/chosen": 0.4170711636543274, |
|
"logits/rejected": 0.45475757122039795, |
|
"logps/chosen": -38.8193359375, |
|
"logps/rejected": -59.24808120727539, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.7222222089767456, |
|
"rewards/chosen": 0.5248206257820129, |
|
"rewards/margins": 0.48520517349243164, |
|
"rewards/rejected": 0.03961547836661339, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.15694319784587768, |
|
"grad_norm": 59.1347578756685, |
|
"learning_rate": 3.9080459770114945e-07, |
|
"logits/chosen": 0.3444980978965759, |
|
"logits/rejected": 0.38142290711402893, |
|
"logps/chosen": -37.63268280029297, |
|
"logps/rejected": -56.55868911743164, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 0.6805555820465088, |
|
"rewards/chosen": 0.28356897830963135, |
|
"rewards/margins": 0.37217438220977783, |
|
"rewards/rejected": -0.0886053591966629, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.16155917425310937, |
|
"grad_norm": 43.01512643240851, |
|
"learning_rate": 4.0229885057471266e-07, |
|
"logits/chosen": 0.47459664940834045, |
|
"logits/rejected": 0.5048218369483948, |
|
"logps/chosen": -37.06074905395508, |
|
"logps/rejected": -41.83311462402344, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.7238032221794128, |
|
"rewards/margins": 0.6140663623809814, |
|
"rewards/rejected": 0.10973668098449707, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16155917425310937, |
|
"eval_logits/chosen": 0.3373754024505615, |
|
"eval_logits/rejected": 0.361337274312973, |
|
"eval_logps/chosen": -40.72093200683594, |
|
"eval_logps/rejected": -47.9627685546875, |
|
"eval_loss": 0.5673334002494812, |
|
"eval_rewards/accuracies": 0.7073732614517212, |
|
"eval_rewards/chosen": 0.512172520160675, |
|
"eval_rewards/margins": 0.47438928484916687, |
|
"eval_rewards/rejected": 0.03778325766324997, |
|
"eval_runtime": 220.4667, |
|
"eval_samples_per_second": 7.865, |
|
"eval_steps_per_second": 1.969, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16617515066034108, |
|
"grad_norm": 58.03313651952633, |
|
"learning_rate": 4.1379310344827586e-07, |
|
"logits/chosen": 0.47328370809555054, |
|
"logits/rejected": 0.516916036605835, |
|
"logps/chosen": -39.381927490234375, |
|
"logps/rejected": -63.04606628417969, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.7222222089767456, |
|
"rewards/chosen": 0.5364831686019897, |
|
"rewards/margins": 0.5998459458351135, |
|
"rewards/rejected": -0.06336280703544617, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.17079112706757277, |
|
"grad_norm": 38.72118579621577, |
|
"learning_rate": 4.25287356321839e-07, |
|
"logits/chosen": 0.4743606746196747, |
|
"logits/rejected": 0.48414406180381775, |
|
"logps/chosen": -47.13395690917969, |
|
"logps/rejected": -47.23988723754883, |
|
"loss": 0.6296, |
|
"rewards/accuracies": 0.6944444179534912, |
|
"rewards/chosen": 0.46243613958358765, |
|
"rewards/margins": 0.4000816345214844, |
|
"rewards/rejected": 0.06235449016094208, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.17540710347480445, |
|
"grad_norm": 52.02457163056824, |
|
"learning_rate": 4.367816091954023e-07, |
|
"logits/chosen": 0.4869605302810669, |
|
"logits/rejected": 0.5183277726173401, |
|
"logps/chosen": -41.5470085144043, |
|
"logps/rejected": -52.64150619506836, |
|
"loss": 0.5554, |
|
"rewards/accuracies": 0.7361111044883728, |
|
"rewards/chosen": 0.6152575612068176, |
|
"rewards/margins": 0.5033391118049622, |
|
"rewards/rejected": 0.11191850155591965, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.18002307988203617, |
|
"grad_norm": 39.942306949985145, |
|
"learning_rate": 4.482758620689655e-07, |
|
"logits/chosen": 0.4678427278995514, |
|
"logits/rejected": 0.4919649660587311, |
|
"logps/chosen": -36.33488845825195, |
|
"logps/rejected": -46.28294372558594, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.6944444179534912, |
|
"rewards/chosen": 0.7630440592765808, |
|
"rewards/margins": 0.5350204110145569, |
|
"rewards/rejected": 0.22802363336086273, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.18463905628926786, |
|
"grad_norm": 49.68265630941736, |
|
"learning_rate": 4.597701149425287e-07, |
|
"logits/chosen": 0.4118601381778717, |
|
"logits/rejected": 0.4340742528438568, |
|
"logps/chosen": -36.466026306152344, |
|
"logps/rejected": -40.359230041503906, |
|
"loss": 0.5161, |
|
"rewards/accuracies": 0.7222222089767456, |
|
"rewards/chosen": 0.7488323450088501, |
|
"rewards/margins": 0.6430253982543945, |
|
"rewards/rejected": 0.1058068722486496, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18463905628926786, |
|
"eval_logits/chosen": 0.3424670994281769, |
|
"eval_logits/rejected": 0.3665529489517212, |
|
"eval_logps/chosen": -40.603668212890625, |
|
"eval_logps/rejected": -48.121337890625, |
|
"eval_loss": 0.5314013957977295, |
|
"eval_rewards/accuracies": 0.7206221222877502, |
|
"eval_rewards/chosen": 0.5708039999008179, |
|
"eval_rewards/margins": 0.6123039126396179, |
|
"eval_rewards/rejected": -0.04149990156292915, |
|
"eval_runtime": 220.269, |
|
"eval_samples_per_second": 7.872, |
|
"eval_steps_per_second": 1.97, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18925503269649954, |
|
"grad_norm": 40.18854063526523, |
|
"learning_rate": 4.712643678160919e-07, |
|
"logits/chosen": 0.4146896302700043, |
|
"logits/rejected": 0.44372716546058655, |
|
"logps/chosen": -44.112205505371094, |
|
"logps/rejected": -54.97979736328125, |
|
"loss": 0.5066, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.5255990624427795, |
|
"rewards/margins": 0.6622204780578613, |
|
"rewards/rejected": -0.1366213709115982, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.19387100910373126, |
|
"grad_norm": 35.8737356471814, |
|
"learning_rate": 4.827586206896552e-07, |
|
"logits/chosen": 0.46901315450668335, |
|
"logits/rejected": 0.5202505588531494, |
|
"logps/chosen": -37.11308288574219, |
|
"logps/rejected": -64.51854705810547, |
|
"loss": 0.4791, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.6412889957427979, |
|
"rewards/margins": 0.8545607924461365, |
|
"rewards/rejected": -0.213271826505661, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.19848698551096294, |
|
"grad_norm": 38.06201763208911, |
|
"learning_rate": 4.942528735632184e-07, |
|
"logits/chosen": 0.4869195520877838, |
|
"logits/rejected": 0.5158190727233887, |
|
"logps/chosen": -41.02754592895508, |
|
"logps/rejected": -52.270511627197266, |
|
"loss": 0.4592, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.6756889820098877, |
|
"rewards/margins": 0.815551221370697, |
|
"rewards/rejected": -0.13986223936080933, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.20310296191819463, |
|
"grad_norm": 36.34414677933188, |
|
"learning_rate": 4.999979670146248e-07, |
|
"logits/chosen": 0.4226466119289398, |
|
"logits/rejected": 0.4440222680568695, |
|
"logps/chosen": -45.02897644042969, |
|
"logps/rejected": -53.814674377441406, |
|
"loss": 0.4665, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5262306928634644, |
|
"rewards/margins": 0.772502601146698, |
|
"rewards/rejected": -0.24627192318439484, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.20771893832542634, |
|
"grad_norm": 47.62232189356859, |
|
"learning_rate": 4.99981703330008e-07, |
|
"logits/chosen": 0.43458905816078186, |
|
"logits/rejected": 0.45631253719329834, |
|
"logps/chosen": -39.44232177734375, |
|
"logps/rejected": -49.5074462890625, |
|
"loss": 0.508, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.6604294180870056, |
|
"rewards/margins": 0.6735664010047913, |
|
"rewards/rejected": -0.013136889785528183, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20771893832542634, |
|
"eval_logits/chosen": 0.3509339988231659, |
|
"eval_logits/rejected": 0.37502503395080566, |
|
"eval_logps/chosen": -40.31688690185547, |
|
"eval_logps/rejected": -48.16210174560547, |
|
"eval_loss": 0.4914422631263733, |
|
"eval_rewards/accuracies": 0.7263824939727783, |
|
"eval_rewards/chosen": 0.7141958475112915, |
|
"eval_rewards/margins": 0.7760785222053528, |
|
"eval_rewards/rejected": -0.06188271939754486, |
|
"eval_runtime": 220.2045, |
|
"eval_samples_per_second": 7.874, |
|
"eval_steps_per_second": 1.971, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21233491473265803, |
|
"grad_norm": 33.53538998473167, |
|
"learning_rate": 4.99949177018813e-07, |
|
"logits/chosen": 0.4293578863143921, |
|
"logits/rejected": 0.46211349964141846, |
|
"logps/chosen": -34.20891571044922, |
|
"logps/rejected": -45.82402420043945, |
|
"loss": 0.4007, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.9528428316116333, |
|
"rewards/margins": 1.0176244974136353, |
|
"rewards/rejected": -0.06478171050548553, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.21695089113988972, |
|
"grad_norm": 47.45137697847349, |
|
"learning_rate": 4.999003901970474e-07, |
|
"logits/chosen": 0.4385245442390442, |
|
"logits/rejected": 0.45108774304389954, |
|
"logps/chosen": -47.24710464477539, |
|
"logps/rejected": -47.30147171020508, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 0.6944444179534912, |
|
"rewards/chosen": 0.644627034664154, |
|
"rewards/margins": 0.6343204975128174, |
|
"rewards/rejected": 0.010306484065949917, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.22156686754712143, |
|
"grad_norm": 33.39629568865361, |
|
"learning_rate": 4.998353460385512e-07, |
|
"logits/chosen": 0.4504711329936981, |
|
"logits/rejected": 0.48663392663002014, |
|
"logps/chosen": -40.03446578979492, |
|
"logps/rejected": -55.506591796875, |
|
"loss": 0.4222, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.6967446804046631, |
|
"rewards/margins": 1.0778069496154785, |
|
"rewards/rejected": -0.381062388420105, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.22618284395435312, |
|
"grad_norm": 34.18594601316725, |
|
"learning_rate": 4.997540487747892e-07, |
|
"logits/chosen": 0.38444679975509644, |
|
"logits/rejected": 0.4130491614341736, |
|
"logps/chosen": -37.72957992553711, |
|
"logps/rejected": -57.71113967895508, |
|
"loss": 0.4716, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.864948570728302, |
|
"rewards/margins": 1.0170652866363525, |
|
"rewards/rejected": -0.152116596698761, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2307988203615848, |
|
"grad_norm": 31.852168197293704, |
|
"learning_rate": 4.996565036945769e-07, |
|
"logits/chosen": 0.4658397436141968, |
|
"logits/rejected": 0.4849558472633362, |
|
"logps/chosen": -44.069618225097656, |
|
"logps/rejected": -46.06491470336914, |
|
"loss": 0.4924, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.5598255395889282, |
|
"rewards/margins": 0.8147852420806885, |
|
"rewards/rejected": -0.25495976209640503, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2307988203615848, |
|
"eval_logits/chosen": 0.3590577244758606, |
|
"eval_logits/rejected": 0.38313183188438416, |
|
"eval_logps/chosen": -40.04033660888672, |
|
"eval_logps/rejected": -48.23354721069336, |
|
"eval_loss": 0.4618569314479828, |
|
"eval_rewards/accuracies": 0.7298387289047241, |
|
"eval_rewards/chosen": 0.852470874786377, |
|
"eval_rewards/margins": 0.9500778913497925, |
|
"eval_rewards/rejected": -0.09760700911283493, |
|
"eval_runtime": 220.4716, |
|
"eval_samples_per_second": 7.865, |
|
"eval_steps_per_second": 1.969, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23541479676881652, |
|
"grad_norm": 32.563251146586694, |
|
"learning_rate": 4.995427171437356e-07, |
|
"logits/chosen": 0.41394177079200745, |
|
"logits/rejected": 0.4560126066207886, |
|
"logps/chosen": -36.68212890625, |
|
"logps/rejected": -56.006553649902344, |
|
"loss": 0.3851, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.7943739891052246, |
|
"rewards/margins": 1.1945956945419312, |
|
"rewards/rejected": -0.40022173523902893, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2400307731760482, |
|
"grad_norm": 35.159104202159625, |
|
"learning_rate": 4.994126965246796e-07, |
|
"logits/chosen": 0.43339937925338745, |
|
"logits/rejected": 0.45789778232574463, |
|
"logps/chosen": -40.00631332397461, |
|
"logps/rejected": -48.161224365234375, |
|
"loss": 0.4153, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.7441484928131104, |
|
"rewards/margins": 1.0307202339172363, |
|
"rewards/rejected": -0.28657177090644836, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.24464674958327992, |
|
"grad_norm": 35.54279884741835, |
|
"learning_rate": 4.992664502959351e-07, |
|
"logits/chosen": 0.42503511905670166, |
|
"logits/rejected": 0.48626741766929626, |
|
"logps/chosen": -36.73310852050781, |
|
"logps/rejected": -73.78736877441406, |
|
"loss": 0.3536, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.8739730715751648, |
|
"rewards/margins": 1.558694839477539, |
|
"rewards/rejected": -0.6847219467163086, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2492627259905116, |
|
"grad_norm": 45.173611856138976, |
|
"learning_rate": 4.991039879715898e-07, |
|
"logits/chosen": 0.4289478361606598, |
|
"logits/rejected": 0.46912992000579834, |
|
"logps/chosen": -40.94606399536133, |
|
"logps/rejected": -58.62925338745117, |
|
"loss": 0.4057, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 1.023528814315796, |
|
"rewards/margins": 1.251956582069397, |
|
"rewards/rejected": -0.22842761874198914, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2538787023977433, |
|
"grad_norm": 25.213187587591246, |
|
"learning_rate": 4.989253201206736e-07, |
|
"logits/chosen": 0.4647282361984253, |
|
"logits/rejected": 0.4716295003890991, |
|
"logps/chosen": -40.334922790527344, |
|
"logps/rejected": -41.65603256225586, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 0.7222222089767456, |
|
"rewards/chosen": 0.9747889637947083, |
|
"rewards/margins": 1.0528353452682495, |
|
"rewards/rejected": -0.07804636657238007, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2538787023977433, |
|
"eval_logits/chosen": 0.36145398020744324, |
|
"eval_logits/rejected": 0.38587653636932373, |
|
"eval_logps/chosen": -39.77558135986328, |
|
"eval_logps/rejected": -48.301231384277344, |
|
"eval_loss": 0.43463748693466187, |
|
"eval_rewards/accuracies": 0.7379032373428345, |
|
"eval_rewards/chosen": 0.9848493337631226, |
|
"eval_rewards/margins": 1.1162999868392944, |
|
"eval_rewards/rejected": -0.1314505934715271, |
|
"eval_runtime": 220.4446, |
|
"eval_samples_per_second": 7.866, |
|
"eval_steps_per_second": 1.969, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.258494678804975, |
|
"grad_norm": 39.895524747857486, |
|
"learning_rate": 4.987304583664712e-07, |
|
"logits/chosen": 0.4972270429134369, |
|
"logits/rejected": 0.5156663060188293, |
|
"logps/chosen": -46.859134674072266, |
|
"logps/rejected": -53.12602996826172, |
|
"loss": 0.4463, |
|
"rewards/accuracies": 0.7361111044883728, |
|
"rewards/chosen": 0.8810398578643799, |
|
"rewards/margins": 0.9829990863800049, |
|
"rewards/rejected": -0.10195919126272202, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.26311065521220667, |
|
"grad_norm": 36.88032065773003, |
|
"learning_rate": 4.985194153857662e-07, |
|
"logits/chosen": 0.4386284351348877, |
|
"logits/rejected": 0.4557953476905823, |
|
"logps/chosen": -36.74658203125, |
|
"logps/rejected": -39.56464767456055, |
|
"loss": 0.4788, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.9255303144454956, |
|
"rewards/margins": 0.9156983494758606, |
|
"rewards/rejected": 0.009831971488893032, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2677266316194384, |
|
"grad_norm": 23.636821560598456, |
|
"learning_rate": 4.982922049080163e-07, |
|
"logits/chosen": 0.40630775690078735, |
|
"logits/rejected": 0.4236665964126587, |
|
"logps/chosen": -35.141971588134766, |
|
"logps/rejected": -42.14583969116211, |
|
"loss": 0.3872, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.8281899690628052, |
|
"rewards/margins": 1.215153455734253, |
|
"rewards/rejected": -0.386963427066803, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.2723426080266701, |
|
"grad_norm": 38.873691089935114, |
|
"learning_rate": 4.980488417144599e-07, |
|
"logits/chosen": 0.37884485721588135, |
|
"logits/rejected": 0.4280329644680023, |
|
"logps/chosen": -41.57583999633789, |
|
"logps/rejected": -71.53160095214844, |
|
"loss": 0.4818, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.7444247603416443, |
|
"rewards/margins": 1.1956461668014526, |
|
"rewards/rejected": -0.4512213468551636, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.27695858443390176, |
|
"grad_norm": 27.126567445081033, |
|
"learning_rate": 4.977893416371544e-07, |
|
"logits/chosen": 0.4753592908382416, |
|
"logits/rejected": 0.4997613728046417, |
|
"logps/chosen": -34.07433319091797, |
|
"logps/rejected": -45.33045959472656, |
|
"loss": 0.3838, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.8865776062011719, |
|
"rewards/margins": 1.4225349426269531, |
|
"rewards/rejected": -0.5359571576118469, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27695858443390176, |
|
"eval_logits/chosen": 0.3657575249671936, |
|
"eval_logits/rejected": 0.39033937454223633, |
|
"eval_logps/chosen": -39.95652770996094, |
|
"eval_logps/rejected": -48.739437103271484, |
|
"eval_loss": 0.410579651594162, |
|
"eval_rewards/accuracies": 0.7540322542190552, |
|
"eval_rewards/chosen": 0.8943750858306885, |
|
"eval_rewards/margins": 1.2449262142181396, |
|
"eval_rewards/rejected": -0.35055097937583923, |
|
"eval_runtime": 220.2442, |
|
"eval_samples_per_second": 7.873, |
|
"eval_steps_per_second": 1.971, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.28157456084113347, |
|
"grad_norm": 26.815456443053705, |
|
"learning_rate": 4.975137215579469e-07, |
|
"logits/chosen": 0.5420396327972412, |
|
"logits/rejected": 0.5500521659851074, |
|
"logps/chosen": -45.788516998291016, |
|
"logps/rejected": -42.21580505371094, |
|
"loss": 0.4117, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.8019249439239502, |
|
"rewards/margins": 1.2268595695495605, |
|
"rewards/rejected": -0.42493465542793274, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.2861905372483652, |
|
"grad_norm": 30.749785890404876, |
|
"learning_rate": 4.972219994073755e-07, |
|
"logits/chosen": 0.49169254302978516, |
|
"logits/rejected": 0.5404393672943115, |
|
"logps/chosen": -38.644107818603516, |
|
"logps/rejected": -67.01266479492188, |
|
"loss": 0.3844, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.8940033316612244, |
|
"rewards/margins": 1.6317830085754395, |
|
"rewards/rejected": -0.7377796173095703, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2908065136555969, |
|
"grad_norm": 29.538977791375373, |
|
"learning_rate": 4.969141941635025e-07, |
|
"logits/chosen": 0.47598233819007874, |
|
"logits/rejected": 0.5060492753982544, |
|
"logps/chosen": -40.60331344604492, |
|
"logps/rejected": -59.37862014770508, |
|
"loss": 0.4476, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5469496250152588, |
|
"rewards/margins": 1.4448275566101074, |
|
"rewards/rejected": -0.8978776931762695, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.29542249006282856, |
|
"grad_norm": 50.663011631161446, |
|
"learning_rate": 4.965903258506806e-07, |
|
"logits/chosen": 0.49228647351264954, |
|
"logits/rejected": 0.5329996943473816, |
|
"logps/chosen": -39.90941619873047, |
|
"logps/rejected": -61.23884963989258, |
|
"loss": 0.347, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.8301137685775757, |
|
"rewards/margins": 1.5421695709228516, |
|
"rewards/rejected": -0.7120558619499207, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.30003846647006027, |
|
"grad_norm": 32.98345370505989, |
|
"learning_rate": 4.962504155382493e-07, |
|
"logits/chosen": 0.4239842891693115, |
|
"logits/rejected": 0.44136151671409607, |
|
"logps/chosen": -36.07121276855469, |
|
"logps/rejected": -41.06203079223633, |
|
"loss": 0.3667, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.7973310351371765, |
|
"rewards/margins": 1.2334084510803223, |
|
"rewards/rejected": -0.4360772669315338, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.30003846647006027, |
|
"eval_logits/chosen": 0.3723231256008148, |
|
"eval_logits/rejected": 0.3968786299228668, |
|
"eval_logps/chosen": -39.925048828125, |
|
"eval_logps/rejected": -48.9533576965332, |
|
"eval_loss": 0.39173391461372375, |
|
"eval_rewards/accuracies": 0.7753456234931946, |
|
"eval_rewards/chosen": 0.9101160168647766, |
|
"eval_rewards/margins": 1.3676302433013916, |
|
"eval_rewards/rejected": -0.4575144052505493, |
|
"eval_runtime": 220.267, |
|
"eval_samples_per_second": 7.872, |
|
"eval_steps_per_second": 1.97, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.304654442877292, |
|
"grad_norm": 28.392702162901728, |
|
"learning_rate": 4.958944853391652e-07, |
|
"logits/chosen": 0.520796537399292, |
|
"logits/rejected": 0.5420558452606201, |
|
"logps/chosen": -37.87763595581055, |
|
"logps/rejected": -46.05318069458008, |
|
"loss": 0.3819, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.932469367980957, |
|
"rewards/margins": 1.2907413244247437, |
|
"rewards/rejected": -0.3582719564437866, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.30927041928452365, |
|
"grad_norm": 27.83688192223066, |
|
"learning_rate": 4.955225584085624e-07, |
|
"logits/chosen": 0.42395105957984924, |
|
"logits/rejected": 0.44882073998451233, |
|
"logps/chosen": -36.98991775512695, |
|
"logps/rejected": -51.79054260253906, |
|
"loss": 0.3951, |
|
"rewards/accuracies": 0.7638888955116272, |
|
"rewards/chosen": 0.9578195214271545, |
|
"rewards/margins": 1.4403272867202759, |
|
"rewards/rejected": -0.48250770568847656, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.31388639569175536, |
|
"grad_norm": 27.432482792006017, |
|
"learning_rate": 4.951346589422467e-07, |
|
"logits/chosen": 0.483965128660202, |
|
"logits/rejected": 0.5153691172599792, |
|
"logps/chosen": -37.48245620727539, |
|
"logps/rejected": -54.50342559814453, |
|
"loss": 0.3942, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 1.0384331941604614, |
|
"rewards/margins": 1.5820738077163696, |
|
"rewards/rejected": -0.5436408519744873, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.3185023720989871, |
|
"grad_norm": 46.62557646275611, |
|
"learning_rate": 4.94730812175122e-07, |
|
"logits/chosen": 0.43841731548309326, |
|
"logits/rejected": 0.4499746561050415, |
|
"logps/chosen": -38.93119812011719, |
|
"logps/rejected": -42.26424026489258, |
|
"loss": 0.4384, |
|
"rewards/accuracies": 0.6527777910232544, |
|
"rewards/chosen": 0.8961164951324463, |
|
"rewards/margins": 1.247178554534912, |
|
"rewards/rejected": -0.3510621190071106, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.32311834850621873, |
|
"grad_norm": 34.05743924648359, |
|
"learning_rate": 4.943110443795476e-07, |
|
"logits/chosen": 0.49757227301597595, |
|
"logits/rejected": 0.5091323852539062, |
|
"logps/chosen": -42.93407440185547, |
|
"logps/rejected": -45.01084899902344, |
|
"loss": 0.4061, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.8557752370834351, |
|
"rewards/margins": 1.3424351215362549, |
|
"rewards/rejected": -0.48665979504585266, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.32311834850621873, |
|
"eval_logits/chosen": 0.3763599395751953, |
|
"eval_logits/rejected": 0.4011194705963135, |
|
"eval_logps/chosen": -39.798316955566406, |
|
"eval_logps/rejected": -49.11299133300781, |
|
"eval_loss": 0.3788905441761017, |
|
"eval_rewards/accuracies": 0.764976978302002, |
|
"eval_rewards/chosen": 0.9734821915626526, |
|
"eval_rewards/margins": 1.5108132362365723, |
|
"eval_rewards/rejected": -0.5373309850692749, |
|
"eval_runtime": 220.3233, |
|
"eval_samples_per_second": 7.87, |
|
"eval_steps_per_second": 1.97, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.32773432491345045, |
|
"grad_norm": 36.481001944632766, |
|
"learning_rate": 4.938753828636297e-07, |
|
"logits/chosen": 0.4888935089111328, |
|
"logits/rejected": 0.4963880777359009, |
|
"logps/chosen": -46.02848815917969, |
|
"logps/rejected": -44.94346237182617, |
|
"loss": 0.4382, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.7767104506492615, |
|
"rewards/margins": 1.235382080078125, |
|
"rewards/rejected": -0.45867156982421875, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.33235030132068216, |
|
"grad_norm": 27.008693506029694, |
|
"learning_rate": 4.934238559694447e-07, |
|
"logits/chosen": 0.460690975189209, |
|
"logits/rejected": 0.5057052969932556, |
|
"logps/chosen": -38.473411560058594, |
|
"logps/rejected": -54.91615295410156, |
|
"loss": 0.3338, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.8698298335075378, |
|
"rewards/margins": 1.6055673360824585, |
|
"rewards/rejected": -0.7357374429702759, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3369662777279138, |
|
"grad_norm": 32.261266015848825, |
|
"learning_rate": 4.929564930711957e-07, |
|
"logits/chosen": 0.4295574426651001, |
|
"logits/rejected": 0.4522492587566376, |
|
"logps/chosen": -39.829490661621094, |
|
"logps/rejected": -44.733333587646484, |
|
"loss": 0.3533, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.7346515655517578, |
|
"rewards/margins": 1.3469676971435547, |
|
"rewards/rejected": -0.6123161315917969, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.34158225413514554, |
|
"grad_norm": 28.797840444924386, |
|
"learning_rate": 4.924733245733008e-07, |
|
"logits/chosen": 0.5410254001617432, |
|
"logits/rejected": 0.5485421419143677, |
|
"logps/chosen": -43.81610870361328, |
|
"logps/rejected": -40.52272033691406, |
|
"loss": 0.3651, |
|
"rewards/accuracies": 0.7638888955116272, |
|
"rewards/chosen": 0.9063374996185303, |
|
"rewards/margins": 1.2729685306549072, |
|
"rewards/rejected": -0.366630882024765, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.34619823054237725, |
|
"grad_norm": 30.202896827963542, |
|
"learning_rate": 4.91974381908416e-07, |
|
"logits/chosen": 0.42066994309425354, |
|
"logits/rejected": 0.4589553475379944, |
|
"logps/chosen": -38.81809997558594, |
|
"logps/rejected": -58.59386444091797, |
|
"loss": 0.3446, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.6800815463066101, |
|
"rewards/margins": 1.928250789642334, |
|
"rewards/rejected": -1.2481693029403687, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.34619823054237725, |
|
"eval_logits/chosen": 0.3821311295032501, |
|
"eval_logits/rejected": 0.40684476494789124, |
|
"eval_logps/chosen": -40.001861572265625, |
|
"eval_logps/rejected": -49.4797477722168, |
|
"eval_loss": 0.3633531332015991, |
|
"eval_rewards/accuracies": 0.7724654674530029, |
|
"eval_rewards/chosen": 0.8717083930969238, |
|
"eval_rewards/margins": 1.5924171209335327, |
|
"eval_rewards/rejected": -0.7207087278366089, |
|
"eval_runtime": 220.1362, |
|
"eval_samples_per_second": 7.877, |
|
"eval_steps_per_second": 1.972, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3508142069496089, |
|
"grad_norm": 26.385033894551455, |
|
"learning_rate": 4.914596975353898e-07, |
|
"logits/chosen": 0.4991176426410675, |
|
"logits/rejected": 0.5242553353309631, |
|
"logps/chosen": -38.974281311035156, |
|
"logps/rejected": -48.54939270019531, |
|
"loss": 0.3721, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.771294355392456, |
|
"rewards/margins": 1.5243595838546753, |
|
"rewards/rejected": -0.7530653476715088, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3554301833568406, |
|
"grad_norm": 42.428423927932826, |
|
"learning_rate": 4.909293049371519e-07, |
|
"logits/chosen": 0.5288230180740356, |
|
"logits/rejected": 0.5352779626846313, |
|
"logps/chosen": -45.90478515625, |
|
"logps/rejected": -44.53614044189453, |
|
"loss": 0.3542, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.7464312314987183, |
|
"rewards/margins": 1.5150079727172852, |
|
"rewards/rejected": -0.7685766220092773, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.36004615976407234, |
|
"grad_norm": 36.75812549927479, |
|
"learning_rate": 4.903832386185343e-07, |
|
"logits/chosen": 0.47585126757621765, |
|
"logits/rejected": 0.49040529131889343, |
|
"logps/chosen": -44.172325134277344, |
|
"logps/rejected": -43.98606872558594, |
|
"loss": 0.3956, |
|
"rewards/accuracies": 0.7361111044883728, |
|
"rewards/chosen": 0.5973650813102722, |
|
"rewards/margins": 1.340658187866211, |
|
"rewards/rejected": -0.743293046951294, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.364662136171304, |
|
"grad_norm": 26.152211217958236, |
|
"learning_rate": 4.89821534104028e-07, |
|
"logits/chosen": 0.39484938979148865, |
|
"logits/rejected": 0.42477357387542725, |
|
"logps/chosen": -41.93134307861328, |
|
"logps/rejected": -56.39106750488281, |
|
"loss": 0.3275, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.827555239200592, |
|
"rewards/margins": 1.9599329233169556, |
|
"rewards/rejected": -1.1323776245117188, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3692781125785357, |
|
"grad_norm": 29.041350828980583, |
|
"learning_rate": 4.892442279354698e-07, |
|
"logits/chosen": 0.4744550287723541, |
|
"logits/rejected": 0.5093830227851868, |
|
"logps/chosen": -42.794578552246094, |
|
"logps/rejected": -59.93064498901367, |
|
"loss": 0.3605, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.540644645690918, |
|
"rewards/margins": 1.6665728092193604, |
|
"rewards/rejected": -1.125928282737732, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3692781125785357, |
|
"eval_logits/chosen": 0.38920047879219055, |
|
"eval_logits/rejected": 0.41388043761253357, |
|
"eval_logps/chosen": -40.3745231628418, |
|
"eval_logps/rejected": -49.94725036621094, |
|
"eval_loss": 0.3510279059410095, |
|
"eval_rewards/accuracies": 0.7920507192611694, |
|
"eval_rewards/chosen": 0.6853779554367065, |
|
"eval_rewards/margins": 1.6398398876190186, |
|
"eval_rewards/rejected": -0.9544618129730225, |
|
"eval_runtime": 220.1812, |
|
"eval_samples_per_second": 7.875, |
|
"eval_steps_per_second": 1.971, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3738940889857674, |
|
"grad_norm": 32.36067481486556, |
|
"learning_rate": 4.886513576696673e-07, |
|
"logits/chosen": 0.4680570960044861, |
|
"logits/rejected": 0.5030277371406555, |
|
"logps/chosen": -42.39280700683594, |
|
"logps/rejected": -58.18678283691406, |
|
"loss": 0.392, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.7217347621917725, |
|
"rewards/margins": 1.8615412712097168, |
|
"rewards/rejected": -1.1398065090179443, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3785100653929991, |
|
"grad_norm": 27.802667550507227, |
|
"learning_rate": 4.880429618759543e-07, |
|
"logits/chosen": 0.46893131732940674, |
|
"logits/rejected": 0.4787411093711853, |
|
"logps/chosen": -45.52459716796875, |
|
"logps/rejected": -46.459312438964844, |
|
"loss": 0.3819, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.870037317276001, |
|
"rewards/margins": 1.4820109605789185, |
|
"rewards/rejected": -0.6119736433029175, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.3831260418002308, |
|
"grad_norm": 27.278325528930967, |
|
"learning_rate": 4.874190801336817e-07, |
|
"logits/chosen": 0.46610963344573975, |
|
"logits/rejected": 0.4872422218322754, |
|
"logps/chosen": -44.28363037109375, |
|
"logps/rejected": -51.54701232910156, |
|
"loss": 0.323, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.6502636075019836, |
|
"rewards/margins": 1.7216179370880127, |
|
"rewards/rejected": -1.0713541507720947, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3877420182074625, |
|
"grad_norm": 25.09454062223173, |
|
"learning_rate": 4.867797530296431e-07, |
|
"logits/chosen": 0.4582709074020386, |
|
"logits/rejected": 0.48244646191596985, |
|
"logps/chosen": -45.76988983154297, |
|
"logps/rejected": -55.2458610534668, |
|
"loss": 0.2842, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.6319215297698975, |
|
"rewards/margins": 2.007154941558838, |
|
"rewards/rejected": -1.3752332925796509, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.39235799461469417, |
|
"grad_norm": 25.014228656107395, |
|
"learning_rate": 4.861250221554343e-07, |
|
"logits/chosen": 0.4760267436504364, |
|
"logits/rejected": 0.5161222219467163, |
|
"logps/chosen": -36.09988021850586, |
|
"logps/rejected": -58.49198913574219, |
|
"loss": 0.317, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.6937295794487, |
|
"rewards/margins": 2.0070507526397705, |
|
"rewards/rejected": -1.3133213520050049, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.39235799461469417, |
|
"eval_logits/chosen": 0.39130648970603943, |
|
"eval_logits/rejected": 0.41622862219810486, |
|
"eval_logps/chosen": -40.4833984375, |
|
"eval_logps/rejected": -50.18775177001953, |
|
"eval_loss": 0.343056857585907, |
|
"eval_rewards/accuracies": 0.796658992767334, |
|
"eval_rewards/chosen": 0.6309407949447632, |
|
"eval_rewards/margins": 1.7056493759155273, |
|
"eval_rewards/rejected": -1.0747085809707642, |
|
"eval_runtime": 220.3261, |
|
"eval_samples_per_second": 7.87, |
|
"eval_steps_per_second": 1.97, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3969739710219259, |
|
"grad_norm": 21.660777806253456, |
|
"learning_rate": 4.854549301047476e-07, |
|
"logits/chosen": 0.5408195853233337, |
|
"logits/rejected": 0.5565234422683716, |
|
"logps/chosen": -42.90623474121094, |
|
"logps/rejected": -43.590702056884766, |
|
"loss": 0.373, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.6533936262130737, |
|
"rewards/margins": 1.5095248222351074, |
|
"rewards/rejected": -0.8561312556266785, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4015899474291576, |
|
"grad_norm": 32.27746768838142, |
|
"learning_rate": 4.847695204706005e-07, |
|
"logits/chosen": 0.47649839520454407, |
|
"logits/rejected": 0.49190616607666016, |
|
"logps/chosen": -38.49553680419922, |
|
"logps/rejected": -40.65150451660156, |
|
"loss": 0.3558, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.7918031811714172, |
|
"rewards/margins": 1.4087355136871338, |
|
"rewards/rejected": -0.6169323325157166, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.40620592383638926, |
|
"grad_norm": 31.844706703711985, |
|
"learning_rate": 4.840688378425e-07, |
|
"logits/chosen": 0.5188453793525696, |
|
"logits/rejected": 0.5562708973884583, |
|
"logps/chosen": -46.135372161865234, |
|
"logps/rejected": -56.292930603027344, |
|
"loss": 0.261, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.7925480604171753, |
|
"rewards/margins": 2.1678171157836914, |
|
"rewards/rejected": -1.3752690553665161, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.410821900243621, |
|
"grad_norm": 26.376171346573187, |
|
"learning_rate": 4.833529278035422e-07, |
|
"logits/chosen": 0.357127845287323, |
|
"logits/rejected": 0.4103134572505951, |
|
"logps/chosen": -37.78556442260742, |
|
"logps/rejected": -67.52072143554688, |
|
"loss": 0.2899, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.9015005826950073, |
|
"rewards/margins": 2.719820261001587, |
|
"rewards/rejected": -1.81831955909729, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4154378766508527, |
|
"grad_norm": 26.0393680431703, |
|
"learning_rate": 4.826218369274459e-07, |
|
"logits/chosen": 0.4666251540184021, |
|
"logits/rejected": 0.5160384178161621, |
|
"logps/chosen": -39.356258392333984, |
|
"logps/rejected": -62.83391571044922, |
|
"loss": 0.3066, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.8675535917282104, |
|
"rewards/margins": 2.234145164489746, |
|
"rewards/rejected": -1.3665915727615356, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4154378766508527, |
|
"eval_logits/chosen": 0.3935144245624542, |
|
"eval_logits/rejected": 0.41844189167022705, |
|
"eval_logps/chosen": -39.861793518066406, |
|
"eval_logps/rejected": -49.855037689208984, |
|
"eval_loss": 0.3321295380592346, |
|
"eval_rewards/accuracies": 0.7926267385482788, |
|
"eval_rewards/chosen": 0.941743791103363, |
|
"eval_rewards/margins": 1.8500969409942627, |
|
"eval_rewards/rejected": -0.9083530902862549, |
|
"eval_runtime": 220.3176, |
|
"eval_samples_per_second": 7.87, |
|
"eval_steps_per_second": 1.97, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.42005385305808435, |
|
"grad_norm": 23.061889635448846, |
|
"learning_rate": 4.818756127755237e-07, |
|
"logits/chosen": 0.49034425616264343, |
|
"logits/rejected": 0.5069853663444519, |
|
"logps/chosen": -37.846553802490234, |
|
"logps/rejected": -41.30693817138672, |
|
"loss": 0.2693, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 1.0121811628341675, |
|
"rewards/margins": 1.8432265520095825, |
|
"rewards/rejected": -0.831045389175415, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.42466982946531606, |
|
"grad_norm": 22.17904586209137, |
|
"learning_rate": 4.811143038935873e-07, |
|
"logits/chosen": 0.5580455660820007, |
|
"logits/rejected": 0.5748550295829773, |
|
"logps/chosen": -42.32413101196289, |
|
"logps/rejected": -46.0750732421875, |
|
"loss": 0.3264, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 1.0455000400543213, |
|
"rewards/margins": 1.93173086643219, |
|
"rewards/rejected": -0.8862307667732239, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.4292858058725478, |
|
"grad_norm": 30.29917055573095, |
|
"learning_rate": 4.803379598087899e-07, |
|
"logits/chosen": 0.5174715518951416, |
|
"logits/rejected": 0.5311744213104248, |
|
"logps/chosen": -40.50711441040039, |
|
"logps/rejected": -40.298824310302734, |
|
"loss": 0.316, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.9772326350212097, |
|
"rewards/margins": 1.7723863124847412, |
|
"rewards/rejected": -0.795153796672821, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.43390178227977944, |
|
"grad_norm": 40.38001852713412, |
|
"learning_rate": 4.795466310264034e-07, |
|
"logits/chosen": 0.42736437916755676, |
|
"logits/rejected": 0.463912695646286, |
|
"logps/chosen": -39.35895919799805, |
|
"logps/rejected": -64.93545532226562, |
|
"loss": 0.4185, |
|
"rewards/accuracies": 0.7638888955116272, |
|
"rewards/chosen": 0.5966134667396545, |
|
"rewards/margins": 1.980704665184021, |
|
"rewards/rejected": -1.3840913772583008, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.43851775868701115, |
|
"grad_norm": 17.949323810733784, |
|
"learning_rate": 4.787403690265335e-07, |
|
"logits/chosen": 0.5044853091239929, |
|
"logits/rejected": 0.5284148454666138, |
|
"logps/chosen": -39.47854995727539, |
|
"logps/rejected": -49.92608642578125, |
|
"loss": 0.3266, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 1.0101630687713623, |
|
"rewards/margins": 1.9091652631759644, |
|
"rewards/rejected": -0.8990020751953125, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.43851775868701115, |
|
"eval_logits/chosen": 0.3972060978412628, |
|
"eval_logits/rejected": 0.4221220314502716, |
|
"eval_logps/chosen": -39.831119537353516, |
|
"eval_logps/rejected": -50.09023666381836, |
|
"eval_loss": 0.3243154287338257, |
|
"eval_rewards/accuracies": 0.7914746403694153, |
|
"eval_rewards/chosen": 0.9570826292037964, |
|
"eval_rewards/margins": 1.9830337762832642, |
|
"eval_rewards/rejected": -1.0259510278701782, |
|
"eval_runtime": 220.3237, |
|
"eval_samples_per_second": 7.87, |
|
"eval_steps_per_second": 1.97, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.44313373509424286, |
|
"grad_norm": 36.065620072852695, |
|
"learning_rate": 4.779192262607702e-07, |
|
"logits/chosen": 0.5138534903526306, |
|
"logits/rejected": 0.544155478477478, |
|
"logps/chosen": -43.310760498046875, |
|
"logps/rejected": -59.56623840332031, |
|
"loss": 0.3542, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.9537274837493896, |
|
"rewards/margins": 2.111888885498047, |
|
"rewards/rejected": -1.1581614017486572, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.4477497115014745, |
|
"grad_norm": 24.653058016123207, |
|
"learning_rate": 4.770832561487758e-07, |
|
"logits/chosen": 0.4504295885562897, |
|
"logits/rejected": 0.46597781777381897, |
|
"logps/chosen": -41.51498794555664, |
|
"logps/rejected": -43.07120132446289, |
|
"loss": 0.2587, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.9096213579177856, |
|
"rewards/margins": 2.131375551223755, |
|
"rewards/rejected": -1.2217543125152588, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.45236568790870624, |
|
"grad_norm": 36.95305184003922, |
|
"learning_rate": 4.762325130748097e-07, |
|
"logits/chosen": 0.5585076808929443, |
|
"logits/rejected": 0.5717556476593018, |
|
"logps/chosen": -47.50046920776367, |
|
"logps/rejected": -44.811973571777344, |
|
"loss": 0.3412, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.9956084489822388, |
|
"rewards/margins": 1.8879083395004272, |
|
"rewards/rejected": -0.8922999501228333, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.45698166431593795, |
|
"grad_norm": 16.999205852011567, |
|
"learning_rate": 4.7536705238418995e-07, |
|
"logits/chosen": 0.47373294830322266, |
|
"logits/rejected": 0.49137142300605774, |
|
"logps/chosen": -42.69048309326172, |
|
"logps/rejected": -50.26279067993164, |
|
"loss": 0.275, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.8595349788665771, |
|
"rewards/margins": 2.2136645317077637, |
|
"rewards/rejected": -1.3541297912597656, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.4615976407231696, |
|
"grad_norm": 33.06750404898565, |
|
"learning_rate": 4.7448693037969336e-07, |
|
"logits/chosen": 0.5136507749557495, |
|
"logits/rejected": 0.527184247970581, |
|
"logps/chosen": -41.794132232666016, |
|
"logps/rejected": -48.2490119934082, |
|
"loss": 0.2986, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.8771340847015381, |
|
"rewards/margins": 1.9797013998031616, |
|
"rewards/rejected": -1.102567195892334, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4615976407231696, |
|
"eval_logits/chosen": 0.39842745661735535, |
|
"eval_logits/rejected": 0.42355066537857056, |
|
"eval_logps/chosen": -40.12582778930664, |
|
"eval_logps/rejected": -50.502620697021484, |
|
"eval_loss": 0.3160472810268402, |
|
"eval_rewards/accuracies": 0.7978110313415527, |
|
"eval_rewards/chosen": 0.8097268342971802, |
|
"eval_rewards/margins": 2.041868209838867, |
|
"eval_rewards/rejected": -1.2321414947509766, |
|
"eval_runtime": 220.4769, |
|
"eval_samples_per_second": 7.865, |
|
"eval_steps_per_second": 1.968, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4662136171304013, |
|
"grad_norm": 27.6688088244827, |
|
"learning_rate": 4.735922043178923e-07, |
|
"logits/chosen": 0.5529847741127014, |
|
"logits/rejected": 0.5818406939506531, |
|
"logps/chosen": -42.29270553588867, |
|
"logps/rejected": -57.84202575683594, |
|
"loss": 0.2725, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.8104487061500549, |
|
"rewards/margins": 2.3321969509124756, |
|
"rewards/rejected": -1.521748423576355, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.47082959353763304, |
|
"grad_norm": 23.404484719369563, |
|
"learning_rate": 4.7268293240543017e-07, |
|
"logits/chosen": 0.48225533962249756, |
|
"logits/rejected": 0.5109025239944458, |
|
"logps/chosen": -40.953433990478516, |
|
"logps/rejected": -55.026153564453125, |
|
"loss": 0.3435, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.8147386908531189, |
|
"rewards/margins": 2.057671546936035, |
|
"rewards/rejected": -1.2429331541061401, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.4754455699448647, |
|
"grad_norm": 29.663210206611154, |
|
"learning_rate": 4.717591737952344e-07, |
|
"logits/chosen": 0.48208919167518616, |
|
"logits/rejected": 0.517291247844696, |
|
"logps/chosen": -36.30723190307617, |
|
"logps/rejected": -54.3764533996582, |
|
"loss": 0.3135, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.8137081861495972, |
|
"rewards/margins": 2.101260185241699, |
|
"rewards/rejected": -1.287551999092102, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4800615463520964, |
|
"grad_norm": 29.39474251364716, |
|
"learning_rate": 4.7082098858266837e-07, |
|
"logits/chosen": 0.48040205240249634, |
|
"logits/rejected": 0.5284512042999268, |
|
"logps/chosen": -31.84227180480957, |
|
"logps/rejected": -61.47830581665039, |
|
"loss": 0.3821, |
|
"rewards/accuracies": 0.7638888955116272, |
|
"rewards/chosen": 0.455925315618515, |
|
"rewards/margins": 2.105367422103882, |
|
"rewards/rejected": -1.649442195892334, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4846775227593281, |
|
"grad_norm": 15.879511628269139, |
|
"learning_rate": 4.698684378016222e-07, |
|
"logits/chosen": 0.4825616478919983, |
|
"logits/rejected": 0.5131646394729614, |
|
"logps/chosen": -43.97586441040039, |
|
"logps/rejected": -58.62031936645508, |
|
"loss": 0.271, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.6879336833953857, |
|
"rewards/margins": 2.212796211242676, |
|
"rewards/rejected": -1.5248624086380005, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4846775227593281, |
|
"eval_logits/chosen": 0.40579578280448914, |
|
"eval_logits/rejected": 0.43089571595191956, |
|
"eval_logps/chosen": -40.55123519897461, |
|
"eval_logps/rejected": -51.04283905029297, |
|
"eval_loss": 0.3111670911312103, |
|
"eval_rewards/accuracies": 0.804147481918335, |
|
"eval_rewards/chosen": 0.597020149230957, |
|
"eval_rewards/margins": 2.099271535873413, |
|
"eval_rewards/rejected": -1.502251386642456, |
|
"eval_runtime": 220.3759, |
|
"eval_samples_per_second": 7.868, |
|
"eval_steps_per_second": 1.969, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.48929349916655984, |
|
"grad_norm": 33.220388342252136, |
|
"learning_rate": 4.6890158342054174e-07, |
|
"logits/chosen": 0.46122825145721436, |
|
"logits/rejected": 0.48773014545440674, |
|
"logps/chosen": -38.094722747802734, |
|
"logps/rejected": -50.649871826171875, |
|
"loss": 0.3288, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.5131194591522217, |
|
"rewards/margins": 2.1884312629699707, |
|
"rewards/rejected": -1.6753116846084595, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.4939094755737915, |
|
"grad_norm": 27.37607169791161, |
|
"learning_rate": 4.679204883383973e-07, |
|
"logits/chosen": 0.45677465200424194, |
|
"logits/rejected": 0.5006839632987976, |
|
"logps/chosen": -36.343292236328125, |
|
"logps/rejected": -65.76275634765625, |
|
"loss": 0.301, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.5972538590431213, |
|
"rewards/margins": 2.6644716262817383, |
|
"rewards/rejected": -2.0672178268432617, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.4985254519810232, |
|
"grad_norm": 28.712191033509406, |
|
"learning_rate": 4.669252163805919e-07, |
|
"logits/chosen": 0.48203393816947937, |
|
"logits/rejected": 0.5129568576812744, |
|
"logps/chosen": -40.263328552246094, |
|
"logps/rejected": -53.96393966674805, |
|
"loss": 0.3434, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.3674449920654297, |
|
"rewards/margins": 2.094463348388672, |
|
"rewards/rejected": -1.7270184755325317, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5031414283882549, |
|
"grad_norm": 21.430060439194165, |
|
"learning_rate": 4.65915832294809e-07, |
|
"logits/chosen": 0.5647565722465515, |
|
"logits/rejected": 0.6052375435829163, |
|
"logps/chosen": -37.24385070800781, |
|
"logps/rejected": -58.28202438354492, |
|
"loss": 0.2945, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.5437911748886108, |
|
"rewards/margins": 2.518171787261963, |
|
"rewards/rejected": -1.9743802547454834, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.5077574047954866, |
|
"grad_norm": 24.194015322932014, |
|
"learning_rate": 4.6489240174680026e-07, |
|
"logits/chosen": 0.5365298390388489, |
|
"logits/rejected": 0.5451048612594604, |
|
"logps/chosen": -40.26055145263672, |
|
"logps/rejected": -40.11984634399414, |
|
"loss": 0.4064, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.4349411725997925, |
|
"rewards/margins": 1.4253244400024414, |
|
"rewards/rejected": -0.9903832674026489, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5077574047954866, |
|
"eval_logits/chosen": 0.40611767768859863, |
|
"eval_logits/rejected": 0.43133166432380676, |
|
"eval_logps/chosen": -40.628150939941406, |
|
"eval_logps/rejected": -51.22826385498047, |
|
"eval_loss": 0.30713996291160583, |
|
"eval_rewards/accuracies": 0.8018433451652527, |
|
"eval_rewards/chosen": 0.5585668087005615, |
|
"eval_rewards/margins": 2.153529644012451, |
|
"eval_rewards/rejected": -1.5949628353118896, |
|
"eval_runtime": 220.3416, |
|
"eval_samples_per_second": 7.87, |
|
"eval_steps_per_second": 1.97, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5123733812027182, |
|
"grad_norm": 23.39715012730976, |
|
"learning_rate": 4.638549913161138e-07, |
|
"logits/chosen": 0.5600088834762573, |
|
"logits/rejected": 0.5736495852470398, |
|
"logps/chosen": -46.20627212524414, |
|
"logps/rejected": -47.1099739074707, |
|
"loss": 0.2227, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.7162383794784546, |
|
"rewards/margins": 2.4795196056365967, |
|
"rewards/rejected": -1.763281226158142, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.51698935760995, |
|
"grad_norm": 23.70013936518676, |
|
"learning_rate": 4.6280366849176267e-07, |
|
"logits/chosen": 0.553576648235321, |
|
"logits/rejected": 0.5800661444664001, |
|
"logps/chosen": -41.73429870605469, |
|
"logps/rejected": -47.09934997558594, |
|
"loss": 0.2708, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6174063682556152, |
|
"rewards/margins": 2.10538649559021, |
|
"rewards/rejected": -1.4879801273345947, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5216053340171817, |
|
"grad_norm": 19.39438827436505, |
|
"learning_rate": 4.6173850166783446e-07, |
|
"logits/chosen": 0.5699052810668945, |
|
"logits/rejected": 0.5908712148666382, |
|
"logps/chosen": -40.74462127685547, |
|
"logps/rejected": -53.7403450012207, |
|
"loss": 0.2716, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.5502187609672546, |
|
"rewards/margins": 2.0002176761627197, |
|
"rewards/rejected": -1.4499988555908203, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5262213104244133, |
|
"grad_norm": 24.49934372199594, |
|
"learning_rate": 4.606595601390417e-07, |
|
"logits/chosen": 0.46904435753822327, |
|
"logits/rejected": 0.5106580257415771, |
|
"logps/chosen": -39.85272979736328, |
|
"logps/rejected": -61.70741653442383, |
|
"loss": 0.2336, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3319948613643646, |
|
"rewards/margins": 2.6446897983551025, |
|
"rewards/rejected": -2.312695026397705, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5308372868316451, |
|
"grad_norm": 28.165420212664795, |
|
"learning_rate": 4.595669140962143e-07, |
|
"logits/chosen": 0.4127655625343323, |
|
"logits/rejected": 0.479299396276474, |
|
"logps/chosen": -34.939422607421875, |
|
"logps/rejected": -78.63516235351562, |
|
"loss": 0.3107, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.18619215488433838, |
|
"rewards/margins": 2.8822548389434814, |
|
"rewards/rejected": -2.6960630416870117, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5308372868316451, |
|
"eval_logits/chosen": 0.4082220494747162, |
|
"eval_logits/rejected": 0.4335884749889374, |
|
"eval_logps/chosen": -40.824676513671875, |
|
"eval_logps/rejected": -51.529090881347656, |
|
"eval_loss": 0.30161648988723755, |
|
"eval_rewards/accuracies": 0.8104838728904724, |
|
"eval_rewards/chosen": 0.4603023827075958, |
|
"eval_rewards/margins": 2.205678939819336, |
|
"eval_rewards/rejected": -1.745376706123352, |
|
"eval_runtime": 220.269, |
|
"eval_samples_per_second": 7.872, |
|
"eval_steps_per_second": 1.97, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5354532632388768, |
|
"grad_norm": 16.564014282307436, |
|
"learning_rate": 4.5846063462173284e-07, |
|
"logits/chosen": 0.5141347050666809, |
|
"logits/rejected": 0.5398997664451599, |
|
"logps/chosen": -38.93478012084961, |
|
"logps/rejected": -53.1637077331543, |
|
"loss": 0.2932, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.3137105405330658, |
|
"rewards/margins": 2.214162826538086, |
|
"rewards/rejected": -1.9004522562026978, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5400692396461084, |
|
"grad_norm": 30.180896923031582, |
|
"learning_rate": 4.573407936849044e-07, |
|
"logits/chosen": 0.49748367071151733, |
|
"logits/rejected": 0.502750039100647, |
|
"logps/chosen": -46.67736053466797, |
|
"logps/rejected": -48.594566345214844, |
|
"loss": 0.3143, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.39324572682380676, |
|
"rewards/margins": 1.9298076629638672, |
|
"rewards/rejected": -1.5365619659423828, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5446852160533402, |
|
"grad_norm": 43.03719615392396, |
|
"learning_rate": 4.5620746413728063e-07, |
|
"logits/chosen": 0.5845724940299988, |
|
"logits/rejected": 0.5915371775627136, |
|
"logps/chosen": -52.0160026550293, |
|
"logps/rejected": -49.12672805786133, |
|
"loss": 0.2833, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.14373371005058289, |
|
"rewards/margins": 2.1639184951782227, |
|
"rewards/rejected": -2.0201845169067383, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.5493011924605719, |
|
"grad_norm": 21.1030283537707, |
|
"learning_rate": 4.550607197079185e-07, |
|
"logits/chosen": 0.552834153175354, |
|
"logits/rejected": 0.5818264484405518, |
|
"logps/chosen": -38.04405212402344, |
|
"logps/rejected": -46.87253189086914, |
|
"loss": 0.2897, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.4428212344646454, |
|
"rewards/margins": 1.7602063417434692, |
|
"rewards/rejected": -1.317385196685791, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5539171688678035, |
|
"grad_norm": 14.340136381864786, |
|
"learning_rate": 4.5390063499858353e-07, |
|
"logits/chosen": 0.5454181432723999, |
|
"logits/rejected": 0.5769542455673218, |
|
"logps/chosen": -47.16811752319336, |
|
"logps/rejected": -62.15293884277344, |
|
"loss": 0.2046, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.36544325947761536, |
|
"rewards/margins": 2.6488418579101562, |
|
"rewards/rejected": -2.2833986282348633, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5539171688678035, |
|
"eval_logits/chosen": 0.41252779960632324, |
|
"eval_logits/rejected": 0.4378991425037384, |
|
"eval_logps/chosen": -40.974342346191406, |
|
"eval_logps/rejected": -51.8930778503418, |
|
"eval_loss": 0.2962896525859833, |
|
"eval_rewards/accuracies": 0.8070276379585266, |
|
"eval_rewards/chosen": 0.3854685127735138, |
|
"eval_rewards/margins": 2.312840223312378, |
|
"eval_rewards/rejected": -1.927371859550476, |
|
"eval_runtime": 220.4271, |
|
"eval_samples_per_second": 7.867, |
|
"eval_steps_per_second": 1.969, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5585331452750353, |
|
"grad_norm": 20.121912107871452, |
|
"learning_rate": 4.5272728547889687e-07, |
|
"logits/chosen": 0.5017317533493042, |
|
"logits/rejected": 0.5252359509468079, |
|
"logps/chosen": -43.418678283691406, |
|
"logps/rejected": -51.78999710083008, |
|
"loss": 0.2157, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.29254353046417236, |
|
"rewards/margins": 2.571570873260498, |
|
"rewards/rejected": -2.2790274620056152, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.5631491216822669, |
|
"grad_norm": 36.79556689673262, |
|
"learning_rate": 4.5154074748142535e-07, |
|
"logits/chosen": 0.5326908230781555, |
|
"logits/rejected": 0.5592876672744751, |
|
"logps/chosen": -45.176578521728516, |
|
"logps/rejected": -55.26374053955078, |
|
"loss": 0.2959, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.26444554328918457, |
|
"rewards/margins": 2.2498509883880615, |
|
"rewards/rejected": -1.985405445098877, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.5677650980894986, |
|
"grad_norm": 30.279268688467162, |
|
"learning_rate": 4.503410981967158e-07, |
|
"logits/chosen": 0.508591890335083, |
|
"logits/rejected": 0.5472189784049988, |
|
"logps/chosen": -37.81255340576172, |
|
"logps/rejected": -59.81355285644531, |
|
"loss": 0.3387, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.324074387550354, |
|
"rewards/margins": 2.479010581970215, |
|
"rewards/rejected": -2.1549363136291504, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5723810744967304, |
|
"grad_norm": 32.696656835575155, |
|
"learning_rate": 4.4912841566827333e-07, |
|
"logits/chosen": 0.5358154773712158, |
|
"logits/rejected": 0.572979211807251, |
|
"logps/chosen": -40.84016799926758, |
|
"logps/rejected": -57.57326889038086, |
|
"loss": 0.2559, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.6461736559867859, |
|
"rewards/margins": 2.717188835144043, |
|
"rewards/rejected": -2.0710153579711914, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.576997050903962, |
|
"grad_norm": 26.864795137183627, |
|
"learning_rate": 4.4790277878748415e-07, |
|
"logits/chosen": 0.5129296779632568, |
|
"logits/rejected": 0.543644368648529, |
|
"logps/chosen": -36.90694046020508, |
|
"logps/rejected": -51.41253662109375, |
|
"loss": 0.2466, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": 0.4497109651565552, |
|
"rewards/margins": 2.559537172317505, |
|
"rewards/rejected": -2.1098265647888184, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.576997050903962, |
|
"eval_logits/chosen": 0.4140053689479828, |
|
"eval_logits/rejected": 0.43953680992126465, |
|
"eval_logps/chosen": -40.92128372192383, |
|
"eval_logps/rejected": -52.06728744506836, |
|
"eval_loss": 0.29202744364738464, |
|
"eval_rewards/accuracies": 0.8064516186714172, |
|
"eval_rewards/chosen": 0.41199636459350586, |
|
"eval_rewards/margins": 2.4264743328094482, |
|
"eval_rewards/rejected": -2.0144779682159424, |
|
"eval_runtime": 220.3958, |
|
"eval_samples_per_second": 7.868, |
|
"eval_steps_per_second": 1.969, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5816130273111938, |
|
"grad_norm": 34.34355868179491, |
|
"learning_rate": 4.466642672884835e-07, |
|
"logits/chosen": 0.5273095965385437, |
|
"logits/rejected": 0.5604310631752014, |
|
"logps/chosen": -39.039512634277344, |
|
"logps/rejected": -52.470951080322266, |
|
"loss": 0.2676, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.256040096282959, |
|
"rewards/margins": 2.4306235313415527, |
|
"rewards/rejected": -2.1745834350585938, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5862290037184255, |
|
"grad_norm": 27.545044099293104, |
|
"learning_rate": 4.454129617429682e-07, |
|
"logits/chosen": 0.515310525894165, |
|
"logits/rejected": 0.5264334678649902, |
|
"logps/chosen": -41.25297546386719, |
|
"logps/rejected": -44.831031799316406, |
|
"loss": 0.2921, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.2963744103908539, |
|
"rewards/margins": 2.2201662063598633, |
|
"rewards/rejected": -1.9237921237945557, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5908449801256571, |
|
"grad_norm": 16.22258168997157, |
|
"learning_rate": 4.441489435549551e-07, |
|
"logits/chosen": 0.5497354865074158, |
|
"logits/rejected": 0.5820472240447998, |
|
"logps/chosen": -45.16104507446289, |
|
"logps/rejected": -60.09016799926758, |
|
"loss": 0.2492, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.36222705245018005, |
|
"rewards/margins": 2.6290435791015625, |
|
"rewards/rejected": -2.2668166160583496, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5954609565328889, |
|
"grad_norm": 22.519317936372268, |
|
"learning_rate": 4.4287229495548573e-07, |
|
"logits/chosen": 0.5290111303329468, |
|
"logits/rejected": 0.550987184047699, |
|
"logps/chosen": -45.896942138671875, |
|
"logps/rejected": -57.38431930541992, |
|
"loss": 0.2158, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.3132680654525757, |
|
"rewards/margins": 2.935549020767212, |
|
"rewards/rejected": -2.622281074523926, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.6000769329401205, |
|
"grad_norm": 33.27879387908239, |
|
"learning_rate": 4.415830989972761e-07, |
|
"logits/chosen": 0.613827645778656, |
|
"logits/rejected": 0.6395273208618164, |
|
"logps/chosen": -40.98984146118164, |
|
"logps/rejected": -48.8809700012207, |
|
"loss": 0.3209, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.3634183704853058, |
|
"rewards/margins": 2.285569190979004, |
|
"rewards/rejected": -1.922150731086731, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6000769329401205, |
|
"eval_logits/chosen": 0.41586774587631226, |
|
"eval_logits/rejected": 0.4413994550704956, |
|
"eval_logps/chosen": -41.435340881347656, |
|
"eval_logps/rejected": -52.66230773925781, |
|
"eval_loss": 0.28806936740875244, |
|
"eval_rewards/accuracies": 0.8116359710693359, |
|
"eval_rewards/chosen": 0.15496963262557983, |
|
"eval_rewards/margins": 2.4669582843780518, |
|
"eval_rewards/rejected": -2.3119888305664062, |
|
"eval_runtime": 220.1153, |
|
"eval_samples_per_second": 7.878, |
|
"eval_steps_per_second": 1.972, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6046929093473522, |
|
"grad_norm": 28.090703957454657, |
|
"learning_rate": 4.402814395493142e-07, |
|
"logits/chosen": 0.49612462520599365, |
|
"logits/rejected": 0.4979320168495178, |
|
"logps/chosen": -40.7058219909668, |
|
"logps/rejected": -38.908050537109375, |
|
"loss": 0.3653, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.15811699628829956, |
|
"rewards/margins": 1.8890395164489746, |
|
"rewards/rejected": -1.7309226989746094, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.609308885754584, |
|
"grad_norm": 20.963207734816056, |
|
"learning_rate": 4.3896740129140354e-07, |
|
"logits/chosen": 0.49926820397377014, |
|
"logits/rejected": 0.518930196762085, |
|
"logps/chosen": -41.947425842285156, |
|
"logps/rejected": -42.273597717285156, |
|
"loss": 0.2493, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.2666120231151581, |
|
"rewards/margins": 2.4279704093933105, |
|
"rewards/rejected": -2.161358594894409, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.6139248621618156, |
|
"grad_norm": 24.847993356607933, |
|
"learning_rate": 4.3764106970865456e-07, |
|
"logits/chosen": 0.5007407665252686, |
|
"logits/rejected": 0.5330516695976257, |
|
"logps/chosen": -36.07570266723633, |
|
"logps/rejected": -50.92935562133789, |
|
"loss": 0.3174, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.02925288677215576, |
|
"rewards/margins": 2.231614589691162, |
|
"rewards/rejected": -2.202361583709717, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6185408385690473, |
|
"grad_norm": 26.539349634561272, |
|
"learning_rate": 4.3630253108592305e-07, |
|
"logits/chosen": 0.5235443115234375, |
|
"logits/rejected": 0.5463228821754456, |
|
"logps/chosen": -48.52283477783203, |
|
"logps/rejected": -54.78059387207031, |
|
"loss": 0.2266, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.005189484916627407, |
|
"rewards/margins": 2.9114773273468018, |
|
"rewards/rejected": -2.9062881469726562, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.6231568149762791, |
|
"grad_norm": 35.3397663590889, |
|
"learning_rate": 4.3495187250219723e-07, |
|
"logits/chosen": 0.4959086775779724, |
|
"logits/rejected": 0.5330989360809326, |
|
"logps/chosen": -37.50285339355469, |
|
"logps/rejected": -56.99623489379883, |
|
"loss": 0.2865, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.16485626995563507, |
|
"rewards/margins": 2.9254465103149414, |
|
"rewards/rejected": -3.0903029441833496, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6231568149762791, |
|
"eval_logits/chosen": 0.4182251989841461, |
|
"eval_logits/rejected": 0.44391536712646484, |
|
"eval_logps/chosen": -41.51067352294922, |
|
"eval_logps/rejected": -52.77988052368164, |
|
"eval_loss": 0.2869359254837036, |
|
"eval_rewards/accuracies": 0.8116359710693359, |
|
"eval_rewards/chosen": 0.11730305105447769, |
|
"eval_rewards/margins": 2.488077163696289, |
|
"eval_rewards/rejected": -2.3707735538482666, |
|
"eval_runtime": 220.1579, |
|
"eval_samples_per_second": 7.876, |
|
"eval_steps_per_second": 1.971, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6277727913835107, |
|
"grad_norm": 23.403340630174217, |
|
"learning_rate": 4.3358918182493253e-07, |
|
"logits/chosen": 0.5670427083969116, |
|
"logits/rejected": 0.5846278071403503, |
|
"logps/chosen": -41.197166442871094, |
|
"logps/rejected": -48.75783920288086, |
|
"loss": 0.229, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.05103777348995209, |
|
"rewards/margins": 2.2875313758850098, |
|
"rewards/rejected": -2.338569164276123, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.6323887677907424, |
|
"grad_norm": 31.35543837574939, |
|
"learning_rate": 4.3221454770433554e-07, |
|
"logits/chosen": 0.5044899582862854, |
|
"logits/rejected": 0.5252879858016968, |
|
"logps/chosen": -46.43470764160156, |
|
"logps/rejected": -50.872764587402344, |
|
"loss": 0.2558, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.030280061066150665, |
|
"rewards/margins": 2.529269218444824, |
|
"rewards/rejected": -2.4989893436431885, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.6370047441979741, |
|
"grad_norm": 27.239886684790495, |
|
"learning_rate": 4.308280595675966e-07, |
|
"logits/chosen": 0.5399680733680725, |
|
"logits/rejected": 0.5539530515670776, |
|
"logps/chosen": -45.22441101074219, |
|
"logps/rejected": -51.61985397338867, |
|
"loss": 0.3439, |
|
"rewards/accuracies": 0.7638888955116272, |
|
"rewards/chosen": -0.1256939023733139, |
|
"rewards/margins": 2.2664339542388916, |
|
"rewards/rejected": -2.392127752304077, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6416207206052058, |
|
"grad_norm": 29.254953852014435, |
|
"learning_rate": 4.2942980761307227e-07, |
|
"logits/chosen": 0.5513600707054138, |
|
"logits/rejected": 0.5763798356056213, |
|
"logps/chosen": -42.95576477050781, |
|
"logps/rejected": -53.852542877197266, |
|
"loss": 0.2795, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.3323157727718353, |
|
"rewards/margins": 2.3478498458862305, |
|
"rewards/rejected": -2.680166006088257, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.6462366970124375, |
|
"grad_norm": 16.01715280590405, |
|
"learning_rate": 4.2801988280441765e-07, |
|
"logits/chosen": 0.5487841367721558, |
|
"logits/rejected": 0.5692893862724304, |
|
"logps/chosen": -45.817508697509766, |
|
"logps/rejected": -54.61252975463867, |
|
"loss": 0.2162, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.03073420189321041, |
|
"rewards/margins": 2.809882402420044, |
|
"rewards/rejected": -2.840616226196289, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6462366970124375, |
|
"eval_logits/chosen": 0.41910773515701294, |
|
"eval_logits/rejected": 0.44490164518356323, |
|
"eval_logps/chosen": -41.43645477294922, |
|
"eval_logps/rejected": -52.90102005004883, |
|
"eval_loss": 0.2802717387676239, |
|
"eval_rewards/accuracies": 0.8104838728904724, |
|
"eval_rewards/chosen": 0.15440984070301056, |
|
"eval_rewards/margins": 2.585754156112671, |
|
"eval_rewards/rejected": -2.431344509124756, |
|
"eval_runtime": 220.3099, |
|
"eval_samples_per_second": 7.871, |
|
"eval_steps_per_second": 1.97, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6508526734196692, |
|
"grad_norm": 21.181113416054586, |
|
"learning_rate": 4.2659837686466813e-07, |
|
"logits/chosen": 0.498602032661438, |
|
"logits/rejected": 0.5217832922935486, |
|
"logps/chosen": -40.613285064697266, |
|
"logps/rejected": -50.06806945800781, |
|
"loss": 0.262, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.07628664374351501, |
|
"rewards/margins": 2.542593240737915, |
|
"rewards/rejected": -2.466306447982788, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6554686498269009, |
|
"grad_norm": 27.465624654814576, |
|
"learning_rate": 4.25165382270273e-07, |
|
"logits/chosen": 0.5099713206291199, |
|
"logits/rejected": 0.5337219834327698, |
|
"logps/chosen": -37.57986831665039, |
|
"logps/rejected": -45.39601516723633, |
|
"loss": 0.2483, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.15927743911743164, |
|
"rewards/margins": 2.373776912689209, |
|
"rewards/rejected": -2.2144994735717773, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.6600846262341326, |
|
"grad_norm": 24.232084058794833, |
|
"learning_rate": 4.2372099224507875e-07, |
|
"logits/chosen": 0.47430500388145447, |
|
"logits/rejected": 0.5168524980545044, |
|
"logps/chosen": -34.61323547363281, |
|
"logps/rejected": -60.36859130859375, |
|
"loss": 0.2904, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.013289166614413261, |
|
"rewards/margins": 2.84716534614563, |
|
"rewards/rejected": -2.860454797744751, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6647006026413643, |
|
"grad_norm": 28.26074226923709, |
|
"learning_rate": 4.2226530075426503e-07, |
|
"logits/chosen": 0.5559656620025635, |
|
"logits/rejected": 0.562049150466919, |
|
"logps/chosen": -48.77291488647461, |
|
"logps/rejected": -52.30695343017578, |
|
"loss": 0.2904, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.016986362636089325, |
|
"rewards/margins": 2.4160873889923096, |
|
"rewards/rejected": -2.3991012573242188, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.669316579048596, |
|
"grad_norm": 25.964047989048964, |
|
"learning_rate": 4.2079840249823106e-07, |
|
"logits/chosen": 0.5188059210777283, |
|
"logits/rejected": 0.5476034879684448, |
|
"logps/chosen": -43.39430236816406, |
|
"logps/rejected": -63.02970886230469, |
|
"loss": 0.2964, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": -0.22233732044696808, |
|
"rewards/margins": 2.6584837436676025, |
|
"rewards/rejected": -2.8808212280273438, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.669316579048596, |
|
"eval_logits/chosen": 0.41873642802238464, |
|
"eval_logits/rejected": 0.44454658031463623, |
|
"eval_logps/chosen": -41.64173126220703, |
|
"eval_logps/rejected": -53.234169006347656, |
|
"eval_loss": 0.27578282356262207, |
|
"eval_rewards/accuracies": 0.8127880096435547, |
|
"eval_rewards/chosen": 0.05177304521203041, |
|
"eval_rewards/margins": 2.6496896743774414, |
|
"eval_rewards/rejected": -2.597916603088379, |
|
"eval_runtime": 220.2319, |
|
"eval_samples_per_second": 7.874, |
|
"eval_steps_per_second": 1.971, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6739325554558276, |
|
"grad_norm": 28.11981406671555, |
|
"learning_rate": 4.193203929064353e-07, |
|
"logits/chosen": 0.5352766513824463, |
|
"logits/rejected": 0.5633915066719055, |
|
"logps/chosen": -43.08574676513672, |
|
"logps/rejected": -63.65277099609375, |
|
"loss": 0.292, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": -0.09769348800182343, |
|
"rewards/margins": 2.7585980892181396, |
|
"rewards/rejected": -2.8562917709350586, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.6785485318630594, |
|
"grad_norm": 22.159785280949862, |
|
"learning_rate": 4.1783136813118705e-07, |
|
"logits/chosen": 0.5104035139083862, |
|
"logits/rejected": 0.5326347947120667, |
|
"logps/chosen": -44.235877990722656, |
|
"logps/rejected": -53.24985885620117, |
|
"loss": 0.2764, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.3315318822860718, |
|
"rewards/margins": 2.574824810028076, |
|
"rewards/rejected": -2.9063568115234375, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.6831645082702911, |
|
"grad_norm": 16.58376439365046, |
|
"learning_rate": 4.163314250413913e-07, |
|
"logits/chosen": 0.5757681131362915, |
|
"logits/rejected": 0.6053035855293274, |
|
"logps/chosen": -40.00181579589844, |
|
"logps/rejected": -50.29273986816406, |
|
"loss": 0.193, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.18691450357437134, |
|
"rewards/margins": 2.6521503925323486, |
|
"rewards/rejected": -2.465236186981201, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6877804846775227, |
|
"grad_norm": 32.319500846176076, |
|
"learning_rate": 4.1482066121624716e-07, |
|
"logits/chosen": 0.5265994668006897, |
|
"logits/rejected": 0.5376725792884827, |
|
"logps/chosen": -42.3819580078125, |
|
"logps/rejected": -43.448524475097656, |
|
"loss": 0.3285, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.1531985104084015, |
|
"rewards/margins": 2.268404245376587, |
|
"rewards/rejected": -2.115206003189087, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6923964610847545, |
|
"grad_norm": 23.349636529497012, |
|
"learning_rate": 4.1329917493889933e-07, |
|
"logits/chosen": 0.43518775701522827, |
|
"logits/rejected": 0.46238911151885986, |
|
"logps/chosen": -39.432003021240234, |
|
"logps/rejected": -52.38154983520508, |
|
"loss": 0.2382, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.1465599089860916, |
|
"rewards/margins": 2.628819704055786, |
|
"rewards/rejected": -2.7753796577453613, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6923964610847545, |
|
"eval_logits/chosen": 0.4236195683479309, |
|
"eval_logits/rejected": 0.4493381381034851, |
|
"eval_logps/chosen": -41.62788009643555, |
|
"eval_logps/rejected": -53.235809326171875, |
|
"eval_loss": 0.2743636965751648, |
|
"eval_rewards/accuracies": 0.8122119903564453, |
|
"eval_rewards/chosen": 0.05869903042912483, |
|
"eval_rewards/margins": 2.6574366092681885, |
|
"eval_rewards/rejected": -2.5987374782562256, |
|
"eval_runtime": 220.281, |
|
"eval_samples_per_second": 7.872, |
|
"eval_steps_per_second": 1.97, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6970124374919862, |
|
"grad_norm": 23.497513813632327, |
|
"learning_rate": 4.117670651900446e-07, |
|
"logits/chosen": 0.5692274570465088, |
|
"logits/rejected": 0.5857737064361572, |
|
"logps/chosen": -44.88375473022461, |
|
"logps/rejected": -50.89904022216797, |
|
"loss": 0.3059, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": -0.18370471894741058, |
|
"rewards/margins": 2.1322684288024902, |
|
"rewards/rejected": -2.3159730434417725, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.7016284138992178, |
|
"grad_norm": 31.67224576363876, |
|
"learning_rate": 4.1022443164149237e-07, |
|
"logits/chosen": 0.48219427466392517, |
|
"logits/rejected": 0.5107440948486328, |
|
"logps/chosen": -46.37804412841797, |
|
"logps/rejected": -62.33393859863281, |
|
"loss": 0.2685, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": -0.04567752406001091, |
|
"rewards/margins": 2.84682559967041, |
|
"rewards/rejected": -2.892503261566162, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.7062443903064496, |
|
"grad_norm": 19.857257644454698, |
|
"learning_rate": 4.086713746496808e-07, |
|
"logits/chosen": 0.5637336373329163, |
|
"logits/rejected": 0.588976263999939, |
|
"logps/chosen": -39.28482437133789, |
|
"logps/rejected": -50.71957778930664, |
|
"loss": 0.2575, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.2317693531513214, |
|
"rewards/margins": 2.6872549057006836, |
|
"rewards/rejected": -2.4554860591888428, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.7108603667136812, |
|
"grad_norm": 17.71463775233371, |
|
"learning_rate": 4.0710799524914805e-07, |
|
"logits/chosen": 0.5934479832649231, |
|
"logits/rejected": 0.6081465482711792, |
|
"logps/chosen": -50.33334732055664, |
|
"logps/rejected": -55.25143814086914, |
|
"loss": 0.2103, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.034923017024993896, |
|
"rewards/margins": 2.6961231231689453, |
|
"rewards/rejected": -2.731046199798584, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.7154763431209129, |
|
"grad_norm": 19.132153588643654, |
|
"learning_rate": 4.055343951459592e-07, |
|
"logits/chosen": 0.5560102462768555, |
|
"logits/rejected": 0.5947719812393188, |
|
"logps/chosen": -37.43670654296875, |
|
"logps/rejected": -57.06461715698242, |
|
"loss": 0.226, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": -0.07254935055971146, |
|
"rewards/margins": 2.918682336807251, |
|
"rewards/rejected": -2.991231918334961, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7154763431209129, |
|
"eval_logits/chosen": 0.42303159832954407, |
|
"eval_logits/rejected": 0.44889286160469055, |
|
"eval_logps/chosen": -41.60685348510742, |
|
"eval_logps/rejected": -53.284358978271484, |
|
"eval_loss": 0.27253130078315735, |
|
"eval_rewards/accuracies": 0.8133640289306641, |
|
"eval_rewards/chosen": 0.06921074539422989, |
|
"eval_rewards/margins": 2.692223072052002, |
|
"eval_rewards/rejected": -2.6230127811431885, |
|
"eval_runtime": 220.2961, |
|
"eval_samples_per_second": 7.871, |
|
"eval_steps_per_second": 1.97, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7200923195281447, |
|
"grad_norm": 20.574269162073108, |
|
"learning_rate": 4.0395067671108985e-07, |
|
"logits/chosen": 0.47218936681747437, |
|
"logits/rejected": 0.5014721155166626, |
|
"logps/chosen": -35.916664123535156, |
|
"logps/rejected": -44.856101989746094, |
|
"loss": 0.2579, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.182376891374588, |
|
"rewards/margins": 2.569021701812744, |
|
"rewards/rejected": -2.3866446018218994, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.7247082959353763, |
|
"grad_norm": 30.250167869534483, |
|
"learning_rate": 4.0235694297376637e-07, |
|
"logits/chosen": 0.5631113648414612, |
|
"logits/rejected": 0.5769122242927551, |
|
"logps/chosen": -49.87733459472656, |
|
"logps/rejected": -55.8229866027832, |
|
"loss": 0.2861, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.1988232433795929, |
|
"rewards/margins": 2.635685443878174, |
|
"rewards/rejected": -2.4368624687194824, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.729324272342608, |
|
"grad_norm": 32.09859733085628, |
|
"learning_rate": 4.0075329761476347e-07, |
|
"logits/chosen": 0.5582194924354553, |
|
"logits/rejected": 0.5716796517372131, |
|
"logps/chosen": -44.06077575683594, |
|
"logps/rejected": -48.060577392578125, |
|
"loss": 0.2637, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14417774975299835, |
|
"rewards/margins": 2.182429313659668, |
|
"rewards/rejected": -2.3266072273254395, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.7339402487498398, |
|
"grad_norm": 20.839702603979845, |
|
"learning_rate": 3.991398449596588e-07, |
|
"logits/chosen": 0.5104639530181885, |
|
"logits/rejected": 0.5302228331565857, |
|
"logps/chosen": -46.450565338134766, |
|
"logps/rejected": -56.8250732421875, |
|
"loss": 0.2178, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.05337013676762581, |
|
"rewards/margins": 2.7899389266967773, |
|
"rewards/rejected": -2.7365689277648926, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.7385562251570714, |
|
"grad_norm": 35.607964067039056, |
|
"learning_rate": 3.9751668997204647e-07, |
|
"logits/chosen": 0.573165774345398, |
|
"logits/rejected": 0.592732310295105, |
|
"logps/chosen": -46.10280990600586, |
|
"logps/rejected": -53.3104248046875, |
|
"loss": 0.238, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.08940169960260391, |
|
"rewards/margins": 2.5656909942626953, |
|
"rewards/rejected": -2.655092716217041, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7385562251570714, |
|
"eval_logits/chosen": 0.4224054217338562, |
|
"eval_logits/rejected": 0.4482380449771881, |
|
"eval_logps/chosen": -41.65960693359375, |
|
"eval_logps/rejected": -53.47556686401367, |
|
"eval_loss": 0.2701371908187866, |
|
"eval_rewards/accuracies": 0.8185483813285828, |
|
"eval_rewards/chosen": 0.04283595457673073, |
|
"eval_rewards/margins": 2.761453866958618, |
|
"eval_rewards/rejected": -2.718618154525757, |
|
"eval_runtime": 220.4956, |
|
"eval_samples_per_second": 7.864, |
|
"eval_steps_per_second": 1.968, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7431722015643031, |
|
"grad_norm": 40.34998221595971, |
|
"learning_rate": 3.958839382467084e-07, |
|
"logits/chosen": 0.5077357888221741, |
|
"logits/rejected": 0.5302278995513916, |
|
"logps/chosen": -38.23583984375, |
|
"logps/rejected": -49.62001037597656, |
|
"loss": 0.2911, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.431808739900589, |
|
"rewards/margins": 2.4383790493011475, |
|
"rewards/rejected": -2.0065698623657227, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7477881779715349, |
|
"grad_norm": 37.34949673704143, |
|
"learning_rate": 3.9424169600274494e-07, |
|
"logits/chosen": 0.5166856646537781, |
|
"logits/rejected": 0.5311781167984009, |
|
"logps/chosen": -43.24025344848633, |
|
"logps/rejected": -48.49333190917969, |
|
"loss": 0.3054, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.1698003113269806, |
|
"rewards/margins": 2.2522177696228027, |
|
"rewards/rejected": -2.422018051147461, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.7524041543787665, |
|
"grad_norm": 25.91010722050029, |
|
"learning_rate": 3.9259007007666436e-07, |
|
"logits/chosen": 0.5167285203933716, |
|
"logits/rejected": 0.5338759422302246, |
|
"logps/chosen": -44.82267761230469, |
|
"logps/rejected": -55.40620803833008, |
|
"loss": 0.2723, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.06528851389884949, |
|
"rewards/margins": 2.759828805923462, |
|
"rewards/rejected": -2.694540023803711, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.7570201307859982, |
|
"grad_norm": 30.862683948057615, |
|
"learning_rate": 3.909291679154332e-07, |
|
"logits/chosen": 0.5040656328201294, |
|
"logits/rejected": 0.5386430025100708, |
|
"logps/chosen": -42.25190734863281, |
|
"logps/rejected": -62.51930618286133, |
|
"loss": 0.2759, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.2548324167728424, |
|
"rewards/margins": 3.0783848762512207, |
|
"rewards/rejected": -3.333217144012451, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.7616361071932299, |
|
"grad_norm": 19.125155732205084, |
|
"learning_rate": 3.892590975694858e-07, |
|
"logits/chosen": 0.49563461542129517, |
|
"logits/rejected": 0.539116621017456, |
|
"logps/chosen": -39.31736755371094, |
|
"logps/rejected": -60.45228576660156, |
|
"loss": 0.2182, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.16796639561653137, |
|
"rewards/margins": 3.4695467948913574, |
|
"rewards/rejected": -3.301579713821411, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7616361071932299, |
|
"eval_logits/chosen": 0.4225333333015442, |
|
"eval_logits/rejected": 0.44842836260795593, |
|
"eval_logps/chosen": -41.670494079589844, |
|
"eval_logps/rejected": -53.553314208984375, |
|
"eval_loss": 0.2688952684402466, |
|
"eval_rewards/accuracies": 0.8145161271095276, |
|
"eval_rewards/chosen": 0.037393342703580856, |
|
"eval_rewards/margins": 2.7948849201202393, |
|
"eval_rewards/rejected": -2.7574915885925293, |
|
"eval_runtime": 220.4734, |
|
"eval_samples_per_second": 7.865, |
|
"eval_steps_per_second": 1.968, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7662520836004616, |
|
"grad_norm": 20.197390141727503, |
|
"learning_rate": 3.875799676856952e-07, |
|
"logits/chosen": 0.5481100082397461, |
|
"logits/rejected": 0.5680783987045288, |
|
"logps/chosen": -43.26856994628906, |
|
"logps/rejected": -54.90293884277344, |
|
"loss": 0.2148, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.2920362651348114, |
|
"rewards/margins": 2.9112956523895264, |
|
"rewards/rejected": -3.20333194732666, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.7708680600076933, |
|
"grad_norm": 28.41138671183374, |
|
"learning_rate": 3.858918875003053e-07, |
|
"logits/chosen": 0.5375738143920898, |
|
"logits/rejected": 0.5755133628845215, |
|
"logps/chosen": -41.622859954833984, |
|
"logps/rejected": -61.92311096191406, |
|
"loss": 0.2733, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.09028860926628113, |
|
"rewards/margins": 3.286768674850464, |
|
"rewards/rejected": -3.3770573139190674, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.775484036414925, |
|
"grad_norm": 16.265551276537238, |
|
"learning_rate": 3.8419496683182396e-07, |
|
"logits/chosen": 0.5556432604789734, |
|
"logits/rejected": 0.5942565202713013, |
|
"logps/chosen": -41.74842071533203, |
|
"logps/rejected": -57.50096893310547, |
|
"loss": 0.1896, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.0623447448015213, |
|
"rewards/margins": 2.878957748413086, |
|
"rewards/rejected": -2.941302537918091, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.7801000128221567, |
|
"grad_norm": 26.59915287717055, |
|
"learning_rate": 3.824893160738792e-07, |
|
"logits/chosen": 0.5246456861495972, |
|
"logits/rejected": 0.553848385810852, |
|
"logps/chosen": -42.39156723022461, |
|
"logps/rejected": -57.20592498779297, |
|
"loss": 0.2682, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": -0.07438618689775467, |
|
"rewards/margins": 3.046879291534424, |
|
"rewards/rejected": -3.1212656497955322, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.7847159892293883, |
|
"grad_norm": 23.023616857684974, |
|
"learning_rate": 3.8077504618803737e-07, |
|
"logits/chosen": 0.580450713634491, |
|
"logits/rejected": 0.5835237503051758, |
|
"logps/chosen": -48.9189567565918, |
|
"logps/rejected": -47.836578369140625, |
|
"loss": 0.2668, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.06672815978527069, |
|
"rewards/margins": 2.457933187484741, |
|
"rewards/rejected": -2.5246615409851074, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7847159892293883, |
|
"eval_logits/chosen": 0.4240727126598358, |
|
"eval_logits/rejected": 0.4500102698802948, |
|
"eval_logps/chosen": -41.714290618896484, |
|
"eval_logps/rejected": -53.6696662902832, |
|
"eval_loss": 0.2670309841632843, |
|
"eval_rewards/accuracies": 0.8179723620414734, |
|
"eval_rewards/chosen": 0.015493539161980152, |
|
"eval_rewards/margins": 2.83115816116333, |
|
"eval_rewards/rejected": -2.815664768218994, |
|
"eval_runtime": 220.6721, |
|
"eval_samples_per_second": 7.858, |
|
"eval_steps_per_second": 1.967, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7893319656366201, |
|
"grad_norm": 16.479244956266236, |
|
"learning_rate": 3.7905226869658446e-07, |
|
"logits/chosen": 0.4684799015522003, |
|
"logits/rejected": 0.4874458909034729, |
|
"logps/chosen": -43.62626647949219, |
|
"logps/rejected": -55.70362854003906, |
|
"loss": 0.2494, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.13190358877182007, |
|
"rewards/margins": 2.8091206550598145, |
|
"rewards/rejected": -2.6772167682647705, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.7939479420438518, |
|
"grad_norm": 24.369877883114157, |
|
"learning_rate": 3.773210956752709e-07, |
|
"logits/chosen": 0.544243574142456, |
|
"logits/rejected": 0.5578660368919373, |
|
"logps/chosen": -40.1495246887207, |
|
"logps/rejected": -44.17314910888672, |
|
"loss": 0.2798, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.020625757053494453, |
|
"rewards/margins": 2.502214193344116, |
|
"rewards/rejected": -2.481588363647461, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7985639184510834, |
|
"grad_norm": 25.623903462647995, |
|
"learning_rate": 3.7558163974602093e-07, |
|
"logits/chosen": 0.474899560213089, |
|
"logits/rejected": 0.5161857008934021, |
|
"logps/chosen": -37.74607467651367, |
|
"logps/rejected": -55.48906707763672, |
|
"loss": 0.2419, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.001830246765166521, |
|
"rewards/margins": 2.923034906387329, |
|
"rewards/rejected": -2.924865245819092, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.8031798948583152, |
|
"grad_norm": 25.184522607734593, |
|
"learning_rate": 3.73834014069605e-07, |
|
"logits/chosen": 0.558302104473114, |
|
"logits/rejected": 0.5833041667938232, |
|
"logps/chosen": -48.4046630859375, |
|
"logps/rejected": -61.20756149291992, |
|
"loss": 0.2374, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.1346227377653122, |
|
"rewards/margins": 2.8843278884887695, |
|
"rewards/rejected": -3.0189502239227295, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.8077958712655469, |
|
"grad_norm": 24.77024105098058, |
|
"learning_rate": 3.7207833233827914e-07, |
|
"logits/chosen": 0.4649287462234497, |
|
"logits/rejected": 0.482571542263031, |
|
"logps/chosen": -44.39389419555664, |
|
"logps/rejected": -58.24624252319336, |
|
"loss": 0.2534, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.14530682563781738, |
|
"rewards/margins": 3.2228527069091797, |
|
"rewards/rejected": -3.368159532546997, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8077958712655469, |
|
"eval_logits/chosen": 0.42746230959892273, |
|
"eval_logits/rejected": 0.45336535573005676, |
|
"eval_logps/chosen": -42.037269592285156, |
|
"eval_logps/rejected": -54.03358459472656, |
|
"eval_loss": 0.2634715437889099, |
|
"eval_rewards/accuracies": 0.8168202638626099, |
|
"eval_rewards/chosen": -0.1459963023662567, |
|
"eval_rewards/margins": 2.8516335487365723, |
|
"eval_rewards/rejected": -2.9976296424865723, |
|
"eval_runtime": 220.3701, |
|
"eval_samples_per_second": 7.869, |
|
"eval_steps_per_second": 1.969, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8124118476727785, |
|
"grad_norm": 26.201135314502036, |
|
"learning_rate": 3.7031470876838786e-07, |
|
"logits/chosen": 0.5293068289756775, |
|
"logits/rejected": 0.5655782222747803, |
|
"logps/chosen": -42.89842224121094, |
|
"logps/rejected": -63.14483642578125, |
|
"loss": 0.2516, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.4706004559993744, |
|
"rewards/margins": 2.8817062377929688, |
|
"rewards/rejected": -3.352307081222534, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.8170278240800103, |
|
"grad_norm": 22.294887268242963, |
|
"learning_rate": 3.6854325809293455e-07, |
|
"logits/chosen": 0.49771615862846375, |
|
"logits/rejected": 0.5413529276847839, |
|
"logps/chosen": -36.90867233276367, |
|
"logps/rejected": -64.4770278930664, |
|
"loss": 0.2284, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.27428972721099854, |
|
"rewards/margins": 3.501157522201538, |
|
"rewards/rejected": -3.775447368621826, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.821643800487242, |
|
"grad_norm": 28.188753078893058, |
|
"learning_rate": 3.6676409555411653e-07, |
|
"logits/chosen": 0.5484297871589661, |
|
"logits/rejected": 0.5813949704170227, |
|
"logps/chosen": -45.460365295410156, |
|
"logps/rejected": -60.86439895629883, |
|
"loss": 0.2542, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.4246326684951782, |
|
"rewards/margins": 3.2056918144226074, |
|
"rewards/rejected": -3.630324363708496, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.8262597768944736, |
|
"grad_norm": 17.14121226520804, |
|
"learning_rate": 3.6497733689582866e-07, |
|
"logits/chosen": 0.48876845836639404, |
|
"logits/rejected": 0.5145962238311768, |
|
"logps/chosen": -39.37761688232422, |
|
"logps/rejected": -49.643211364746094, |
|
"loss": 0.2016, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21149006485939026, |
|
"rewards/margins": 2.893353223800659, |
|
"rewards/rejected": -3.1048433780670166, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.8308757533017054, |
|
"grad_norm": 35.17955186267088, |
|
"learning_rate": 3.631830983561335e-07, |
|
"logits/chosen": 0.573662519454956, |
|
"logits/rejected": 0.5948094725608826, |
|
"logps/chosen": -47.85080337524414, |
|
"logps/rejected": -52.225006103515625, |
|
"loss": 0.2586, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.3559052646160126, |
|
"rewards/margins": 2.786222219467163, |
|
"rewards/rejected": -3.142127513885498, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8308757533017054, |
|
"eval_logits/chosen": 0.42756161093711853, |
|
"eval_logits/rejected": 0.45349106192588806, |
|
"eval_logps/chosen": -42.38340759277344, |
|
"eval_logps/rejected": -54.44844436645508, |
|
"eval_loss": 0.2630784213542938, |
|
"eval_rewards/accuracies": 0.8179723620414734, |
|
"eval_rewards/chosen": -0.31906506419181824, |
|
"eval_rewards/margins": 2.8859920501708984, |
|
"eval_rewards/rejected": -3.205056667327881, |
|
"eval_runtime": 220.2057, |
|
"eval_samples_per_second": 7.874, |
|
"eval_steps_per_second": 1.971, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.835491729708937, |
|
"grad_norm": 36.03053976982613, |
|
"learning_rate": 3.613814966596991e-07, |
|
"logits/chosen": 0.5263631343841553, |
|
"logits/rejected": 0.5573300123214722, |
|
"logps/chosen": -43.24696731567383, |
|
"logps/rejected": -57.23331069946289, |
|
"loss": 0.2526, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": -0.4683598279953003, |
|
"rewards/margins": 3.082267999649048, |
|
"rewards/rejected": -3.5506277084350586, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.8401077061161687, |
|
"grad_norm": 15.328563865471402, |
|
"learning_rate": 3.595726490102059e-07, |
|
"logits/chosen": 0.5707637071609497, |
|
"logits/rejected": 0.6143693327903748, |
|
"logps/chosen": -40.44147491455078, |
|
"logps/rejected": -62.61209487915039, |
|
"loss": 0.1294, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": -0.3496915102005005, |
|
"rewards/margins": 3.486618995666504, |
|
"rewards/rejected": -3.836310863494873, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.8447236825234005, |
|
"grad_norm": 15.002635114989888, |
|
"learning_rate": 3.577566730827214e-07, |
|
"logits/chosen": 0.5126733779907227, |
|
"logits/rejected": 0.5439874529838562, |
|
"logps/chosen": -40.29549789428711, |
|
"logps/rejected": -56.204898834228516, |
|
"loss": 0.2951, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": -0.3362084925174713, |
|
"rewards/margins": 2.846021890640259, |
|
"rewards/rejected": -3.182230234146118, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.8493396589306321, |
|
"grad_norm": 25.52691859216037, |
|
"learning_rate": 3.559336870160453e-07, |
|
"logits/chosen": 0.5128374099731445, |
|
"logits/rejected": 0.5424924492835999, |
|
"logps/chosen": -38.71543884277344, |
|
"logps/rejected": -52.61689758300781, |
|
"loss": 0.2084, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.28658950328826904, |
|
"rewards/margins": 3.0817792415618896, |
|
"rewards/rejected": -3.368368625640869, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.8539556353378638, |
|
"grad_norm": 30.283513234320385, |
|
"learning_rate": 3.541038094050241e-07, |
|
"logits/chosen": 0.515430212020874, |
|
"logits/rejected": 0.5466374158859253, |
|
"logps/chosen": -45.59136962890625, |
|
"logps/rejected": -63.18849182128906, |
|
"loss": 0.2378, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.5768634676933289, |
|
"rewards/margins": 3.5630674362182617, |
|
"rewards/rejected": -4.139930725097656, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8539556353378638, |
|
"eval_logits/chosen": 0.4274056553840637, |
|
"eval_logits/rejected": 0.45338377356529236, |
|
"eval_logps/chosen": -43.063682556152344, |
|
"eval_logps/rejected": -55.225093841552734, |
|
"eval_loss": 0.2617854177951813, |
|
"eval_rewards/accuracies": 0.817396342754364, |
|
"eval_rewards/chosen": -0.659203290939331, |
|
"eval_rewards/margins": 2.9341788291931152, |
|
"eval_rewards/rejected": -3.5933821201324463, |
|
"eval_runtime": 220.2088, |
|
"eval_samples_per_second": 7.874, |
|
"eval_steps_per_second": 1.971, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8585716117450956, |
|
"grad_norm": 30.9826241797592, |
|
"learning_rate": 3.52267159292835e-07, |
|
"logits/chosen": 0.4993041455745697, |
|
"logits/rejected": 0.5248599052429199, |
|
"logps/chosen": -44.83211898803711, |
|
"logps/rejected": -61.29323959350586, |
|
"loss": 0.2333, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": -0.7047384977340698, |
|
"rewards/margins": 3.358118772506714, |
|
"rewards/rejected": -4.062856674194336, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.8631875881523272, |
|
"grad_norm": 16.52463887201103, |
|
"learning_rate": 3.5042385616324236e-07, |
|
"logits/chosen": 0.4287330210208893, |
|
"logits/rejected": 0.46707651019096375, |
|
"logps/chosen": -36.363590240478516, |
|
"logps/rejected": -59.82657241821289, |
|
"loss": 0.22, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.8720024228096008, |
|
"rewards/margins": 3.389249086380005, |
|
"rewards/rejected": -4.261251449584961, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8678035645595589, |
|
"grad_norm": 15.500715269356169, |
|
"learning_rate": 3.485740199328244e-07, |
|
"logits/chosen": 0.5408291816711426, |
|
"logits/rejected": 0.5578600764274597, |
|
"logps/chosen": -50.285335540771484, |
|
"logps/rejected": -54.07209014892578, |
|
"loss": 0.1876, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.5448592305183411, |
|
"rewards/margins": 3.2346181869506836, |
|
"rewards/rejected": -3.779477119445801, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.8724195409667906, |
|
"grad_norm": 12.222084345575727, |
|
"learning_rate": 3.4671777094317196e-07, |
|
"logits/chosen": 0.5013281106948853, |
|
"logits/rejected": 0.5262949466705322, |
|
"logps/chosen": -46.47956848144531, |
|
"logps/rejected": -53.49814224243164, |
|
"loss": 0.1677, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.7341945767402649, |
|
"rewards/margins": 3.0543222427368164, |
|
"rewards/rejected": -3.7885169982910156, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.8770355173740223, |
|
"grad_norm": 22.531696347522484, |
|
"learning_rate": 3.448552299530595e-07, |
|
"logits/chosen": 0.5649933218955994, |
|
"logits/rejected": 0.5860426425933838, |
|
"logps/chosen": -42.52098083496094, |
|
"logps/rejected": -52.308616638183594, |
|
"loss": 0.3071, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": -0.9869860410690308, |
|
"rewards/margins": 2.7113142013549805, |
|
"rewards/rejected": -3.698300361633301, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8770355173740223, |
|
"eval_logits/chosen": 0.4274827539920807, |
|
"eval_logits/rejected": 0.45349830389022827, |
|
"eval_logps/chosen": -43.129615783691406, |
|
"eval_logps/rejected": -55.33893585205078, |
|
"eval_loss": 0.2627149224281311, |
|
"eval_rewards/accuracies": 0.8156682252883911, |
|
"eval_rewards/chosen": -0.6921693086624146, |
|
"eval_rewards/margins": 2.958131790161133, |
|
"eval_rewards/rejected": -3.650301218032837, |
|
"eval_runtime": 220.3046, |
|
"eval_samples_per_second": 7.871, |
|
"eval_steps_per_second": 1.97, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.881651493781254, |
|
"grad_norm": 39.03269809250303, |
|
"learning_rate": 3.429865181305894e-07, |
|
"logits/chosen": 0.5594089031219482, |
|
"logits/rejected": 0.5762946605682373, |
|
"logps/chosen": -46.85918045043945, |
|
"logps/rejected": -55.68655776977539, |
|
"loss": 0.2915, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.9153691530227661, |
|
"rewards/margins": 2.779404401779175, |
|
"rewards/rejected": -3.694772958755493, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.8862674701884857, |
|
"grad_norm": 25.617922410092657, |
|
"learning_rate": 3.411117570453091e-07, |
|
"logits/chosen": 0.5484945774078369, |
|
"logits/rejected": 0.5738579034805298, |
|
"logps/chosen": -42.73631286621094, |
|
"logps/rejected": -53.853271484375, |
|
"loss": 0.2369, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.7328565120697021, |
|
"rewards/margins": 2.8266656398773193, |
|
"rewards/rejected": -3.5595223903656006, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.8908834465957174, |
|
"grad_norm": 30.869961559508535, |
|
"learning_rate": 3.392310686603025e-07, |
|
"logits/chosen": 0.534080982208252, |
|
"logits/rejected": 0.5444844365119934, |
|
"logps/chosen": -42.41215515136719, |
|
"logps/rejected": -50.85294723510742, |
|
"loss": 0.2909, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": -0.9006066918373108, |
|
"rewards/margins": 2.361262559890747, |
|
"rewards/rejected": -3.261868953704834, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.895499423002949, |
|
"grad_norm": 19.657432685783167, |
|
"learning_rate": 3.3734457532425554e-07, |
|
"logits/chosen": 0.5231594443321228, |
|
"logits/rejected": 0.5530441403388977, |
|
"logps/chosen": -42.48830795288086, |
|
"logps/rejected": -57.00692367553711, |
|
"loss": 0.2606, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.6170899271965027, |
|
"rewards/margins": 3.237041711807251, |
|
"rewards/rejected": -3.8541314601898193, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.9001153994101808, |
|
"grad_norm": 24.399140672578795, |
|
"learning_rate": 3.354523997634969e-07, |
|
"logits/chosen": 0.540899932384491, |
|
"logits/rejected": 0.5695917010307312, |
|
"logps/chosen": -44.531185150146484, |
|
"logps/rejected": -58.8494873046875, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.7790883183479309, |
|
"rewards/margins": 3.128167152404785, |
|
"rewards/rejected": -3.9072554111480713, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9001153994101808, |
|
"eval_logits/chosen": 0.42857107520103455, |
|
"eval_logits/rejected": 0.4546278119087219, |
|
"eval_logps/chosen": -43.16852951049805, |
|
"eval_logps/rejected": -55.42344665527344, |
|
"eval_loss": 0.2621525228023529, |
|
"eval_rewards/accuracies": 0.8179723620414734, |
|
"eval_rewards/chosen": -0.7116276621818542, |
|
"eval_rewards/margins": 2.980929374694824, |
|
"eval_rewards/rejected": -3.6925570964813232, |
|
"eval_runtime": 220.3143, |
|
"eval_samples_per_second": 7.871, |
|
"eval_steps_per_second": 1.97, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9047313758174125, |
|
"grad_norm": 35.01908054863291, |
|
"learning_rate": 3.3355466507401374e-07, |
|
"logits/chosen": 0.5315423607826233, |
|
"logits/rejected": 0.5454668998718262, |
|
"logps/chosen": -42.16218185424805, |
|
"logps/rejected": -44.85585403442383, |
|
"loss": 0.372, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": -0.805086612701416, |
|
"rewards/margins": 2.338005542755127, |
|
"rewards/rejected": -3.143092155456543, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.9093473522246441, |
|
"grad_norm": 21.288998506479572, |
|
"learning_rate": 3.3165149471344394e-07, |
|
"logits/chosen": 0.5552914142608643, |
|
"logits/rejected": 0.5818530321121216, |
|
"logps/chosen": -42.95904541015625, |
|
"logps/rejected": -52.76212692260742, |
|
"loss": 0.2934, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": -0.9580552577972412, |
|
"rewards/margins": 2.6676671504974365, |
|
"rewards/rejected": -3.6257221698760986, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.9139633286318759, |
|
"grad_norm": 25.556003693396036, |
|
"learning_rate": 3.297430124930444e-07, |
|
"logits/chosen": 0.582655668258667, |
|
"logits/rejected": 0.5952574014663696, |
|
"logps/chosen": -48.771934509277344, |
|
"logps/rejected": -54.426483154296875, |
|
"loss": 0.3223, |
|
"rewards/accuracies": 0.7361111044883728, |
|
"rewards/chosen": -0.6146318912506104, |
|
"rewards/margins": 2.4974234104156494, |
|
"rewards/rejected": -3.112055540084839, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.9185793050391076, |
|
"grad_norm": 23.905362174336005, |
|
"learning_rate": 3.2782934256963647e-07, |
|
"logits/chosen": 0.5089656114578247, |
|
"logits/rejected": 0.5398065447807312, |
|
"logps/chosen": -45.75530242919922, |
|
"logps/rejected": -61.64253234863281, |
|
"loss": 0.2549, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.6105983853340149, |
|
"rewards/margins": 3.1589841842651367, |
|
"rewards/rejected": -3.769582509994507, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.9231952814463392, |
|
"grad_norm": 24.17532494020093, |
|
"learning_rate": 3.259106094375289e-07, |
|
"logits/chosen": 0.539167046546936, |
|
"logits/rejected": 0.5812445282936096, |
|
"logps/chosen": -39.31736755371094, |
|
"logps/rejected": -63.33793640136719, |
|
"loss": 0.2698, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.3948301374912262, |
|
"rewards/margins": 3.442387819290161, |
|
"rewards/rejected": -3.8372182846069336, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9231952814463392, |
|
"eval_logits/chosen": 0.42656469345092773, |
|
"eval_logits/rejected": 0.45276370644569397, |
|
"eval_logps/chosen": -42.66855239868164, |
|
"eval_logps/rejected": -55.0075798034668, |
|
"eval_loss": 0.2560158371925354, |
|
"eval_rewards/accuracies": 0.8231566548347473, |
|
"eval_rewards/chosen": -0.46163854002952576, |
|
"eval_rewards/margins": 3.0229856967926025, |
|
"eval_rewards/rejected": -3.4846243858337402, |
|
"eval_runtime": 220.2216, |
|
"eval_samples_per_second": 7.874, |
|
"eval_steps_per_second": 1.971, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.927811257853571, |
|
"grad_norm": 30.671620714098214, |
|
"learning_rate": 3.239869379204189e-07, |
|
"logits/chosen": 0.4974105656147003, |
|
"logits/rejected": 0.5221477746963501, |
|
"logps/chosen": -45.057281494140625, |
|
"logps/rejected": -56.83816909790039, |
|
"loss": 0.2017, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": -0.5868238210678101, |
|
"rewards/margins": 3.3964414596557617, |
|
"rewards/rejected": -3.9832653999328613, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.9324272342608027, |
|
"grad_norm": 24.915176146115876, |
|
"learning_rate": 3.2205845316327144e-07, |
|
"logits/chosen": 0.5429517030715942, |
|
"logits/rejected": 0.5683455467224121, |
|
"logps/chosen": -34.97327423095703, |
|
"logps/rejected": -46.666717529296875, |
|
"loss": 0.3399, |
|
"rewards/accuracies": 0.7222222089767456, |
|
"rewards/chosen": -0.43591320514678955, |
|
"rewards/margins": 2.185106039047241, |
|
"rewards/rejected": -2.6210196018218994, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.9370432106680343, |
|
"grad_norm": 23.867375292949593, |
|
"learning_rate": 3.2012528062417845e-07, |
|
"logits/chosen": 0.5323294997215271, |
|
"logits/rejected": 0.5459015369415283, |
|
"logps/chosen": -43.10551071166992, |
|
"logps/rejected": -47.71934127807617, |
|
"loss": 0.2436, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.7240028977394104, |
|
"rewards/margins": 2.4708030223846436, |
|
"rewards/rejected": -3.1948060989379883, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.9416591870752661, |
|
"grad_norm": 15.007721932706033, |
|
"learning_rate": 3.1818754606619643e-07, |
|
"logits/chosen": 0.5331852436065674, |
|
"logits/rejected": 0.564946174621582, |
|
"logps/chosen": -36.540283203125, |
|
"logps/rejected": -57.03317642211914, |
|
"loss": 0.2822, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.16474466025829315, |
|
"rewards/margins": 3.167923927307129, |
|
"rewards/rejected": -3.3326683044433594, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.9462751634824977, |
|
"grad_norm": 22.364487052769828, |
|
"learning_rate": 3.162453755491655e-07, |
|
"logits/chosen": 0.49684393405914307, |
|
"logits/rejected": 0.5316374897956848, |
|
"logps/chosen": -38.39241027832031, |
|
"logps/rejected": -59.15244674682617, |
|
"loss": 0.1874, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.322665810585022, |
|
"rewards/margins": 3.4969892501831055, |
|
"rewards/rejected": -3.819655179977417, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9462751634824977, |
|
"eval_logits/chosen": 0.4290708899497986, |
|
"eval_logits/rejected": 0.45515918731689453, |
|
"eval_logps/chosen": -42.679603576660156, |
|
"eval_logps/rejected": -55.10276412963867, |
|
"eval_loss": 0.2565246820449829, |
|
"eval_rewards/accuracies": 0.8191244006156921, |
|
"eval_rewards/chosen": -0.467162162065506, |
|
"eval_rewards/margins": 3.065053939819336, |
|
"eval_rewards/rejected": -3.5322158336639404, |
|
"eval_runtime": 220.2891, |
|
"eval_samples_per_second": 7.871, |
|
"eval_steps_per_second": 1.97, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9508911398897294, |
|
"grad_norm": 25.19862106785063, |
|
"learning_rate": 3.142988954215079e-07, |
|
"logits/chosen": 0.5264102816581726, |
|
"logits/rejected": 0.5622512698173523, |
|
"logps/chosen": -43.48373794555664, |
|
"logps/rejected": -66.42120361328125, |
|
"loss": 0.2996, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.48827776312828064, |
|
"rewards/margins": 3.3450686931610107, |
|
"rewards/rejected": -3.833346128463745, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.9555071162969612, |
|
"grad_norm": 21.74301345510537, |
|
"learning_rate": 3.1234823231200925e-07, |
|
"logits/chosen": 0.5031583309173584, |
|
"logits/rejected": 0.5540390014648438, |
|
"logps/chosen": -40.93600845336914, |
|
"logps/rejected": -66.30878448486328, |
|
"loss": 0.2428, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.5792509317398071, |
|
"rewards/margins": 3.6368870735168457, |
|
"rewards/rejected": -4.2161383628845215, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.9601230927041928, |
|
"grad_norm": 22.436508219334904, |
|
"learning_rate": 3.1039351312157993e-07, |
|
"logits/chosen": 0.56053227186203, |
|
"logits/rejected": 0.590539813041687, |
|
"logps/chosen": -41.67660140991211, |
|
"logps/rejected": -58.28109359741211, |
|
"loss": 0.2048, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.6333367228507996, |
|
"rewards/margins": 3.312451124191284, |
|
"rewards/rejected": -3.9457881450653076, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.9647390691114246, |
|
"grad_norm": 36.50210265432233, |
|
"learning_rate": 3.0843486501499967e-07, |
|
"logits/chosen": 0.508413553237915, |
|
"logits/rejected": 0.5429882407188416, |
|
"logps/chosen": -42.58755111694336, |
|
"logps/rejected": -52.10399627685547, |
|
"loss": 0.3069, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": -0.4402269721031189, |
|
"rewards/margins": 2.6428239345550537, |
|
"rewards/rejected": -3.0830507278442383, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.9693550455186563, |
|
"grad_norm": 19.432988353108243, |
|
"learning_rate": 3.064724154126449e-07, |
|
"logits/chosen": 0.48101869225502014, |
|
"logits/rejected": 0.49470260739326477, |
|
"logps/chosen": -43.99076461791992, |
|
"logps/rejected": -47.8154411315918, |
|
"loss": 0.2486, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.6770768761634827, |
|
"rewards/margins": 2.6182446479797363, |
|
"rewards/rejected": -3.2953217029571533, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9693550455186563, |
|
"eval_logits/chosen": 0.4298844337463379, |
|
"eval_logits/rejected": 0.45596131682395935, |
|
"eval_logps/chosen": -42.74457550048828, |
|
"eval_logps/rejected": -55.1827278137207, |
|
"eval_loss": 0.2540464699268341, |
|
"eval_rewards/accuracies": 0.8231566548347473, |
|
"eval_rewards/chosen": -0.4996483027935028, |
|
"eval_rewards/margins": 3.072551727294922, |
|
"eval_rewards/rejected": -3.572199821472168, |
|
"eval_runtime": 220.4655, |
|
"eval_samples_per_second": 7.865, |
|
"eval_steps_per_second": 1.969, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9739710219258879, |
|
"grad_norm": 21.396529357952137, |
|
"learning_rate": 3.045062919821995e-07, |
|
"logits/chosen": 0.5096142292022705, |
|
"logits/rejected": 0.5509178638458252, |
|
"logps/chosen": -40.65134811401367, |
|
"logps/rejected": -64.13406372070312, |
|
"loss": 0.2407, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.25429394841194153, |
|
"rewards/margins": 3.5406899452209473, |
|
"rewards/rejected": -3.7949838638305664, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.9785869983331197, |
|
"grad_norm": 27.30197314549755, |
|
"learning_rate": 3.0253662263034925e-07, |
|
"logits/chosen": 0.5253940224647522, |
|
"logits/rejected": 0.5617537498474121, |
|
"logps/chosen": -44.63224792480469, |
|
"logps/rejected": -62.29665756225586, |
|
"loss": 0.2666, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.6128353476524353, |
|
"rewards/margins": 3.4666247367858887, |
|
"rewards/rejected": -4.079460144042969, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.9832029747403513, |
|
"grad_norm": 40.51282949087652, |
|
"learning_rate": 3.005635354944606e-07, |
|
"logits/chosen": 0.5502428412437439, |
|
"logits/rejected": 0.5616468787193298, |
|
"logps/chosen": -46.97676467895508, |
|
"logps/rejected": -46.36595153808594, |
|
"loss": 0.2894, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": -0.7273317575454712, |
|
"rewards/margins": 2.6478114128112793, |
|
"rewards/rejected": -3.375143051147461, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.987818951147583, |
|
"grad_norm": 23.92512657865844, |
|
"learning_rate": 2.9858715893424504e-07, |
|
"logits/chosen": 0.5228149890899658, |
|
"logits/rejected": 0.5698718428611755, |
|
"logps/chosen": -40.91889953613281, |
|
"logps/rejected": -64.06893920898438, |
|
"loss": 0.1794, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.32393407821655273, |
|
"rewards/margins": 3.8048884868621826, |
|
"rewards/rejected": -4.128821849822998, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.9924349275548148, |
|
"grad_norm": 18.33017798245734, |
|
"learning_rate": 2.966076215234082e-07, |
|
"logits/chosen": 0.5833015441894531, |
|
"logits/rejected": 0.6151509881019592, |
|
"logps/chosen": -47.47243118286133, |
|
"logps/rejected": -64.26097869873047, |
|
"loss": 0.2098, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2687421441078186, |
|
"rewards/margins": 3.582411766052246, |
|
"rewards/rejected": -3.85115385055542, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9924349275548148, |
|
"eval_logits/chosen": 0.42911431193351746, |
|
"eval_logits/rejected": 0.45535048842430115, |
|
"eval_logps/chosen": -42.6432991027832, |
|
"eval_logps/rejected": -55.0967903137207, |
|
"eval_loss": 0.25298023223876953, |
|
"eval_rewards/accuracies": 0.8237327337265015, |
|
"eval_rewards/chosen": -0.4490084946155548, |
|
"eval_rewards/margins": 3.0802206993103027, |
|
"eval_rewards/rejected": -3.5292294025421143, |
|
"eval_runtime": 220.5016, |
|
"eval_samples_per_second": 7.864, |
|
"eval_steps_per_second": 1.968, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9970509039620464, |
|
"grad_norm": 24.845062608395242, |
|
"learning_rate": 2.94625052041286e-07, |
|
"logits/chosen": 0.529398500919342, |
|
"logits/rejected": 0.5461426377296448, |
|
"logps/chosen": -42.26673889160156, |
|
"logps/rejected": -52.43321228027344, |
|
"loss": 0.2582, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.38506922125816345, |
|
"rewards/margins": 2.947833299636841, |
|
"rewards/rejected": -3.332902431488037, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.001666880369278, |
|
"grad_norm": 14.705625802608846, |
|
"learning_rate": 2.926395794644665e-07, |
|
"logits/chosen": 0.5060461759567261, |
|
"logits/rejected": 0.5222041010856628, |
|
"logps/chosen": -45.8979606628418, |
|
"logps/rejected": -55.48097229003906, |
|
"loss": 0.1798, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.3213649392127991, |
|
"rewards/margins": 3.302720308303833, |
|
"rewards/rejected": -3.6240854263305664, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.0062828567765099, |
|
"grad_norm": 24.90302953634143, |
|
"learning_rate": 2.906513329583991e-07, |
|
"logits/chosen": 0.5120677351951599, |
|
"logits/rejected": 0.5406749844551086, |
|
"logps/chosen": -40.07225036621094, |
|
"logps/rejected": -54.882259368896484, |
|
"loss": 0.2186, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.5253066420555115, |
|
"rewards/margins": 3.1281352043151855, |
|
"rewards/rejected": -3.653441905975342, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.0108988331837414, |
|
"grad_norm": 20.006366802619794, |
|
"learning_rate": 2.886604418689921e-07, |
|
"logits/chosen": 0.48885577917099, |
|
"logits/rejected": 0.5327137112617493, |
|
"logps/chosen": -38.752708435058594, |
|
"logps/rejected": -66.8874740600586, |
|
"loss": 0.2705, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": -0.5506837368011475, |
|
"rewards/margins": 3.6388425827026367, |
|
"rewards/rejected": -4.189526557922363, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.0155148095909732, |
|
"grad_norm": 11.538422039384988, |
|
"learning_rate": 2.866670357141979e-07, |
|
"logits/chosen": 0.5471632480621338, |
|
"logits/rejected": 0.5706813931465149, |
|
"logps/chosen": -44.1706428527832, |
|
"logps/rejected": -54.80915832519531, |
|
"loss": 0.2123, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.5128348469734192, |
|
"rewards/margins": 3.5640437602996826, |
|
"rewards/rejected": -4.076879024505615, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0155148095909732, |
|
"eval_logits/chosen": 0.42714568972587585, |
|
"eval_logits/rejected": 0.4533489942550659, |
|
"eval_logps/chosen": -42.395565032958984, |
|
"eval_logps/rejected": -54.934104919433594, |
|
"eval_loss": 0.2539977729320526, |
|
"eval_rewards/accuracies": 0.8231566548347473, |
|
"eval_rewards/chosen": -0.3251444697380066, |
|
"eval_rewards/margins": 3.122741937637329, |
|
"eval_rewards/rejected": -3.4478864669799805, |
|
"eval_runtime": 220.3559, |
|
"eval_samples_per_second": 7.869, |
|
"eval_steps_per_second": 1.97, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.020130785998205, |
|
"grad_norm": 16.119320288131345, |
|
"learning_rate": 2.8467124417558737e-07, |
|
"logits/chosen": 0.5559278130531311, |
|
"logits/rejected": 0.5782606601715088, |
|
"logps/chosen": -43.08287048339844, |
|
"logps/rejected": -55.4886474609375, |
|
"loss": 0.2118, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.22590351104736328, |
|
"rewards/margins": 3.3553009033203125, |
|
"rewards/rejected": -3.581204414367676, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.0247467624054365, |
|
"grad_norm": 21.10014479926061, |
|
"learning_rate": 2.8267319708991253e-07, |
|
"logits/chosen": 0.5570061206817627, |
|
"logits/rejected": 0.5741885304450989, |
|
"logps/chosen": -46.57928466796875, |
|
"logps/rejected": -48.77629089355469, |
|
"loss": 0.2203, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.2255779653787613, |
|
"rewards/margins": 2.8583762645721436, |
|
"rewards/rejected": -3.083954334259033, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.0293627388126683, |
|
"grad_norm": 21.99323071947427, |
|
"learning_rate": 2.806730244406612e-07, |
|
"logits/chosen": 0.5444987416267395, |
|
"logits/rejected": 0.5731097459793091, |
|
"logps/chosen": -40.73080825805664, |
|
"logps/rejected": -52.80342102050781, |
|
"loss": 0.2407, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.2986847758293152, |
|
"rewards/margins": 3.0820257663726807, |
|
"rewards/rejected": -3.3807103633880615, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.0339787152199, |
|
"grad_norm": 17.17450683483707, |
|
"learning_rate": 2.786708563496001e-07, |
|
"logits/chosen": 0.5541989207267761, |
|
"logits/rejected": 0.5817456841468811, |
|
"logps/chosen": -45.73213195800781, |
|
"logps/rejected": -61.18666458129883, |
|
"loss": 0.1772, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.05669987201690674, |
|
"rewards/margins": 3.8165981769561768, |
|
"rewards/rejected": -3.873298168182373, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.0385946916271316, |
|
"grad_norm": 27.653708636239905, |
|
"learning_rate": 2.7666682306830994e-07, |
|
"logits/chosen": 0.5207394957542419, |
|
"logits/rejected": 0.5322983860969543, |
|
"logps/chosen": -41.09166717529297, |
|
"logps/rejected": -43.31468200683594, |
|
"loss": 0.2544, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.3381701707839966, |
|
"rewards/margins": 2.6456761360168457, |
|
"rewards/rejected": -2.9838459491729736, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.0385946916271316, |
|
"eval_logits/chosen": 0.43128177523612976, |
|
"eval_logits/rejected": 0.4573296308517456, |
|
"eval_logps/chosen": -42.16498565673828, |
|
"eval_logps/rejected": -54.75392150878906, |
|
"eval_loss": 0.2521970570087433, |
|
"eval_rewards/accuracies": 0.8248847723007202, |
|
"eval_rewards/chosen": -0.20985357463359833, |
|
"eval_rewards/margins": 3.147939920425415, |
|
"eval_rewards/rejected": -3.3577938079833984, |
|
"eval_runtime": 220.2887, |
|
"eval_samples_per_second": 7.871, |
|
"eval_steps_per_second": 1.97, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.0432106680343634, |
|
"grad_norm": 26.863807248353726, |
|
"learning_rate": 2.746610549697119e-07, |
|
"logits/chosen": 0.5497666001319885, |
|
"logits/rejected": 0.5746829509735107, |
|
"logps/chosen": -42.95619583129883, |
|
"logps/rejected": -57.17405700683594, |
|
"loss": 0.2279, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.33137860894203186, |
|
"rewards/margins": 3.0671894550323486, |
|
"rewards/rejected": -3.3985676765441895, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.0478266444415951, |
|
"grad_norm": 15.765922708965844, |
|
"learning_rate": 2.7265368253958615e-07, |
|
"logits/chosen": 0.5027904510498047, |
|
"logits/rejected": 0.5187773108482361, |
|
"logps/chosen": -40.01198959350586, |
|
"logps/rejected": -49.16390609741211, |
|
"loss": 0.1826, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.024355987086892128, |
|
"rewards/margins": 3.001004219055176, |
|
"rewards/rejected": -3.025360107421875, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.0524426208488267, |
|
"grad_norm": 13.117750938407347, |
|
"learning_rate": 2.706448363680831e-07, |
|
"logits/chosen": 0.5505272746086121, |
|
"logits/rejected": 0.592627763748169, |
|
"logps/chosen": -40.86323928833008, |
|
"logps/rejected": -65.0215072631836, |
|
"loss": 0.1182, |
|
"rewards/accuracies": 0.9722222089767456, |
|
"rewards/chosen": -0.19750367105007172, |
|
"rewards/margins": 4.092833995819092, |
|
"rewards/rejected": -4.290337562561035, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.0570585972560584, |
|
"grad_norm": 16.896591758231867, |
|
"learning_rate": 2.686346471412277e-07, |
|
"logits/chosen": 0.4872972071170807, |
|
"logits/rejected": 0.5277370810508728, |
|
"logps/chosen": -44.69199752807617, |
|
"logps/rejected": -65.82919311523438, |
|
"loss": 0.1481, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": -0.253704696893692, |
|
"rewards/margins": 3.8575947284698486, |
|
"rewards/rejected": -4.111299514770508, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.0616745736632902, |
|
"grad_norm": 20.974972760985903, |
|
"learning_rate": 2.6662324563241805e-07, |
|
"logits/chosen": 0.5082690119743347, |
|
"logits/rejected": 0.5304160118103027, |
|
"logps/chosen": -39.70173263549805, |
|
"logps/rejected": -50.749732971191406, |
|
"loss": 0.218, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.058096084743738174, |
|
"rewards/margins": 2.925325632095337, |
|
"rewards/rejected": -2.983421802520752, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.0616745736632902, |
|
"eval_logits/chosen": 0.42715081572532654, |
|
"eval_logits/rejected": 0.45357510447502136, |
|
"eval_logps/chosen": -41.917137145996094, |
|
"eval_logps/rejected": -54.64493179321289, |
|
"eval_loss": 0.2522634267807007, |
|
"eval_rewards/accuracies": 0.8231566548347473, |
|
"eval_rewards/chosen": -0.08592969179153442, |
|
"eval_rewards/margins": 3.217369556427002, |
|
"eval_rewards/rejected": -3.3032991886138916, |
|
"eval_runtime": 220.2922, |
|
"eval_samples_per_second": 7.871, |
|
"eval_steps_per_second": 1.97, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.0662905500705218, |
|
"grad_norm": 14.344965515087893, |
|
"learning_rate": 2.6461076269391713e-07, |
|
"logits/chosen": 0.5723965167999268, |
|
"logits/rejected": 0.6080074310302734, |
|
"logps/chosen": -47.22536087036133, |
|
"logps/rejected": -63.04933166503906, |
|
"loss": 0.1633, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08401741087436676, |
|
"rewards/margins": 4.024357318878174, |
|
"rewards/rejected": -4.10837459564209, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.0709065264777535, |
|
"grad_norm": 22.161377940303407, |
|
"learning_rate": 2.625973292483409e-07, |
|
"logits/chosen": 0.49575677514076233, |
|
"logits/rejected": 0.5175695419311523, |
|
"logps/chosen": -49.86793518066406, |
|
"logps/rejected": -61.0032958984375, |
|
"loss": 0.2086, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2437991052865982, |
|
"rewards/margins": 3.3475723266601562, |
|
"rewards/rejected": -3.5913712978363037, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.0755225028849853, |
|
"grad_norm": 9.157546830395537, |
|
"learning_rate": 2.6058307628014065e-07, |
|
"logits/chosen": 0.5648156404495239, |
|
"logits/rejected": 0.5903113484382629, |
|
"logps/chosen": -47.16014099121094, |
|
"logps/rejected": -58.00987243652344, |
|
"loss": 0.1681, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.20527897775173187, |
|
"rewards/margins": 3.885181427001953, |
|
"rewards/rejected": -4.090460777282715, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.0801384792922168, |
|
"grad_norm": 20.418800394750264, |
|
"learning_rate": 2.5856813482708217e-07, |
|
"logits/chosen": 0.5167273879051208, |
|
"logits/rejected": 0.5341954827308655, |
|
"logps/chosen": -44.03962707519531, |
|
"logps/rejected": -48.64061737060547, |
|
"loss": 0.205, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.06323742121458054, |
|
"rewards/margins": 3.104510545730591, |
|
"rewards/rejected": -3.041273355484009, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.0847544556994486, |
|
"grad_norm": 24.70628607742756, |
|
"learning_rate": 2.565526359717206e-07, |
|
"logits/chosen": 0.537581205368042, |
|
"logits/rejected": 0.5596475005149841, |
|
"logps/chosen": -37.46675109863281, |
|
"logps/rejected": -45.9968147277832, |
|
"loss": 0.3005, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": -0.24194829165935516, |
|
"rewards/margins": 2.6193909645080566, |
|
"rewards/rejected": -2.8613390922546387, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0847544556994486, |
|
"eval_logits/chosen": 0.4362466037273407, |
|
"eval_logits/rejected": 0.4623866379261017, |
|
"eval_logps/chosen": -42.15773010253906, |
|
"eval_logps/rejected": -54.935401916503906, |
|
"eval_loss": 0.24963192641735077, |
|
"eval_rewards/accuracies": 0.8260368704795837, |
|
"eval_rewards/chosen": -0.20622780919075012, |
|
"eval_rewards/margins": 3.242306709289551, |
|
"eval_rewards/rejected": -3.4485342502593994, |
|
"eval_runtime": 220.4037, |
|
"eval_samples_per_second": 7.867, |
|
"eval_steps_per_second": 1.969, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0893704321066804, |
|
"grad_norm": 27.430779359112005, |
|
"learning_rate": 2.545367108328731e-07, |
|
"logits/chosen": 0.5652859807014465, |
|
"logits/rejected": 0.591205358505249, |
|
"logps/chosen": -43.71979904174805, |
|
"logps/rejected": -53.00830841064453, |
|
"loss": 0.2156, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.20083469152450562, |
|
"rewards/margins": 3.2087488174438477, |
|
"rewards/rejected": -3.409583330154419, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.0939864085139122, |
|
"grad_norm": 13.134510140867176, |
|
"learning_rate": 2.525204905570889e-07, |
|
"logits/chosen": 0.5791910290718079, |
|
"logits/rejected": 0.6038353443145752, |
|
"logps/chosen": -46.998390197753906, |
|
"logps/rejected": -59.18220520019531, |
|
"loss": 0.1707, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.05355483293533325, |
|
"rewards/margins": 3.5535666942596436, |
|
"rewards/rejected": -3.607121706008911, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.0986023849211437, |
|
"grad_norm": 19.90392742325827, |
|
"learning_rate": 2.505041063101171e-07, |
|
"logits/chosen": 0.5816848278045654, |
|
"logits/rejected": 0.6008831858634949, |
|
"logps/chosen": -47.19880676269531, |
|
"logps/rejected": -51.822105407714844, |
|
"loss": 0.2218, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.03883904218673706, |
|
"rewards/margins": 3.348583221435547, |
|
"rewards/rejected": -3.309744358062744, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.1032183613283755, |
|
"grad_norm": 17.00116980477646, |
|
"learning_rate": 2.4848768926837466e-07, |
|
"logits/chosen": 0.5338962078094482, |
|
"logits/rejected": 0.5906614065170288, |
|
"logps/chosen": -40.04157257080078, |
|
"logps/rejected": -76.84749603271484, |
|
"loss": 0.1893, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1601162701845169, |
|
"rewards/margins": 4.218037128448486, |
|
"rewards/rejected": -4.378152847290039, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.107834337735607, |
|
"grad_norm": 15.038557815597683, |
|
"learning_rate": 2.464713706104113e-07, |
|
"logits/chosen": 0.5352125763893127, |
|
"logits/rejected": 0.5612537264823914, |
|
"logps/chosen": -43.91660690307617, |
|
"logps/rejected": -56.44979476928711, |
|
"loss": 0.1633, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": -0.2793487310409546, |
|
"rewards/margins": 3.6175765991210938, |
|
"rewards/rejected": -3.896925210952759, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.107834337735607, |
|
"eval_logits/chosen": 0.43004509806632996, |
|
"eval_logits/rejected": 0.4563468098640442, |
|
"eval_logps/chosen": -42.171958923339844, |
|
"eval_logps/rejected": -54.986507415771484, |
|
"eval_loss": 0.24832715094089508, |
|
"eval_rewards/accuracies": 0.8271889686584473, |
|
"eval_rewards/chosen": -0.21334028244018555, |
|
"eval_rewards/margins": 3.2607483863830566, |
|
"eval_rewards/rejected": -3.474088668823242, |
|
"eval_runtime": 220.2251, |
|
"eval_samples_per_second": 7.874, |
|
"eval_steps_per_second": 1.971, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.1124503141428388, |
|
"grad_norm": 22.9744657106464, |
|
"learning_rate": 2.444552815083767e-07, |
|
"logits/chosen": 0.6254298686981201, |
|
"logits/rejected": 0.6373676061630249, |
|
"logps/chosen": -42.673282623291016, |
|
"logps/rejected": -45.563087463378906, |
|
"loss": 0.2114, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.057508740574121475, |
|
"rewards/margins": 3.0235791206359863, |
|
"rewards/rejected": -3.081087350845337, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.1170662905500706, |
|
"grad_norm": 17.674691508042564, |
|
"learning_rate": 2.4243955311948693e-07, |
|
"logits/chosen": 0.5245480537414551, |
|
"logits/rejected": 0.5648095011711121, |
|
"logps/chosen": -39.3298225402832, |
|
"logps/rejected": -61.31127166748047, |
|
"loss": 0.2236, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": -0.1908557116985321, |
|
"rewards/margins": 3.677870512008667, |
|
"rewards/rejected": -3.8687260150909424, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.1216822669573023, |
|
"grad_norm": 19.4717194397301, |
|
"learning_rate": 2.4042431657749115e-07, |
|
"logits/chosen": 0.585620105266571, |
|
"logits/rejected": 0.6345695853233337, |
|
"logps/chosen": -41.645267486572266, |
|
"logps/rejected": -72.78955078125, |
|
"loss": 0.1703, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.20467931032180786, |
|
"rewards/margins": 4.08174991607666, |
|
"rewards/rejected": -4.286429405212402, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.1262982433645339, |
|
"grad_norm": 30.909727917565508, |
|
"learning_rate": 2.384097029841419e-07, |
|
"logits/chosen": 0.4901224672794342, |
|
"logits/rejected": 0.5071887969970703, |
|
"logps/chosen": -43.30605697631836, |
|
"logps/rejected": -50.992618560791016, |
|
"loss": 0.2185, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.18728405237197876, |
|
"rewards/margins": 2.9479784965515137, |
|
"rewards/rejected": -3.1352624893188477, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.1309142197717656, |
|
"grad_norm": 16.93415094151409, |
|
"learning_rate": 2.3639584340066544e-07, |
|
"logits/chosen": 0.5211553573608398, |
|
"logits/rejected": 0.5518543124198914, |
|
"logps/chosen": -37.83938980102539, |
|
"logps/rejected": -53.91053009033203, |
|
"loss": 0.234, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.05988183990120888, |
|
"rewards/margins": 3.5345206260681152, |
|
"rewards/rejected": -3.4746387004852295, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.1309142197717656, |
|
"eval_logits/chosen": 0.43326738476753235, |
|
"eval_logits/rejected": 0.45958051085472107, |
|
"eval_logps/chosen": -41.84520721435547, |
|
"eval_logps/rejected": -54.6281852722168, |
|
"eval_loss": 0.24792973697185516, |
|
"eval_rewards/accuracies": 0.8220046162605286, |
|
"eval_rewards/chosen": -0.04996471852064133, |
|
"eval_rewards/margins": 3.244964361190796, |
|
"eval_rewards/rejected": -3.294929265975952, |
|
"eval_runtime": 220.3046, |
|
"eval_samples_per_second": 7.871, |
|
"eval_steps_per_second": 1.97, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.1355301961789972, |
|
"grad_norm": 16.790260075155444, |
|
"learning_rate": 2.3438286883923539e-07, |
|
"logits/chosen": 0.5881079435348511, |
|
"logits/rejected": 0.6105315685272217, |
|
"logps/chosen": -46.794837951660156, |
|
"logps/rejected": -53.43986511230469, |
|
"loss": 0.2269, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.08306831121444702, |
|
"rewards/margins": 3.1719002723693848, |
|
"rewards/rejected": -3.088831663131714, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.140146172586229, |
|
"grad_norm": 22.957641710400285, |
|
"learning_rate": 2.323709102544506e-07, |
|
"logits/chosen": 0.6002509593963623, |
|
"logits/rejected": 0.6072889566421509, |
|
"logps/chosen": -39.66600036621094, |
|
"logps/rejected": -41.07653045654297, |
|
"loss": 0.2857, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.20397840440273285, |
|
"rewards/margins": 2.4769766330718994, |
|
"rewards/rejected": -2.272998094558716, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.1447621489934607, |
|
"grad_norm": 27.504424003065566, |
|
"learning_rate": 2.3036009853481474e-07, |
|
"logits/chosen": 0.5301830768585205, |
|
"logits/rejected": 0.5608452558517456, |
|
"logps/chosen": -39.39542770385742, |
|
"logps/rejected": -58.36659622192383, |
|
"loss": 0.2681, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.2189822793006897, |
|
"rewards/margins": 3.4378933906555176, |
|
"rewards/rejected": -3.6568756103515625, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.1493781254006925, |
|
"grad_norm": 16.835368907101664, |
|
"learning_rate": 2.283505644942223e-07, |
|
"logits/chosen": 0.5190525054931641, |
|
"logits/rejected": 0.5493537783622742, |
|
"logps/chosen": -34.43808364868164, |
|
"logps/rejected": -54.84063720703125, |
|
"loss": 0.1937, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.13352231681346893, |
|
"rewards/margins": 3.440141201019287, |
|
"rewards/rejected": -3.3066186904907227, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.153994101807924, |
|
"grad_norm": 14.320814422051418, |
|
"learning_rate": 2.2634243886344781e-07, |
|
"logits/chosen": 0.5132643580436707, |
|
"logits/rejected": 0.5381724834442139, |
|
"logps/chosen": -41.94618225097656, |
|
"logps/rejected": -54.74879455566406, |
|
"loss": 0.243, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.1846380978822708, |
|
"rewards/margins": 3.523959159851074, |
|
"rewards/rejected": -3.3393211364746094, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.153994101807924, |
|
"eval_logits/chosen": 0.43241602182388306, |
|
"eval_logits/rejected": 0.45862025022506714, |
|
"eval_logps/chosen": -41.512245178222656, |
|
"eval_logps/rejected": -54.365325927734375, |
|
"eval_loss": 0.24479356408119202, |
|
"eval_rewards/accuracies": 0.8289170265197754, |
|
"eval_rewards/chosen": 0.11651827394962311, |
|
"eval_rewards/margins": 3.2800135612487793, |
|
"eval_rewards/rejected": -3.1634950637817383, |
|
"eval_runtime": 220.3257, |
|
"eval_samples_per_second": 7.87, |
|
"eval_steps_per_second": 1.97, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1586100782151558, |
|
"grad_norm": 17.24901468893502, |
|
"learning_rate": 2.2433585228164115e-07, |
|
"logits/chosen": 0.5386977791786194, |
|
"logits/rejected": 0.5774834156036377, |
|
"logps/chosen": -43.753910064697266, |
|
"logps/rejected": -65.60494232177734, |
|
"loss": 0.1918, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.19071653485298157, |
|
"rewards/margins": 4.159061908721924, |
|
"rewards/rejected": -3.9683446884155273, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.1632260546223874, |
|
"grad_norm": 22.994462305856853, |
|
"learning_rate": 2.2233093528782938e-07, |
|
"logits/chosen": 0.5429908037185669, |
|
"logits/rejected": 0.5663915872573853, |
|
"logps/chosen": -49.295047760009766, |
|
"logps/rejected": -58.83778381347656, |
|
"loss": 0.1741, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": 0.27108439803123474, |
|
"rewards/margins": 3.4974775314331055, |
|
"rewards/rejected": -3.226392984390259, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.1678420310296191, |
|
"grad_norm": 19.749474882703815, |
|
"learning_rate": 2.2032781831242367e-07, |
|
"logits/chosen": 0.5360143184661865, |
|
"logits/rejected": 0.5641200542449951, |
|
"logps/chosen": -35.82609558105469, |
|
"logps/rejected": -44.779361724853516, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.4115668535232544, |
|
"rewards/margins": 2.9376118183135986, |
|
"rewards/rejected": -2.526045083999634, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.172458007436851, |
|
"grad_norm": 29.881557534524536, |
|
"learning_rate": 2.183266316687347e-07, |
|
"logits/chosen": 0.5799429416656494, |
|
"logits/rejected": 0.5963388681411743, |
|
"logps/chosen": -42.11252975463867, |
|
"logps/rejected": -44.56486511230469, |
|
"loss": 0.2905, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.2770005762577057, |
|
"rewards/margins": 2.54060435295105, |
|
"rewards/rejected": -2.263603687286377, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.1770739838440827, |
|
"grad_norm": 11.72889590765659, |
|
"learning_rate": 2.16327505544495e-07, |
|
"logits/chosen": 0.5231108069419861, |
|
"logits/rejected": 0.5499060153961182, |
|
"logps/chosen": -43.436798095703125, |
|
"logps/rejected": -57.92034912109375, |
|
"loss": 0.1472, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.47280406951904297, |
|
"rewards/margins": 4.098244667053223, |
|
"rewards/rejected": -3.625440835952759, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.1770739838440827, |
|
"eval_logits/chosen": 0.43323588371276855, |
|
"eval_logits/rejected": 0.4594508111476898, |
|
"eval_logps/chosen": -41.14154815673828, |
|
"eval_logps/rejected": -54.075172424316406, |
|
"eval_loss": 0.247583270072937, |
|
"eval_rewards/accuracies": 0.828341007232666, |
|
"eval_rewards/chosen": 0.30186572670936584, |
|
"eval_rewards/margins": 3.3202853202819824, |
|
"eval_rewards/rejected": -3.0184197425842285, |
|
"eval_runtime": 220.3645, |
|
"eval_samples_per_second": 7.869, |
|
"eval_steps_per_second": 1.969, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.1816899602513142, |
|
"grad_norm": 19.02915371887465, |
|
"learning_rate": 2.143305699933892e-07, |
|
"logits/chosen": 0.5309435725212097, |
|
"logits/rejected": 0.5609121322631836, |
|
"logps/chosen": -39.10821533203125, |
|
"logps/rejected": -55.85133743286133, |
|
"loss": 0.2148, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.35923802852630615, |
|
"rewards/margins": 3.6412789821624756, |
|
"rewards/rejected": -3.282041549682617, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.186305936658546, |
|
"grad_norm": 18.184730820886717, |
|
"learning_rate": 2.1233595492659382e-07, |
|
"logits/chosen": 0.6312618851661682, |
|
"logits/rejected": 0.6453579068183899, |
|
"logps/chosen": -48.93413543701172, |
|
"logps/rejected": -50.58020782470703, |
|
"loss": 0.1701, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.28959882259368896, |
|
"rewards/margins": 3.4992854595184326, |
|
"rewards/rejected": -3.209686040878296, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.1909219130657775, |
|
"grad_norm": 21.115621604290848, |
|
"learning_rate": 2.1034379010432542e-07, |
|
"logits/chosen": 0.5738712549209595, |
|
"logits/rejected": 0.5990296006202698, |
|
"logps/chosen": -36.4149055480957, |
|
"logps/rejected": -47.95274353027344, |
|
"loss": 0.2192, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.35762646794319153, |
|
"rewards/margins": 3.1450395584106445, |
|
"rewards/rejected": -2.7874133586883545, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.1955378894730093, |
|
"grad_norm": 18.313049973835163, |
|
"learning_rate": 2.0835420512739957e-07, |
|
"logits/chosen": 0.48849010467529297, |
|
"logits/rejected": 0.5418619513511658, |
|
"logps/chosen": -39.52627182006836, |
|
"logps/rejected": -70.53701782226562, |
|
"loss": 0.1678, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.39579084515571594, |
|
"rewards/margins": 4.528857231140137, |
|
"rewards/rejected": -4.133066654205322, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.200153865880241, |
|
"grad_norm": 18.512425100692376, |
|
"learning_rate": 2.0636732942879917e-07, |
|
"logits/chosen": 0.5643823146820068, |
|
"logits/rejected": 0.5917804837226868, |
|
"logps/chosen": -43.44633483886719, |
|
"logps/rejected": -56.26163101196289, |
|
"loss": 0.166, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": 0.33819130063056946, |
|
"rewards/margins": 3.693488121032715, |
|
"rewards/rejected": -3.3552963733673096, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.200153865880241, |
|
"eval_logits/chosen": 0.4335879981517792, |
|
"eval_logits/rejected": 0.45994046330451965, |
|
"eval_logps/chosen": -41.402774810791016, |
|
"eval_logps/rejected": -54.35234451293945, |
|
"eval_loss": 0.2449788749217987, |
|
"eval_rewards/accuracies": 0.8317972421646118, |
|
"eval_rewards/chosen": 0.1712525486946106, |
|
"eval_rewards/margins": 3.328258991241455, |
|
"eval_rewards/rejected": -3.1570065021514893, |
|
"eval_runtime": 220.2998, |
|
"eval_samples_per_second": 7.871, |
|
"eval_steps_per_second": 1.97, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.2047698422874729, |
|
"grad_norm": 11.696545134195471, |
|
"learning_rate": 2.0438329226525415e-07, |
|
"logits/chosen": 0.5642399787902832, |
|
"logits/rejected": 0.587860643863678, |
|
"logps/chosen": -41.212337493896484, |
|
"logps/rejected": -43.521636962890625, |
|
"loss": 0.2246, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.5518161058425903, |
|
"rewards/margins": 2.9677634239196777, |
|
"rewards/rejected": -2.415947675704956, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.2093858186947044, |
|
"grad_norm": 24.196902238001236, |
|
"learning_rate": 2.0240222270883288e-07, |
|
"logits/chosen": 0.5227870941162109, |
|
"logits/rejected": 0.5579611659049988, |
|
"logps/chosen": -44.49864196777344, |
|
"logps/rejected": -64.84123229980469, |
|
"loss": 0.2314, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.16053809225559235, |
|
"rewards/margins": 3.896054267883301, |
|
"rewards/rejected": -3.73551607131958, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.2140017951019362, |
|
"grad_norm": 12.971615376216704, |
|
"learning_rate": 2.0042424963854542e-07, |
|
"logits/chosen": 0.5063973665237427, |
|
"logits/rejected": 0.5544097423553467, |
|
"logps/chosen": -40.40736389160156, |
|
"logps/rejected": -70.9152603149414, |
|
"loss": 0.1526, |
|
"rewards/accuracies": 0.9444444179534912, |
|
"rewards/chosen": 0.3248124122619629, |
|
"rewards/margins": 4.234506607055664, |
|
"rewards/rejected": -3.9096946716308594, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.2186177715091677, |
|
"grad_norm": 14.0866861852398, |
|
"learning_rate": 1.9844950173195883e-07, |
|
"logits/chosen": 0.5182596445083618, |
|
"logits/rejected": 0.549498975276947, |
|
"logps/chosen": -39.39563751220703, |
|
"logps/rejected": -54.05485153198242, |
|
"loss": 0.1818, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.22824376821517944, |
|
"rewards/margins": 3.397740364074707, |
|
"rewards/rejected": -3.169497013092041, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.2232337479163995, |
|
"grad_norm": 13.76916365285817, |
|
"learning_rate": 1.964781074568265e-07, |
|
"logits/chosen": 0.5031299591064453, |
|
"logits/rejected": 0.5121724009513855, |
|
"logps/chosen": -41.18108367919922, |
|
"logps/rejected": -45.627994537353516, |
|
"loss": 0.1945, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": 0.03019801713526249, |
|
"rewards/margins": 3.0934128761291504, |
|
"rewards/rejected": -3.0632145404815674, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.2232337479163995, |
|
"eval_logits/chosen": 0.43405523896217346, |
|
"eval_logits/rejected": 0.46039465069770813, |
|
"eval_logps/chosen": -41.60369110107422, |
|
"eval_logps/rejected": -54.51262664794922, |
|
"eval_loss": 0.24258121848106384, |
|
"eval_rewards/accuracies": 0.8335253596305847, |
|
"eval_rewards/chosen": 0.07079467922449112, |
|
"eval_rewards/margins": 3.3079416751861572, |
|
"eval_rewards/rejected": -3.2371468544006348, |
|
"eval_runtime": 220.2641, |
|
"eval_samples_per_second": 7.872, |
|
"eval_steps_per_second": 1.97, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.2278497243236313, |
|
"grad_norm": 16.411903473780164, |
|
"learning_rate": 1.9451019506273018e-07, |
|
"logits/chosen": 0.541588306427002, |
|
"logits/rejected": 0.5615941286087036, |
|
"logps/chosen": -36.563297271728516, |
|
"logps/rejected": -48.32072448730469, |
|
"loss": 0.2351, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.17822687327861786, |
|
"rewards/margins": 2.845065116882324, |
|
"rewards/rejected": -2.6668384075164795, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.232465700730863, |
|
"grad_norm": 13.467269631637619, |
|
"learning_rate": 1.9254589257273712e-07, |
|
"logits/chosen": 0.5137292146682739, |
|
"logits/rejected": 0.5505712032318115, |
|
"logps/chosen": -36.598384857177734, |
|
"logps/rejected": -57.48229217529297, |
|
"loss": 0.1473, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.19568167626857758, |
|
"rewards/margins": 4.128161907196045, |
|
"rewards/rejected": -3.9324798583984375, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.2370816771380946, |
|
"grad_norm": 24.645788661655104, |
|
"learning_rate": 1.9058532777507141e-07, |
|
"logits/chosen": 0.5294635891914368, |
|
"logits/rejected": 0.5472697615623474, |
|
"logps/chosen": -39.22220230102539, |
|
"logps/rejected": -49.91395950317383, |
|
"loss": 0.2172, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.11992057412862778, |
|
"rewards/margins": 3.224815845489502, |
|
"rewards/rejected": -3.1048953533172607, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.2416976535453264, |
|
"grad_norm": 18.291984511184836, |
|
"learning_rate": 1.886286282148002e-07, |
|
"logits/chosen": 0.5298857688903809, |
|
"logits/rejected": 0.5633623600006104, |
|
"logps/chosen": -41.294647216796875, |
|
"logps/rejected": -57.79304885864258, |
|
"loss": 0.2731, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.11145944148302078, |
|
"rewards/margins": 3.1801443099975586, |
|
"rewards/rejected": -3.2916040420532227, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.246313629952558, |
|
"grad_norm": 17.71916747448851, |
|
"learning_rate": 1.8667592118553693e-07, |
|
"logits/chosen": 0.5349301099777222, |
|
"logits/rejected": 0.5512058734893799, |
|
"logps/chosen": -43.72676467895508, |
|
"logps/rejected": -52.80296325683594, |
|
"loss": 0.2216, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.03689540922641754, |
|
"rewards/margins": 3.2271673679351807, |
|
"rewards/rejected": -3.2640628814697266, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.246313629952558, |
|
"eval_logits/chosen": 0.4325529932975769, |
|
"eval_logits/rejected": 0.45892781019210815, |
|
"eval_logps/chosen": -41.67875289916992, |
|
"eval_logps/rejected": -54.59620666503906, |
|
"eval_loss": 0.24205271899700165, |
|
"eval_rewards/accuracies": 0.8277649879455566, |
|
"eval_rewards/chosen": 0.03326287120580673, |
|
"eval_rewards/margins": 3.312199115753174, |
|
"eval_rewards/rejected": -3.2789359092712402, |
|
"eval_runtime": 220.1774, |
|
"eval_samples_per_second": 7.875, |
|
"eval_steps_per_second": 1.971, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.2509296063597897, |
|
"grad_norm": 15.1063531754732, |
|
"learning_rate": 1.8472733372115956e-07, |
|
"logits/chosen": 0.4958040416240692, |
|
"logits/rejected": 0.5259097814559937, |
|
"logps/chosen": -43.43186950683594, |
|
"logps/rejected": -60.27039337158203, |
|
"loss": 0.1823, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": -0.40173831582069397, |
|
"rewards/margins": 3.8025894165039062, |
|
"rewards/rejected": -4.2043280601501465, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.2555455827670214, |
|
"grad_norm": 23.60965925798032, |
|
"learning_rate": 1.8278299258754692e-07, |
|
"logits/chosen": 0.47050708532333374, |
|
"logits/rejected": 0.5154716968536377, |
|
"logps/chosen": -43.42805480957031, |
|
"logps/rejected": -71.56327056884766, |
|
"loss": 0.2284, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.35217729210853577, |
|
"rewards/margins": 4.311697483062744, |
|
"rewards/rejected": -4.663875102996826, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.2601615591742532, |
|
"grad_norm": 11.785150141913245, |
|
"learning_rate": 1.808430242743316e-07, |
|
"logits/chosen": 0.46195343136787415, |
|
"logits/rejected": 0.4784909784793854, |
|
"logps/chosen": -42.974945068359375, |
|
"logps/rejected": -54.21615219116211, |
|
"loss": 0.1867, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.31646448373794556, |
|
"rewards/margins": 3.5641021728515625, |
|
"rewards/rejected": -3.2476377487182617, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.2647775355814848, |
|
"grad_norm": 13.346160813344762, |
|
"learning_rate": 1.7890755498667104e-07, |
|
"logits/chosen": 0.5626040101051331, |
|
"logits/rejected": 0.5980097651481628, |
|
"logps/chosen": -36.59039306640625, |
|
"logps/rejected": -55.57601547241211, |
|
"loss": 0.182, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.17459021508693695, |
|
"rewards/margins": 3.451416015625, |
|
"rewards/rejected": -3.2768259048461914, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.2693935119887165, |
|
"grad_norm": 25.621843956328824, |
|
"learning_rate": 1.7697671063703756e-07, |
|
"logits/chosen": 0.5085393786430359, |
|
"logits/rejected": 0.5440909266471863, |
|
"logps/chosen": -39.27238464355469, |
|
"logps/rejected": -59.40525817871094, |
|
"loss": 0.2243, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.011964095756411552, |
|
"rewards/margins": 3.6004185676574707, |
|
"rewards/rejected": -3.588454246520996, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.2693935119887165, |
|
"eval_logits/chosen": 0.4355390965938568, |
|
"eval_logits/rejected": 0.46181005239486694, |
|
"eval_logps/chosen": -41.701602935791016, |
|
"eval_logps/rejected": -54.663360595703125, |
|
"eval_loss": 0.24010230600833893, |
|
"eval_rewards/accuracies": 0.8260368704795837, |
|
"eval_rewards/chosen": 0.0218377523124218, |
|
"eval_rewards/margins": 3.3343515396118164, |
|
"eval_rewards/rejected": -3.312513828277588, |
|
"eval_runtime": 220.234, |
|
"eval_samples_per_second": 7.873, |
|
"eval_steps_per_second": 1.971, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.274009488395948, |
|
"grad_norm": 29.85339571581757, |
|
"learning_rate": 1.750506168370267e-07, |
|
"logits/chosen": 0.5484946370124817, |
|
"logits/rejected": 0.5642725229263306, |
|
"logps/chosen": -40.738338470458984, |
|
"logps/rejected": -47.2222900390625, |
|
"loss": 0.2665, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.49148619174957275, |
|
"rewards/margins": 3.0378835201263428, |
|
"rewards/rejected": -2.5463972091674805, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.2786254648031798, |
|
"grad_norm": 11.606234417897845, |
|
"learning_rate": 1.7312939888918594e-07, |
|
"logits/chosen": 0.5540368556976318, |
|
"logits/rejected": 0.5830137729644775, |
|
"logps/chosen": -43.42100143432617, |
|
"logps/rejected": -63.07583999633789, |
|
"loss": 0.1529, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": 0.060752179473638535, |
|
"rewards/margins": 3.951368570327759, |
|
"rewards/rejected": -3.8906164169311523, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.2832414412104116, |
|
"grad_norm": 8.195981315855988, |
|
"learning_rate": 1.712131817788628e-07, |
|
"logits/chosen": 0.5598903298377991, |
|
"logits/rejected": 0.582931637763977, |
|
"logps/chosen": -39.05931854248047, |
|
"logps/rejected": -49.5858154296875, |
|
"loss": 0.2278, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": -0.035774629563093185, |
|
"rewards/margins": 3.2900662422180176, |
|
"rewards/rejected": -3.325840950012207, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.2878574176176434, |
|
"grad_norm": 10.58953396876903, |
|
"learning_rate": 1.693020901660738e-07, |
|
"logits/chosen": 0.5586022138595581, |
|
"logits/rejected": 0.5835521221160889, |
|
"logps/chosen": -46.566070556640625, |
|
"logps/rejected": -56.1746940612793, |
|
"loss": 0.1347, |
|
"rewards/accuracies": 0.9583333134651184, |
|
"rewards/chosen": 0.1323520541191101, |
|
"rewards/margins": 3.951080322265625, |
|
"rewards/rejected": -3.81872820854187, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.292473394024875, |
|
"grad_norm": 20.647672350132265, |
|
"learning_rate": 1.6739624837739518e-07, |
|
"logits/chosen": 0.4893258512020111, |
|
"logits/rejected": 0.5065658092498779, |
|
"logps/chosen": -46.70867919921875, |
|
"logps/rejected": -53.02800369262695, |
|
"loss": 0.2073, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.15436476469039917, |
|
"rewards/margins": 3.050819158554077, |
|
"rewards/rejected": -3.205183744430542, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.292473394024875, |
|
"eval_logits/chosen": 0.4335208237171173, |
|
"eval_logits/rejected": 0.45989227294921875, |
|
"eval_logps/chosen": -41.82432556152344, |
|
"eval_logps/rejected": -54.859825134277344, |
|
"eval_loss": 0.23924875259399414, |
|
"eval_rewards/accuracies": 0.8312212228775024, |
|
"eval_rewards/chosen": -0.03952277451753616, |
|
"eval_rewards/margins": 3.371224880218506, |
|
"eval_rewards/rejected": -3.410747766494751, |
|
"eval_runtime": 220.3082, |
|
"eval_samples_per_second": 7.871, |
|
"eval_steps_per_second": 1.97, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.2970893704321067, |
|
"grad_norm": 15.328848187023517, |
|
"learning_rate": 1.6549578039787434e-07, |
|
"logits/chosen": 0.5223647356033325, |
|
"logits/rejected": 0.5576710104942322, |
|
"logps/chosen": -43.448875427246094, |
|
"logps/rejected": -67.14339447021484, |
|
"loss": 0.2405, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.1683083474636078, |
|
"rewards/margins": 3.6626782417297363, |
|
"rewards/rejected": -3.830986499786377, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.3017053468393383, |
|
"grad_norm": 14.362719389125761, |
|
"learning_rate": 1.6360080986296384e-07, |
|
"logits/chosen": 0.5163556337356567, |
|
"logits/rejected": 0.5569749474525452, |
|
"logps/chosen": -37.78327941894531, |
|
"logps/rejected": -64.23339080810547, |
|
"loss": 0.186, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.047993943095207214, |
|
"rewards/margins": 4.109629154205322, |
|
"rewards/rejected": -4.157623291015625, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.30632132324657, |
|
"grad_norm": 8.849930925918736, |
|
"learning_rate": 1.6171146005047894e-07, |
|
"logits/chosen": 0.5622715353965759, |
|
"logits/rejected": 0.5891626477241516, |
|
"logps/chosen": -46.50107955932617, |
|
"logps/rejected": -63.37003707885742, |
|
"loss": 0.1689, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.1092919185757637, |
|
"rewards/margins": 4.0769548416137695, |
|
"rewards/rejected": -3.967662811279297, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.3109372996538018, |
|
"grad_norm": 16.110148125770678, |
|
"learning_rate": 1.5982785387257694e-07, |
|
"logits/chosen": 0.5649956464767456, |
|
"logits/rejected": 0.5782197117805481, |
|
"logps/chosen": -43.4311408996582, |
|
"logps/rejected": -49.03315734863281, |
|
"loss": 0.2002, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.15342091023921967, |
|
"rewards/margins": 2.909942150115967, |
|
"rewards/rejected": -3.0633630752563477, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.3155532760610336, |
|
"grad_norm": 23.725153045927403, |
|
"learning_rate": 1.5795011386776159e-07, |
|
"logits/chosen": 0.5103439688682556, |
|
"logits/rejected": 0.5300507545471191, |
|
"logps/chosen": -42.80021667480469, |
|
"logps/rejected": -47.7119255065918, |
|
"loss": 0.2255, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14677530527114868, |
|
"rewards/margins": 3.0557618141174316, |
|
"rewards/rejected": -3.2025370597839355, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.3155532760610336, |
|
"eval_logits/chosen": 0.43335986137390137, |
|
"eval_logits/rejected": 0.4598417580127716, |
|
"eval_logps/chosen": -41.851680755615234, |
|
"eval_logps/rejected": -54.97309112548828, |
|
"eval_loss": 0.23906731605529785, |
|
"eval_rewards/accuracies": 0.835829496383667, |
|
"eval_rewards/chosen": -0.05320117622613907, |
|
"eval_rewards/margins": 3.4141783714294434, |
|
"eval_rewards/rejected": -3.467379570007324, |
|
"eval_runtime": 220.3588, |
|
"eval_samples_per_second": 7.869, |
|
"eval_steps_per_second": 1.97, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.320169252468265, |
|
"grad_norm": 16.172756609459842, |
|
"learning_rate": 1.560783621929113e-07, |
|
"logits/chosen": 0.5175637006759644, |
|
"logits/rejected": 0.5229324102401733, |
|
"logps/chosen": -49.446102142333984, |
|
"logps/rejected": -55.164894104003906, |
|
"loss": 0.1869, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.08015252649784088, |
|
"rewards/margins": 3.3609066009521484, |
|
"rewards/rejected": -3.2807538509368896, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.3247852288754969, |
|
"grad_norm": 24.72268513177688, |
|
"learning_rate": 1.5421272061533177e-07, |
|
"logits/chosen": 0.5066720247268677, |
|
"logits/rejected": 0.5451788306236267, |
|
"logps/chosen": -37.343570709228516, |
|
"logps/rejected": -60.23046112060547, |
|
"loss": 0.2949, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.1486133188009262, |
|
"rewards/margins": 3.3898818492889404, |
|
"rewards/rejected": -3.2412681579589844, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.3294012052827284, |
|
"grad_norm": 18.734543272703554, |
|
"learning_rate": 1.5235331050483513e-07, |
|
"logits/chosen": 0.5524860620498657, |
|
"logits/rejected": 0.5772072672843933, |
|
"logps/chosen": -43.33749771118164, |
|
"logps/rejected": -56.5976676940918, |
|
"loss": 0.2367, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.3428743779659271, |
|
"rewards/margins": 3.3112895488739014, |
|
"rewards/rejected": -3.6541638374328613, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.3340171816899602, |
|
"grad_norm": 15.636365920242639, |
|
"learning_rate": 1.5050025282584327e-07, |
|
"logits/chosen": 0.5805926322937012, |
|
"logits/rejected": 0.6090676188468933, |
|
"logps/chosen": -49.13417434692383, |
|
"logps/rejected": -64.1076431274414, |
|
"loss": 0.1791, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": -0.08167193830013275, |
|
"rewards/margins": 3.955726146697998, |
|
"rewards/rejected": -4.037397861480713, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.338633158097192, |
|
"grad_norm": 15.524132351808905, |
|
"learning_rate": 1.4865366812951921e-07, |
|
"logits/chosen": 0.598872721195221, |
|
"logits/rejected": 0.62497878074646, |
|
"logps/chosen": -36.58146667480469, |
|
"logps/rejected": -46.25484085083008, |
|
"loss": 0.1893, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.01747778430581093, |
|
"rewards/margins": 3.4903595447540283, |
|
"rewards/rejected": -3.5078377723693848, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.338633158097192, |
|
"eval_logits/chosen": 0.4342789053916931, |
|
"eval_logits/rejected": 0.46078288555145264, |
|
"eval_logps/chosen": -42.1205940246582, |
|
"eval_logps/rejected": -55.25835418701172, |
|
"eval_loss": 0.2389531433582306, |
|
"eval_rewards/accuracies": 0.8352534770965576, |
|
"eval_rewards/chosen": -0.18765874207019806, |
|
"eval_rewards/margins": 3.4223523139953613, |
|
"eval_rewards/rejected": -3.610011339187622, |
|
"eval_runtime": 220.361, |
|
"eval_samples_per_second": 7.869, |
|
"eval_steps_per_second": 1.969, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.3432491345044237, |
|
"grad_norm": 22.418640332294185, |
|
"learning_rate": 1.4681367654592446e-07, |
|
"logits/chosen": 0.583182692527771, |
|
"logits/rejected": 0.596510112285614, |
|
"logps/chosen": -45.08745574951172, |
|
"logps/rejected": -52.57502746582031, |
|
"loss": 0.1635, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.24202129244804382, |
|
"rewards/margins": 3.0601682662963867, |
|
"rewards/rejected": -3.302189826965332, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.3478651109116553, |
|
"grad_norm": 16.477398466397805, |
|
"learning_rate": 1.4498039777620353e-07, |
|
"logits/chosen": 0.5257098078727722, |
|
"logits/rejected": 0.5561378598213196, |
|
"logps/chosen": -49.92831039428711, |
|
"logps/rejected": -66.70814514160156, |
|
"loss": 0.1983, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.07970259338617325, |
|
"rewards/margins": 4.159069538116455, |
|
"rewards/rejected": -4.079366683959961, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.352481087318887, |
|
"grad_norm": 21.638282072644653, |
|
"learning_rate": 1.4315395108479728e-07, |
|
"logits/chosen": 0.5448426008224487, |
|
"logits/rejected": 0.5733739733695984, |
|
"logps/chosen": -42.567203521728516, |
|
"logps/rejected": -59.23841094970703, |
|
"loss": 0.1872, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.3566977083683014, |
|
"rewards/margins": 3.441741943359375, |
|
"rewards/rejected": -3.7984399795532227, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.3570970637261186, |
|
"grad_norm": 22.386629994354788, |
|
"learning_rate": 1.4133445529168365e-07, |
|
"logits/chosen": 0.5482079982757568, |
|
"logits/rejected": 0.5674624443054199, |
|
"logps/chosen": -47.31834030151367, |
|
"logps/rejected": -59.47747802734375, |
|
"loss": 0.1735, |
|
"rewards/accuracies": 0.9444444179534912, |
|
"rewards/chosen": -0.25350263714790344, |
|
"rewards/margins": 3.711785316467285, |
|
"rewards/rejected": -3.965287923812866, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.3617130401333504, |
|
"grad_norm": 14.716672759245373, |
|
"learning_rate": 1.395220287646483e-07, |
|
"logits/chosen": 0.5413531064987183, |
|
"logits/rejected": 0.5619943141937256, |
|
"logps/chosen": -45.74396514892578, |
|
"logps/rejected": -54.50990295410156, |
|
"loss": 0.1609, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": -0.3855374753475189, |
|
"rewards/margins": 3.439289093017578, |
|
"rewards/rejected": -3.82482647895813, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.3617130401333504, |
|
"eval_logits/chosen": 0.43462061882019043, |
|
"eval_logits/rejected": 0.461076945066452, |
|
"eval_logps/chosen": -42.448509216308594, |
|
"eval_logps/rejected": -55.58904266357422, |
|
"eval_loss": 0.2393806427717209, |
|
"eval_rewards/accuracies": 0.8317972421646118, |
|
"eval_rewards/chosen": -0.3516136407852173, |
|
"eval_rewards/margins": 3.4237425327301025, |
|
"eval_rewards/rejected": -3.7753562927246094, |
|
"eval_runtime": 220.4141, |
|
"eval_samples_per_second": 7.867, |
|
"eval_steps_per_second": 1.969, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.3663290165405821, |
|
"grad_norm": 21.200823940085225, |
|
"learning_rate": 1.377167894115837e-07, |
|
"logits/chosen": 0.562565803527832, |
|
"logits/rejected": 0.6183031797409058, |
|
"logps/chosen": -38.32450866699219, |
|
"logps/rejected": -68.53689575195312, |
|
"loss": 0.179, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.12118612229824066, |
|
"rewards/margins": 4.04473876953125, |
|
"rewards/rejected": -4.165925025939941, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.370944992947814, |
|
"grad_norm": 13.082922810935031, |
|
"learning_rate": 1.3591885467281877e-07, |
|
"logits/chosen": 0.4695725440979004, |
|
"logits/rejected": 0.4965362548828125, |
|
"logps/chosen": -39.13195037841797, |
|
"logps/rejected": -58.23176574707031, |
|
"loss": 0.1861, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.30562350153923035, |
|
"rewards/margins": 3.781522035598755, |
|
"rewards/rejected": -4.0871453285217285, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.3755609693550455, |
|
"grad_norm": 34.97692684836387, |
|
"learning_rate": 1.3412834151347896e-07, |
|
"logits/chosen": 0.5469548106193542, |
|
"logits/rejected": 0.5717971324920654, |
|
"logps/chosen": -44.02994155883789, |
|
"logps/rejected": -57.28227996826172, |
|
"loss": 0.2084, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.3421705365180969, |
|
"rewards/margins": 3.692906379699707, |
|
"rewards/rejected": -4.035076141357422, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.3801769457622772, |
|
"grad_norm": 14.254996050777464, |
|
"learning_rate": 1.323453664158769e-07, |
|
"logits/chosen": 0.5193799138069153, |
|
"logits/rejected": 0.5635771155357361, |
|
"logps/chosen": -40.06482696533203, |
|
"logps/rejected": -67.0745620727539, |
|
"loss": 0.2322, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.5795915126800537, |
|
"rewards/margins": 3.6668989658355713, |
|
"rewards/rejected": -4.246490001678467, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.3847929221695088, |
|
"grad_norm": 18.46063830068681, |
|
"learning_rate": 1.3057004537193422e-07, |
|
"logits/chosen": 0.5273723602294922, |
|
"logits/rejected": 0.5402401685714722, |
|
"logps/chosen": -45.491241455078125, |
|
"logps/rejected": -53.827972412109375, |
|
"loss": 0.185, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.32591530680656433, |
|
"rewards/margins": 3.758335590362549, |
|
"rewards/rejected": -4.084251403808594, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3847929221695088, |
|
"eval_logits/chosen": 0.4299531877040863, |
|
"eval_logits/rejected": 0.45649805665016174, |
|
"eval_logps/chosen": -42.37248992919922, |
|
"eval_logps/rejected": -55.565975189208984, |
|
"eval_loss": 0.23996217548847198, |
|
"eval_rewards/accuracies": 0.8300691246986389, |
|
"eval_rewards/chosen": -0.31360533833503723, |
|
"eval_rewards/margins": 3.450216054916382, |
|
"eval_rewards/rejected": -3.7638211250305176, |
|
"eval_runtime": 220.4449, |
|
"eval_samples_per_second": 7.866, |
|
"eval_steps_per_second": 1.969, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3894088985767405, |
|
"grad_norm": 24.193490725343704, |
|
"learning_rate": 1.2880249387563662e-07, |
|
"logits/chosen": 0.5480252504348755, |
|
"logits/rejected": 0.5805102586746216, |
|
"logps/chosen": -43.4918098449707, |
|
"logps/rejected": -62.1549072265625, |
|
"loss": 0.1713, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": -0.4662397801876068, |
|
"rewards/margins": 3.974961280822754, |
|
"rewards/rejected": -4.441201210021973, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.3940248749839723, |
|
"grad_norm": 8.975682909766576, |
|
"learning_rate": 1.2704282691551938e-07, |
|
"logits/chosen": 0.45732539892196655, |
|
"logits/rejected": 0.5041163563728333, |
|
"logps/chosen": -40.32965850830078, |
|
"logps/rejected": -67.52854919433594, |
|
"loss": 0.1754, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.014653444290161133, |
|
"rewards/margins": 4.295289993286133, |
|
"rewards/rejected": -4.280636787414551, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.398640851391204, |
|
"grad_norm": 27.018968489026342, |
|
"learning_rate": 1.2529115896718714e-07, |
|
"logits/chosen": 0.5242836475372314, |
|
"logits/rejected": 0.5399221777915955, |
|
"logps/chosen": -45.72035217285156, |
|
"logps/rejected": -52.612548828125, |
|
"loss": 0.2076, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.37530067563056946, |
|
"rewards/margins": 3.2071659564971924, |
|
"rewards/rejected": -3.5824666023254395, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.4032568277984356, |
|
"grad_norm": 13.414881670063712, |
|
"learning_rate": 1.2354760398586708e-07, |
|
"logits/chosen": 0.5383539199829102, |
|
"logits/rejected": 0.5773718953132629, |
|
"logps/chosen": -48.75130081176758, |
|
"logps/rejected": -72.36872863769531, |
|
"loss": 0.1511, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.44930893182754517, |
|
"rewards/margins": 4.512818336486816, |
|
"rewards/rejected": -4.962126731872559, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.4078728042056674, |
|
"grad_norm": 7.330900567316457, |
|
"learning_rate": 1.2181227539899468e-07, |
|
"logits/chosen": 0.5381309986114502, |
|
"logits/rejected": 0.5586973428726196, |
|
"logps/chosen": -45.09908676147461, |
|
"logps/rejected": -58.20050811767578, |
|
"loss": 0.1744, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.2882673442363739, |
|
"rewards/margins": 3.7085728645324707, |
|
"rewards/rejected": -3.996840238571167, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.4078728042056674, |
|
"eval_logits/chosen": 0.4304519295692444, |
|
"eval_logits/rejected": 0.45695292949676514, |
|
"eval_logps/chosen": -42.44300842285156, |
|
"eval_logps/rejected": -55.6538200378418, |
|
"eval_loss": 0.238841712474823, |
|
"eval_rewards/accuracies": 0.8352534770965576, |
|
"eval_rewards/chosen": -0.34886524081230164, |
|
"eval_rewards/margins": 3.4588773250579834, |
|
"eval_rewards/rejected": -3.8077423572540283, |
|
"eval_runtime": 220.5308, |
|
"eval_samples_per_second": 7.863, |
|
"eval_steps_per_second": 1.968, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.412488780612899, |
|
"grad_norm": 10.681757170937171, |
|
"learning_rate": 1.2008528609883557e-07, |
|
"logits/chosen": 0.5007774233818054, |
|
"logits/rejected": 0.5296944379806519, |
|
"logps/chosen": -47.22381591796875, |
|
"logps/rejected": -64.06365966796875, |
|
"loss": 0.1531, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": -0.030609939247369766, |
|
"rewards/margins": 4.320724010467529, |
|
"rewards/rejected": -4.351334571838379, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.4171047570201307, |
|
"grad_norm": 10.655182313924602, |
|
"learning_rate": 1.1836674843514042e-07, |
|
"logits/chosen": 0.5347999930381775, |
|
"logits/rejected": 0.564474880695343, |
|
"logps/chosen": -37.77484893798828, |
|
"logps/rejected": -54.86954879760742, |
|
"loss": 0.175, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.38236376643180847, |
|
"rewards/margins": 3.763371706008911, |
|
"rewards/rejected": -4.145735263824463, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.4217207334273625, |
|
"grad_norm": 4.808937007878847, |
|
"learning_rate": 1.1665677420783671e-07, |
|
"logits/chosen": 0.5504859089851379, |
|
"logits/rejected": 0.5750877261161804, |
|
"logps/chosen": -43.14183807373047, |
|
"logps/rejected": -53.28805160522461, |
|
"loss": 0.1417, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.12460337579250336, |
|
"rewards/margins": 3.7694272994995117, |
|
"rewards/rejected": -3.894031047821045, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.4263367098345943, |
|
"grad_norm": 25.84566759360446, |
|
"learning_rate": 1.149554746597553e-07, |
|
"logits/chosen": 0.5723487734794617, |
|
"logits/rejected": 0.6003535389900208, |
|
"logps/chosen": -45.33318328857422, |
|
"logps/rejected": -59.90052795410156, |
|
"loss": 0.262, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": -0.3526383936405182, |
|
"rewards/margins": 3.843003988265991, |
|
"rewards/rejected": -4.195642471313477, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.4309526862418258, |
|
"grad_norm": 16.545628594299828, |
|
"learning_rate": 1.1326296046939333e-07, |
|
"logits/chosen": 0.5338951945304871, |
|
"logits/rejected": 0.5544497966766357, |
|
"logps/chosen": -39.78907775878906, |
|
"logps/rejected": -49.23013687133789, |
|
"loss": 0.2511, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.12468406558036804, |
|
"rewards/margins": 3.2776834964752197, |
|
"rewards/rejected": -3.402367353439331, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.4309526862418258, |
|
"eval_logits/chosen": 0.43395209312438965, |
|
"eval_logits/rejected": 0.46030664443969727, |
|
"eval_logps/chosen": -42.41356658935547, |
|
"eval_logps/rejected": -55.699623107910156, |
|
"eval_loss": 0.23819313943386078, |
|
"eval_rewards/accuracies": 0.8317972421646118, |
|
"eval_rewards/chosen": -0.3341463804244995, |
|
"eval_rewards/margins": 3.4965004920959473, |
|
"eval_rewards/rejected": -3.8306467533111572, |
|
"eval_runtime": 220.4261, |
|
"eval_samples_per_second": 7.867, |
|
"eval_steps_per_second": 1.969, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.4355686626490576, |
|
"grad_norm": 18.451086465748666, |
|
"learning_rate": 1.1157934174371413e-07, |
|
"logits/chosen": 0.497620165348053, |
|
"logits/rejected": 0.5271977782249451, |
|
"logps/chosen": -44.88563919067383, |
|
"logps/rejected": -63.52084732055664, |
|
"loss": 0.1973, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.4545660614967346, |
|
"rewards/margins": 4.014831066131592, |
|
"rewards/rejected": -4.469396591186523, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.4401846390562894, |
|
"grad_norm": 15.41826391561629, |
|
"learning_rate": 1.0990472801098419e-07, |
|
"logits/chosen": 0.49964290857315063, |
|
"logits/rejected": 0.5341427326202393, |
|
"logps/chosen": -39.38306427001953, |
|
"logps/rejected": -59.41951370239258, |
|
"loss": 0.1465, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": -0.07668253034353256, |
|
"rewards/margins": 4.010004043579102, |
|
"rewards/rejected": -4.086687088012695, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.444800615463521, |
|
"grad_norm": 13.657128245878823, |
|
"learning_rate": 1.0823922821364795e-07, |
|
"logits/chosen": 0.5488825440406799, |
|
"logits/rejected": 0.5648425221443176, |
|
"logps/chosen": -49.72515869140625, |
|
"logps/rejected": -57.29216766357422, |
|
"loss": 0.1844, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.15428660809993744, |
|
"rewards/margins": 3.7048492431640625, |
|
"rewards/rejected": -3.859135627746582, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.4494165918707527, |
|
"grad_norm": 17.171702939592354, |
|
"learning_rate": 1.0658295070124026e-07, |
|
"logits/chosen": 0.5274313688278198, |
|
"logits/rejected": 0.540188729763031, |
|
"logps/chosen": -47.955406188964844, |
|
"logps/rejected": -54.03617477416992, |
|
"loss": 0.2187, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.16990727186203003, |
|
"rewards/margins": 3.60162091255188, |
|
"rewards/rejected": -3.7715280055999756, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.4540325682779844, |
|
"grad_norm": 25.795693399142227, |
|
"learning_rate": 1.0493600322333762e-07, |
|
"logits/chosen": 0.5215524435043335, |
|
"logits/rejected": 0.5590708255767822, |
|
"logps/chosen": -44.3021354675293, |
|
"logps/rejected": -73.55774688720703, |
|
"loss": 0.141, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.3967975080013275, |
|
"rewards/margins": 4.7301740646362305, |
|
"rewards/rejected": -5.12697172164917, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.4540325682779844, |
|
"eval_logits/chosen": 0.43174034357070923, |
|
"eval_logits/rejected": 0.4582732319831848, |
|
"eval_logps/chosen": -42.194610595703125, |
|
"eval_logps/rejected": -55.55934524536133, |
|
"eval_loss": 0.23693177103996277, |
|
"eval_rewards/accuracies": 0.8317972421646118, |
|
"eval_rewards/chosen": -0.22466643154621124, |
|
"eval_rewards/margins": 3.535839080810547, |
|
"eval_rewards/rejected": -3.7605059146881104, |
|
"eval_runtime": 220.3801, |
|
"eval_samples_per_second": 7.868, |
|
"eval_steps_per_second": 1.969, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.458648544685216, |
|
"grad_norm": 14.475820972948407, |
|
"learning_rate": 1.0329849292254883e-07, |
|
"logits/chosen": 0.596792995929718, |
|
"logits/rejected": 0.624647855758667, |
|
"logps/chosen": -45.63186264038086, |
|
"logps/rejected": -62.25794982910156, |
|
"loss": 0.1936, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.2872418463230133, |
|
"rewards/margins": 3.9080302715301514, |
|
"rewards/rejected": -4.195271968841553, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.4632645210924478, |
|
"grad_norm": 26.862980766739724, |
|
"learning_rate": 1.0167052632754458e-07, |
|
"logits/chosen": 0.5725838541984558, |
|
"logits/rejected": 0.5932745337486267, |
|
"logps/chosen": -41.20800018310547, |
|
"logps/rejected": -51.21732711791992, |
|
"loss": 0.227, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.39695149660110474, |
|
"rewards/margins": 2.928715229034424, |
|
"rewards/rejected": -3.325666666030884, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.4678804974996795, |
|
"grad_norm": 13.962052681918495, |
|
"learning_rate": 1.0005220934612713e-07, |
|
"logits/chosen": 0.6229636669158936, |
|
"logits/rejected": 0.6402004361152649, |
|
"logps/chosen": -46.95052719116211, |
|
"logps/rejected": -53.86199951171875, |
|
"loss": 0.1824, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.3401051461696625, |
|
"rewards/margins": 3.6175549030303955, |
|
"rewards/rejected": -3.95766019821167, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.472496473906911, |
|
"grad_norm": 9.092245687630806, |
|
"learning_rate": 9.844364725834056e-08, |
|
"logits/chosen": 0.48213544487953186, |
|
"logits/rejected": 0.5316063761711121, |
|
"logps/chosen": -45.23646545410156, |
|
"logps/rejected": -75.49991607666016, |
|
"loss": 0.0997, |
|
"rewards/accuracies": 0.9583333134651184, |
|
"rewards/chosen": -0.1606331765651703, |
|
"rewards/margins": 5.202739238739014, |
|
"rewards/rejected": -5.363372802734375, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.4771124503141428, |
|
"grad_norm": 18.96340702396886, |
|
"learning_rate": 9.68449447096217e-08, |
|
"logits/chosen": 0.4373500943183899, |
|
"logits/rejected": 0.4579113721847534, |
|
"logps/chosen": -39.44499588012695, |
|
"logps/rejected": -51.54633712768555, |
|
"loss": 0.3299, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": -0.2867163419723511, |
|
"rewards/margins": 3.076793670654297, |
|
"rewards/rejected": -3.3635098934173584, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.4771124503141428, |
|
"eval_logits/chosen": 0.4346330463886261, |
|
"eval_logits/rejected": 0.461146742105484, |
|
"eval_logps/chosen": -42.071449279785156, |
|
"eval_logps/rejected": -55.46683883666992, |
|
"eval_loss": 0.23784740269184113, |
|
"eval_rewards/accuracies": 0.835829496383667, |
|
"eval_rewards/chosen": -0.16308562457561493, |
|
"eval_rewards/margins": 3.551164388656616, |
|
"eval_rewards/rejected": -3.714250087738037, |
|
"eval_runtime": 220.3881, |
|
"eval_samples_per_second": 7.868, |
|
"eval_steps_per_second": 1.969, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.4817284267213746, |
|
"grad_norm": 22.570461867090884, |
|
"learning_rate": 9.525620570399259e-08, |
|
"logits/chosen": 0.5038811564445496, |
|
"logits/rejected": 0.5432533025741577, |
|
"logps/chosen": -44.41080856323242, |
|
"logps/rejected": -65.23593139648438, |
|
"loss": 0.1275, |
|
"rewards/accuracies": 0.9444444179534912, |
|
"rewards/chosen": -0.2485545426607132, |
|
"rewards/margins": 4.013004779815674, |
|
"rewards/rejected": -4.261559009552002, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.4863444031286062, |
|
"grad_norm": 11.127499049370783, |
|
"learning_rate": 9.36775335972943e-08, |
|
"logits/chosen": 0.4518318772315979, |
|
"logits/rejected": 0.531367838382721, |
|
"logps/chosen": -39.415767669677734, |
|
"logps/rejected": -98.71846771240234, |
|
"loss": 0.1566, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.000497970322612673, |
|
"rewards/margins": 6.575231075286865, |
|
"rewards/rejected": -6.575727939605713, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.490960379535838, |
|
"grad_norm": 24.53509661266678, |
|
"learning_rate": 9.210903109046284e-08, |
|
"logits/chosen": 0.46663856506347656, |
|
"logits/rejected": 0.5147727727890015, |
|
"logps/chosen": -43.30581283569336, |
|
"logps/rejected": -63.16206741333008, |
|
"loss": 0.1684, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.5338683128356934, |
|
"rewards/margins": 4.3571882247924805, |
|
"rewards/rejected": -4.89105749130249, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.4955763559430697, |
|
"grad_norm": 11.303027411423997, |
|
"learning_rate": 9.05508002228485e-08, |
|
"logits/chosen": 0.529050350189209, |
|
"logits/rejected": 0.5628350377082825, |
|
"logps/chosen": -38.363826751708984, |
|
"logps/rejected": -53.06625747680664, |
|
"loss": 0.2071, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.031818799674510956, |
|
"rewards/margins": 3.961611032485962, |
|
"rewards/rejected": -3.929792642593384, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.5001923323503012, |
|
"grad_norm": 10.500286558923209, |
|
"learning_rate": 8.900294236557707e-08, |
|
"logits/chosen": 0.49337685108184814, |
|
"logits/rejected": 0.5243138074874878, |
|
"logps/chosen": -37.17765808105469, |
|
"logps/rejected": -49.10523986816406, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.008912450633943081, |
|
"rewards/margins": 3.240175485610962, |
|
"rewards/rejected": -3.2312631607055664, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.5001923323503012, |
|
"eval_logits/chosen": 0.4313080310821533, |
|
"eval_logits/rejected": 0.45790737867355347, |
|
"eval_logps/chosen": -42.17680740356445, |
|
"eval_logps/rejected": -55.66178894042969, |
|
"eval_loss": 0.2398524433374405, |
|
"eval_rewards/accuracies": 0.8306451439857483, |
|
"eval_rewards/chosen": -0.21576282382011414, |
|
"eval_rewards/margins": 3.59596586227417, |
|
"eval_rewards/rejected": -3.8117284774780273, |
|
"eval_runtime": 220.4293, |
|
"eval_samples_per_second": 7.866, |
|
"eval_steps_per_second": 1.969, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.504808308757533, |
|
"grad_norm": 21.390880404408534, |
|
"learning_rate": 8.746555821495561e-08, |
|
"logits/chosen": 0.4801899492740631, |
|
"logits/rejected": 0.5136987566947937, |
|
"logps/chosen": -43.907596588134766, |
|
"logps/rejected": -62.06863021850586, |
|
"loss": 0.1972, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21802374720573425, |
|
"rewards/margins": 4.019637584686279, |
|
"rewards/rejected": -4.237661361694336, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.5094242851647648, |
|
"grad_norm": 17.814740010117944, |
|
"learning_rate": 8.593874778592122e-08, |
|
"logits/chosen": 0.4772498309612274, |
|
"logits/rejected": 0.5082363486289978, |
|
"logps/chosen": -36.85258483886719, |
|
"logps/rejected": -49.34876251220703, |
|
"loss": 0.1537, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.038329627364873886, |
|
"rewards/margins": 3.5393142700195312, |
|
"rewards/rejected": -3.577643394470215, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.5140402615719966, |
|
"grad_norm": 24.684686325904988, |
|
"learning_rate": 8.442261040553472e-08, |
|
"logits/chosen": 0.5512763857841492, |
|
"logits/rejected": 0.5618037581443787, |
|
"logps/chosen": -44.694515228271484, |
|
"logps/rejected": -49.48525619506836, |
|
"loss": 0.1683, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": 0.0919620469212532, |
|
"rewards/margins": 3.498401403427124, |
|
"rewards/rejected": -3.406439781188965, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.518656237979228, |
|
"grad_norm": 21.50701378180569, |
|
"learning_rate": 8.291724470651903e-08, |
|
"logits/chosen": 0.49069249629974365, |
|
"logits/rejected": 0.5210825800895691, |
|
"logps/chosen": -44.639766693115234, |
|
"logps/rejected": -57.28916549682617, |
|
"loss": 0.2335, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.4066303074359894, |
|
"rewards/margins": 3.4069387912750244, |
|
"rewards/rejected": -3.813568592071533, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.5232722143864597, |
|
"grad_norm": 11.082339838552715, |
|
"learning_rate": 8.14227486208423e-08, |
|
"logits/chosen": 0.4665941596031189, |
|
"logits/rejected": 0.4930134415626526, |
|
"logps/chosen": -37.94073486328125, |
|
"logps/rejected": -53.0433464050293, |
|
"loss": 0.1797, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.09727773815393448, |
|
"rewards/margins": 3.91404128074646, |
|
"rewards/rejected": -3.8167638778686523, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.5232722143864597, |
|
"eval_logits/chosen": 0.43500614166259766, |
|
"eval_logits/rejected": 0.4616233706474304, |
|
"eval_logps/chosen": -42.075767517089844, |
|
"eval_logps/rejected": -55.58706283569336, |
|
"eval_loss": 0.2391819953918457, |
|
"eval_rewards/accuracies": 0.8306451439857483, |
|
"eval_rewards/chosen": -0.1652439683675766, |
|
"eval_rewards/margins": 3.609118938446045, |
|
"eval_rewards/rejected": -3.774362802505493, |
|
"eval_runtime": 220.4966, |
|
"eval_samples_per_second": 7.864, |
|
"eval_steps_per_second": 1.968, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.5278881907936914, |
|
"grad_norm": 17.884909353386927, |
|
"learning_rate": 7.993921937334716e-08, |
|
"logits/chosen": 0.5584304332733154, |
|
"logits/rejected": 0.5700749754905701, |
|
"logps/chosen": -41.323944091796875, |
|
"logps/rejected": -49.892147064208984, |
|
"loss": 0.2096, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.27705806493759155, |
|
"rewards/margins": 3.536667823791504, |
|
"rewards/rejected": -3.813725709915161, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.5325041672009232, |
|
"grad_norm": 6.982953174746173, |
|
"learning_rate": 7.846675347542578e-08, |
|
"logits/chosen": 0.5807335376739502, |
|
"logits/rejected": 0.6132792234420776, |
|
"logps/chosen": -37.81986999511719, |
|
"logps/rejected": -49.71797180175781, |
|
"loss": 0.1272, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": 0.3861154019832611, |
|
"rewards/margins": 4.170031547546387, |
|
"rewards/rejected": -3.783916473388672, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.537120143608155, |
|
"grad_norm": 18.18022469520284, |
|
"learning_rate": 7.700544671874079e-08, |
|
"logits/chosen": 0.6006969213485718, |
|
"logits/rejected": 0.6162829995155334, |
|
"logps/chosen": -47.33814239501953, |
|
"logps/rejected": -52.70623016357422, |
|
"loss": 0.1962, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.2818297743797302, |
|
"rewards/margins": 3.495248317718506, |
|
"rewards/rejected": -3.7770779132843018, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.5417361200153867, |
|
"grad_norm": 17.752568042598934, |
|
"learning_rate": 7.555539416899437e-08, |
|
"logits/chosen": 0.5043608546257019, |
|
"logits/rejected": 0.535383939743042, |
|
"logps/chosen": -37.40916442871094, |
|
"logps/rejected": -52.42148971557617, |
|
"loss": 0.2323, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.4006814658641815, |
|
"rewards/margins": 3.385708808898926, |
|
"rewards/rejected": -3.7863900661468506, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.5463520964226183, |
|
"grad_norm": 14.165329854797266, |
|
"learning_rate": 7.41166901597429e-08, |
|
"logits/chosen": 0.5081818699836731, |
|
"logits/rejected": 0.5341579914093018, |
|
"logps/chosen": -42.154205322265625, |
|
"logps/rejected": -55.97992706298828, |
|
"loss": 0.1774, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": -0.05981425940990448, |
|
"rewards/margins": 3.988154172897339, |
|
"rewards/rejected": -4.047967910766602, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.5463520964226183, |
|
"eval_logits/chosen": 0.4372006952762604, |
|
"eval_logits/rejected": 0.46362602710723877, |
|
"eval_logps/chosen": -42.13774490356445, |
|
"eval_logps/rejected": -55.63636779785156, |
|
"eval_loss": 0.23786574602127075, |
|
"eval_rewards/accuracies": 0.8329492807388306, |
|
"eval_rewards/chosen": -0.19623348116874695, |
|
"eval_rewards/margins": 3.602783203125, |
|
"eval_rewards/rejected": -3.7990164756774902, |
|
"eval_runtime": 220.5205, |
|
"eval_samples_per_second": 7.863, |
|
"eval_steps_per_second": 1.968, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.5509680728298498, |
|
"grad_norm": 22.84931762442886, |
|
"learning_rate": 7.268942828626046e-08, |
|
"logits/chosen": 0.5015777349472046, |
|
"logits/rejected": 0.5260412096977234, |
|
"logps/chosen": -39.39936828613281, |
|
"logps/rejected": -50.80826950073242, |
|
"loss": 0.2259, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.02117648348212242, |
|
"rewards/margins": 3.6156790256500244, |
|
"rewards/rejected": -3.6368556022644043, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.5555840492370816, |
|
"grad_norm": 10.729660784502734, |
|
"learning_rate": 7.127370139945018e-08, |
|
"logits/chosen": 0.5064399242401123, |
|
"logits/rejected": 0.542765736579895, |
|
"logps/chosen": -41.118350982666016, |
|
"logps/rejected": -57.55162048339844, |
|
"loss": 0.1581, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": -0.18698811531066895, |
|
"rewards/margins": 4.028824806213379, |
|
"rewards/rejected": -4.215813159942627, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.5602000256443134, |
|
"grad_norm": 12.758336439580667, |
|
"learning_rate": 6.986960159980326e-08, |
|
"logits/chosen": 0.5471921563148499, |
|
"logits/rejected": 0.5656020045280457, |
|
"logps/chosen": -44.28984069824219, |
|
"logps/rejected": -53.67868423461914, |
|
"loss": 0.1621, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": -0.007483018562197685, |
|
"rewards/margins": 3.514232873916626, |
|
"rewards/rejected": -3.5217158794403076, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.5648160020515451, |
|
"grad_norm": 25.743372698631337, |
|
"learning_rate": 6.847722023140776e-08, |
|
"logits/chosen": 0.5099420547485352, |
|
"logits/rejected": 0.5306479930877686, |
|
"logps/chosen": -38.24551773071289, |
|
"logps/rejected": -46.37004470825195, |
|
"loss": 0.2453, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -0.13890628516674042, |
|
"rewards/margins": 3.242166757583618, |
|
"rewards/rejected": -3.381072998046875, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.569431978458777, |
|
"grad_norm": 29.001544411683714, |
|
"learning_rate": 6.709664787600616e-08, |
|
"logits/chosen": 0.5341071486473083, |
|
"logits/rejected": 0.549387514591217, |
|
"logps/chosen": -38.39107131958008, |
|
"logps/rejected": -45.22284698486328, |
|
"loss": 0.2519, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": -0.32282909750938416, |
|
"rewards/margins": 2.876624822616577, |
|
"rewards/rejected": -3.1994540691375732, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.569431978458777, |
|
"eval_logits/chosen": 0.4367799460887909, |
|
"eval_logits/rejected": 0.46335569024086, |
|
"eval_logps/chosen": -42.14803695678711, |
|
"eval_logps/rejected": -55.68684005737305, |
|
"eval_loss": 0.23701736330986023, |
|
"eval_rewards/accuracies": 0.8335253596305847, |
|
"eval_rewards/chosen": -0.20137952268123627, |
|
"eval_rewards/margins": 3.622871160507202, |
|
"eval_rewards/rejected": -3.8242506980895996, |
|
"eval_runtime": 220.405, |
|
"eval_samples_per_second": 7.867, |
|
"eval_steps_per_second": 1.969, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.5740479548660085, |
|
"grad_norm": 26.57226192590101, |
|
"learning_rate": 6.572797434710219e-08, |
|
"logits/chosen": 0.47764989733695984, |
|
"logits/rejected": 0.5231152772903442, |
|
"logps/chosen": -39.2479362487793, |
|
"logps/rejected": -67.22251892089844, |
|
"loss": 0.1985, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.03589929640293121, |
|
"rewards/margins": 4.406409740447998, |
|
"rewards/rejected": -4.370510578155518, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.57866393127324, |
|
"grad_norm": 7.8158043752344115, |
|
"learning_rate": 6.437128868411856e-08, |
|
"logits/chosen": 0.5327097177505493, |
|
"logits/rejected": 0.5473262071609497, |
|
"logps/chosen": -38.83921813964844, |
|
"logps/rejected": -47.30848693847656, |
|
"loss": 0.212, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.002615167060866952, |
|
"rewards/margins": 3.3942179679870605, |
|
"rewards/rejected": -3.3916027545928955, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.5832799076804718, |
|
"grad_norm": 11.10012939486401, |
|
"learning_rate": 6.302667914660384e-08, |
|
"logits/chosen": 0.5219799280166626, |
|
"logits/rejected": 0.55839604139328, |
|
"logps/chosen": -37.46578598022461, |
|
"logps/rejected": -54.46531295776367, |
|
"loss": 0.2233, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.02391706593334675, |
|
"rewards/margins": 3.7034101486206055, |
|
"rewards/rejected": -3.679492950439453, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.5878958840877035, |
|
"grad_norm": 19.67549763311113, |
|
"learning_rate": 6.169423320849112e-08, |
|
"logits/chosen": 0.5211795568466187, |
|
"logits/rejected": 0.5298517346382141, |
|
"logps/chosen": -45.8150520324707, |
|
"logps/rejected": -47.33256149291992, |
|
"loss": 0.2021, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2716074287891388, |
|
"rewards/margins": 3.559727191925049, |
|
"rewards/rejected": -3.831334352493286, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.5925118604949353, |
|
"grad_norm": 15.711220514951888, |
|
"learning_rate": 6.037403755240748e-08, |
|
"logits/chosen": 0.5544189810752869, |
|
"logits/rejected": 0.5787670612335205, |
|
"logps/chosen": -45.216304779052734, |
|
"logps/rejected": -59.76258850097656, |
|
"loss": 0.1572, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": -0.14509858191013336, |
|
"rewards/margins": 3.88366436958313, |
|
"rewards/rejected": -4.0287628173828125, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.5925118604949353, |
|
"eval_logits/chosen": 0.43276646733283997, |
|
"eval_logits/rejected": 0.45934849977493286, |
|
"eval_logps/chosen": -42.20445251464844, |
|
"eval_logps/rejected": -55.753753662109375, |
|
"eval_loss": 0.23724210262298584, |
|
"eval_rewards/accuracies": 0.8317972421646118, |
|
"eval_rewards/chosen": -0.2295861542224884, |
|
"eval_rewards/margins": 3.6281206607818604, |
|
"eval_rewards/rejected": -3.8577067852020264, |
|
"eval_runtime": 220.4833, |
|
"eval_samples_per_second": 7.865, |
|
"eval_steps_per_second": 1.968, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.597127836902167, |
|
"grad_norm": 14.487508826565733, |
|
"learning_rate": 5.9066178064034326e-08, |
|
"logits/chosen": 0.4430210590362549, |
|
"logits/rejected": 0.4965353012084961, |
|
"logps/chosen": -33.27760696411133, |
|
"logps/rejected": -71.74127197265625, |
|
"loss": 0.2328, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2861379384994507, |
|
"rewards/margins": 4.55012321472168, |
|
"rewards/rejected": -4.836262226104736, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.6017438133093986, |
|
"grad_norm": 23.580990452467088, |
|
"learning_rate": 5.777073982652064e-08, |
|
"logits/chosen": 0.5170236825942993, |
|
"logits/rejected": 0.5521243214607239, |
|
"logps/chosen": -35.71030044555664, |
|
"logps/rejected": -52.74575424194336, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": -0.3935600519180298, |
|
"rewards/margins": 3.574741840362549, |
|
"rewards/rejected": -3.96830153465271, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.6063597897166302, |
|
"grad_norm": 13.54068941517088, |
|
"learning_rate": 5.6487807114947325e-08, |
|
"logits/chosen": 0.551853358745575, |
|
"logits/rejected": 0.5928479433059692, |
|
"logps/chosen": -42.63957214355469, |
|
"logps/rejected": -70.68295288085938, |
|
"loss": 0.1803, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": -0.2251981645822525, |
|
"rewards/margins": 4.277625560760498, |
|
"rewards/rejected": -4.502823352813721, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.610975766123862, |
|
"grad_norm": 27.742044897151906, |
|
"learning_rate": 5.521746339084532e-08, |
|
"logits/chosen": 0.5765677094459534, |
|
"logits/rejected": 0.5921374559402466, |
|
"logps/chosen": -47.175655364990234, |
|
"logps/rejected": -58.09642028808594, |
|
"loss": 0.2516, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": -0.3188338875770569, |
|
"rewards/margins": 3.57645583152771, |
|
"rewards/rejected": -3.895289897918701, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.6155917425310937, |
|
"grad_norm": 13.652878320465026, |
|
"learning_rate": 5.39597912967652e-08, |
|
"logits/chosen": 0.5359885692596436, |
|
"logits/rejected": 0.575743556022644, |
|
"logps/chosen": -38.843807220458984, |
|
"logps/rejected": -61.49338150024414, |
|
"loss": 0.1886, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -0.01514108944684267, |
|
"rewards/margins": 4.108646392822266, |
|
"rewards/rejected": -4.1237874031066895, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.6155917425310937, |
|
"eval_logits/chosen": 0.43191081285476685, |
|
"eval_logits/rejected": 0.4585791528224945, |
|
"eval_logps/chosen": -42.20844268798828, |
|
"eval_logps/rejected": -55.773094177246094, |
|
"eval_loss": 0.23592650890350342, |
|
"eval_rewards/accuracies": 0.8364055156707764, |
|
"eval_rewards/chosen": -0.23158276081085205, |
|
"eval_rewards/margins": 3.635798692703247, |
|
"eval_rewards/rejected": -3.8673815727233887, |
|
"eval_runtime": 220.5019, |
|
"eval_samples_per_second": 7.864, |
|
"eval_steps_per_second": 1.968, |
|
"step": 700 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 866, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|