{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 11451, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.022172949002217297, "grad_norm": NaN, "learning_rate": 1.9512195121951218e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5637989044189453, "logits/rejected": -2.5734333992004395, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.8527, "nll_loss": 1.088592767715454, "rewards/accuracies": 0.512499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10 }, { "epoch": 0.04434589800443459, "grad_norm": NaN, "learning_rate": 3.9024390243902435e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5235965251922607, "logits/rejected": -2.5694117546081543, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.6216, "nll_loss": 1.042040467262268, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 20 }, { "epoch": 0.06651884700665188, "grad_norm": 0.7492002248764038, "learning_rate": 5.853658536585366e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.439809560775757, "logits/rejected": -2.4339804649353027, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3856, "nll_loss": 1.040069818496704, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 30 }, { "epoch": 0.08869179600886919, "grad_norm": NaN, "learning_rate": 7.804878048780487e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5076301097869873, "logits/rejected": -2.538536310195923, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5238, "nll_loss": 1.0452662706375122, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 40 }, { "epoch": 0.11086474501108648, "grad_norm": NaN, "learning_rate": 7.999071182730533e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5563275814056396, "logits/rejected": -2.5710463523864746, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.7136, "nll_loss": 0.9837453961372375, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 50 }, { "epoch": 0.13303769401330376, "grad_norm": NaN, "learning_rate": 7.995861010152277e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.422978162765503, "logits/rejected": -2.497622013092041, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4346, "nll_loss": 1.1125876903533936, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 60 }, { "epoch": 0.15521064301552107, "grad_norm": NaN, "learning_rate": 7.990359855463492e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4739160537719727, "logits/rejected": -2.5015368461608887, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.7054, "nll_loss": 1.0281422138214111, "rewards/accuracies": 0.5, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 70 }, { "epoch": 0.17738359201773837, "grad_norm": NaN, "learning_rate": 7.982570872689543e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5495190620422363, "logits/rejected": -2.5661299228668213, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.9447, "nll_loss": 1.0821787118911743, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 80 }, { "epoch": 0.19955654101995565, "grad_norm": NaN, "learning_rate": 7.972498527556375e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.539638042449951, "logits/rejected": -2.565432071685791, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4908, "nll_loss": 1.0412414073944092, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 90 }, { "epoch": 0.22172949002217296, "grad_norm": NaN, "learning_rate": 7.960148594930148e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5148587226867676, "logits/rejected": -2.531787395477295, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3802, "nll_loss": 1.0808457136154175, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 100 }, { "epoch": 0.24390243902439024, "grad_norm": NaN, "learning_rate": 7.945528155506268e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4936861991882324, "logits/rejected": -2.5372276306152344, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4052, "nll_loss": 1.0503720045089722, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 110 }, { "epoch": 0.2660753880266075, "grad_norm": NaN, "learning_rate": 7.928645591749765e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5514473915100098, "logits/rejected": -2.5984888076782227, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.6079, "nll_loss": 1.0542423725128174, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 120 }, { "epoch": 0.28824833702882485, "grad_norm": NaN, "learning_rate": 7.909510583089285e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.513597011566162, "logits/rejected": -2.509930372238159, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4307, "nll_loss": 0.9876394271850586, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 130 }, { "epoch": 0.31042128603104213, "grad_norm": 0.5560552477836609, "learning_rate": 7.888134100367517e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5325217247009277, "logits/rejected": -2.5615830421447754, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5637, "nll_loss": 1.090905785560608, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 140 }, { "epoch": 0.3325942350332594, "grad_norm": NaN, "learning_rate": 7.864528399551163e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5349364280700684, "logits/rejected": -2.555640459060669, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2739, "nll_loss": 0.984653115272522, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 150 }, { "epoch": 0.35476718403547675, "grad_norm": 0.49410519003868103, "learning_rate": 7.83870701470413e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.579796075820923, "logits/rejected": -2.6130154132843018, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.6816, "nll_loss": 0.9771108627319336, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 160 }, { "epoch": 0.376940133037694, "grad_norm": NaN, "learning_rate": 7.810684750227926e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5355019569396973, "logits/rejected": -2.5664353370666504, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4045, "nll_loss": 0.9972349405288696, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 170 }, { "epoch": 0.3991130820399113, "grad_norm": NaN, "learning_rate": 7.780477672373715e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.610077381134033, "logits/rejected": -2.6662240028381348, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3857, "nll_loss": 0.9627612829208374, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 180 }, { "epoch": 0.4212860310421286, "grad_norm": NaN, "learning_rate": 7.748103100030918e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5606350898742676, "logits/rejected": -2.599088430404663, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.7501, "nll_loss": 0.9361652135848999, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 190 }, { "epoch": 0.4434589800443459, "grad_norm": NaN, "learning_rate": 7.713579594797617e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5864617824554443, "logits/rejected": -2.6177988052368164, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5321, "nll_loss": 0.9839666485786438, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 200 }, { "epoch": 0.4656319290465632, "grad_norm": NaN, "learning_rate": 7.676926950338484e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5560379028320312, "logits/rejected": -2.6088876724243164, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4582, "nll_loss": 0.9833539724349976, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 210 }, { "epoch": 0.4878048780487805, "grad_norm": NaN, "learning_rate": 7.638166181036278e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6120612621307373, "logits/rejected": -2.648507595062256, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.7413, "nll_loss": 0.9990829229354858, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 220 }, { "epoch": 0.5099778270509978, "grad_norm": NaN, "learning_rate": 7.597319509943522e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5437393188476562, "logits/rejected": -2.5923430919647217, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3477, "nll_loss": 0.9254695177078247, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 230 }, { "epoch": 0.532150776053215, "grad_norm": NaN, "learning_rate": 7.554410356041128e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5858166217803955, "logits/rejected": -2.645839214324951, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.8364, "nll_loss": 0.9360870122909546, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 240 }, { "epoch": 0.5543237250554324, "grad_norm": NaN, "learning_rate": 7.509463320811409e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.603842258453369, "logits/rejected": -2.65669584274292, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.9066, "nll_loss": 0.8704744577407837, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 250 }, { "epoch": 0.5764966740576497, "grad_norm": NaN, "learning_rate": 7.462504174133093e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5409696102142334, "logits/rejected": -2.58134388923645, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4614, "nll_loss": 0.8922187685966492, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 260 }, { "epoch": 0.5986696230598669, "grad_norm": NaN, "learning_rate": 7.413559839506442e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6056294441223145, "logits/rejected": -2.59102725982666, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4101, "nll_loss": 0.9519271850585938, "rewards/accuracies": 0.512499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 270 }, { "epoch": 0.6208425720620843, "grad_norm": NaN, "learning_rate": 7.362658378616977e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.518075942993164, "logits/rejected": -2.5656614303588867, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.7219, "nll_loss": 0.9626830816268921, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 280 }, { "epoch": 0.6430155210643016, "grad_norm": NaN, "learning_rate": 7.309828975246615e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.478224277496338, "logits/rejected": -2.4818527698516846, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4698, "nll_loss": 0.9482153654098511, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 290 }, { "epoch": 0.6651884700665188, "grad_norm": NaN, "learning_rate": 7.255101918541482e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5367703437805176, "logits/rejected": -2.618227005004883, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3779, "nll_loss": 0.9249935150146484, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 300 }, { "epoch": 0.6873614190687362, "grad_norm": NaN, "learning_rate": 7.198508585645966e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.498709201812744, "logits/rejected": -2.5306169986724854, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4247, "nll_loss": 0.9907130002975464, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 310 }, { "epoch": 0.7095343680709535, "grad_norm": 0.5340009331703186, "learning_rate": 7.140081423712985e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.576976776123047, "logits/rejected": -2.5837340354919434, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3265, "nll_loss": 0.9744553565979004, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 320 }, { "epoch": 0.7317073170731707, "grad_norm": NaN, "learning_rate": 7.079853931300778e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5235087871551514, "logits/rejected": -2.537680149078369, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3516, "nll_loss": 0.9415532350540161, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 330 }, { "epoch": 0.753880266075388, "grad_norm": NaN, "learning_rate": 7.017860639166877e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.561636447906494, "logits/rejected": -2.6029891967773438, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.0444, "nll_loss": 0.87201327085495, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 340 }, { "epoch": 0.7760532150776053, "grad_norm": NaN, "learning_rate": 6.95413709047029e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6087021827697754, "logits/rejected": -2.631678342819214, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.241, "nll_loss": 0.9579289555549622, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 350 }, { "epoch": 0.7982261640798226, "grad_norm": 0.6355442404747009, "learning_rate": 6.888719820393224e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5300655364990234, "logits/rejected": -2.538252592086792, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1996, "nll_loss": 0.9421980977058411, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 360 }, { "epoch": 0.8203991130820399, "grad_norm": NaN, "learning_rate": 6.821646335194051e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4321510791778564, "logits/rejected": -2.488974094390869, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5151, "nll_loss": 0.9701131582260132, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 370 }, { "epoch": 0.8425720620842572, "grad_norm": NaN, "learning_rate": 6.752955090703516e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5075130462646484, "logits/rejected": -2.576809883117676, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.6486, "nll_loss": 0.8745073080062866, "rewards/accuracies": 0.699999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 380 }, { "epoch": 0.8647450110864745, "grad_norm": NaN, "learning_rate": 6.682685470276513e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.609452247619629, "logits/rejected": -2.6642494201660156, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.6961, "nll_loss": 0.8854317665100098, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 390 }, { "epoch": 0.8869179600886918, "grad_norm": 0.49902427196502686, "learning_rate": 6.610877762212086e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5146374702453613, "logits/rejected": -2.529111385345459, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2123, "nll_loss": 0.9021833539009094, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 400 }, { "epoch": 0.9090909090909091, "grad_norm": NaN, "learning_rate": 6.537573136654582e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5666518211364746, "logits/rejected": -2.594228744506836, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4251, "nll_loss": 0.9093640446662903, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 410 }, { "epoch": 0.9312638580931264, "grad_norm": NaN, "learning_rate": 6.462813621989207e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.551781177520752, "logits/rejected": -2.5560076236724854, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3258, "nll_loss": 0.9460923075675964, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 420 }, { "epoch": 0.9534368070953437, "grad_norm": NaN, "learning_rate": 6.386642080745528e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5597198009490967, "logits/rejected": -2.5898067951202393, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2546, "nll_loss": 0.9564792513847351, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 430 }, { "epoch": 0.975609756097561, "grad_norm": 0.4311073124408722, "learning_rate": 6.30910218502272e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6047396659851074, "logits/rejected": -2.6136045455932617, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3434, "nll_loss": 0.96406489610672, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 440 }, { "epoch": 0.9977827050997783, "grad_norm": NaN, "learning_rate": 6.230238391450653e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.598021984100342, "logits/rejected": -2.635017156600952, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5232, "nll_loss": 0.9541953802108765, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 450 }, { "epoch": 1.0199556541019956, "grad_norm": NaN, "learning_rate": 6.150095915701193e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.572929859161377, "logits/rejected": -2.6330859661102295, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4393, "nll_loss": 0.9072533845901489, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 460 }, { "epoch": 1.042128603104213, "grad_norm": NaN, "learning_rate": 6.06872070656429e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.659424066543579, "logits/rejected": -2.665045976638794, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2597, "nll_loss": 0.9222477674484253, "rewards/accuracies": 0.512499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 470 }, { "epoch": 1.06430155210643, "grad_norm": NaN, "learning_rate": 5.986159419603766e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5539708137512207, "logits/rejected": -2.578723430633545, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5468, "nll_loss": 0.9003454446792603, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 480 }, { "epoch": 1.0864745011086474, "grad_norm": NaN, "learning_rate": 5.902459390407861e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5614101886749268, "logits/rejected": -2.61173677444458, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4411, "nll_loss": 0.9182124137878418, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 490 }, { "epoch": 1.1086474501108647, "grad_norm": 0.7572781443595886, "learning_rate": 5.817668607449911e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.621619939804077, "logits/rejected": -2.688084363937378, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2006, "nll_loss": 0.9091912508010864, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 500 }, { "epoch": 1.130820399113082, "grad_norm": NaN, "learning_rate": 5.731835684574692e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.59326171875, "logits/rejected": -2.6192498207092285, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.392, "nll_loss": 0.8921745419502258, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 510 }, { "epoch": 1.1529933481152994, "grad_norm": NaN, "learning_rate": 5.645009833126218e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.570634365081787, "logits/rejected": -2.5929832458496094, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5643, "nll_loss": 0.9609503746032715, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 520 }, { "epoch": 1.1751662971175167, "grad_norm": NaN, "learning_rate": 5.557240833732967e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5438590049743652, "logits/rejected": -2.586803913116455, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4816, "nll_loss": 0.8677853345870972, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 530 }, { "epoch": 1.1973392461197339, "grad_norm": NaN, "learning_rate": 5.46857900776672e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5750033855438232, "logits/rejected": -2.618818521499634, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3517, "nll_loss": 0.8420497179031372, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 540 }, { "epoch": 1.2195121951219512, "grad_norm": NaN, "learning_rate": 5.3790751884913605e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.576446771621704, "logits/rejected": -2.5843629837036133, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5026, "nll_loss": 0.8621179461479187, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 550 }, { "epoch": 1.2416851441241685, "grad_norm": NaN, "learning_rate": 5.288780691918196e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.569793939590454, "logits/rejected": -2.656283140182495, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3906, "nll_loss": 1.0533077716827393, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 560 }, { "epoch": 1.2638580931263859, "grad_norm": NaN, "learning_rate": 5.197747287384502e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.586745500564575, "logits/rejected": -2.6093835830688477, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5754, "nll_loss": 0.9478577375411987, "rewards/accuracies": 0.5, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 570 }, { "epoch": 1.2860310421286032, "grad_norm": NaN, "learning_rate": 5.106027167872141e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5553150177001953, "logits/rejected": -2.610725164413452, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4337, "nll_loss": 0.8642798662185669, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 580 }, { "epoch": 1.3082039911308203, "grad_norm": NaN, "learning_rate": 5.013672920083319e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.579271078109741, "logits/rejected": -2.635814666748047, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2762, "nll_loss": 0.9140083193778992, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 590 }, { "epoch": 1.3303769401330376, "grad_norm": NaN, "learning_rate": 4.920737494290572e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5220165252685547, "logits/rejected": -2.551558256149292, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.6071, "nll_loss": 0.9066599011421204, "rewards/accuracies": 0.512499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 600 }, { "epoch": 1.352549889135255, "grad_norm": NaN, "learning_rate": 4.827274173978333e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.560403347015381, "logits/rejected": -2.639423131942749, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2709, "nll_loss": 0.8913120031356812, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 610 }, { "epoch": 1.3747228381374723, "grad_norm": NaN, "learning_rate": 4.733336545293438e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.616939067840576, "logits/rejected": -2.650655746459961, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4557, "nll_loss": 0.9315293431282043, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 620 }, { "epoch": 1.3968957871396896, "grad_norm": 0.6212561130523682, "learning_rate": 4.638978466322108e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.531559944152832, "logits/rejected": -2.5307424068450928, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.9584, "nll_loss": 0.9440839886665344, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 630 }, { "epoch": 1.4190687361419068, "grad_norm": NaN, "learning_rate": 4.5442540362110285e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.546278476715088, "logits/rejected": -2.5663809776306152, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4305, "nll_loss": 0.8402501940727234, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 640 }, { "epoch": 1.441241685144124, "grad_norm": 0.784209132194519, "learning_rate": 4.449217564150197e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.547091484069824, "logits/rejected": -2.6290016174316406, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1191, "nll_loss": 0.9092384576797485, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 650 }, { "epoch": 1.4634146341463414, "grad_norm": NaN, "learning_rate": 4.353923538235369e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.60884952545166, "logits/rejected": -2.640436887741089, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2808, "nll_loss": 0.872983455657959, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 660 }, { "epoch": 1.4855875831485588, "grad_norm": NaN, "learning_rate": 4.2584265942279114e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5404248237609863, "logits/rejected": -2.5652973651885986, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5307, "nll_loss": 0.8896579742431641, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 670 }, { "epoch": 1.507760532150776, "grad_norm": NaN, "learning_rate": 4.162781484230005e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.554001569747925, "logits/rejected": -2.5924289226531982, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5124, "nll_loss": 0.9706124067306519, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 680 }, { "epoch": 1.5299334811529932, "grad_norm": NaN, "learning_rate": 4.067043045293142e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6452507972717285, "logits/rejected": -2.6547276973724365, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3232, "nll_loss": 0.9081094861030579, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 690 }, { "epoch": 1.5521064301552108, "grad_norm": NaN, "learning_rate": 3.971266167977914e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5739452838897705, "logits/rejected": -2.6191649436950684, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1834, "nll_loss": 0.8611727952957153, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 700 }, { "epoch": 1.5742793791574279, "grad_norm": 0.7607054710388184, "learning_rate": 3.875505764883128e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5171356201171875, "logits/rejected": -2.5558667182922363, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3944, "nll_loss": 0.9226005673408508, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 710 }, { "epoch": 1.5964523281596452, "grad_norm": NaN, "learning_rate": 3.7798167391622746e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4788334369659424, "logits/rejected": -2.556370735168457, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2499, "nll_loss": 0.8890296816825867, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 720 }, { "epoch": 1.6186252771618626, "grad_norm": 0.5698145031929016, "learning_rate": 3.684253953045438e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.59308123588562, "logits/rejected": -2.5706467628479004, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3174, "nll_loss": 0.8769053220748901, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 730 }, { "epoch": 1.6407982261640797, "grad_norm": NaN, "learning_rate": 3.588872196384632e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.515533924102783, "logits/rejected": -2.539431571960449, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4826, "nll_loss": 0.9430069923400879, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 740 }, { "epoch": 1.6629711751662972, "grad_norm": 1.2568813562393188, "learning_rate": 3.493726155240664e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6210389137268066, "logits/rejected": -2.6431758403778076, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2068, "nll_loss": 0.9099509119987488, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 750 }, { "epoch": 1.6851441241685143, "grad_norm": NaN, "learning_rate": 3.3988703805294946e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.572796106338501, "logits/rejected": -2.6425790786743164, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3102, "nll_loss": 0.9549806714057922, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 760 }, { "epoch": 1.7073170731707317, "grad_norm": 0.7996054887771606, "learning_rate": 3.3043592567460748e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5201961994171143, "logits/rejected": -2.552128553390503, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5766, "nll_loss": 0.9187144041061401, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 770 }, { "epoch": 1.729490022172949, "grad_norm": NaN, "learning_rate": 3.2102469707836174e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5832736492156982, "logits/rejected": -2.6100733280181885, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1691, "nll_loss": 0.8904333114624023, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 780 }, { "epoch": 1.7516629711751663, "grad_norm": NaN, "learning_rate": 3.1165874808661342e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.599666118621826, "logits/rejected": -2.62807297706604, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4813, "nll_loss": 0.8979522585868835, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 790 }, { "epoch": 1.7738359201773837, "grad_norm": NaN, "learning_rate": 3.0234344856121086e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6034371852874756, "logits/rejected": -2.6399123668670654, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2004, "nll_loss": 0.9016131162643433, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 800 }, { "epoch": 1.7960088691796008, "grad_norm": NaN, "learning_rate": 2.9308413932469805e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5875086784362793, "logits/rejected": -2.5995872020721436, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2187, "nll_loss": 0.895309567451477, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 810 }, { "epoch": 1.8181818181818183, "grad_norm": NaN, "learning_rate": 2.8388612909821512e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.545532703399658, "logits/rejected": -2.5655813217163086, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4789, "nll_loss": 0.8789796829223633, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 820 }, { "epoch": 1.8403547671840355, "grad_norm": NaN, "learning_rate": 2.7475469145780162e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.634237766265869, "logits/rejected": -2.6819849014282227, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 2.0414, "nll_loss": 0.8710411190986633, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 830 }, { "epoch": 1.8625277161862528, "grad_norm": NaN, "learning_rate": 2.6569506181085155e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.569899559020996, "logits/rejected": -2.627354145050049, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1485, "nll_loss": 0.8969793319702148, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 840 }, { "epoch": 1.8847006651884701, "grad_norm": NaN, "learning_rate": 2.5671243439445098e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5224175453186035, "logits/rejected": -2.521833896636963, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5147, "nll_loss": 1.0038148164749146, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 850 }, { "epoch": 1.9068736141906872, "grad_norm": NaN, "learning_rate": 2.4781195929731997e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5353684425354004, "logits/rejected": -2.554072141647339, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2603, "nll_loss": 0.8757489323616028, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 860 }, { "epoch": 1.9290465631929048, "grad_norm": NaN, "learning_rate": 2.3899873950706803e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6149215698242188, "logits/rejected": -2.663322687149048, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3348, "nll_loss": 0.8890805244445801, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 870 }, { "epoch": 1.951219512195122, "grad_norm": NaN, "learning_rate": 2.3027782798445205e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5764241218566895, "logits/rejected": -2.5978610515594482, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.255, "nll_loss": 0.9303677678108215, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 880 }, { "epoch": 1.9733924611973392, "grad_norm": NaN, "learning_rate": 2.216542247663192e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5386908054351807, "logits/rejected": -2.5890002250671387, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.228, "nll_loss": 0.880374550819397, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 890 }, { "epoch": 1.9955654101995566, "grad_norm": NaN, "learning_rate": 2.1313287409889075e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6515865325927734, "logits/rejected": -2.698002338409424, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4501, "nll_loss": 0.8617309331893921, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 900 }, { "epoch": 2.0177383592017737, "grad_norm": NaN, "learning_rate": 2.0471866160303494e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.576596975326538, "logits/rejected": -2.634167194366455, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.658, "nll_loss": 0.9685592651367188, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 910 }, { "epoch": 2.0399113082039912, "grad_norm": NaN, "learning_rate": 1.9641641147314996e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.556303024291992, "logits/rejected": -2.6210145950317383, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.6006, "nll_loss": 0.9573662877082825, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 920 }, { "epoch": 2.0620842572062084, "grad_norm": NaN, "learning_rate": 1.88230883711267e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6028056144714355, "logits/rejected": -2.648607015609741, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3477, "nll_loss": 0.8656437993049622, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 930 }, { "epoch": 2.084257206208426, "grad_norm": NaN, "learning_rate": 1.8016677139795635e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5672965049743652, "logits/rejected": -2.5980045795440674, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.404, "nll_loss": 0.8821004033088684, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 940 }, { "epoch": 2.106430155210643, "grad_norm": NaN, "learning_rate": 1.7222869800160197e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.567884683609009, "logits/rejected": -2.6413073539733887, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5904, "nll_loss": 0.8698426485061646, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 950 }, { "epoch": 2.12860310421286, "grad_norm": NaN, "learning_rate": 1.6442121472758776e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5741963386535645, "logits/rejected": -2.6005654335021973, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.7035, "nll_loss": 0.8915897607803345, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 960 }, { "epoch": 2.1507760532150777, "grad_norm": NaN, "learning_rate": 1.5674879790891504e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5121145248413086, "logits/rejected": -2.5986428260803223, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.429, "nll_loss": 0.8993185758590698, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 970 }, { "epoch": 2.172949002217295, "grad_norm": NaN, "learning_rate": 1.4921584643974772e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5978806018829346, "logits/rejected": -2.6225571632385254, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.6976, "nll_loss": 0.8835185766220093, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 980 }, { "epoch": 2.1951219512195124, "grad_norm": NaN, "learning_rate": 1.4182667925335472e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5312979221343994, "logits/rejected": -2.619032382965088, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2105, "nll_loss": 0.8996240496635437, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 990 }, { "epoch": 2.2172949002217295, "grad_norm": NaN, "learning_rate": 1.3458553284589852e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.586155414581299, "logits/rejected": -2.641432523727417, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4627, "nll_loss": 0.8832103610038757, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1000 }, { "epoch": 2.2394678492239466, "grad_norm": NaN, "learning_rate": 1.2749655884748788e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.649963617324829, "logits/rejected": -2.721240758895874, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.9071, "nll_loss": 0.8600472211837769, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1010 }, { "epoch": 2.261640798226164, "grad_norm": NaN, "learning_rate": 1.205638216418864e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6075966358184814, "logits/rejected": -2.6658735275268555, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3184, "nll_loss": 0.9156390428543091, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1020 }, { "epoch": 2.2838137472283813, "grad_norm": NaN, "learning_rate": 1.1379129603624472e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5418643951416016, "logits/rejected": -2.5810935497283936, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3225, "nll_loss": 0.9440028071403503, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1030 }, { "epoch": 2.305986696230599, "grad_norm": 0.6439170241355896, "learning_rate": 1.0718286498218834e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5944840908050537, "logits/rejected": -2.6368420124053955, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3297, "nll_loss": 0.8718999028205872, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1040 }, { "epoch": 2.328159645232816, "grad_norm": NaN, "learning_rate": 1.0074231734957184e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5355989933013916, "logits/rejected": -2.5640969276428223, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1733, "nll_loss": 0.8988415002822876, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1050 }, { "epoch": 2.3503325942350335, "grad_norm": NaN, "learning_rate": 9.447334575417189e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.605184555053711, "logits/rejected": -2.6584599018096924, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4157, "nll_loss": 0.8994711637496948, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1060 }, { "epoch": 2.3725055432372506, "grad_norm": NaN, "learning_rate": 8.837954444056825e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.521874189376831, "logits/rejected": -2.5487990379333496, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.504, "nll_loss": 0.8935952186584473, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1070 }, { "epoch": 2.3946784922394677, "grad_norm": 0.6280531883239746, "learning_rate": 8.246440722142325e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.612034320831299, "logits/rejected": -2.640134811401367, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1654, "nll_loss": 0.9181571006774902, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1080 }, { "epoch": 2.4168514412416853, "grad_norm": NaN, "learning_rate": 7.67313254743438e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6642110347747803, "logits/rejected": -2.6616063117980957, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3256, "nll_loss": 0.9149085283279419, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1090 }, { "epoch": 2.4390243902439024, "grad_norm": NaN, "learning_rate": 7.118358619747322e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5517077445983887, "logits/rejected": -2.5954713821411133, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3177, "nll_loss": 0.9105826616287231, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1100 }, { "epoch": 2.4611973392461195, "grad_norm": NaN, "learning_rate": 6.582437012492725e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6221938133239746, "logits/rejected": -2.6449408531188965, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.387, "nll_loss": 0.891291618347168, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1110 }, { "epoch": 2.483370288248337, "grad_norm": NaN, "learning_rate": 6.065674990315623e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5601844787597656, "logits/rejected": -2.58024263381958, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2113, "nll_loss": 0.8098522424697876, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1120 }, { "epoch": 2.505543237250554, "grad_norm": NaN, "learning_rate": 5.568368832927742e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5909504890441895, "logits/rejected": -2.596946954727173, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3932, "nll_loss": 0.9140310287475586, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1130 }, { "epoch": 2.5277161862527717, "grad_norm": NaN, "learning_rate": 5.090803665238872e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5009472370147705, "logits/rejected": -2.5562808513641357, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2941, "nll_loss": 0.8312563896179199, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1140 }, { "epoch": 2.549889135254989, "grad_norm": NaN, "learning_rate": 4.633253293883679e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6316781044006348, "logits/rejected": -2.635262966156006, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4821, "nll_loss": 0.8716381192207336, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1150 }, { "epoch": 2.5720620842572064, "grad_norm": NaN, "learning_rate": 4.19598005023774e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6068694591522217, "logits/rejected": -2.650502920150757, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5451, "nll_loss": 0.9065351486206055, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1160 }, { "epoch": 2.5942350332594235, "grad_norm": NaN, "learning_rate": 3.7792346400128183e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.579982280731201, "logits/rejected": -2.643984317779541, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3209, "nll_loss": 0.8348051905632019, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1170 }, { "epoch": 2.6164079822616406, "grad_norm": NaN, "learning_rate": 3.3832559995175116e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5105443000793457, "logits/rejected": -2.57656192779541, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4288, "nll_loss": 0.997296154499054, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1180 }, { "epoch": 2.638580931263858, "grad_norm": NaN, "learning_rate": 3.0082711586658336e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5682618618011475, "logits/rejected": -2.6190028190612793, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3811, "nll_loss": 0.840679943561554, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1190 }, { "epoch": 2.6607538802660753, "grad_norm": NaN, "learning_rate": 2.654495110812136e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.534421920776367, "logits/rejected": -2.5627925395965576, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4835, "nll_loss": 0.9845026135444641, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1200 }, { "epoch": 2.682926829268293, "grad_norm": NaN, "learning_rate": 2.3221306894870962e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.597075939178467, "logits/rejected": -2.605375051498413, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.9092, "nll_loss": 0.8531631231307983, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1210 }, { "epoch": 2.70509977827051, "grad_norm": NaN, "learning_rate": 2.0113684521053663e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5460121631622314, "logits/rejected": -2.6030726432800293, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5883, "nll_loss": 0.8551692962646484, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1220 }, { "epoch": 2.7272727272727275, "grad_norm": NaN, "learning_rate": 1.722386570711647e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.559178352355957, "logits/rejected": -2.601423740386963, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5836, "nll_loss": 0.8850408792495728, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1230 }, { "epoch": 2.7494456762749446, "grad_norm": 0.6112498641014099, "learning_rate": 1.455350729827698e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.579529285430908, "logits/rejected": -2.5950798988342285, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3608, "nll_loss": 0.8255780339241028, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1240 }, { "epoch": 2.7716186252771617, "grad_norm": NaN, "learning_rate": 1.2104140314590194e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.525857448577881, "logits/rejected": -2.587132215499878, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5238, "nll_loss": 0.9289102554321289, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1250 }, { "epoch": 2.7937915742793793, "grad_norm": NaN, "learning_rate": 9.877169073155167e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.533351182937622, "logits/rejected": -2.584251880645752, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4373, "nll_loss": 0.9016596674919128, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1260 }, { "epoch": 2.8159645232815964, "grad_norm": NaN, "learning_rate": 7.873870382965364e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5861144065856934, "logits/rejected": -2.612295150756836, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.5352, "nll_loss": 0.8941007852554321, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1270 }, { "epoch": 2.8381374722838135, "grad_norm": 0.6927753686904907, "learning_rate": 6.095392812864863e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6299588680267334, "logits/rejected": -2.654484272003174, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1739, "nll_loss": 0.9128586053848267, "rewards/accuracies": 0.5, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1280 }, { "epoch": 2.860310421286031, "grad_norm": 0.6138539910316467, "learning_rate": 4.5427560330289824e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5274271965026855, "logits/rejected": -2.5881905555725098, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.0278, "nll_loss": 0.8638602495193481, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1290 }, { "epoch": 2.882483370288248, "grad_norm": NaN, "learning_rate": 3.216850230348145e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6004536151885986, "logits/rejected": -2.639181613922119, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3678, "nll_loss": 0.9433631896972656, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1300 }, { "epoch": 2.9046563192904657, "grad_norm": NaN, "learning_rate": 2.1184355980488067e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.6405842304229736, "logits/rejected": -2.632291316986084, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.7057, "nll_loss": 0.9545801281929016, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1310 }, { "epoch": 2.926829268292683, "grad_norm": NaN, "learning_rate": 1.2481418998456118e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.552712917327881, "logits/rejected": -2.603447437286377, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1937, "nll_loss": 0.885978102684021, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1320 }, { "epoch": 2.9490022172949004, "grad_norm": NaN, "learning_rate": 6.064681088730151e-09, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.49711275100708, "logits/rejected": -2.5226452350616455, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2368, "nll_loss": 0.8924044370651245, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1330 }, { "epoch": 2.9711751662971175, "grad_norm": NaN, "learning_rate": 1.9378212160501285e-09, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.568042755126953, "logits/rejected": -2.608767032623291, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4421, "nll_loss": 0.8176005482673645, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1340 }, { "epoch": 2.9933481152993346, "grad_norm": NaN, "learning_rate": 1.0320546925512985e-10, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.606173276901245, "logits/rejected": -2.6094250679016113, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.9638, "nll_loss": 0.9152927398681641, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1350 }, { "epoch": 0.356300759758973, "grad_norm": 1.5070972442626953, "learning_rate": 7.835966423238558e-06, "log_odds_chosen": 0.16467048227787018, "log_odds_ratio": -0.678921103477478, "logits/chosen": -2.1744472980499268, "logits/rejected": -2.181506872177124, "logps/chosen": -0.9224382638931274, "logps/rejected": -1.0073015689849854, "loss": 1.4751, "nll_loss": 1.2840995788574219, "rewards/accuracies": 0.6071428656578064, "rewards/chosen": -0.2767315208911896, "rewards/margins": 0.02545899525284767, "rewards/rejected": -0.30219051241874695, "step": 1360 }, { "epoch": 0.35892061828661254, "grad_norm": 3.159428596496582, "learning_rate": 7.83274432822734e-06, "log_odds_chosen": 0.13781918585300446, "log_odds_ratio": -0.7075511813163757, "logits/chosen": -2.3369295597076416, "logits/rejected": -2.2583727836608887, "logps/chosen": -0.8473695516586304, "logps/rejected": -0.9524151086807251, "loss": 1.4039, "nll_loss": 1.2262517213821411, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.2542109191417694, "rewards/margins": 0.03151360899209976, "rewards/rejected": -0.28572455048561096, "step": 1370 }, { "epoch": 0.36154047681425205, "grad_norm": 0.8486490249633789, "learning_rate": 7.829491570137134e-06, "log_odds_chosen": 0.07154488563537598, "log_odds_ratio": -0.7181152701377869, "logits/chosen": -2.306084156036377, "logits/rejected": -2.192629098892212, "logps/chosen": -0.8765948414802551, "logps/rejected": -0.9365623593330383, "loss": 1.358, "nll_loss": 1.1578160524368286, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2629784643650055, "rewards/margins": 0.017990300431847572, "rewards/rejected": -0.2809687554836273, "step": 1380 }, { "epoch": 0.3641603353418915, "grad_norm": 1.097153902053833, "learning_rate": 7.826208174990959e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2871549129486084, "logits/rejected": -2.125932216644287, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.4065, "nll_loss": 1.2210099697113037, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1390 }, { "epoch": 0.36678019386953103, "grad_norm": 0.5998280644416809, "learning_rate": 7.822894169056939e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2990994453430176, "logits/rejected": -2.188930034637451, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2988, "nll_loss": 0.9502847790718079, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1400 }, { "epoch": 0.36940005239717055, "grad_norm": 0.8084430694580078, "learning_rate": 7.819549578848095e-06, "log_odds_chosen": 0.21356813609600067, "log_odds_ratio": -0.6818668246269226, "logits/chosen": -2.3163061141967773, "logits/rejected": -2.188969850540161, "logps/chosen": -0.8551802635192871, "logps/rejected": -0.9751632809638977, "loss": 1.3441, "nll_loss": 1.1822631359100342, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2565540671348572, "rewards/margins": 0.03599492460489273, "rewards/rejected": -0.2925489544868469, "step": 1410 }, { "epoch": 0.37201991092481007, "grad_norm": 0.7295518517494202, "learning_rate": 7.816174431122129e-06, "log_odds_chosen": 0.2810767590999603, "log_odds_ratio": -0.656787097454071, "logits/chosen": -2.418583631515503, "logits/rejected": -2.311044692993164, "logps/chosen": -0.8355219960212708, "logps/rejected": -0.9859644174575806, "loss": 1.3275, "nll_loss": 1.164785385131836, "rewards/accuracies": 0.625, "rewards/chosen": -0.2506566047668457, "rewards/margins": 0.045132704079151154, "rewards/rejected": -0.29578930139541626, "step": 1420 }, { "epoch": 0.3746397694524496, "grad_norm": 1.0470610857009888, "learning_rate": 7.812768752881212e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2903048992156982, "logits/rejected": -2.1959969997406006, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3977, "nll_loss": 1.1932132244110107, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1430 }, { "epoch": 0.37725962798008905, "grad_norm": 0.8782858848571777, "learning_rate": 7.809332571371769e-06, "log_odds_chosen": 0.10299164056777954, "log_odds_ratio": -0.7256948947906494, "logits/chosen": -2.290497064590454, "logits/rejected": -2.2419228553771973, "logps/chosen": -0.8990342020988464, "logps/rejected": -0.9536194801330566, "loss": 1.3818, "nll_loss": 1.1981843709945679, "rewards/accuracies": 0.5, "rewards/chosen": -0.2697102725505829, "rewards/margins": 0.01637556590139866, "rewards/rejected": -0.2860858738422394, "step": 1440 }, { "epoch": 0.37987948650772857, "grad_norm": 0.7405297756195068, "learning_rate": 7.805865914084262e-06, "log_odds_chosen": 0.15694861114025116, "log_odds_ratio": -0.7073497772216797, "logits/chosen": -2.3941407203674316, "logits/rejected": -2.3704845905303955, "logps/chosen": -0.8153379559516907, "logps/rejected": -0.9288307428359985, "loss": 1.4063, "nll_loss": 1.122910499572754, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24460136890411377, "rewards/margins": 0.03404783457517624, "rewards/rejected": -0.2786491811275482, "step": 1450 }, { "epoch": 0.3824993450353681, "grad_norm": NaN, "learning_rate": 7.802368808752964e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3435275554656982, "logits/rejected": -2.2503085136413574, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3784, "nll_loss": 1.1327253580093384, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1460 }, { "epoch": 0.3851192035630076, "grad_norm": 0.7310872673988342, "learning_rate": 7.798841283355746e-06, "log_odds_chosen": 0.17410822212696075, "log_odds_ratio": -0.6637142896652222, "logits/chosen": -2.29927134513855, "logits/rejected": -2.259204864501953, "logps/chosen": -0.8322680592536926, "logps/rejected": -0.9205790758132935, "loss": 1.2635, "nll_loss": 1.0119085311889648, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24968044459819794, "rewards/margins": 0.02649330161511898, "rewards/rejected": -0.27617374062538147, "step": 1470 }, { "epoch": 0.3877390620906471, "grad_norm": 1.2615474462509155, "learning_rate": 7.795283366113843e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1989307403564453, "logits/rejected": -2.1296334266662598, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.381, "nll_loss": 1.134813666343689, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1480 }, { "epoch": 0.3903589206182866, "grad_norm": 0.9343477487564087, "learning_rate": 7.791695085491638e-06, "log_odds_chosen": 0.2836722135543823, "log_odds_ratio": -0.6585140824317932, "logits/chosen": -2.162867307662964, "logits/rejected": -2.0868241786956787, "logps/chosen": -0.7990925908088684, "logps/rejected": -0.9676051139831543, "loss": 1.4294, "nll_loss": 1.1541612148284912, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23972778022289276, "rewards/margins": 0.05055375024676323, "rewards/rejected": -0.2902815341949463, "step": 1490 }, { "epoch": 0.3929787791459261, "grad_norm": 1.0779283046722412, "learning_rate": 7.788076470196432e-06, "log_odds_chosen": 0.407390296459198, "log_odds_ratio": -0.5998691916465759, "logits/chosen": -2.1911914348602295, "logits/rejected": -2.0499963760375977, "logps/chosen": -0.7325464487075806, "logps/rejected": -0.9589724540710449, "loss": 1.338, "nll_loss": 1.1581240892410278, "rewards/accuracies": 0.625, "rewards/chosen": -0.21976391971111298, "rewards/margins": 0.06792781502008438, "rewards/rejected": -0.28769174218177795, "step": 1500 }, { "epoch": 0.3955986376735656, "grad_norm": 0.5823186635971069, "learning_rate": 7.784427549178204e-06, "log_odds_chosen": 0.173260897397995, "log_odds_ratio": -0.6867095232009888, "logits/chosen": -2.2463114261627197, "logits/rejected": -2.2329888343811035, "logps/chosen": -0.8379928469657898, "logps/rejected": -0.9333888292312622, "loss": 1.3539, "nll_loss": 1.133705735206604, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.25139787793159485, "rewards/margins": 0.028618773445487022, "rewards/rejected": -0.2800166606903076, "step": 1510 }, { "epoch": 0.39821849620120514, "grad_norm": 0.8613591194152832, "learning_rate": 7.780748351629395e-06, "log_odds_chosen": 0.21467141807079315, "log_odds_ratio": -0.6655287742614746, "logits/chosen": -2.1774816513061523, "logits/rejected": -2.1081528663635254, "logps/chosen": -0.7823084592819214, "logps/rejected": -0.9093402028083801, "loss": 1.329, "nll_loss": 1.0647414922714233, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2346925288438797, "rewards/margins": 0.038109537214040756, "rewards/rejected": -0.27280208468437195, "step": 1520 }, { "epoch": 0.40083835472884466, "grad_norm": 2.3185458183288574, "learning_rate": 7.777038906984665e-06, "log_odds_chosen": 0.33879223465919495, "log_odds_ratio": -0.6267626881599426, "logits/chosen": -2.325411319732666, "logits/rejected": -2.253406047821045, "logps/chosen": -0.7435046434402466, "logps/rejected": -0.9339975118637085, "loss": 1.2532, "nll_loss": 1.0884990692138672, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22305139899253845, "rewards/margins": 0.05714789032936096, "rewards/rejected": -0.2801992893218994, "step": 1530 }, { "epoch": 0.4034582132564842, "grad_norm": 2.0251095294952393, "learning_rate": 7.773299244920657e-06, "log_odds_chosen": 0.2629839777946472, "log_odds_ratio": -0.7064835429191589, "logits/chosen": -2.2713868618011475, "logits/rejected": -2.154143810272217, "logps/chosen": -0.7290531992912292, "logps/rejected": -0.8958428502082825, "loss": 1.2453, "nll_loss": 1.0896470546722412, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.21871595084667206, "rewards/margins": 0.05003691837191582, "rewards/rejected": -0.2687528729438782, "step": 1540 }, { "epoch": 0.40607807178412364, "grad_norm": 1.3158693313598633, "learning_rate": 7.769529395355766e-06, "log_odds_chosen": 0.07123173773288727, "log_odds_ratio": -0.7279552221298218, "logits/chosen": -2.1936962604522705, "logits/rejected": -2.081378936767578, "logps/chosen": -0.870353102684021, "logps/rejected": -0.9185281991958618, "loss": 1.3, "nll_loss": 1.0599675178527832, "rewards/accuracies": 0.5, "rewards/chosen": -0.2611059546470642, "rewards/margins": 0.014452511444687843, "rewards/rejected": -0.2755584418773651, "step": 1550 }, { "epoch": 0.40869793031176316, "grad_norm": 1.2023017406463623, "learning_rate": 7.765729388449896e-06, "log_odds_chosen": 0.19455137848854065, "log_odds_ratio": -0.7161202430725098, "logits/chosen": -2.2802798748016357, "logits/rejected": -2.285790205001831, "logps/chosen": -0.7755097150802612, "logps/rejected": -0.8909648656845093, "loss": 1.3287, "nll_loss": 1.0911790132522583, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2326529324054718, "rewards/margins": 0.034636545926332474, "rewards/rejected": -0.2672894597053528, "step": 1560 }, { "epoch": 0.4113177888394027, "grad_norm": 0.8103194832801819, "learning_rate": 7.761899254604214e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.131821870803833, "logits/rejected": -2.092191219329834, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3414, "nll_loss": 1.011042594909668, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1570 }, { "epoch": 0.4139376473670422, "grad_norm": 1.4886573553085327, "learning_rate": 7.758039024460917e-06, "log_odds_chosen": 0.235622838139534, "log_odds_ratio": -0.6792353391647339, "logits/chosen": -2.2703051567077637, "logits/rejected": -2.2016983032226562, "logps/chosen": -0.7831603288650513, "logps/rejected": -0.9362146258354187, "loss": 1.2752, "nll_loss": 1.0483226776123047, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.23494811356067657, "rewards/margins": 0.045916296541690826, "rewards/rejected": -0.2808643877506256, "step": 1580 }, { "epoch": 0.4165575058946817, "grad_norm": 0.665324330329895, "learning_rate": 7.754148728902977e-06, "log_odds_chosen": 0.2642481327056885, "log_odds_ratio": -0.6594840288162231, "logits/chosen": -2.189133882522583, "logits/rejected": -2.1034750938415527, "logps/chosen": -0.7848230004310608, "logps/rejected": -0.9393364191055298, "loss": 1.3546, "nll_loss": 1.104046106338501, "rewards/accuracies": 0.5625, "rewards/chosen": -0.23544688522815704, "rewards/margins": 0.046354059129953384, "rewards/rejected": -0.2818009555339813, "step": 1590 }, { "epoch": 0.4191773644223212, "grad_norm": 0.7179826498031616, "learning_rate": 7.7502283990539e-06, "log_odds_chosen": 0.19448122382164001, "log_odds_ratio": -0.6666055917739868, "logits/chosen": -2.1263155937194824, "logits/rejected": -2.044039726257324, "logps/chosen": -0.8908664584159851, "logps/rejected": -1.0337872505187988, "loss": 1.3569, "nll_loss": 1.1761796474456787, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.26725995540618896, "rewards/margins": 0.042876236140728, "rewards/rejected": -0.31013616919517517, "step": 1600 }, { "epoch": 0.4217972229499607, "grad_norm": 0.6757217645645142, "learning_rate": 7.746278066277476e-06, "log_odds_chosen": 0.4056762158870697, "log_odds_ratio": -0.6558108329772949, "logits/chosen": -2.213034152984619, "logits/rejected": -2.1029086112976074, "logps/chosen": -0.7628439664840698, "logps/rejected": -0.9843906164169312, "loss": 1.3261, "nll_loss": 1.1515486240386963, "rewards/accuracies": 0.5625, "rewards/chosen": -0.22885319590568542, "rewards/margins": 0.06646402925252914, "rewards/rejected": -0.29531723260879517, "step": 1610 }, { "epoch": 0.4244170814776002, "grad_norm": 0.84027099609375, "learning_rate": 7.742297762177523e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.146639585494995, "logits/rejected": -1.9969524145126343, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3331, "nll_loss": 1.0941113233566284, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1620 }, { "epoch": 0.42703694000523973, "grad_norm": 0.8461483120918274, "learning_rate": 7.738287518597646e-06, "log_odds_chosen": 0.24490182101726532, "log_odds_ratio": -0.6744768619537354, "logits/chosen": -2.1876320838928223, "logits/rejected": -2.106388568878174, "logps/chosen": -0.7402501106262207, "logps/rejected": -0.8526729345321655, "loss": 1.2823, "nll_loss": 1.0333292484283447, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22207501530647278, "rewards/margins": 0.03372686356306076, "rewards/rejected": -0.25580188632011414, "step": 1630 }, { "epoch": 0.42965679853287925, "grad_norm": 0.8110608458518982, "learning_rate": 7.734247367620966e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1225366592407227, "logits/rejected": -2.066610813140869, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3049, "nll_loss": 1.1132264137268066, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1640 }, { "epoch": 0.4322766570605187, "grad_norm": 1.3219563961029053, "learning_rate": 7.73017734156988e-06, "log_odds_chosen": 0.37222856283187866, "log_odds_ratio": -0.6275134086608887, "logits/chosen": -2.180270195007324, "logits/rejected": -2.104553699493408, "logps/chosen": -0.7384976148605347, "logps/rejected": -0.9277009963989258, "loss": 1.2701, "nll_loss": 1.0812407732009888, "rewards/accuracies": 0.625, "rewards/chosen": -0.22154930233955383, "rewards/margins": 0.056761015206575394, "rewards/rejected": -0.27831029891967773, "step": 1650 }, { "epoch": 0.43489651558815823, "grad_norm": 0.8220859169960022, "learning_rate": 7.726077473005784e-06, "log_odds_chosen": 0.25185585021972656, "log_odds_ratio": -0.6601060628890991, "logits/chosen": -2.1891627311706543, "logits/rejected": -2.080193281173706, "logps/chosen": -0.7745991349220276, "logps/rejected": -0.9133998155593872, "loss": 1.241, "nll_loss": 1.07802414894104, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.232379749417305, "rewards/margins": 0.04164021462202072, "rewards/rejected": -0.2740199565887451, "step": 1660 }, { "epoch": 0.43751637411579775, "grad_norm": 0.7861508727073669, "learning_rate": 7.721947794728836e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.155824661254883, "logits/rejected": -2.0749428272247314, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.277, "nll_loss": 1.0315476655960083, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1670 }, { "epoch": 0.44013623264343726, "grad_norm": 0.7692843675613403, "learning_rate": 7.717788339777671e-06, "log_odds_chosen": 0.14557471871376038, "log_odds_ratio": -0.7391952276229858, "logits/chosen": -2.1996002197265625, "logits/rejected": -2.089608907699585, "logps/chosen": -0.8423151969909668, "logps/rejected": -0.93895423412323, "loss": 1.3819, "nll_loss": 1.1533706188201904, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.25269457697868347, "rewards/margins": 0.02899169921875, "rewards/rejected": -0.28168630599975586, "step": 1680 }, { "epoch": 0.4427560911710768, "grad_norm": 1.050455927848816, "learning_rate": 7.713599141429148e-06, "log_odds_chosen": 0.18557021021842957, "log_odds_ratio": -0.6638679504394531, "logits/chosen": -2.193223476409912, "logits/rejected": -2.0895392894744873, "logps/chosen": -0.8287395238876343, "logps/rejected": -0.9351475834846497, "loss": 1.2532, "nll_loss": 1.076210379600525, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.248621866106987, "rewards/margins": 0.031922388821840286, "rewards/rejected": -0.2805442810058594, "step": 1690 }, { "epoch": 0.44537594969871624, "grad_norm": 0.7139078378677368, "learning_rate": 7.709380233198084e-06, "log_odds_chosen": 0.1657506227493286, "log_odds_ratio": -0.7026845216751099, "logits/chosen": -2.186840534210205, "logits/rejected": -2.044227123260498, "logps/chosen": -0.8488050699234009, "logps/rejected": -0.95036381483078, "loss": 1.3314, "nll_loss": 1.0827560424804688, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2546415328979492, "rewards/margins": 0.03046763502061367, "rewards/rejected": -0.28510919213294983, "step": 1700 }, { "epoch": 0.44799580822635576, "grad_norm": 1.0684787034988403, "learning_rate": 7.70513164883698e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.0887954235076904, "logits/rejected": -1.9752638339996338, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2763, "nll_loss": 1.1006641387939453, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1710 }, { "epoch": 0.4506156667539953, "grad_norm": 0.9487037062644958, "learning_rate": 7.700853422335761e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1088550090789795, "logits/rejected": -2.038099765777588, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3765, "nll_loss": 1.1307182312011719, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1720 }, { "epoch": 0.4532355252816348, "grad_norm": 0.7048947811126709, "learning_rate": 7.696545587921492e-06, "log_odds_chosen": 0.2919122874736786, "log_odds_ratio": -0.6339688897132874, "logits/chosen": -2.1001009941101074, "logits/rejected": -2.0026204586029053, "logps/chosen": -0.7748144865036011, "logps/rejected": -0.9189624786376953, "loss": 1.2934, "nll_loss": 1.181532621383667, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23244433104991913, "rewards/margins": 0.04324441775679588, "rewards/rejected": -0.2756887674331665, "step": 1730 }, { "epoch": 0.4558553838092743, "grad_norm": 1.297353982925415, "learning_rate": 7.692208180058113e-06, "log_odds_chosen": 0.5148166418075562, "log_odds_ratio": -0.5838104486465454, "logits/chosen": -2.208028793334961, "logits/rejected": -2.0533347129821777, "logps/chosen": -0.7364177703857422, "logps/rejected": -1.0378844738006592, "loss": 1.3752, "nll_loss": 1.1920849084854126, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.22092533111572266, "rewards/margins": 0.09044001996517181, "rewards/rejected": -0.3113653063774109, "step": 1740 }, { "epoch": 0.4584752423369138, "grad_norm": 0.9961971044540405, "learning_rate": 7.687841233446158e-06, "log_odds_chosen": 0.22683505713939667, "log_odds_ratio": -0.6922428011894226, "logits/chosen": -2.1968588829040527, "logits/rejected": -2.082923412322998, "logps/chosen": -0.8106549978256226, "logps/rejected": -0.9458268880844116, "loss": 1.2595, "nll_loss": 1.0203992128372192, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2431965172290802, "rewards/margins": 0.0405515618622303, "rewards/rejected": -0.2837480902671814, "step": 1750 }, { "epoch": 0.4610951008645533, "grad_norm": 0.9128112196922302, "learning_rate": 7.683444783022481e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1307501792907715, "logits/rejected": -2.077415943145752, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3076, "nll_loss": 1.0451977252960205, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1760 }, { "epoch": 0.4637149593921928, "grad_norm": 0.9377247095108032, "learning_rate": 7.679018863959977e-06, "log_odds_chosen": 0.26452043652534485, "log_odds_ratio": -0.6454588174819946, "logits/chosen": -2.2334842681884766, "logits/rejected": -2.1440517902374268, "logps/chosen": -0.7835454940795898, "logps/rejected": -0.9287274479866028, "loss": 1.32, "nll_loss": 1.1055638790130615, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23506367206573486, "rewards/margins": 0.04355456680059433, "rewards/rejected": -0.2786182165145874, "step": 1770 }, { "epoch": 0.46633481791983233, "grad_norm": 1.1383013725280762, "learning_rate": 7.674563511667296e-06, "log_odds_chosen": 0.2754136025905609, "log_odds_ratio": -0.663358747959137, "logits/chosen": -2.2680716514587402, "logits/rejected": -2.2700283527374268, "logps/chosen": -0.7558166980743408, "logps/rejected": -0.8934957385063171, "loss": 1.2857, "nll_loss": 1.0963268280029297, "rewards/accuracies": 0.5, "rewards/chosen": -0.22674500942230225, "rewards/margins": 0.041303712874650955, "rewards/rejected": -0.2680487036705017, "step": 1780 }, { "epoch": 0.46895467644747185, "grad_norm": 0.7890952825546265, "learning_rate": 7.67007876178856e-06, "log_odds_chosen": 0.27597224712371826, "log_odds_ratio": -0.6576776504516602, "logits/chosen": -2.2900586128234863, "logits/rejected": -2.2074086666107178, "logps/chosen": -0.7823407053947449, "logps/rejected": -0.9427968263626099, "loss": 1.2974, "nll_loss": 1.1813403367996216, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23470225930213928, "rewards/margins": 0.048136819154024124, "rewards/rejected": -0.2828390300273895, "step": 1790 }, { "epoch": 0.47157453497511137, "grad_norm": 0.9115896224975586, "learning_rate": 7.66556465020309e-06, "log_odds_chosen": 0.2854171097278595, "log_odds_ratio": -0.6494393348693848, "logits/chosen": -2.2592923641204834, "logits/rejected": -2.0683884620666504, "logps/chosen": -0.8232611417770386, "logps/rejected": -1.0141000747680664, "loss": 1.248, "nll_loss": 1.0168474912643433, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24697837233543396, "rewards/margins": 0.05725168064236641, "rewards/rejected": -0.3042300343513489, "step": 1800 }, { "epoch": 0.47419439350275083, "grad_norm": 0.6727674603462219, "learning_rate": 7.661021213025097e-06, "log_odds_chosen": 0.1448836326599121, "log_odds_ratio": -0.709470272064209, "logits/chosen": -2.222550868988037, "logits/rejected": -2.183189868927002, "logps/chosen": -0.8002765774726868, "logps/rejected": -0.908728301525116, "loss": 1.4198, "nll_loss": 1.1429588794708252, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.24008294939994812, "rewards/margins": 0.032535530626773834, "rewards/rejected": -0.27261847257614136, "step": 1810 }, { "epoch": 0.47681425203039035, "grad_norm": 1.3036495447158813, "learning_rate": 7.656448486603416e-06, "log_odds_chosen": 0.29390355944633484, "log_odds_ratio": -0.6591013669967651, "logits/chosen": -2.1585395336151123, "logits/rejected": -2.0369210243225098, "logps/chosen": -0.7730036973953247, "logps/rejected": -0.9560281038284302, "loss": 1.2894, "nll_loss": 1.0614018440246582, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.23190109431743622, "rewards/margins": 0.054907333105802536, "rewards/rejected": -0.28680843114852905, "step": 1820 }, { "epoch": 0.47943411055802987, "grad_norm": 0.619870662689209, "learning_rate": 7.651846507521198e-06, "log_odds_chosen": 0.1994878649711609, "log_odds_ratio": -0.6725788712501526, "logits/chosen": -2.157137870788574, "logits/rejected": -2.0323843955993652, "logps/chosen": -0.8131291270256042, "logps/rejected": -0.9139816164970398, "loss": 1.2805, "nll_loss": 1.064143180847168, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.24393872916698456, "rewards/margins": 0.03025573492050171, "rewards/rejected": -0.27419447898864746, "step": 1830 }, { "epoch": 0.4820539690856694, "grad_norm": 0.8471915125846863, "learning_rate": 7.647215312595623e-06, "log_odds_chosen": 0.34192129969596863, "log_odds_ratio": -0.6533671617507935, "logits/chosen": -2.236917018890381, "logits/rejected": -2.1792304515838623, "logps/chosen": -0.7844781875610352, "logps/rejected": -0.9492262601852417, "loss": 1.3063, "nll_loss": 1.0647456645965576, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23534348607063293, "rewards/margins": 0.04942439869046211, "rewards/rejected": -0.28476789593696594, "step": 1840 }, { "epoch": 0.4846738276133089, "grad_norm": NaN, "learning_rate": 7.642554938877612e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1569488048553467, "logits/rejected": -2.0734241008758545, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3391, "nll_loss": 1.0577176809310913, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1850 }, { "epoch": 0.48729368614094837, "grad_norm": 0.5546465516090393, "learning_rate": 7.63786542365152e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1749930381774902, "logits/rejected": -2.16654372215271, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3613, "nll_loss": 1.168358564376831, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1860 }, { "epoch": 0.4899135446685879, "grad_norm": 0.5946412086486816, "learning_rate": 7.633146804434848e-06, "log_odds_chosen": 0.3680673837661743, "log_odds_ratio": -0.6370216608047485, "logits/chosen": -2.2312488555908203, "logits/rejected": -2.1326515674591064, "logps/chosen": -0.7894002199172974, "logps/rejected": -1.0060811042785645, "loss": 1.1942, "nll_loss": 1.0459545850753784, "rewards/accuracies": 0.625, "rewards/chosen": -0.2368200719356537, "rewards/margins": 0.06500427424907684, "rewards/rejected": -0.3018243610858917, "step": 1870 }, { "epoch": 0.4925334031962274, "grad_norm": 0.6534931063652039, "learning_rate": 7.628399118977931e-06, "log_odds_chosen": 0.35880246758461, "log_odds_ratio": -0.6358337998390198, "logits/chosen": -2.197322368621826, "logits/rejected": -2.1643221378326416, "logps/chosen": -0.7515470385551453, "logps/rejected": -0.9479736089706421, "loss": 1.252, "nll_loss": 1.0549204349517822, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2254641354084015, "rewards/margins": 0.0589279904961586, "rewards/rejected": -0.2843921482563019, "step": 1880 }, { "epoch": 0.4951532617238669, "grad_norm": 0.7467595934867859, "learning_rate": 7.623622405263646e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.135007619857788, "logits/rejected": -2.132744550704956, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3585, "nll_loss": 1.1310532093048096, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1890 }, { "epoch": 0.49777312025150644, "grad_norm": 0.649025022983551, "learning_rate": 7.6188167015071035e-06, "log_odds_chosen": 0.3301432132720947, "log_odds_ratio": -0.6751290559768677, "logits/chosen": -2.1907875537872314, "logits/rejected": -2.1237707138061523, "logps/chosen": -0.790390133857727, "logps/rejected": -1.018385648727417, "loss": 1.3404, "nll_loss": 1.108107328414917, "rewards/accuracies": 0.625, "rewards/chosen": -0.23711705207824707, "rewards/margins": 0.06839870661497116, "rewards/rejected": -0.30551576614379883, "step": 1900 }, { "epoch": 0.5003929787791459, "grad_norm": 1.0946447849273682, "learning_rate": 7.613982046155344e-06, "log_odds_chosen": 0.11785908043384552, "log_odds_ratio": -0.7169533967971802, "logits/chosen": -2.2313244342803955, "logits/rejected": -2.167520523071289, "logps/chosen": -0.8033254742622375, "logps/rejected": -0.8701337575912476, "loss": 1.2805, "nll_loss": 1.085548758506775, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.24099767208099365, "rewards/margins": 0.020042452961206436, "rewards/rejected": -0.2610401511192322, "step": 1910 }, { "epoch": 0.5030128373067855, "grad_norm": 0.8028205037117004, "learning_rate": 7.609118477887029e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1612930297851562, "logits/rejected": -2.139380693435669, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2447, "nll_loss": 1.0496000051498413, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1920 }, { "epoch": 0.5056326958344249, "grad_norm": 0.5566911101341248, "learning_rate": 7.60422603561213e-06, "log_odds_chosen": 0.3029788136482239, "log_odds_ratio": -0.6570523977279663, "logits/chosen": -2.261167049407959, "logits/rejected": -2.0912437438964844, "logps/chosen": -0.7508211135864258, "logps/rejected": -0.9543059468269348, "loss": 1.2526, "nll_loss": 0.9856220483779907, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2252463400363922, "rewards/margins": 0.06104546785354614, "rewards/rejected": -0.28629180788993835, "step": 1930 }, { "epoch": 0.5082525543620644, "grad_norm": 0.9057849645614624, "learning_rate": 7.599304758471623e-06, "log_odds_chosen": 0.2874334454536438, "log_odds_ratio": -0.6394225358963013, "logits/chosen": -2.2187225818634033, "logits/rejected": -2.0726983547210693, "logps/chosen": -0.7682667970657349, "logps/rejected": -0.9517143368721008, "loss": 1.2777, "nll_loss": 1.0369327068328857, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.23048004508018494, "rewards/margins": 0.05503424257040024, "rewards/rejected": -0.28551429510116577, "step": 1940 }, { "epoch": 0.510872412889704, "grad_norm": 0.6973705887794495, "learning_rate": 7.594354685837166e-06, "log_odds_chosen": 0.012250863015651703, "log_odds_ratio": -0.7658689618110657, "logits/chosen": -2.2530770301818848, "logits/rejected": -2.1975150108337402, "logps/chosen": -0.874448299407959, "logps/rejected": -0.8741192817687988, "loss": 1.2952, "nll_loss": 1.1212711334228516, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2623344659805298, "rewards/margins": -9.867735207080841e-05, "rewards/rejected": -0.2622358202934265, "step": 1950 }, { "epoch": 0.5134922714173434, "grad_norm": 0.6821538805961609, "learning_rate": 7.589375857310794e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1999828815460205, "logits/rejected": -2.0714235305786133, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2916, "nll_loss": 1.032511591911316, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1960 }, { "epoch": 0.516112129944983, "grad_norm": 0.6255263686180115, "learning_rate": 7.584368312724596e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1596908569335938, "logits/rejected": -2.1005470752716064, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2495, "nll_loss": 1.1217178106307983, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 1970 }, { "epoch": 0.5187319884726225, "grad_norm": 0.8915054798126221, "learning_rate": 7.579332092140395e-06, "log_odds_chosen": 0.3993709087371826, "log_odds_ratio": -0.6062116622924805, "logits/chosen": -2.268876552581787, "logits/rejected": -2.1685822010040283, "logps/chosen": -0.7471445798873901, "logps/rejected": -0.9877832531929016, "loss": 1.2545, "nll_loss": 0.9963588714599609, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2241433560848236, "rewards/margins": 0.07219160348176956, "rewards/rejected": -0.2963349521160126, "step": 1980 }, { "epoch": 0.5213518470002619, "grad_norm": 0.668416440486908, "learning_rate": 7.574267235849436e-06, "log_odds_chosen": 0.23820717632770538, "log_odds_ratio": -0.686004102230072, "logits/chosen": -2.1191599369049072, "logits/rejected": -2.0223026275634766, "logps/chosen": -0.7987849116325378, "logps/rejected": -0.9834505319595337, "loss": 1.3089, "nll_loss": 1.0545966625213623, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.23963549733161926, "rewards/margins": 0.055399663746356964, "rewards/rejected": -0.295035183429718, "step": 1990 }, { "epoch": 0.5239717055279015, "grad_norm": 0.9887790083885193, "learning_rate": 7.569173784372054e-06, "log_odds_chosen": 0.4983958303928375, "log_odds_ratio": -0.5740706920623779, "logits/chosen": -2.246269941329956, "logits/rejected": -2.0993967056274414, "logps/chosen": -0.7058591246604919, "logps/rejected": -0.9787705540657043, "loss": 1.2308, "nll_loss": 0.9880681037902832, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21175774931907654, "rewards/margins": 0.08187338709831238, "rewards/rejected": -0.2936311364173889, "step": 2000 }, { "epoch": 0.526591564055541, "grad_norm": 0.8224198818206787, "learning_rate": 7.564051778457354e-06, "log_odds_chosen": 0.730120062828064, "log_odds_ratio": -0.5243743658065796, "logits/chosen": -2.184593915939331, "logits/rejected": -2.0078964233398438, "logps/chosen": -0.777365505695343, "logps/rejected": -1.2488415241241455, "loss": 1.2138, "nll_loss": 1.088826298713684, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.23320968449115753, "rewards/margins": 0.14144277572631836, "rewards/rejected": -0.3746524453163147, "step": 2010 }, { "epoch": 0.5292114225831805, "grad_norm": 1.0857088565826416, "learning_rate": 7.558901259082888e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1109869480133057, "logits/rejected": -2.0452141761779785, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3192, "nll_loss": 1.0062416791915894, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2020 }, { "epoch": 0.53183128111082, "grad_norm": 0.7711729407310486, "learning_rate": 7.553722267454322e-06, "log_odds_chosen": 0.34288468956947327, "log_odds_ratio": -0.6621919274330139, "logits/chosen": -2.131227731704712, "logits/rejected": -2.0925912857055664, "logps/chosen": -0.7417663335800171, "logps/rejected": -0.9669727087020874, "loss": 1.2962, "nll_loss": 1.061499834060669, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.22252991795539856, "rewards/margins": 0.06756193190813065, "rewards/rejected": -0.2900918126106262, "step": 2030 }, { "epoch": 0.5344511396384595, "grad_norm": 0.9361077547073364, "learning_rate": 7.548514845005106e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.245638847351074, "logits/rejected": -2.1568429470062256, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3658, "nll_loss": 1.180989146232605, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2040 }, { "epoch": 0.537070998166099, "grad_norm": 0.9165865778923035, "learning_rate": 7.543279033396149e-06, "log_odds_chosen": 0.2553882598876953, "log_odds_ratio": -0.6828151941299438, "logits/chosen": -2.2795186042785645, "logits/rejected": -2.1708366870880127, "logps/chosen": -0.8418358564376831, "logps/rejected": -1.020721673965454, "loss": 1.2995, "nll_loss": 1.0512328147888184, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.25255078077316284, "rewards/margins": 0.05366574600338936, "rewards/rejected": -0.3062165081501007, "step": 2050 }, { "epoch": 0.5396908566937385, "grad_norm": 0.5067943930625916, "learning_rate": 7.538014874515479e-06, "log_odds_chosen": 0.22995737195014954, "log_odds_ratio": -0.6568772196769714, "logits/chosen": -2.3087832927703857, "logits/rejected": -2.1556599140167236, "logps/chosen": -0.8139178156852722, "logps/rejected": -0.9787559509277344, "loss": 1.335, "nll_loss": 1.0977249145507812, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24417535960674286, "rewards/margins": 0.049451421946287155, "rewards/rejected": -0.2936267852783203, "step": 2060 }, { "epoch": 0.5423107152213781, "grad_norm": 0.8534427881240845, "learning_rate": 7.532722410477912e-06, "log_odds_chosen": 0.3925303816795349, "log_odds_ratio": -0.6230870485305786, "logits/chosen": -2.222318172454834, "logits/rejected": -2.103269100189209, "logps/chosen": -0.8367436528205872, "logps/rejected": -1.085686445236206, "loss": 1.3094, "nll_loss": 1.0748655796051025, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2510230839252472, "rewards/margins": 0.07468287646770477, "rewards/rejected": -0.32570594549179077, "step": 2070 }, { "epoch": 0.5449305737490175, "grad_norm": 0.5659139156341553, "learning_rate": 7.527401683624712e-06, "log_odds_chosen": 0.49753817915916443, "log_odds_ratio": -0.5961757898330688, "logits/chosen": -2.126559019088745, "logits/rejected": -2.0896658897399902, "logps/chosen": -0.7640675902366638, "logps/rejected": -1.0833499431610107, "loss": 1.2383, "nll_loss": 0.9940149188041687, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22922030091285706, "rewards/margins": 0.09578472375869751, "rewards/rejected": -0.3250049948692322, "step": 2080 }, { "epoch": 0.547550432276657, "grad_norm": 0.9395691752433777, "learning_rate": 7.522052736523255e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2049221992492676, "logits/rejected": -2.1126251220703125, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3033, "nll_loss": 1.069281816482544, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2090 }, { "epoch": 0.5501702908042966, "grad_norm": 0.6644283533096313, "learning_rate": 7.5166756119666875e-06, "log_odds_chosen": 0.33624404668807983, "log_odds_ratio": -0.6594878435134888, "logits/chosen": -2.1170880794525146, "logits/rejected": -2.05601167678833, "logps/chosen": -0.7550711631774902, "logps/rejected": -0.9273549914360046, "loss": 1.2792, "nll_loss": 1.0367519855499268, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.22652137279510498, "rewards/margins": 0.05168512463569641, "rewards/rejected": -0.2782064974308014, "step": 2100 }, { "epoch": 0.552790149331936, "grad_norm": 1.0046262741088867, "learning_rate": 7.511270352973584e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.235132932662964, "logits/rejected": -2.091721296310425, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2639, "nll_loss": 1.0734943151474, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2110 }, { "epoch": 0.5554100078595756, "grad_norm": 0.7452945709228516, "learning_rate": 7.5058370027876e-06, "log_odds_chosen": 0.4744284749031067, "log_odds_ratio": -0.5885844826698303, "logits/chosen": -2.3577826023101807, "logits/rejected": -2.1653759479522705, "logps/chosen": -0.7202708125114441, "logps/rejected": -0.9829386472702026, "loss": 1.2334, "nll_loss": 1.0034105777740479, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21608126163482666, "rewards/margins": 0.07880035787820816, "rewards/rejected": -0.2948816418647766, "step": 2120 }, { "epoch": 0.5580298663872151, "grad_norm": 0.6816550493240356, "learning_rate": 7.500375604877133e-06, "log_odds_chosen": 0.40567436814308167, "log_odds_ratio": -0.6176926493644714, "logits/chosen": -2.340628147125244, "logits/rejected": -2.2888550758361816, "logps/chosen": -0.7054851651191711, "logps/rejected": -0.9284421801567078, "loss": 1.2241, "nll_loss": 0.9680972099304199, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21164552867412567, "rewards/margins": 0.06688711792230606, "rewards/rejected": -0.2785326838493347, "step": 2130 }, { "epoch": 0.5606497249148547, "grad_norm": 0.6294760704040527, "learning_rate": 7.4948862029349666e-06, "log_odds_chosen": 0.27154138684272766, "log_odds_ratio": -0.6608646512031555, "logits/chosen": -2.3504977226257324, "logits/rejected": -2.2641069889068604, "logps/chosen": -0.753447413444519, "logps/rejected": -0.9167672395706177, "loss": 1.2646, "nll_loss": 1.0325427055358887, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2260342389345169, "rewards/margins": 0.048995934426784515, "rewards/rejected": -0.2750301659107208, "step": 2140 }, { "epoch": 0.5632695834424941, "grad_norm": 0.6009808778762817, "learning_rate": 7.489368840877928e-06, "log_odds_chosen": 0.5156446695327759, "log_odds_ratio": -0.6383460760116577, "logits/chosen": -2.236543893814087, "logits/rejected": -2.0773768424987793, "logps/chosen": -0.7878815531730652, "logps/rejected": -1.146390676498413, "loss": 1.2458, "nll_loss": 1.025183081626892, "rewards/accuracies": 0.625, "rewards/chosen": -0.236364483833313, "rewards/margins": 0.10755269229412079, "rewards/rejected": -0.3439171612262726, "step": 2150 }, { "epoch": 0.5658894419701336, "grad_norm": 1.0105847120285034, "learning_rate": 7.4838235628465315e-06, "log_odds_chosen": 0.4232221245765686, "log_odds_ratio": -0.6551628112792969, "logits/chosen": -2.291649103164673, "logits/rejected": -2.211902141571045, "logps/chosen": -0.7405408620834351, "logps/rejected": -1.0014972686767578, "loss": 1.2949, "nll_loss": 1.047706961631775, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22216227650642395, "rewards/margins": 0.07828692346811295, "rewards/rejected": -0.3004491925239563, "step": 2160 }, { "epoch": 0.5685093004977732, "grad_norm": 1.017638087272644, "learning_rate": 7.478250413204628e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.24541974067688, "logits/rejected": -2.2050092220306396, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3121, "nll_loss": 1.0911709070205688, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2170 }, { "epoch": 0.5711291590254126, "grad_norm": 1.9565269947052002, "learning_rate": 7.472649436539051e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3028979301452637, "logits/rejected": -2.162764072418213, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2336, "nll_loss": 1.0390911102294922, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2180 }, { "epoch": 0.5737490175530522, "grad_norm": 1.0505093336105347, "learning_rate": 7.467020677659256e-06, "log_odds_chosen": 0.3742752969264984, "log_odds_ratio": -0.6339282393455505, "logits/chosen": -2.3230414390563965, "logits/rejected": -2.2359557151794434, "logps/chosen": -0.7908411026000977, "logps/rejected": -1.0411726236343384, "loss": 1.28, "nll_loss": 1.0830051898956299, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2372523546218872, "rewards/margins": 0.07509948313236237, "rewards/rejected": -0.3123518228530884, "step": 2190 }, { "epoch": 0.5763688760806917, "grad_norm": 0.9210163354873657, "learning_rate": 7.461364181596964e-06, "log_odds_chosen": 0.03896327689290047, "log_odds_ratio": -0.7938626408576965, "logits/chosen": -2.2267916202545166, "logits/rejected": -2.2394258975982666, "logps/chosen": -0.7566267848014832, "logps/rejected": -0.7844875454902649, "loss": 1.3462, "nll_loss": 1.0972793102264404, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2269880324602127, "rewards/margins": 0.008358245715498924, "rewards/rejected": -0.23534627258777618, "step": 2200 }, { "epoch": 0.5789887346083311, "grad_norm": 0.658789873123169, "learning_rate": 7.455679993605805e-06, "log_odds_chosen": 0.19465026259422302, "log_odds_ratio": -0.694733738899231, "logits/chosen": -2.3600332736968994, "logits/rejected": -2.2505123615264893, "logps/chosen": -0.7592724561691284, "logps/rejected": -0.8699545860290527, "loss": 1.2958, "nll_loss": 1.0229685306549072, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2277817726135254, "rewards/margins": 0.03320460766553879, "rewards/rejected": -0.2609863877296448, "step": 2210 }, { "epoch": 0.5816085931359707, "grad_norm": 0.680808961391449, "learning_rate": 7.449968159160949e-06, "log_odds_chosen": 0.2870968282222748, "log_odds_ratio": -0.6675760746002197, "logits/chosen": -2.248018980026245, "logits/rejected": -2.1817219257354736, "logps/chosen": -0.808517336845398, "logps/rejected": -0.9757534265518188, "loss": 1.2963, "nll_loss": 1.0884374380111694, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2425551861524582, "rewards/margins": 0.05017079785466194, "rewards/rejected": -0.2927260100841522, "step": 2220 }, { "epoch": 0.5842284516636101, "grad_norm": 0.5747780799865723, "learning_rate": 7.444228723958747e-06, "log_odds_chosen": 0.4211820662021637, "log_odds_ratio": -0.6097627282142639, "logits/chosen": -2.145867347717285, "logits/rejected": -2.1219398975372314, "logps/chosen": -0.7492374181747437, "logps/rejected": -1.0155950784683228, "loss": 1.2992, "nll_loss": 1.131514549255371, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.22477123141288757, "rewards/margins": 0.07990728318691254, "rewards/rejected": -0.3046785295009613, "step": 2230 }, { "epoch": 0.5868483101912497, "grad_norm": 1.127907633781433, "learning_rate": 7.438461733916367e-06, "log_odds_chosen": 0.32157209515571594, "log_odds_ratio": -0.6565490961074829, "logits/chosen": -2.2401270866394043, "logits/rejected": -2.094183921813965, "logps/chosen": -0.7808433771133423, "logps/rejected": -0.9899951219558716, "loss": 1.2267, "nll_loss": 1.0375850200653076, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.23425301909446716, "rewards/margins": 0.06274549663066864, "rewards/rejected": -0.296998530626297, "step": 2240 }, { "epoch": 0.5894681687188892, "grad_norm": 0.6365547776222229, "learning_rate": 7.432667235171417e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2861392498016357, "logits/rejected": -2.2535648345947266, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3725, "nll_loss": 1.0584983825683594, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2250 }, { "epoch": 0.5920880272465286, "grad_norm": 0.5650036334991455, "learning_rate": 7.42684527408159e-06, "log_odds_chosen": 0.2913205623626709, "log_odds_ratio": -0.6465460658073425, "logits/chosen": -2.2655625343322754, "logits/rejected": -2.1316330432891846, "logps/chosen": -0.7703976631164551, "logps/rejected": -0.9624114036560059, "loss": 1.2947, "nll_loss": 1.139532208442688, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2311192750930786, "rewards/margins": 0.057604141533374786, "rewards/rejected": -0.2887234091758728, "step": 2260 }, { "epoch": 0.5947078857741682, "grad_norm": 0.5025174021720886, "learning_rate": 7.420995897224282e-06, "log_odds_chosen": 0.21648594737052917, "log_odds_ratio": -0.6536962389945984, "logits/chosen": -2.2929017543792725, "logits/rejected": -2.2222514152526855, "logps/chosen": -0.8068785667419434, "logps/rejected": -0.9505699872970581, "loss": 1.345, "nll_loss": 1.099893569946289, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.24206361174583435, "rewards/margins": 0.04310744255781174, "rewards/rejected": -0.2851710021495819, "step": 2270 }, { "epoch": 0.5973277443018077, "grad_norm": 1.2897135019302368, "learning_rate": 7.415119151396221e-06, "log_odds_chosen": 0.15070763230323792, "log_odds_ratio": -0.6982909440994263, "logits/chosen": -2.284419059753418, "logits/rejected": -2.2173399925231934, "logps/chosen": -0.8226763010025024, "logps/rejected": -0.9135896563529968, "loss": 1.2823, "nll_loss": 1.1719170808792114, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.24680288136005402, "rewards/margins": 0.027274031192064285, "rewards/rejected": -0.274076908826828, "step": 2280 }, { "epoch": 0.5999476028294473, "grad_norm": 0.7253257036209106, "learning_rate": 7.409215083613102e-06, "log_odds_chosen": 0.4557565748691559, "log_odds_ratio": -0.5840985178947449, "logits/chosen": -2.3577868938446045, "logits/rejected": -2.2489075660705566, "logps/chosen": -0.7491756677627563, "logps/rejected": -1.0174922943115234, "loss": 1.231, "nll_loss": 1.039825439453125, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.22475270926952362, "rewards/margins": 0.08049501478672028, "rewards/rejected": -0.3052476942539215, "step": 2290 }, { "epoch": 0.6025674613570867, "grad_norm": 0.7056682109832764, "learning_rate": 7.403283741109196e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.239004135131836, "logits/rejected": -2.20566987991333, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2772, "nll_loss": 1.0783637762069702, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2300 }, { "epoch": 0.6051873198847262, "grad_norm": 0.5374972820281982, "learning_rate": 7.397325171336985e-06, "log_odds_chosen": 0.37783634662628174, "log_odds_ratio": -0.6228850483894348, "logits/chosen": -2.346026659011841, "logits/rejected": -2.3104848861694336, "logps/chosen": -0.7539309859275818, "logps/rejected": -0.9441367983818054, "loss": 1.2938, "nll_loss": 1.0617027282714844, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2261793166399002, "rewards/margins": 0.057061709463596344, "rewards/rejected": -0.28324103355407715, "step": 2310 }, { "epoch": 0.6078071784123658, "grad_norm": 0.7347215414047241, "learning_rate": 7.391339421966774e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.255875587463379, "logits/rejected": -2.25917387008667, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3142, "nll_loss": 1.0913206338882446, "rewards/accuracies": 0.512499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2320 }, { "epoch": 0.6104270369400052, "grad_norm": 1.3080955743789673, "learning_rate": 7.3853265408863145e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3684592247009277, "logits/rejected": -2.3009555339813232, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.416, "nll_loss": 1.1959171295166016, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2330 }, { "epoch": 0.6130468954676448, "grad_norm": 0.7657895088195801, "learning_rate": 7.379286576200418e-06, "log_odds_chosen": 0.3927902281284332, "log_odds_ratio": -0.6415016651153564, "logits/chosen": -2.374662160873413, "logits/rejected": -2.3016607761383057, "logps/chosen": -0.7947167158126831, "logps/rejected": -1.0554046630859375, "loss": 1.2619, "nll_loss": 1.111592173576355, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23841504752635956, "rewards/margins": 0.07820636034011841, "rewards/rejected": -0.3166213631629944, "step": 2340 }, { "epoch": 0.6156667539952843, "grad_norm": 0.5421836376190186, "learning_rate": 7.373219576230576e-06, "log_odds_chosen": 0.0835968628525734, "log_odds_ratio": -0.7544440031051636, "logits/chosen": -2.3562092781066895, "logits/rejected": -2.2856314182281494, "logps/chosen": -0.7969850301742554, "logps/rejected": -0.8622144460678101, "loss": 1.2657, "nll_loss": 1.0536034107208252, "rewards/accuracies": 0.5, "rewards/chosen": -0.2390955239534378, "rewards/margins": 0.01956883631646633, "rewards/rejected": -0.2586643397808075, "step": 2350 }, { "epoch": 0.6182866125229237, "grad_norm": 0.8315609097480774, "learning_rate": 7.367125589514566e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2541520595550537, "logits/rejected": -2.153322696685791, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2611, "nll_loss": 1.0685862302780151, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2360 }, { "epoch": 0.6209064710505633, "grad_norm": 1.1386735439300537, "learning_rate": 7.361004664806071e-06, "log_odds_chosen": 0.15305542945861816, "log_odds_ratio": -0.7249768972396851, "logits/chosen": -2.1520450115203857, "logits/rejected": -2.1850452423095703, "logps/chosen": -0.792477548122406, "logps/rejected": -0.8838480114936829, "loss": 1.2816, "nll_loss": 1.0832180976867676, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.2377432882785797, "rewards/margins": 0.027411162853240967, "rewards/rejected": -0.2651544213294983, "step": 2370 }, { "epoch": 0.6235263295782028, "grad_norm": 0.6053383350372314, "learning_rate": 7.354856851074282e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.271695375442505, "logits/rejected": -2.2154130935668945, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2774, "nll_loss": 1.0565390586853027, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2380 }, { "epoch": 0.6261461881058423, "grad_norm": 0.4593925476074219, "learning_rate": 7.348682197503515e-06, "log_odds_chosen": 0.17132264375686646, "log_odds_ratio": -0.7304762601852417, "logits/chosen": -2.339852809906006, "logits/rejected": -2.2735748291015625, "logps/chosen": -0.8469673991203308, "logps/rejected": -0.9550887942314148, "loss": 1.3215, "nll_loss": 1.0765211582183838, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.25409018993377686, "rewards/margins": 0.03243647515773773, "rewards/rejected": -0.2865266799926758, "step": 2390 }, { "epoch": 0.6287660466334818, "grad_norm": 0.6238325834274292, "learning_rate": 7.342480753492808e-06, "log_odds_chosen": 0.2794187664985657, "log_odds_ratio": -0.6386472582817078, "logits/chosen": -2.276552200317383, "logits/rejected": -2.2163491249084473, "logps/chosen": -0.8052932024002075, "logps/rejected": -0.9754471778869629, "loss": 1.2637, "nll_loss": 1.0496490001678467, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24158796668052673, "rewards/margins": 0.051046185195446014, "rewards/rejected": -0.29263415932655334, "step": 2400 }, { "epoch": 0.6313859051611213, "grad_norm": 0.920269787311554, "learning_rate": 7.3362525686555335e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2791836261749268, "logits/rejected": -2.1544668674468994, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2972, "nll_loss": 1.0136305093765259, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2410 }, { "epoch": 0.6340057636887608, "grad_norm": 1.168060541152954, "learning_rate": 7.329997692818997e-06, "log_odds_chosen": 0.29985612630844116, "log_odds_ratio": -0.6534333825111389, "logits/chosen": -2.2244980335235596, "logits/rejected": -2.1145637035369873, "logps/chosen": -0.767120361328125, "logps/rejected": -0.9483343958854675, "loss": 1.267, "nll_loss": 1.09038245677948, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.23013611137866974, "rewards/margins": 0.05436421558260918, "rewards/rejected": -0.2845003306865692, "step": 2420 }, { "epoch": 0.6366256222164003, "grad_norm": NaN, "learning_rate": 7.323716176024041e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2432172298431396, "logits/rejected": -2.163698673248291, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3215, "nll_loss": 1.138155460357666, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2430 }, { "epoch": 0.6392454807440399, "grad_norm": 1.0263786315917969, "learning_rate": 7.31740806852464e-06, "log_odds_chosen": 0.3928252160549164, "log_odds_ratio": -0.6428135633468628, "logits/chosen": -2.214393138885498, "logits/rejected": -2.13381290435791, "logps/chosen": -0.7372905015945435, "logps/rejected": -0.9613118171691895, "loss": 1.3115, "nll_loss": 1.0688308477401733, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22118715941905975, "rewards/margins": 0.06720642745494843, "rewards/rejected": -0.2883935868740082, "step": 2440 }, { "epoch": 0.6418653392716793, "grad_norm": 0.7318268418312073, "learning_rate": 7.311073420787508e-06, "log_odds_chosen": 0.23766593635082245, "log_odds_ratio": -0.6869194507598877, "logits/chosen": -2.210160493850708, "logits/rejected": -2.1306357383728027, "logps/chosen": -0.8341716527938843, "logps/rejected": -0.9821904301643372, "loss": 1.3243, "nll_loss": 1.0683224201202393, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.25025150179862976, "rewards/margins": 0.04440561681985855, "rewards/rejected": -0.2946571409702301, "step": 2450 }, { "epoch": 0.6444851977993188, "grad_norm": 0.9901526570320129, "learning_rate": 7.304712283491682e-06, "log_odds_chosen": 0.33675986528396606, "log_odds_ratio": -0.6387494802474976, "logits/chosen": -2.2591147422790527, "logits/rejected": -2.1586108207702637, "logps/chosen": -0.8248133659362793, "logps/rejected": -1.041301965713501, "loss": 1.2632, "nll_loss": 1.0973416566848755, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2474440336227417, "rewards/margins": 0.06494656205177307, "rewards/rejected": -0.31239053606987, "step": 2460 }, { "epoch": 0.6471050563269584, "grad_norm": 0.71580570936203, "learning_rate": 7.298324707528128e-06, "log_odds_chosen": 0.2328287810087204, "log_odds_ratio": -0.7043330669403076, "logits/chosen": -2.2296154499053955, "logits/rejected": -2.1293795108795166, "logps/chosen": -0.7819975018501282, "logps/rejected": -0.919158935546875, "loss": 1.2657, "nll_loss": 1.0321539640426636, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2345992624759674, "rewards/margins": 0.04114843159914017, "rewards/rejected": -0.27574771642684937, "step": 2470 }, { "epoch": 0.6497249148545978, "grad_norm": 0.590401828289032, "learning_rate": 7.291910743999327e-06, "log_odds_chosen": 0.21885737776756287, "log_odds_ratio": -0.6903527975082397, "logits/chosen": -2.163498878479004, "logits/rejected": -2.1917812824249268, "logps/chosen": -0.868248462677002, "logps/rejected": -1.0025335550308228, "loss": 1.2335, "nll_loss": 1.058502435684204, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2604745328426361, "rewards/margins": 0.04028552025556564, "rewards/rejected": -0.30076009035110474, "step": 2480 }, { "epoch": 0.6523447733822374, "grad_norm": 0.5178573131561279, "learning_rate": 7.28547044421887e-06, "log_odds_chosen": 0.2557637691497803, "log_odds_ratio": -0.6888936758041382, "logits/chosen": -2.238238573074341, "logits/rejected": -2.190157651901245, "logps/chosen": -0.8402193784713745, "logps/rejected": -0.9892901182174683, "loss": 1.2931, "nll_loss": 1.0994813442230225, "rewards/accuracies": 0.625, "rewards/chosen": -0.25206583738327026, "rewards/margins": 0.04472119361162186, "rewards/rejected": -0.2967870533466339, "step": 2490 }, { "epoch": 0.6549646319098769, "grad_norm": 1.2576723098754883, "learning_rate": 7.279003859711042e-06, "log_odds_chosen": 0.40503090620040894, "log_odds_ratio": -0.619393527507782, "logits/chosen": -2.1863226890563965, "logits/rejected": -2.1457390785217285, "logps/chosen": -0.774633526802063, "logps/rejected": -1.0127094984054565, "loss": 1.2585, "nll_loss": 1.1200616359710693, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.23239007592201233, "rewards/margins": 0.07142280042171478, "rewards/rejected": -0.3038128912448883, "step": 2500 }, { "epoch": 0.6575844904375163, "grad_norm": 1.4892168045043945, "learning_rate": 7.2725110422104185e-06, "log_odds_chosen": 0.23780755698680878, "log_odds_ratio": -0.7130314111709595, "logits/chosen": -2.2438855171203613, "logits/rejected": -2.0703585147857666, "logps/chosen": -0.8102817535400391, "logps/rejected": -0.9559137225151062, "loss": 1.2876, "nll_loss": 1.1036754846572876, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.24308452010154724, "rewards/margins": 0.04368959739804268, "rewards/rejected": -0.2867741286754608, "step": 2510 }, { "epoch": 0.6602043489651559, "grad_norm": 0.7082468867301941, "learning_rate": 7.265992043661444e-06, "log_odds_chosen": 0.399004191160202, "log_odds_ratio": -0.617197573184967, "logits/chosen": -2.1709508895874023, "logits/rejected": -2.0352561473846436, "logps/chosen": -0.771338939666748, "logps/rejected": -1.019679307937622, "loss": 1.2824, "nll_loss": 1.0527714490890503, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2314017117023468, "rewards/margins": 0.07450208067893982, "rewards/rejected": -0.30590376257896423, "step": 2520 }, { "epoch": 0.6628242074927954, "grad_norm": 0.5823565125465393, "learning_rate": 7.259446916218019e-06, "log_odds_chosen": 0.2624393701553345, "log_odds_ratio": -0.6526986360549927, "logits/chosen": -2.1676461696624756, "logits/rejected": -2.1462416648864746, "logps/chosen": -0.7459732294082642, "logps/rejected": -0.8959965705871582, "loss": 1.2776, "nll_loss": 1.0215675830841064, "rewards/accuracies": 0.5625, "rewards/chosen": -0.22379200160503387, "rewards/margins": 0.04500698298215866, "rewards/rejected": -0.26879897713661194, "step": 2530 }, { "epoch": 0.6654440660204349, "grad_norm": 0.798294723033905, "learning_rate": 7.252875712243082e-06, "log_odds_chosen": 0.46093541383743286, "log_odds_ratio": -0.585874617099762, "logits/chosen": -2.045391321182251, "logits/rejected": -2.042501926422119, "logps/chosen": -0.7316650152206421, "logps/rejected": -0.9661180377006531, "loss": 1.2401, "nll_loss": 1.0619885921478271, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21949954330921173, "rewards/margins": 0.07033590972423553, "rewards/rejected": -0.28983545303344727, "step": 2540 }, { "epoch": 0.6680639245480744, "grad_norm": 0.9163401126861572, "learning_rate": 7.246278484308194e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1355414390563965, "logits/rejected": -2.0553345680236816, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3127, "nll_loss": 1.0776865482330322, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2550 }, { "epoch": 0.6706837830757139, "grad_norm": 0.8939431309700012, "learning_rate": 7.239655285193112e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1532928943634033, "logits/rejected": -2.0679402351379395, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2405, "nll_loss": 1.1460297107696533, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2560 }, { "epoch": 0.6733036416033534, "grad_norm": 0.8097273111343384, "learning_rate": 7.233006167885375e-06, "log_odds_chosen": 0.3317798972129822, "log_odds_ratio": -0.6523692607879639, "logits/chosen": -2.0989041328430176, "logits/rejected": -2.0460550785064697, "logps/chosen": -0.7197028398513794, "logps/rejected": -0.898587703704834, "loss": 1.2775, "nll_loss": 1.055712103843689, "rewards/accuracies": 0.625, "rewards/chosen": -0.215910866856575, "rewards/margins": 0.05366547778248787, "rewards/rejected": -0.2695763409137726, "step": 2570 }, { "epoch": 0.6759235001309929, "grad_norm": 0.6048458218574524, "learning_rate": 7.226331185579869e-06, "log_odds_chosen": 0.49049004912376404, "log_odds_ratio": -0.5751563310623169, "logits/chosen": -2.189640522003174, "logits/rejected": -2.050835132598877, "logps/chosen": -0.7604073882102966, "logps/rejected": -1.080538034439087, "loss": 1.3127, "nll_loss": 1.0872504711151123, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.22812223434448242, "rewards/margins": 0.09603922069072723, "rewards/rejected": -0.32416146993637085, "step": 2580 }, { "epoch": 0.6785433586586325, "grad_norm": 1.0940250158309937, "learning_rate": 7.2196303916784125e-06, "log_odds_chosen": 0.21755337715148926, "log_odds_ratio": -0.7021976709365845, "logits/chosen": -2.2397446632385254, "logits/rejected": -2.134626626968384, "logps/chosen": -0.8208645582199097, "logps/rejected": -0.9672040939331055, "loss": 1.2727, "nll_loss": 1.0403534173965454, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.246259406208992, "rewards/margins": 0.04390185698866844, "rewards/rejected": -0.29016128182411194, "step": 2590 }, { "epoch": 0.6811632171862719, "grad_norm": 1.1411422491073608, "learning_rate": 7.212903839789321e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.237382173538208, "logits/rejected": -2.1155903339385986, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3726, "nll_loss": 1.137587308883667, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2600 }, { "epoch": 0.6837830757139115, "grad_norm": 0.7393993139266968, "learning_rate": 7.206151583726983e-06, "log_odds_chosen": 0.3169768750667572, "log_odds_ratio": -0.6450271010398865, "logits/chosen": -2.0800976753234863, "logits/rejected": -2.062063217163086, "logps/chosen": -0.7997614145278931, "logps/rejected": -1.0275766849517822, "loss": 1.2282, "nll_loss": 1.0264945030212402, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.23992840945720673, "rewards/margins": 0.0683446153998375, "rewards/rejected": -0.3082730174064636, "step": 2610 }, { "epoch": 0.686402934241551, "grad_norm": 1.237419605255127, "learning_rate": 7.199373677511429e-06, "log_odds_chosen": 0.4066476821899414, "log_odds_ratio": -0.6371482610702515, "logits/chosen": -2.0903244018554688, "logits/rejected": -1.9512255191802979, "logps/chosen": -0.8107932209968567, "logps/rejected": -1.0624643564224243, "loss": 1.2531, "nll_loss": 1.0793176889419556, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24323797225952148, "rewards/margins": 0.07550130039453506, "rewards/rejected": -0.31873929500579834, "step": 2620 }, { "epoch": 0.6890227927691904, "grad_norm": 1.1348999738693237, "learning_rate": 7.192570175367896e-06, "log_odds_chosen": 0.23723673820495605, "log_odds_ratio": -0.692613422870636, "logits/chosen": -2.1553401947021484, "logits/rejected": -2.1098575592041016, "logps/chosen": -0.7742843627929688, "logps/rejected": -0.9194076657295227, "loss": 1.2278, "nll_loss": 0.9754388928413391, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.23228535056114197, "rewards/margins": 0.04353698343038559, "rewards/rejected": -0.27582231163978577, "step": 2630 }, { "epoch": 0.69164265129683, "grad_norm": 0.568951427936554, "learning_rate": 7.185741131726398e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1578760147094727, "logits/rejected": -2.0186927318573, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3132, "nll_loss": 1.1213527917861938, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2640 }, { "epoch": 0.6942625098244695, "grad_norm": 0.8528425693511963, "learning_rate": 7.178886601221288e-06, "log_odds_chosen": 0.43714895844459534, "log_odds_ratio": -0.6062057614326477, "logits/chosen": -2.1266090869903564, "logits/rejected": -1.972529411315918, "logps/chosen": -0.7341925501823425, "logps/rejected": -1.0070372819900513, "loss": 1.2007, "nll_loss": 1.0572203397750854, "rewards/accuracies": 0.625, "rewards/chosen": -0.22025778889656067, "rewards/margins": 0.08185340464115143, "rewards/rejected": -0.3021112084388733, "step": 2650 }, { "epoch": 0.696882368352109, "grad_norm": 0.7478863596916199, "learning_rate": 7.172006638690818e-06, "log_odds_chosen": 0.23426392674446106, "log_odds_ratio": -0.7009468674659729, "logits/chosen": -2.269324779510498, "logits/rejected": -2.2378697395324707, "logps/chosen": -0.8009575009346008, "logps/rejected": -0.9451401829719543, "loss": 1.2668, "nll_loss": 1.0990660190582275, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.24028727412223816, "rewards/margins": 0.04325477406382561, "rewards/rejected": -0.28354206681251526, "step": 2660 }, { "epoch": 0.6995022268797485, "grad_norm": 0.536769449710846, "learning_rate": 7.165101299176709e-06, "log_odds_chosen": 0.3421301543712616, "log_odds_ratio": -0.6537696123123169, "logits/chosen": -2.102100133895874, "logits/rejected": -2.023209571838379, "logps/chosen": -0.8150397539138794, "logps/rejected": -1.0280616283416748, "loss": 1.2252, "nll_loss": 1.0245137214660645, "rewards/accuracies": 0.625, "rewards/chosen": -0.24451196193695068, "rewards/margins": 0.06390657275915146, "rewards/rejected": -0.30841854214668274, "step": 2670 }, { "epoch": 0.702122085407388, "grad_norm": 1.5541636943817139, "learning_rate": 7.158170637923701e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.199897527694702, "logits/rejected": -2.126243829727173, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1875, "nll_loss": 0.9682501554489136, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2680 }, { "epoch": 0.7047419439350275, "grad_norm": 0.6678760051727295, "learning_rate": 7.15121471037912e-06, "log_odds_chosen": 0.3198024332523346, "log_odds_ratio": -0.631301999092102, "logits/chosen": -2.1863677501678467, "logits/rejected": -1.9945367574691772, "logps/chosen": -0.7599000334739685, "logps/rejected": -0.9678766131401062, "loss": 1.1794, "nll_loss": 0.9643503427505493, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22797003388404846, "rewards/margins": 0.06239297240972519, "rewards/rejected": -0.29036301374435425, "step": 2690 }, { "epoch": 0.707361802462667, "grad_norm": 0.6105390191078186, "learning_rate": 7.144233572192424e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1922237873077393, "logits/rejected": -2.1277713775634766, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2495, "nll_loss": 1.0547616481781006, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2700 }, { "epoch": 0.7099816609903066, "grad_norm": 1.4134238958358765, "learning_rate": 7.137227279214769e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.073819637298584, "logits/rejected": -2.028144121170044, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3426, "nll_loss": 1.156713843345642, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2710 }, { "epoch": 0.712601519517946, "grad_norm": 0.7173327207565308, "learning_rate": 7.1301958874985525e-06, "log_odds_chosen": 0.3696121275424957, "log_odds_ratio": -0.6350809335708618, "logits/chosen": -2.2135560512542725, "logits/rejected": -2.123964548110962, "logps/chosen": -0.8260348439216614, "logps/rejected": -1.0520808696746826, "loss": 1.261, "nll_loss": 1.0257538557052612, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2478104531764984, "rewards/margins": 0.06781382858753204, "rewards/rejected": -0.31562429666519165, "step": 2720 }, { "epoch": 0.7152213780455855, "grad_norm": 1.0755146741867065, "learning_rate": 7.12313945329697e-06, "log_odds_chosen": 0.7183526754379272, "log_odds_ratio": -0.5720943212509155, "logits/chosen": -2.1377110481262207, "logits/rejected": -2.0320024490356445, "logps/chosen": -0.8404228091239929, "logps/rejected": -1.363778829574585, "loss": 1.2238, "nll_loss": 1.092319130897522, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.25212687253952026, "rewards/margins": 0.15700681507587433, "rewards/rejected": -0.4091336727142334, "step": 2730 }, { "epoch": 0.7178412365732251, "grad_norm": 0.4474810063838959, "learning_rate": 7.116058033063568e-06, "log_odds_chosen": 0.4478398263454437, "log_odds_ratio": -0.6137696504592896, "logits/chosen": -2.277543544769287, "logits/rejected": -2.1322669982910156, "logps/chosen": -0.7880067825317383, "logps/rejected": -1.0826122760772705, "loss": 1.3098, "nll_loss": 1.0818217992782593, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23640206456184387, "rewards/margins": 0.08838163316249847, "rewards/rejected": -0.32478365302085876, "step": 2740 }, { "epoch": 0.7204610951008645, "grad_norm": 0.6360846161842346, "learning_rate": 7.108951683451783e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.37385892868042, "logits/rejected": -2.233447551727295, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.293, "nll_loss": 1.0701863765716553, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2750 }, { "epoch": 0.7230809536285041, "grad_norm": 0.7214239835739136, "learning_rate": 7.101820461314499e-06, "log_odds_chosen": 0.4084698557853699, "log_odds_ratio": -0.6077283620834351, "logits/chosen": -2.3347229957580566, "logits/rejected": -2.215712785720825, "logps/chosen": -0.7788197994232178, "logps/rejected": -1.024890661239624, "loss": 1.3186, "nll_loss": 1.1246103048324585, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23364591598510742, "rewards/margins": 0.07382124662399292, "rewards/rejected": -0.30746716260910034, "step": 2760 }, { "epoch": 0.7257008121561436, "grad_norm": NaN, "learning_rate": 7.094664423703584e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1620326042175293, "logits/rejected": -2.0330188274383545, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2016, "nll_loss": 0.9939059019088745, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2770 }, { "epoch": 0.728320670683783, "grad_norm": 1.1359010934829712, "learning_rate": 7.0874836278694365e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2081775665283203, "logits/rejected": -2.1510608196258545, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3287, "nll_loss": 1.1743004322052002, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2780 }, { "epoch": 0.7309405292114226, "grad_norm": 0.8056393265724182, "learning_rate": 7.080278131260532e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1479156017303467, "logits/rejected": -2.048037052154541, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2808, "nll_loss": 1.0827562808990479, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2790 }, { "epoch": 0.7335603877390621, "grad_norm": 0.7724719047546387, "learning_rate": 7.073047991522957e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1938867568969727, "logits/rejected": -2.1530394554138184, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2485, "nll_loss": 1.021594762802124, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2800 }, { "epoch": 0.7361802462667016, "grad_norm": 1.1115247011184692, "learning_rate": 7.06579326649995e-06, "log_odds_chosen": 0.5211392641067505, "log_odds_ratio": -0.5728268027305603, "logits/chosen": -2.147111654281616, "logits/rejected": -2.0510988235473633, "logps/chosen": -0.8054038882255554, "logps/rejected": -1.1425273418426514, "loss": 1.243, "nll_loss": 1.1077337265014648, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.2416212111711502, "rewards/margins": 0.10113699734210968, "rewards/rejected": -0.3427582383155823, "step": 2810 }, { "epoch": 0.7388001047943411, "grad_norm": 0.5888007879257202, "learning_rate": 7.0585140142314395e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1483044624328613, "logits/rejected": -2.0383048057556152, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2897, "nll_loss": 1.072026252746582, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2820 }, { "epoch": 0.7414199633219806, "grad_norm": 0.6710845828056335, "learning_rate": 7.051210292953578e-06, "log_odds_chosen": 0.3103925883769989, "log_odds_ratio": -0.6611201167106628, "logits/chosen": -2.1971492767333984, "logits/rejected": -2.1729156970977783, "logps/chosen": -0.7257711887359619, "logps/rejected": -0.9491456151008606, "loss": 1.2806, "nll_loss": 1.094168782234192, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.21773135662078857, "rewards/margins": 0.06701229512691498, "rewards/rejected": -0.28474366664886475, "step": 2830 }, { "epoch": 0.7440398218496201, "grad_norm": 0.8307505249977112, "learning_rate": 7.043882161098279e-06, "log_odds_chosen": 0.4076002538204193, "log_odds_ratio": -0.6226305365562439, "logits/chosen": -2.1630561351776123, "logits/rejected": -2.1046409606933594, "logps/chosen": -0.7399248480796814, "logps/rejected": -0.9798358678817749, "loss": 1.2645, "nll_loss": 1.0215128660202026, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22197744250297546, "rewards/margins": 0.07197330892086029, "rewards/rejected": -0.29395079612731934, "step": 2840 }, { "epoch": 0.7466596803772596, "grad_norm": 0.7144160270690918, "learning_rate": 7.0365296772927476e-06, "log_odds_chosen": 0.3930348753929138, "log_odds_ratio": -0.608148992061615, "logits/chosen": -2.2506258487701416, "logits/rejected": -2.163642644882202, "logps/chosen": -0.775261402130127, "logps/rejected": -0.9980617761611938, "loss": 1.2437, "nll_loss": 1.0367671251296997, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23257842659950256, "rewards/margins": 0.06684008985757828, "rewards/rejected": -0.29941850900650024, "step": 2850 }, { "epoch": 0.7492795389048992, "grad_norm": 0.5483514666557312, "learning_rate": 7.029152900359011e-06, "log_odds_chosen": 0.13476970791816711, "log_odds_ratio": -0.7270932197570801, "logits/chosen": -2.2263050079345703, "logits/rejected": -2.1049838066101074, "logps/chosen": -0.8486529588699341, "logps/rejected": -0.9229358434677124, "loss": 1.2986, "nll_loss": 1.1287142038345337, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.25459587574005127, "rewards/margins": 0.022284861654043198, "rewards/rejected": -0.27688074111938477, "step": 2860 }, { "epoch": 0.7518993974325386, "grad_norm": 0.5478702187538147, "learning_rate": 7.021751889313448e-06, "log_odds_chosen": 0.2945244312286377, "log_odds_ratio": -0.6724186539649963, "logits/chosen": -2.1276438236236572, "logits/rejected": -2.0883290767669678, "logps/chosen": -0.8223506212234497, "logps/rejected": -1.028502106666565, "loss": 1.2573, "nll_loss": 1.0236998796463013, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.24670517444610596, "rewards/margins": 0.061845459043979645, "rewards/rejected": -0.3085506558418274, "step": 2870 }, { "epoch": 0.7545192559601781, "grad_norm": 0.620324969291687, "learning_rate": 7.014326703366318e-06, "log_odds_chosen": 0.37890589237213135, "log_odds_ratio": -0.5998955965042114, "logits/chosen": -2.1689436435699463, "logits/rejected": -2.095350503921509, "logps/chosen": -0.7338879108428955, "logps/rejected": -0.9649111032485962, "loss": 1.292, "nll_loss": 1.046656608581543, "rewards/accuracies": 0.625, "rewards/chosen": -0.2201664000749588, "rewards/margins": 0.06930693984031677, "rewards/rejected": -0.28947335481643677, "step": 2880 }, { "epoch": 0.7571391144878177, "grad_norm": 0.7871511578559875, "learning_rate": 7.006877401921289e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.101745128631592, "logits/rejected": -2.0622658729553223, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3341, "nll_loss": 1.1133625507354736, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2890 }, { "epoch": 0.7597589730154571, "grad_norm": 0.7853126525878906, "learning_rate": 6.999404044574956e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.095571994781494, "logits/rejected": -2.073544979095459, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3085, "nll_loss": 1.1572355031967163, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2900 }, { "epoch": 0.7623788315430967, "grad_norm": 0.6674336791038513, "learning_rate": 6.991906691116369e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.248521327972412, "logits/rejected": -2.1992533206939697, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3087, "nll_loss": 1.0622496604919434, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2910 }, { "epoch": 0.7649986900707362, "grad_norm": 0.5655173063278198, "learning_rate": 6.984385401526559e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.150892496109009, "logits/rejected": -1.9822124242782593, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2568, "nll_loss": 1.0187671184539795, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2920 }, { "epoch": 0.7676185485983756, "grad_norm": 0.8378030061721802, "learning_rate": 6.976840235978044e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.125096082687378, "logits/rejected": -2.1032614707946777, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3051, "nll_loss": 1.0320090055465698, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 2930 }, { "epoch": 0.7702384071260152, "grad_norm": 0.5712536573410034, "learning_rate": 6.969271254834366e-06, "log_odds_chosen": 0.32749080657958984, "log_odds_ratio": -0.6465135812759399, "logits/chosen": -2.2995567321777344, "logits/rejected": -2.100559949874878, "logps/chosen": -0.7840683460235596, "logps/rejected": -0.991683840751648, "loss": 1.2887, "nll_loss": 1.1132055521011353, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2352205216884613, "rewards/margins": 0.06228462606668472, "rewards/rejected": -0.2975051701068878, "step": 2940 }, { "epoch": 0.7728582656536547, "grad_norm": 1.2187914848327637, "learning_rate": 6.961678518649591e-06, "log_odds_chosen": 0.1670421063899994, "log_odds_ratio": -0.7025924324989319, "logits/chosen": -2.0532732009887695, "logits/rejected": -2.005733013153076, "logps/chosen": -0.8278334736824036, "logps/rejected": -0.9536125063896179, "loss": 1.271, "nll_loss": 1.0712836980819702, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.24835005402565002, "rewards/margins": 0.03773371875286102, "rewards/rejected": -0.28608375787734985, "step": 2950 }, { "epoch": 0.7754781241812942, "grad_norm": 0.5995035767555237, "learning_rate": 6.95406208816784e-06, "log_odds_chosen": 0.2661210298538208, "log_odds_ratio": -0.6747698187828064, "logits/chosen": -2.112772226333618, "logits/rejected": -2.024566650390625, "logps/chosen": -0.8226250410079956, "logps/rejected": -0.9694890975952148, "loss": 1.337, "nll_loss": 1.0853242874145508, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24678751826286316, "rewards/margins": 0.04405922442674637, "rewards/rejected": -0.29084673523902893, "step": 2960 }, { "epoch": 0.7780979827089337, "grad_norm": 0.6277843713760376, "learning_rate": 6.9464220243227885e-06, "log_odds_chosen": 0.09279575198888779, "log_odds_ratio": -0.7602270841598511, "logits/chosen": -2.239936351776123, "logits/rejected": -2.0854809284210205, "logps/chosen": -0.8698814511299133, "logps/rejected": -0.9617762565612793, "loss": 1.2329, "nll_loss": 1.0387821197509766, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.26096442341804504, "rewards/margins": 0.02756846882402897, "rewards/rejected": -0.28853291273117065, "step": 2970 }, { "epoch": 0.7807178412365732, "grad_norm": 0.5950387716293335, "learning_rate": 6.938758388237189e-06, "log_odds_chosen": 0.17784437537193298, "log_odds_ratio": -0.7037127614021301, "logits/chosen": -2.2328972816467285, "logits/rejected": -2.1269688606262207, "logps/chosen": -0.8187738656997681, "logps/rejected": -0.925631046295166, "loss": 1.297, "nll_loss": 1.0960203409194946, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.245632141828537, "rewards/margins": 0.032057177275419235, "rewards/rejected": -0.2776893377304077, "step": 2980 }, { "epoch": 0.7833376997642127, "grad_norm": 0.8385928273200989, "learning_rate": 6.931071241222378e-06, "log_odds_chosen": 0.31650716066360474, "log_odds_ratio": -0.6479363441467285, "logits/chosen": -2.2020020484924316, "logits/rejected": -2.154996156692505, "logps/chosen": -0.7813807129859924, "logps/rejected": -0.9673460125923157, "loss": 1.2688, "nll_loss": 1.0483282804489136, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2344142198562622, "rewards/margins": 0.055789582431316376, "rewards/rejected": -0.2902038097381592, "step": 2990 }, { "epoch": 0.7859575582918522, "grad_norm": 0.7713876962661743, "learning_rate": 6.923360644777787e-06, "log_odds_chosen": 0.27009299397468567, "log_odds_ratio": -0.6855179071426392, "logits/chosen": -2.2240679264068604, "logits/rejected": -2.09279727935791, "logps/chosen": -0.8316240310668945, "logps/rejected": -1.0465376377105713, "loss": 1.2186, "nll_loss": 1.0708683729171753, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24948720633983612, "rewards/margins": 0.06447408348321915, "rewards/rejected": -0.31396132707595825, "step": 3000 }, { "epoch": 0.7885774168194918, "grad_norm": 1.1684746742248535, "learning_rate": 6.915626660590451e-06, "log_odds_chosen": 0.5478761196136475, "log_odds_ratio": -0.5586941242218018, "logits/chosen": -2.2233002185821533, "logits/rejected": -2.0840015411376953, "logps/chosen": -0.7935158014297485, "logps/rejected": -1.127759337425232, "loss": 1.2508, "nll_loss": 1.0928322076797485, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.23805475234985352, "rewards/margins": 0.10027307271957397, "rewards/rejected": -0.3383278250694275, "step": 3010 }, { "epoch": 0.7911972753471312, "grad_norm": 0.9368971586227417, "learning_rate": 6.9078693505345095e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2127246856689453, "logits/rejected": -2.166398525238037, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3428, "nll_loss": 1.1240220069885254, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3020 }, { "epoch": 0.7938171338747707, "grad_norm": 0.8324739933013916, "learning_rate": 6.900088776670721e-06, "log_odds_chosen": 0.18721231818199158, "log_odds_ratio": -0.7051528692245483, "logits/chosen": -2.182648181915283, "logits/rejected": -1.984972357749939, "logps/chosen": -0.863318920135498, "logps/rejected": -0.9999431371688843, "loss": 1.2526, "nll_loss": 1.0552375316619873, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2589956820011139, "rewards/margins": 0.04098723456263542, "rewards/rejected": -0.2999829351902008, "step": 3030 }, { "epoch": 0.7964369924024103, "grad_norm": 0.6330258846282959, "learning_rate": 6.892285001245957e-06, "log_odds_chosen": 0.24604663252830505, "log_odds_ratio": -0.6671556234359741, "logits/chosen": -2.2969837188720703, "logits/rejected": -2.1538138389587402, "logps/chosen": -0.8543354868888855, "logps/rejected": -1.0042922496795654, "loss": 1.2582, "nll_loss": 1.05300772190094, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2563006579875946, "rewards/margins": 0.044987019151449203, "rewards/rejected": -0.3012877106666565, "step": 3040 }, { "epoch": 0.7990568509300497, "grad_norm": 0.5441903471946716, "learning_rate": 6.884458086692712e-06, "log_odds_chosen": 0.26793450117111206, "log_odds_ratio": -0.6500241756439209, "logits/chosen": -2.129385471343994, "logits/rejected": -2.1506495475769043, "logps/chosen": -0.7567280530929565, "logps/rejected": -0.9095877408981323, "loss": 1.2475, "nll_loss": 1.0213663578033447, "rewards/accuracies": 0.625, "rewards/chosen": -0.22701840102672577, "rewards/margins": 0.04585794731974602, "rewards/rejected": -0.2728763222694397, "step": 3050 }, { "epoch": 0.8016767094576893, "grad_norm": 0.9691625237464905, "learning_rate": 6.876608095628597e-06, "log_odds_chosen": 0.3041144013404846, "log_odds_ratio": -0.6534818410873413, "logits/chosen": -2.151218891143799, "logits/rejected": -2.1326375007629395, "logps/chosen": -0.8030401468276978, "logps/rejected": -0.9914613962173462, "loss": 1.3052, "nll_loss": 1.1234928369522095, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2409120500087738, "rewards/margins": 0.056526392698287964, "rewards/rejected": -0.29743844270706177, "step": 3060 }, { "epoch": 0.8042965679853288, "grad_norm": 0.8772619962692261, "learning_rate": 6.868735090855845e-06, "log_odds_chosen": 0.25347211956977844, "log_odds_ratio": -0.6752365827560425, "logits/chosen": -2.251408100128174, "logits/rejected": -2.221590757369995, "logps/chosen": -0.7676174640655518, "logps/rejected": -0.9122737646102905, "loss": 1.2451, "nll_loss": 1.0795937776565552, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.23028525710105896, "rewards/margins": 0.043396878987550735, "rewards/rejected": -0.2736821472644806, "step": 3070 }, { "epoch": 0.8069164265129684, "grad_norm": 0.7910062074661255, "learning_rate": 6.860839135360802e-06, "log_odds_chosen": 0.28677815198898315, "log_odds_ratio": -0.6730754971504211, "logits/chosen": -2.229288101196289, "logits/rejected": -2.0682153701782227, "logps/chosen": -0.7965772747993469, "logps/rejected": -0.9596049189567566, "loss": 1.1515, "nll_loss": 0.9608367681503296, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23897318542003632, "rewards/margins": 0.0489082895219326, "rewards/rejected": -0.287881463766098, "step": 3080 }, { "epoch": 0.8095362850406078, "grad_norm": 1.2048920392990112, "learning_rate": 6.852920292313429e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.112882137298584, "logits/rejected": -2.1212258338928223, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2654, "nll_loss": 1.0765750408172607, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3090 }, { "epoch": 0.8121561435682473, "grad_norm": 0.8459360599517822, "learning_rate": 6.844978625066794e-06, "log_odds_chosen": 0.2506456673145294, "log_odds_ratio": -0.6783725023269653, "logits/chosen": -2.1630568504333496, "logits/rejected": -2.090458631515503, "logps/chosen": -0.8152025938034058, "logps/rejected": -0.9976935386657715, "loss": 1.1804, "nll_loss": 1.0005921125411987, "rewards/accuracies": 0.625, "rewards/chosen": -0.24456080794334412, "rewards/margins": 0.054747261106967926, "rewards/rejected": -0.29930803179740906, "step": 3100 }, { "epoch": 0.8147760020958869, "grad_norm": 0.8095239400863647, "learning_rate": 6.837014197156564e-06, "log_odds_chosen": 0.47977280616760254, "log_odds_ratio": -0.6109746694564819, "logits/chosen": -2.2128024101257324, "logits/rejected": -2.127735137939453, "logps/chosen": -0.744666576385498, "logps/rejected": -1.0179359912872314, "loss": 1.2252, "nll_loss": 1.0396313667297363, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22339999675750732, "rewards/margins": 0.08198080211877823, "rewards/rejected": -0.30538076162338257, "step": 3110 }, { "epoch": 0.8173958606235263, "grad_norm": 0.7466575503349304, "learning_rate": 6.8290270723004995e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.261514186859131, "logits/rejected": -2.204908609390259, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2483, "nll_loss": 1.0346410274505615, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3120 }, { "epoch": 0.8200157191511659, "grad_norm": 0.8273678421974182, "learning_rate": 6.8210173143979415e-06, "log_odds_chosen": 0.42329850792884827, "log_odds_ratio": -0.6244557499885559, "logits/chosen": -2.227433443069458, "logits/rejected": -2.154644250869751, "logps/chosen": -0.7812697291374207, "logps/rejected": -1.08664870262146, "loss": 1.2156, "nll_loss": 1.0455844402313232, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23438093066215515, "rewards/margins": 0.09161369502544403, "rewards/rejected": -0.3259946405887604, "step": 3130 }, { "epoch": 0.8226355776788054, "grad_norm": 0.7020566463470459, "learning_rate": 6.812984987529303e-06, "log_odds_chosen": 0.40156760811805725, "log_odds_ratio": -0.6424612402915955, "logits/chosen": -2.142141342163086, "logits/rejected": -2.104396104812622, "logps/chosen": -0.7818731069564819, "logps/rejected": -1.0185701847076416, "loss": 1.2791, "nll_loss": 1.0681931972503662, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.234561949968338, "rewards/margins": 0.0710090771317482, "rewards/rejected": -0.3055710196495056, "step": 3140 }, { "epoch": 0.8252554362064448, "grad_norm": 0.7978582978248596, "learning_rate": 6.804930155955556e-06, "log_odds_chosen": 0.2946697175502777, "log_odds_ratio": -0.6227195858955383, "logits/chosen": -2.226771593093872, "logits/rejected": -2.128952980041504, "logps/chosen": -0.750542938709259, "logps/rejected": -0.903497576713562, "loss": 1.2, "nll_loss": 1.0200318098068237, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22516289353370667, "rewards/margins": 0.04588640481233597, "rewards/rejected": -0.27104929089546204, "step": 3150 }, { "epoch": 0.8278752947340844, "grad_norm": 0.6247879862785339, "learning_rate": 6.796852884117715e-06, "log_odds_chosen": 0.43241414427757263, "log_odds_ratio": -0.6538288593292236, "logits/chosen": -2.1597695350646973, "logits/rejected": -2.141784191131592, "logps/chosen": -0.8206120729446411, "logps/rejected": -1.1073694229125977, "loss": 1.2967, "nll_loss": 1.110215425491333, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2461836040019989, "rewards/margins": 0.08602719008922577, "rewards/rejected": -0.33221083879470825, "step": 3160 }, { "epoch": 0.8304951532617239, "grad_norm": 0.8438325524330139, "learning_rate": 6.788753236636325e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2946648597717285, "logits/rejected": -2.1189956665039062, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2791, "nll_loss": 1.0987119674682617, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3170 }, { "epoch": 0.8331150117893634, "grad_norm": 0.6306805610656738, "learning_rate": 6.780631278310942e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2421727180480957, "logits/rejected": -2.2239952087402344, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3106, "nll_loss": 1.0618656873703003, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3180 }, { "epoch": 0.8357348703170029, "grad_norm": 0.6500200033187866, "learning_rate": 6.772487074119615e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.0922045707702637, "logits/rejected": -1.9939416646957397, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3194, "nll_loss": 1.041007399559021, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3190 }, { "epoch": 0.8383547288446423, "grad_norm": 0.8225128054618835, "learning_rate": 6.764320689218367e-06, "log_odds_chosen": 0.4958614408969879, "log_odds_ratio": -0.617427408695221, "logits/chosen": -2.1428723335266113, "logits/rejected": -2.13554310798645, "logps/chosen": -0.7122236490249634, "logps/rejected": -0.9816004037857056, "loss": 1.2076, "nll_loss": 0.950714111328125, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2136671245098114, "rewards/margins": 0.08081299066543579, "rewards/rejected": -0.2944801449775696, "step": 3200 }, { "epoch": 0.8409745873722819, "grad_norm": 0.7864217758178711, "learning_rate": 6.7561321889406715e-06, "log_odds_chosen": 0.3534198999404907, "log_odds_ratio": -0.641982913017273, "logits/chosen": -2.1395926475524902, "logits/rejected": -2.0922439098358154, "logps/chosen": -0.7750155329704285, "logps/rejected": -0.9938211441040039, "loss": 1.2396, "nll_loss": 1.1144806146621704, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2325046807527542, "rewards/margins": 0.06564168632030487, "rewards/rejected": -0.2981463670730591, "step": 3210 }, { "epoch": 0.8435944458999214, "grad_norm": 0.5436039566993713, "learning_rate": 6.7479216387969345e-06, "log_odds_chosen": 0.4018774628639221, "log_odds_ratio": -0.6372645497322083, "logits/chosen": -2.158130168914795, "logits/rejected": -2.1389412879943848, "logps/chosen": -0.8250292539596558, "logps/rejected": -1.089007019996643, "loss": 1.2522, "nll_loss": 1.0285905599594116, "rewards/accuracies": 0.5625, "rewards/chosen": -0.24750879406929016, "rewards/margins": 0.07919331640005112, "rewards/rejected": -0.3267021179199219, "step": 3220 }, { "epoch": 0.846214304427561, "grad_norm": 0.95215904712677, "learning_rate": 6.739689104473966e-06, "log_odds_chosen": 0.3724967837333679, "log_odds_ratio": -0.6314784288406372, "logits/chosen": -2.210779905319214, "logits/rejected": -2.199453830718994, "logps/chosen": -0.800619900226593, "logps/rejected": -1.0477778911590576, "loss": 1.2489, "nll_loss": 1.0949738025665283, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.24018597602844238, "rewards/margins": 0.07414741069078445, "rewards/rejected": -0.31433337926864624, "step": 3230 }, { "epoch": 0.8488341629552004, "grad_norm": 0.5786678791046143, "learning_rate": 6.7314346518344546e-06, "log_odds_chosen": 0.2267800271511078, "log_odds_ratio": -0.6825129985809326, "logits/chosen": -2.316218614578247, "logits/rejected": -2.2454750537872314, "logps/chosen": -0.8672484159469604, "logps/rejected": -0.9972507357597351, "loss": 1.3357, "nll_loss": 1.1359622478485107, "rewards/accuracies": 0.625, "rewards/chosen": -0.26017457246780396, "rewards/margins": 0.03900068998336792, "rewards/rejected": -0.2991752028465271, "step": 3240 }, { "epoch": 0.8514540214828399, "grad_norm": 0.5737934708595276, "learning_rate": 6.723158346916444e-06, "log_odds_chosen": 0.19101275503635406, "log_odds_ratio": -0.7254603505134583, "logits/chosen": -2.225320339202881, "logits/rejected": -2.1805214881896973, "logps/chosen": -0.7957597970962524, "logps/rejected": -0.908596396446228, "loss": 1.2882, "nll_loss": 1.0750505924224854, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23872795701026917, "rewards/margins": 0.03385098651051521, "rewards/rejected": -0.2725789248943329, "step": 3250 }, { "epoch": 0.8540738800104795, "grad_norm": 0.6615453958511353, "learning_rate": 6.714860255932798e-06, "log_odds_chosen": 0.37908267974853516, "log_odds_ratio": -0.6760109066963196, "logits/chosen": -2.2373995780944824, "logits/rejected": -2.1702606678009033, "logps/chosen": -0.8603512048721313, "logps/rejected": -1.069373369216919, "loss": 1.2303, "nll_loss": 1.0732070207595825, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2581053674221039, "rewards/margins": 0.0627066045999527, "rewards/rejected": -0.32081201672554016, "step": 3260 }, { "epoch": 0.8566937385381189, "grad_norm": 0.7862983345985413, "learning_rate": 6.706540445270684e-06, "log_odds_chosen": 0.4689900875091553, "log_odds_ratio": -0.6314810514450073, "logits/chosen": -2.304373025894165, "logits/rejected": -2.2608327865600586, "logps/chosen": -0.8024113774299622, "logps/rejected": -1.0898149013519287, "loss": 1.3284, "nll_loss": 1.1199365854263306, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.24072346091270447, "rewards/margins": 0.08622101694345474, "rewards/rejected": -0.3269444704055786, "step": 3270 }, { "epoch": 0.8593135970657585, "grad_norm": 0.7702196836471558, "learning_rate": 6.698198981491023e-06, "log_odds_chosen": 0.4366222321987152, "log_odds_ratio": -0.5884053111076355, "logits/chosen": -2.20082426071167, "logits/rejected": -2.150367498397827, "logps/chosen": -0.753293514251709, "logps/rejected": -1.007750153541565, "loss": 1.2557, "nll_loss": 1.0518124103546143, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22598806023597717, "rewards/margins": 0.07633697986602783, "rewards/rejected": -0.3023250699043274, "step": 3280 }, { "epoch": 0.861933455593398, "grad_norm": 0.6288425326347351, "learning_rate": 6.689835931327977e-06, "log_odds_chosen": 0.39261871576309204, "log_odds_ratio": -0.6504190564155579, "logits/chosen": -2.1958439350128174, "logits/rejected": -2.1917357444763184, "logps/chosen": -0.7236720323562622, "logps/rejected": -0.9510132074356079, "loss": 1.2169, "nll_loss": 1.0487489700317383, "rewards/accuracies": 0.625, "rewards/chosen": -0.21710161864757538, "rewards/margins": 0.06820230931043625, "rewards/rejected": -0.28530392050743103, "step": 3290 }, { "epoch": 0.8645533141210374, "grad_norm": 1.003694772720337, "learning_rate": 6.681451361688397e-06, "log_odds_chosen": 0.3558635115623474, "log_odds_ratio": -0.6442310214042664, "logits/chosen": -2.2488136291503906, "logits/rejected": -2.163747787475586, "logps/chosen": -0.7784761190414429, "logps/rejected": -1.020261287689209, "loss": 1.1823, "nll_loss": 0.9901485443115234, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.23354284465312958, "rewards/margins": 0.07253555208444595, "rewards/rejected": -0.30607837438583374, "step": 3300 }, { "epoch": 0.867173172648677, "grad_norm": 0.6935679912567139, "learning_rate": 6.673045339651303e-06, "log_odds_chosen": 0.40724238753318787, "log_odds_ratio": -0.6429778337478638, "logits/chosen": -2.2061409950256348, "logits/rejected": -2.127814769744873, "logps/chosen": -0.7680079340934753, "logps/rejected": -0.9793314933776855, "loss": 1.2496, "nll_loss": 1.1112148761749268, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2304023951292038, "rewards/margins": 0.06339707970619202, "rewards/rejected": -0.293799489736557, "step": 3310 }, { "epoch": 0.8697930311763165, "grad_norm": 0.7396456003189087, "learning_rate": 6.664617932467333e-06, "log_odds_chosen": 0.27828970551490784, "log_odds_ratio": -0.6437123417854309, "logits/chosen": -2.250906467437744, "logits/rejected": -2.1394481658935547, "logps/chosen": -0.7782624363899231, "logps/rejected": -0.956400990486145, "loss": 1.1804, "nll_loss": 1.0189125537872314, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23347875475883484, "rewards/margins": 0.05344155430793762, "rewards/rejected": -0.28692030906677246, "step": 3320 }, { "epoch": 0.872412889703956, "grad_norm": 0.5018510222434998, "learning_rate": 6.656169207558221e-06, "log_odds_chosen": 0.21465472877025604, "log_odds_ratio": -0.7300101518630981, "logits/chosen": -2.282186985015869, "logits/rejected": -2.280038833618164, "logps/chosen": -0.8806089162826538, "logps/rejected": -1.0704500675201416, "loss": 1.3246, "nll_loss": 1.2006666660308838, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2641827166080475, "rewards/margins": 0.0569523349404335, "rewards/rejected": -0.3211350440979004, "step": 3330 }, { "epoch": 0.8750327482315955, "grad_norm": 0.6447619795799255, "learning_rate": 6.647699232516241e-06, "log_odds_chosen": 0.3628096282482147, "log_odds_ratio": -0.648274838924408, "logits/chosen": -2.401176929473877, "logits/rejected": -2.244321346282959, "logps/chosen": -0.7767874002456665, "logps/rejected": -1.0501941442489624, "loss": 1.1502, "nll_loss": 0.9910930395126343, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23303620517253876, "rewards/margins": 0.08202206343412399, "rewards/rejected": -0.31505829095840454, "step": 3340 }, { "epoch": 0.877652606759235, "grad_norm": 0.5813132524490356, "learning_rate": 6.6392080751036816e-06, "log_odds_chosen": 0.43994221091270447, "log_odds_ratio": -0.6281770467758179, "logits/chosen": -2.2883613109588623, "logits/rejected": -2.151966094970703, "logps/chosen": -0.7786010503768921, "logps/rejected": -1.0633666515350342, "loss": 1.2258, "nll_loss": 1.0202409029006958, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2335803508758545, "rewards/margins": 0.08542965352535248, "rewards/rejected": -0.31901001930236816, "step": 3350 }, { "epoch": 0.8802724652868745, "grad_norm": 0.7948014736175537, "learning_rate": 6.63069580325229e-06, "log_odds_chosen": 0.28727591037750244, "log_odds_ratio": -0.6295253038406372, "logits/chosen": -2.2538743019104004, "logits/rejected": -2.217115879058838, "logps/chosen": -0.7720192670822144, "logps/rejected": -0.9491912126541138, "loss": 1.29, "nll_loss": 1.0846030712127686, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23160576820373535, "rewards/margins": 0.05315159633755684, "rewards/rejected": -0.2847573757171631, "step": 3360 }, { "epoch": 0.882892323814514, "grad_norm": 1.1898905038833618, "learning_rate": 6.622162485062739e-06, "log_odds_chosen": 0.5656839609146118, "log_odds_ratio": -0.6008988618850708, "logits/chosen": -2.1415390968322754, "logits/rejected": -2.072024345397949, "logps/chosen": -0.7751294374465942, "logps/rejected": -1.1483380794525146, "loss": 1.2611, "nll_loss": 1.0258206129074097, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2325388491153717, "rewards/margins": 0.11196257174015045, "rewards/rejected": -0.34450146555900574, "step": 3370 }, { "epoch": 0.8855121823421536, "grad_norm": 0.5382871627807617, "learning_rate": 6.61360818880408e-06, "log_odds_chosen": 0.15128861367702484, "log_odds_ratio": -0.6858547329902649, "logits/chosen": -2.282588481903076, "logits/rejected": -2.261298656463623, "logps/chosen": -0.8165601491928101, "logps/rejected": -0.8988736867904663, "loss": 1.1906, "nll_loss": 0.9416168332099915, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.24496805667877197, "rewards/margins": 0.02469407208263874, "rewards/rejected": -0.26966217160224915, "step": 3380 }, { "epoch": 0.888132040869793, "grad_norm": 0.7031036615371704, "learning_rate": 6.6050329829131895e-06, "log_odds_chosen": 0.4688243269920349, "log_odds_ratio": -0.6155135035514832, "logits/chosen": -2.2125720977783203, "logits/rejected": -2.131483316421509, "logps/chosen": -0.7161384224891663, "logps/rejected": -0.999444305896759, "loss": 1.2088, "nll_loss": 1.0456256866455078, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21484152972698212, "rewards/margins": 0.08499175310134888, "rewards/rejected": -0.2998332977294922, "step": 3390 }, { "epoch": 0.8907518993974325, "grad_norm": 0.804205596446991, "learning_rate": 6.596436935994231e-06, "log_odds_chosen": 0.3887774348258972, "log_odds_ratio": -0.642585277557373, "logits/chosen": -2.290648937225342, "logits/rejected": -2.2178561687469482, "logps/chosen": -0.7612495422363281, "logps/rejected": -1.005303978919983, "loss": 1.1948, "nll_loss": 1.0630762577056885, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.22837483882904053, "rewards/margins": 0.07321634888648987, "rewards/rejected": -0.3015912175178528, "step": 3400 }, { "epoch": 0.8933717579250721, "grad_norm": 0.7042835354804993, "learning_rate": 6.587820116818102e-06, "log_odds_chosen": 0.3457682728767395, "log_odds_ratio": -0.6287668943405151, "logits/chosen": -2.135673761367798, "logits/rejected": -2.0596063137054443, "logps/chosen": -0.8340846300125122, "logps/rejected": -1.051815390586853, "loss": 1.2547, "nll_loss": 1.0832328796386719, "rewards/accuracies": 0.625, "rewards/chosen": -0.25022539496421814, "rewards/margins": 0.0653192549943924, "rewards/rejected": -0.31554466485977173, "step": 3410 }, { "epoch": 0.8959916164527115, "grad_norm": 0.6093992590904236, "learning_rate": 6.579182594321883e-06, "log_odds_chosen": 0.45073190331459045, "log_odds_ratio": -0.6153164505958557, "logits/chosen": -2.3242080211639404, "logits/rejected": -2.1881542205810547, "logps/chosen": -0.7014079093933105, "logps/rejected": -0.988603413105011, "loss": 1.2258, "nll_loss": 0.9943335652351379, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21042239665985107, "rewards/margins": 0.0861586332321167, "rewards/rejected": -0.2965810298919678, "step": 3420 }, { "epoch": 0.8986114749803511, "grad_norm": 0.5893613696098328, "learning_rate": 6.5705244376082896e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.311530351638794, "logits/rejected": -2.1480250358581543, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.35, "nll_loss": 1.1489099264144897, "rewards/accuracies": 0.512499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3430 }, { "epoch": 0.9012313335079906, "grad_norm": 0.9981038570404053, "learning_rate": 6.561845715945113e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2001287937164307, "logits/rejected": -2.109483003616333, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.224, "nll_loss": 1.081761121749878, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3440 }, { "epoch": 0.90385119203563, "grad_norm": 1.1899179220199585, "learning_rate": 6.553146498764673e-06, "log_odds_chosen": 0.4923027455806732, "log_odds_ratio": -0.6032063364982605, "logits/chosen": -2.372786045074463, "logits/rejected": -2.290383815765381, "logps/chosen": -0.7255181670188904, "logps/rejected": -0.9897600412368774, "loss": 1.242, "nll_loss": 1.0289896726608276, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2176554650068283, "rewards/margins": 0.07927256077528, "rewards/rejected": -0.2969280183315277, "step": 3450 }, { "epoch": 0.9064710505632696, "grad_norm": 0.9532584547996521, "learning_rate": 6.544426855663261e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.218233823776245, "logits/rejected": -2.172614574432373, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1941, "nll_loss": 0.9494379162788391, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3460 }, { "epoch": 0.9090909090909091, "grad_norm": 0.675166666507721, "learning_rate": 6.535686856400578e-06, "log_odds_chosen": 0.3828073740005493, "log_odds_ratio": -0.6082161068916321, "logits/chosen": -2.295574426651001, "logits/rejected": -2.125378370285034, "logps/chosen": -0.7820349931716919, "logps/rejected": -1.0106667280197144, "loss": 1.2481, "nll_loss": 0.9841006994247437, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.23461051285266876, "rewards/margins": 0.06858952343463898, "rewards/rejected": -0.30320003628730774, "step": 3470 }, { "epoch": 0.9117107676185486, "grad_norm": 0.5884809494018555, "learning_rate": 6.526926570899182e-06, "log_odds_chosen": 0.1575748473405838, "log_odds_ratio": -0.7290948033332825, "logits/chosen": -2.3222103118896484, "logits/rejected": -2.252857208251953, "logps/chosen": -0.7994164228439331, "logps/rejected": -0.9172335863113403, "loss": 1.2789, "nll_loss": 0.9737574458122253, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.23982493579387665, "rewards/margins": 0.0353451631963253, "rewards/rejected": -0.27517011761665344, "step": 3480 }, { "epoch": 0.9143306261461881, "grad_norm": 0.5838411450386047, "learning_rate": 6.518146069243928e-06, "log_odds_chosen": 0.5788034200668335, "log_odds_ratio": -0.5474847555160522, "logits/chosen": -2.289735794067383, "logits/rejected": -2.13700795173645, "logps/chosen": -0.7176246047019958, "logps/rejected": -1.069835901260376, "loss": 1.1828, "nll_loss": 0.9975638389587402, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.21528737246990204, "rewards/margins": 0.10566340386867523, "rewards/rejected": -0.32095080614089966, "step": 3490 }, { "epoch": 0.9169504846738276, "grad_norm": 0.6071087718009949, "learning_rate": 6.509345421681404e-06, "log_odds_chosen": 0.3012910485267639, "log_odds_ratio": -0.627751886844635, "logits/chosen": -2.2642569541931152, "logits/rejected": -2.2112367153167725, "logps/chosen": -0.7751448750495911, "logps/rejected": -0.9667026400566101, "loss": 1.2554, "nll_loss": 1.0542640686035156, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2325434684753418, "rewards/margins": 0.05746736004948616, "rewards/rejected": -0.29001086950302124, "step": 3500 }, { "epoch": 0.9195703432014671, "grad_norm": 0.700552761554718, "learning_rate": 6.500524698619373e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2243270874023438, "logits/rejected": -2.192295551300049, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2013, "nll_loss": 1.099815845489502, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3510 }, { "epoch": 0.9221902017291066, "grad_norm": 0.7178587317466736, "learning_rate": 6.4916839706262065e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.109621524810791, "logits/rejected": -2.0778555870056152, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3494, "nll_loss": 1.0909357070922852, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3520 }, { "epoch": 0.9248100602567462, "grad_norm": 0.8290575742721558, "learning_rate": 6.482823308430323e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.166016101837158, "logits/rejected": -2.0696825981140137, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2595, "nll_loss": 1.048041820526123, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3530 }, { "epoch": 0.9274299187843856, "grad_norm": 1.1387983560562134, "learning_rate": 6.473942782919618e-06, "log_odds_chosen": 0.13087137043476105, "log_odds_ratio": -0.7509936094284058, "logits/chosen": -2.2618415355682373, "logits/rejected": -2.172558546066284, "logps/chosen": -0.826370358467102, "logps/rejected": -0.9089586138725281, "loss": 1.2749, "nll_loss": 1.0707132816314697, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.24791112542152405, "rewards/margins": 0.02477649226784706, "rewards/rejected": -0.2726876139640808, "step": 3540 }, { "epoch": 0.9300497773120252, "grad_norm": 0.61870276927948, "learning_rate": 6.4650424651409e-06, "log_odds_chosen": 0.3501388430595398, "log_odds_ratio": -0.6480156183242798, "logits/chosen": -2.1623668670654297, "logits/rejected": -2.225841999053955, "logps/chosen": -0.7172938585281372, "logps/rejected": -0.9010885953903198, "loss": 1.2658, "nll_loss": 1.0366438627243042, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2151881754398346, "rewards/margins": 0.05513841658830643, "rewards/rejected": -0.2703265845775604, "step": 3550 }, { "epoch": 0.9326696358396647, "grad_norm": 0.602613091468811, "learning_rate": 6.456122426299325e-06, "log_odds_chosen": 0.4221953749656677, "log_odds_ratio": -0.6846648454666138, "logits/chosen": -2.2409651279449463, "logits/rejected": -2.2080750465393066, "logps/chosen": -0.789202868938446, "logps/rejected": -1.0500257015228271, "loss": 1.3131, "nll_loss": 1.1938931941986084, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.23676088452339172, "rewards/margins": 0.07824688404798508, "rewards/rejected": -0.315007746219635, "step": 3560 }, { "epoch": 0.9352894943673041, "grad_norm": 0.944076418876648, "learning_rate": 6.447182737757819e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.208246946334839, "logits/rejected": -2.110275983810425, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3455, "nll_loss": 1.1080548763275146, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3570 }, { "epoch": 0.9379093528949437, "grad_norm": 0.6484692096710205, "learning_rate": 6.438223471036513e-06, "log_odds_chosen": 0.14046882092952728, "log_odds_ratio": -0.7138088941574097, "logits/chosen": -2.316216230392456, "logits/rejected": -2.2925333976745605, "logps/chosen": -0.7886157035827637, "logps/rejected": -0.8752482533454895, "loss": 1.2373, "nll_loss": 1.0316179990768433, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.23658473789691925, "rewards/margins": 0.025989780202507973, "rewards/rejected": -0.2625744938850403, "step": 3580 }, { "epoch": 0.9405292114225832, "grad_norm": 1.3446333408355713, "learning_rate": 6.429244697812167e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2853825092315674, "logits/rejected": -2.2034144401550293, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.259, "nll_loss": 1.0007083415985107, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3590 }, { "epoch": 0.9431490699502227, "grad_norm": 1.1501189470291138, "learning_rate": 6.4202464899176055e-06, "log_odds_chosen": 0.31998586654663086, "log_odds_ratio": -0.6589009165763855, "logits/chosen": -2.287815570831299, "logits/rejected": -2.2191479206085205, "logps/chosen": -0.7409970760345459, "logps/rejected": -0.9406083226203918, "loss": 1.2226, "nll_loss": 1.0692561864852905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22229912877082825, "rewards/margins": 0.05988338589668274, "rewards/rejected": -0.282182514667511, "step": 3600 }, { "epoch": 0.9457689284778622, "grad_norm": 0.9715876579284668, "learning_rate": 6.4112289193411265e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2311463356018066, "logits/rejected": -2.1804516315460205, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3031, "nll_loss": 1.0543556213378906, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3610 }, { "epoch": 0.9483887870055017, "grad_norm": 0.6495265364646912, "learning_rate": 6.402192058225944e-06, "log_odds_chosen": 0.13607683777809143, "log_odds_ratio": -0.7345439195632935, "logits/chosen": -2.1918234825134277, "logits/rejected": -2.2382254600524902, "logps/chosen": -0.8182328343391418, "logps/rejected": -0.8725131154060364, "loss": 1.2901, "nll_loss": 1.0918344259262085, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2454698383808136, "rewards/margins": 0.016284089535474777, "rewards/rejected": -0.26175394654273987, "step": 3620 }, { "epoch": 0.9510086455331412, "grad_norm": 0.5618166327476501, "learning_rate": 6.393135978869594e-06, "log_odds_chosen": 0.13218867778778076, "log_odds_ratio": -0.7189925909042358, "logits/chosen": -2.2147817611694336, "logits/rejected": -2.11883544921875, "logps/chosen": -0.7445226907730103, "logps/rejected": -0.8496515154838562, "loss": 1.3703, "nll_loss": 1.1340827941894531, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.22335676848888397, "rewards/margins": 0.0315387062728405, "rewards/rejected": -0.25489550828933716, "step": 3630 }, { "epoch": 0.9536285040607807, "grad_norm": 0.6808521151542664, "learning_rate": 6.384060753723371e-06, "log_odds_chosen": 0.3427991271018982, "log_odds_ratio": -0.6193419098854065, "logits/chosen": -2.174104690551758, "logits/rejected": -2.093768358230591, "logps/chosen": -0.7680143117904663, "logps/rejected": -0.9898279905319214, "loss": 1.302, "nll_loss": 1.0458976030349731, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.230404332280159, "rewards/margins": 0.06654410064220428, "rewards/rejected": -0.2969484329223633, "step": 3640 }, { "epoch": 0.9562483625884203, "grad_norm": 0.7768465876579285, "learning_rate": 6.374966455391737e-06, "log_odds_chosen": 0.2215617150068283, "log_odds_ratio": -0.7079781293869019, "logits/chosen": -2.19539737701416, "logits/rejected": -2.1382100582122803, "logps/chosen": -0.8588269948959351, "logps/rejected": -0.9985498189926147, "loss": 1.3399, "nll_loss": 1.214458703994751, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.2576481103897095, "rewards/margins": 0.04191683605313301, "rewards/rejected": -0.2995649576187134, "step": 3650 }, { "epoch": 0.9588682211160597, "grad_norm": 0.812852680683136, "learning_rate": 6.365853156631748e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.145325183868408, "logits/rejected": -2.084348201751709, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3242, "nll_loss": 1.0940518379211426, "rewards/accuracies": 0.699999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3660 }, { "epoch": 0.9614880796436992, "grad_norm": 0.8383850455284119, "learning_rate": 6.356720930352466e-06, "log_odds_chosen": 0.38319218158721924, "log_odds_ratio": -0.6252017617225647, "logits/chosen": -2.1991477012634277, "logits/rejected": -2.059462308883667, "logps/chosen": -0.7634745240211487, "logps/rejected": -0.9688547253608704, "loss": 1.2458, "nll_loss": 0.9911584854125977, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.22904236614704132, "rewards/margins": 0.0616140142083168, "rewards/rejected": -0.2906563878059387, "step": 3670 }, { "epoch": 0.9641079381713388, "grad_norm": 0.7521118521690369, "learning_rate": 6.34756984961438e-06, "log_odds_chosen": 0.38214513659477234, "log_odds_ratio": -0.6032508611679077, "logits/chosen": -2.238010883331299, "logits/rejected": -2.1206400394439697, "logps/chosen": -0.7213320136070251, "logps/rejected": -0.9409394264221191, "loss": 1.2898, "nll_loss": 1.0594193935394287, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.21639962494373322, "rewards/margins": 0.06588224321603775, "rewards/rejected": -0.2822818160057068, "step": 3680 }, { "epoch": 0.9667277966989782, "grad_norm": 0.9524658918380737, "learning_rate": 6.338399987628822e-06, "log_odds_chosen": 0.4660716950893402, "log_odds_ratio": -0.6000378727912903, "logits/chosen": -2.3594729900360107, "logits/rejected": -2.22713041305542, "logps/chosen": -0.724315345287323, "logps/rejected": -1.0040605068206787, "loss": 1.1691, "nll_loss": 0.9722328186035156, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2172946184873581, "rewards/margins": 0.08392354846000671, "rewards/rejected": -0.301218181848526, "step": 3690 }, { "epoch": 0.9693476552266178, "grad_norm": 0.9523927569389343, "learning_rate": 6.329211417757377e-06, "log_odds_chosen": 0.3134787678718567, "log_odds_ratio": -0.6222403645515442, "logits/chosen": -2.3230435848236084, "logits/rejected": -2.2177884578704834, "logps/chosen": -0.7493599653244019, "logps/rejected": -0.9036420583724976, "loss": 1.1885, "nll_loss": 0.9806810617446899, "rewards/accuracies": 0.625, "rewards/chosen": -0.224808007478714, "rewards/margins": 0.04628462344408035, "rewards/rejected": -0.27109265327453613, "step": 3700 }, { "epoch": 0.9719675137542573, "grad_norm": 0.6331028938293457, "learning_rate": 6.320004213511299e-06, "log_odds_chosen": 0.3374965190887451, "log_odds_ratio": -0.6669653058052063, "logits/chosen": -2.262331247329712, "logits/rejected": -2.2593014240264893, "logps/chosen": -0.7897607088088989, "logps/rejected": -1.0056719779968262, "loss": 1.2359, "nll_loss": 1.0917088985443115, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.23692819476127625, "rewards/margins": 0.06477342545986176, "rewards/rejected": -0.3017016649246216, "step": 3710 }, { "epoch": 0.9745873722818967, "grad_norm": 0.7359500527381897, "learning_rate": 6.310778448550924e-06, "log_odds_chosen": 0.2746438980102539, "log_odds_ratio": -0.6137323379516602, "logits/chosen": -2.289092540740967, "logits/rejected": -2.0905239582061768, "logps/chosen": -0.803347110748291, "logps/rejected": -0.9682496786117554, "loss": 1.2312, "nll_loss": 1.100529670715332, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.24100413918495178, "rewards/margins": 0.049470800906419754, "rewards/rejected": -0.29047495126724243, "step": 3720 }, { "epoch": 0.9772072308095363, "grad_norm": 0.5508049726486206, "learning_rate": 6.301534196685077e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.202450752258301, "logits/rejected": -2.172144651412964, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.332, "nll_loss": 1.0336596965789795, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3730 }, { "epoch": 0.9798270893371758, "grad_norm": NaN, "learning_rate": 6.2922715318704865e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.205749750137329, "logits/rejected": -2.1636455059051514, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2529, "nll_loss": 1.0434378385543823, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3740 }, { "epoch": 0.9824469478648153, "grad_norm": 0.6512846350669861, "learning_rate": 6.2829905282111875e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1463780403137207, "logits/rejected": -2.123952627182007, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3045, "nll_loss": 1.016488790512085, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3750 }, { "epoch": 0.9850668063924548, "grad_norm": 1.1669460535049438, "learning_rate": 6.273691259957931e-06, "log_odds_chosen": 0.3959721624851227, "log_odds_ratio": -0.6428759694099426, "logits/chosen": -2.2631819248199463, "logits/rejected": -2.231206178665161, "logps/chosen": -0.7727817893028259, "logps/rejected": -1.0451576709747314, "loss": 1.2636, "nll_loss": 1.0664043426513672, "rewards/accuracies": 0.625, "rewards/chosen": -0.2318345606327057, "rewards/margins": 0.08171277493238449, "rewards/rejected": -0.3135473132133484, "step": 3760 }, { "epoch": 0.9876866649200943, "grad_norm": 0.7675594687461853, "learning_rate": 6.264373801507592e-06, "log_odds_chosen": 0.12575078010559082, "log_odds_ratio": -0.7430700659751892, "logits/chosen": -2.3263494968414307, "logits/rejected": -2.306441068649292, "logps/chosen": -0.823257327079773, "logps/rejected": -0.9126752018928528, "loss": 1.2424, "nll_loss": 1.1051504611968994, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.24697721004486084, "rewards/margins": 0.026825379580259323, "rewards/rejected": -0.27380257844924927, "step": 3770 }, { "epoch": 0.9903065234477338, "grad_norm": 0.7688103914260864, "learning_rate": 6.25503822740257e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2846951484680176, "logits/rejected": -2.22051739692688, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2889, "nll_loss": 1.048423409461975, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3780 }, { "epoch": 0.9929263819753733, "grad_norm": 0.5170724987983704, "learning_rate": 6.245684612330199e-06, "log_odds_chosen": 0.27964678406715393, "log_odds_ratio": -0.6406950354576111, "logits/chosen": -2.269103527069092, "logits/rejected": -2.1621804237365723, "logps/chosen": -0.7854992151260376, "logps/rejected": -0.9603395462036133, "loss": 1.2436, "nll_loss": 0.9992278814315796, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.23564977943897247, "rewards/margins": 0.05245209485292435, "rewards/rejected": -0.2881019115447998, "step": 3790 }, { "epoch": 0.9955462405030129, "grad_norm": 0.7574141621589661, "learning_rate": 6.2363130311221416e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2087361812591553, "logits/rejected": -2.2159552574157715, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2629, "nll_loss": 1.0843480825424194, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3800 }, { "epoch": 0.9981660990306523, "grad_norm": 0.9244164824485779, "learning_rate": 6.226923558753798e-06, "log_odds_chosen": 0.37149226665496826, "log_odds_ratio": -0.6405940055847168, "logits/chosen": -2.2013072967529297, "logits/rejected": -2.0514369010925293, "logps/chosen": -0.865227222442627, "logps/rejected": -1.133719563484192, "loss": 1.2286, "nll_loss": 1.0517619848251343, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.25956815481185913, "rewards/margins": 0.08054772764444351, "rewards/rejected": -0.3401159346103668, "step": 3810 }, { "epoch": 1.0007859575582918, "grad_norm": 1.1609869003295898, "learning_rate": 6.217516270343699e-06, "log_odds_chosen": 0.49234017729759216, "log_odds_ratio": -0.5747582316398621, "logits/chosen": -2.254106044769287, "logits/rejected": -2.1651575565338135, "logps/chosen": -0.7415367960929871, "logps/rejected": -1.048278570175171, "loss": 1.2285, "nll_loss": 1.0747876167297363, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2224610596895218, "rewards/margins": 0.09202253818511963, "rewards/rejected": -0.3144835829734802, "step": 3820 }, { "epoch": 1.0034058160859314, "grad_norm": 0.8137723207473755, "learning_rate": 6.2080912411529155e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.299896240234375, "logits/rejected": -2.2599215507507324, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1638, "nll_loss": 0.9832426905632019, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3830 }, { "epoch": 1.006025674613571, "grad_norm": 0.6269726753234863, "learning_rate": 6.1986485465844425e-06, "log_odds_chosen": 0.23022398352622986, "log_odds_ratio": -0.6390150785446167, "logits/chosen": -2.2656867504119873, "logits/rejected": -2.2446932792663574, "logps/chosen": -0.755323052406311, "logps/rejected": -0.8821438550949097, "loss": 1.1899, "nll_loss": 0.9932019114494324, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2265969216823578, "rewards/margins": 0.03804624825716019, "rewards/rejected": -0.26464319229125977, "step": 3840 }, { "epoch": 1.0086455331412103, "grad_norm": 0.5766107439994812, "learning_rate": 6.189188262182609e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.209562301635742, "logits/rejected": -2.162344217300415, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1721, "nll_loss": 1.0388972759246826, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3850 }, { "epoch": 1.0112653916688499, "grad_norm": 1.1309465169906616, "learning_rate": 6.179710463632465e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.211894989013672, "logits/rejected": -2.2190732955932617, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3074, "nll_loss": 1.1205438375473022, "rewards/accuracies": 0.5, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3860 }, { "epoch": 1.0138852501964895, "grad_norm": 0.7367751598358154, "learning_rate": 6.170215226759177e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.304166078567505, "logits/rejected": -2.2721710205078125, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2192, "nll_loss": 1.0439809560775757, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3870 }, { "epoch": 1.0165051087241288, "grad_norm": 0.6073639988899231, "learning_rate": 6.1607026275274276e-06, "log_odds_chosen": 0.26344627141952515, "log_odds_ratio": -0.6818192601203918, "logits/chosen": -2.242924213409424, "logits/rejected": -2.124089002609253, "logps/chosen": -0.8242942690849304, "logps/rejected": -1.0117872953414917, "loss": 1.2402, "nll_loss": 1.0579249858856201, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2472883015871048, "rewards/margins": 0.05624788999557495, "rewards/rejected": -0.30353614687919617, "step": 3880 }, { "epoch": 1.0191249672517684, "grad_norm": 0.9972704648971558, "learning_rate": 6.1511727420408025e-06, "log_odds_chosen": 0.2906353771686554, "log_odds_ratio": -0.6684931516647339, "logits/chosen": -2.3371388912200928, "logits/rejected": -2.3116233348846436, "logps/chosen": -0.7626818418502808, "logps/rejected": -0.9552086591720581, "loss": 1.1489, "nll_loss": 0.9105151891708374, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2288045883178711, "rewards/margins": 0.05775803327560425, "rewards/rejected": -0.28656262159347534, "step": 3890 }, { "epoch": 1.021744825779408, "grad_norm": 0.6762953996658325, "learning_rate": 6.1416256465411785e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2466282844543457, "logits/rejected": -2.1685664653778076, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1858, "nll_loss": 0.9925233125686646, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3900 }, { "epoch": 1.0243646843070475, "grad_norm": 1.751596450805664, "learning_rate": 6.132061417408121e-06, "log_odds_chosen": 0.40927061438560486, "log_odds_ratio": -0.6206954717636108, "logits/chosen": -2.0897347927093506, "logits/rejected": -2.1209442615509033, "logps/chosen": -0.7750033140182495, "logps/rejected": -1.023240089416504, "loss": 1.2239, "nll_loss": 1.0789676904678345, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23250100016593933, "rewards/margins": 0.07447101920843124, "rewards/rejected": -0.30697202682495117, "step": 3910 }, { "epoch": 1.0269845428346869, "grad_norm": 0.7024646997451782, "learning_rate": 6.12248013115827e-06, "log_odds_chosen": 0.20329897105693817, "log_odds_ratio": -0.7032625079154968, "logits/chosen": -2.185547351837158, "logits/rejected": -2.1096887588500977, "logps/chosen": -0.8211889266967773, "logps/rejected": -0.9668930172920227, "loss": 1.1921, "nll_loss": 0.9816325902938843, "rewards/accuracies": 0.5625, "rewards/chosen": -0.24635669589042664, "rewards/margins": 0.04371120035648346, "rewards/rejected": -0.2900679111480713, "step": 3920 }, { "epoch": 1.0296044013623264, "grad_norm": 0.4858560264110565, "learning_rate": 6.112881864444725e-06, "log_odds_chosen": 0.37001222372055054, "log_odds_ratio": -0.6327062249183655, "logits/chosen": -2.0800650119781494, "logits/rejected": -1.9994436502456665, "logps/chosen": -0.8098918795585632, "logps/rejected": -1.0603526830673218, "loss": 1.2073, "nll_loss": 1.0339288711547852, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2429676353931427, "rewards/margins": 0.07513822615146637, "rewards/rejected": -0.3181058168411255, "step": 3930 }, { "epoch": 1.032224259889966, "grad_norm": 0.8571949601173401, "learning_rate": 6.103266694056436e-06, "log_odds_chosen": 0.8310745358467102, "log_odds_ratio": -0.5013384222984314, "logits/chosen": -2.0710763931274414, "logits/rejected": -2.0228476524353027, "logps/chosen": -0.6643949151039124, "logps/rejected": -1.1636669635772705, "loss": 1.1797, "nll_loss": 0.9972573518753052, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.19931845366954803, "rewards/margins": 0.14978156983852386, "rewards/rejected": -0.3491000533103943, "step": 3940 }, { "epoch": 1.0348441184176054, "grad_norm": 0.7730270028114319, "learning_rate": 6.093634696917587e-06, "log_odds_chosen": 0.6772661209106445, "log_odds_ratio": -0.5415030717849731, "logits/chosen": -2.2686429023742676, "logits/rejected": -2.110778570175171, "logps/chosen": -0.704410195350647, "logps/rejected": -1.128185510635376, "loss": 1.2395, "nll_loss": 1.0496928691864014, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.21132305264472961, "rewards/margins": 0.1271326243877411, "rewards/rejected": -0.3384556770324707, "step": 3950 }, { "epoch": 1.037463976945245, "grad_norm": 2.381632089614868, "learning_rate": 6.08398595008698e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1682333946228027, "logits/rejected": -2.0311310291290283, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2433, "nll_loss": 1.0430829524993896, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3960 }, { "epoch": 1.0400838354728845, "grad_norm": 1.080214500427246, "learning_rate": 6.07432053075742e-06, "log_odds_chosen": 0.5614861249923706, "log_odds_ratio": -0.5446239709854126, "logits/chosen": -2.082615852355957, "logits/rejected": -2.0798325538635254, "logps/chosen": -0.778605043888092, "logps/rejected": -1.1638044118881226, "loss": 1.1853, "nll_loss": 1.0095101594924927, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2335815131664276, "rewards/margins": 0.11555981636047363, "rewards/rejected": -0.34914129972457886, "step": 3970 }, { "epoch": 1.0427036940005239, "grad_norm": NaN, "learning_rate": 6.064638516255098e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.146432876586914, "logits/rejected": -2.0870683193206787, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2635, "nll_loss": 1.023985505104065, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 3980 }, { "epoch": 1.0453235525281634, "grad_norm": 0.524878978729248, "learning_rate": 6.05493998403897e-06, "log_odds_chosen": 0.5698356628417969, "log_odds_ratio": -0.5837051272392273, "logits/chosen": -2.2179770469665527, "logits/rejected": -2.0783069133758545, "logps/chosen": -0.7515365481376648, "logps/rejected": -1.1224766969680786, "loss": 1.2047, "nll_loss": 1.0146328210830688, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2254609763622284, "rewards/margins": 0.11128203570842743, "rewards/rejected": -0.336743026971817, "step": 3990 }, { "epoch": 1.047943411055803, "grad_norm": 0.5263944268226624, "learning_rate": 6.045225011700139e-06, "log_odds_chosen": 0.29967954754829407, "log_odds_ratio": -0.6702970266342163, "logits/chosen": -2.3318514823913574, "logits/rejected": -2.1519522666931152, "logps/chosen": -0.7933050394058228, "logps/rejected": -0.9470657110214233, "loss": 1.2486, "nll_loss": 1.0257657766342163, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.23799148201942444, "rewards/margins": 0.04612823575735092, "rewards/rejected": -0.28411975502967834, "step": 4000 }, { "epoch": 1.0505632695834426, "grad_norm": 0.8442757725715637, "learning_rate": 6.035493676961233e-06, "log_odds_chosen": 0.5000320076942444, "log_odds_ratio": -0.5822146534919739, "logits/chosen": -2.2249915599823, "logits/rejected": -2.077233076095581, "logps/chosen": -0.7924088835716248, "logps/rejected": -1.1025866270065308, "loss": 1.2303, "nll_loss": 1.0445876121520996, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.23772267997264862, "rewards/margins": 0.09305335581302643, "rewards/rejected": -0.33077603578567505, "step": 4010 }, { "epoch": 1.053183128111082, "grad_norm": 1.4713609218597412, "learning_rate": 6.025746057675785e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2404959201812744, "logits/rejected": -2.148362636566162, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2785, "nll_loss": 1.1696465015411377, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4020 }, { "epoch": 1.0558029866387215, "grad_norm": 0.6620559692382812, "learning_rate": 6.015982231827611e-06, "log_odds_chosen": 0.5713348984718323, "log_odds_ratio": -0.5462216138839722, "logits/chosen": -2.300175428390503, "logits/rejected": -2.1027109622955322, "logps/chosen": -0.7111223936080933, "logps/rejected": -1.0485832691192627, "loss": 1.1712, "nll_loss": 0.9871209859848022, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2133367508649826, "rewards/margins": 0.10123821347951889, "rewards/rejected": -0.3145749270915985, "step": 4030 }, { "epoch": 1.058422845166361, "grad_norm": 1.2737563848495483, "learning_rate": 6.006202277530178e-06, "log_odds_chosen": 0.5905517339706421, "log_odds_ratio": -0.5607999563217163, "logits/chosen": -2.2002670764923096, "logits/rejected": -2.2103161811828613, "logps/chosen": -0.6973376870155334, "logps/rejected": -1.0304601192474365, "loss": 1.2087, "nll_loss": 0.9917594194412231, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20920129120349884, "rewards/margins": 0.09993673115968704, "rewards/rejected": -0.3091380298137665, "step": 4040 }, { "epoch": 1.0610427036940004, "grad_norm": NaN, "learning_rate": 5.996406273025992e-06, "log_odds_chosen": 0.5038422346115112, "log_odds_ratio": -0.6051856875419617, "logits/chosen": -2.128378391265869, "logits/rejected": -2.068403720855713, "logps/chosen": -0.7250930070877075, "logps/rejected": -1.042755365371704, "loss": 1.2657, "nll_loss": 1.0501794815063477, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21752789616584778, "rewards/margins": 0.09529872238636017, "rewards/rejected": -0.31282660365104675, "step": 4050 }, { "epoch": 1.06366256222164, "grad_norm": 0.6503753066062927, "learning_rate": 5.986594296685962e-06, "log_odds_chosen": 0.3813335597515106, "log_odds_ratio": -0.6175588965415955, "logits/chosen": -2.194093942642212, "logits/rejected": -2.0389435291290283, "logps/chosen": -0.756267249584198, "logps/rejected": -1.007966160774231, "loss": 1.1661, "nll_loss": 0.9986007809638977, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22688022255897522, "rewards/margins": 0.07550962269306183, "rewards/rejected": -0.30238988995552063, "step": 4060 }, { "epoch": 1.0662824207492796, "grad_norm": 0.5784167647361755, "learning_rate": 5.976766427008777e-06, "log_odds_chosen": 0.40300631523132324, "log_odds_ratio": -0.5883700251579285, "logits/chosen": -2.2458789348602295, "logits/rejected": -2.0684919357299805, "logps/chosen": -0.830001950263977, "logps/rejected": -1.0631030797958374, "loss": 1.2212, "nll_loss": 0.9799604415893555, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.24900059401988983, "rewards/margins": 0.06993035972118378, "rewards/rejected": -0.3189309239387512, "step": 4070 }, { "epoch": 1.068902279276919, "grad_norm": 0.8229634165763855, "learning_rate": 5.966922742620278e-06, "log_odds_chosen": 0.44295749068260193, "log_odds_ratio": -0.6001716256141663, "logits/chosen": -2.168051242828369, "logits/rejected": -2.0387887954711914, "logps/chosen": -0.7586795687675476, "logps/rejected": -1.0304973125457764, "loss": 1.2168, "nll_loss": 1.0081427097320557, "rewards/accuracies": 0.625, "rewards/chosen": -0.22760391235351562, "rewards/margins": 0.08154533803462982, "rewards/rejected": -0.30914920568466187, "step": 4080 }, { "epoch": 1.0715221378045585, "grad_norm": 0.7874696254730225, "learning_rate": 5.9570633222728274e-06, "log_odds_chosen": 0.4077785611152649, "log_odds_ratio": -0.6445068120956421, "logits/chosen": -2.116905927658081, "logits/rejected": -2.076551914215088, "logps/chosen": -0.783328652381897, "logps/rejected": -0.9880490303039551, "loss": 1.2619, "nll_loss": 1.0689337253570557, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.23499858379364014, "rewards/margins": 0.0614161416888237, "rewards/rejected": -0.29641473293304443, "step": 4090 }, { "epoch": 1.074141996332198, "grad_norm": 0.7716543078422546, "learning_rate": 5.94718824484468e-06, "log_odds_chosen": 0.10392202436923981, "log_odds_ratio": -0.76170814037323, "logits/chosen": -2.022914409637451, "logits/rejected": -2.0615439414978027, "logps/chosen": -0.8132250905036926, "logps/rejected": -0.8591440916061401, "loss": 1.2762, "nll_loss": 1.1307119131088257, "rewards/accuracies": 0.5, "rewards/chosen": -0.24396753311157227, "rewards/margins": 0.013775733299553394, "rewards/rejected": -0.2577432692050934, "step": 4100 }, { "epoch": 1.0767618548598377, "grad_norm": 1.149641513824463, "learning_rate": 5.937297589339354e-06, "log_odds_chosen": 0.5257401466369629, "log_odds_ratio": -0.5456224679946899, "logits/chosen": -2.181577205657959, "logits/rejected": -2.0659561157226562, "logps/chosen": -0.6877787113189697, "logps/rejected": -1.0051343441009521, "loss": 1.199, "nll_loss": 0.9848003387451172, "rewards/accuracies": 0.75, "rewards/chosen": -0.20633363723754883, "rewards/margins": 0.09520672261714935, "rewards/rejected": -0.3015403151512146, "step": 4110 }, { "epoch": 1.079381713387477, "grad_norm": 0.5483066439628601, "learning_rate": 5.927391434884993e-06, "log_odds_chosen": 0.5342003703117371, "log_odds_ratio": -0.5766080021858215, "logits/chosen": -2.14675235748291, "logits/rejected": -2.039607524871826, "logps/chosen": -0.7202420830726624, "logps/rejected": -1.0381380319595337, "loss": 1.264, "nll_loss": 1.0587743520736694, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21607264876365662, "rewards/margins": 0.09536879509687424, "rewards/rejected": -0.31144142150878906, "step": 4120 }, { "epoch": 1.0820015719151166, "grad_norm": 0.7426726222038269, "learning_rate": 5.917469860733739e-06, "log_odds_chosen": 0.503982424736023, "log_odds_ratio": -0.5942285060882568, "logits/chosen": -2.219217300415039, "logits/rejected": -2.06247878074646, "logps/chosen": -0.8039132356643677, "logps/rejected": -1.1335679292678833, "loss": 1.1991, "nll_loss": 1.0187681913375854, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.24117398262023926, "rewards/margins": 0.09889640659093857, "rewards/rejected": -0.34007036685943604, "step": 4130 }, { "epoch": 1.0846214304427562, "grad_norm": 1.2329041957855225, "learning_rate": 5.907532946261096e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1475722789764404, "logits/rejected": -2.1263856887817383, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2878, "nll_loss": 1.0421867370605469, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4140 }, { "epoch": 1.0872412889703955, "grad_norm": 0.6671878695487976, "learning_rate": 5.897580770965294e-06, "log_odds_chosen": 0.5044934749603271, "log_odds_ratio": -0.5522691607475281, "logits/chosen": -2.285893678665161, "logits/rejected": -2.1222424507141113, "logps/chosen": -0.7412294149398804, "logps/rejected": -1.0358654260635376, "loss": 1.2001, "nll_loss": 0.9936666488647461, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.22236883640289307, "rewards/margins": 0.0883907824754715, "rewards/rejected": -0.31075963377952576, "step": 4150 }, { "epoch": 1.089861147498035, "grad_norm": 0.6541994214057922, "learning_rate": 5.887613414466658e-06, "log_odds_chosen": 0.17003217339515686, "log_odds_ratio": -0.7056115865707397, "logits/chosen": -2.203944444656372, "logits/rejected": -2.0913166999816895, "logps/chosen": -0.8618839383125305, "logps/rejected": -0.9436081647872925, "loss": 1.2564, "nll_loss": 1.1096385717391968, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.25856518745422363, "rewards/margins": 0.02451728656888008, "rewards/rejected": -0.2830824553966522, "step": 4160 }, { "epoch": 1.0924810060256747, "grad_norm": 0.8766489624977112, "learning_rate": 5.877630956506961e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.134378433227539, "logits/rejected": -1.9621633291244507, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2182, "nll_loss": 0.9833464622497559, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4170 }, { "epoch": 1.0951008645533142, "grad_norm": 1.4179552793502808, "learning_rate": 5.867633476948796e-06, "log_odds_chosen": 0.34401580691337585, "log_odds_ratio": -0.6191283464431763, "logits/chosen": -2.183779716491699, "logits/rejected": -2.124058485031128, "logps/chosen": -0.7591277360916138, "logps/rejected": -0.9776920080184937, "loss": 1.142, "nll_loss": 0.9311462640762329, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.22773833572864532, "rewards/margins": 0.06556928902864456, "rewards/rejected": -0.2933076322078705, "step": 4180 }, { "epoch": 1.0977207230809536, "grad_norm": 0.6019245386123657, "learning_rate": 5.857621055774931e-06, "log_odds_chosen": 0.49188870191574097, "log_odds_ratio": -0.6093038320541382, "logits/chosen": -2.1112422943115234, "logits/rejected": -2.0183138847351074, "logps/chosen": -0.7880024313926697, "logps/rejected": -1.1322263479232788, "loss": 1.178, "nll_loss": 0.9552238583564758, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23640072345733643, "rewards/margins": 0.10326719284057617, "rewards/rejected": -0.339667946100235, "step": 4190 }, { "epoch": 1.1003405816085932, "grad_norm": 0.8325327634811401, "learning_rate": 5.8475937730876746e-06, "log_odds_chosen": 0.29659682512283325, "log_odds_ratio": -0.7026544809341431, "logits/chosen": -2.1116385459899902, "logits/rejected": -2.03653883934021, "logps/chosen": -0.8402560353279114, "logps/rejected": -1.0096443891525269, "loss": 1.2816, "nll_loss": 1.0465644598007202, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.25207680463790894, "rewards/margins": 0.050816506147384644, "rewards/rejected": -0.30289334058761597, "step": 4200 }, { "epoch": 1.1029604401362327, "grad_norm": 0.8154863119125366, "learning_rate": 5.8375517091082295e-06, "log_odds_chosen": 0.3984401226043701, "log_odds_ratio": -0.6513702869415283, "logits/chosen": -2.1439931392669678, "logits/rejected": -2.0900416374206543, "logps/chosen": -0.8751657605171204, "logps/rejected": -1.1659042835235596, "loss": 1.3066, "nll_loss": 1.1050077676773071, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2625497281551361, "rewards/margins": 0.08722160011529922, "rewards/rejected": -0.34977132081985474, "step": 4210 }, { "epoch": 1.105580298663872, "grad_norm": 1.8498574495315552, "learning_rate": 5.8274949441760515e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.225738525390625, "logits/rejected": -2.1462182998657227, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2336, "nll_loss": 1.09915292263031, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4220 }, { "epoch": 1.1082001571915117, "grad_norm": 0.6563227772712708, "learning_rate": 5.817423558748212e-06, "log_odds_chosen": 0.509792685508728, "log_odds_ratio": -0.6335816383361816, "logits/chosen": -2.2947661876678467, "logits/rejected": -2.2037901878356934, "logps/chosen": -0.8475666046142578, "logps/rejected": -1.1563843488693237, "loss": 1.2194, "nll_loss": 1.0263891220092773, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2542700171470642, "rewards/margins": 0.09264533221721649, "rewards/rejected": -0.3469153642654419, "step": 4230 }, { "epoch": 1.1108200157191512, "grad_norm": 0.5780437588691711, "learning_rate": 5.807337633398749e-06, "log_odds_chosen": 0.27395445108413696, "log_odds_ratio": -0.6978157162666321, "logits/chosen": -2.158517837524414, "logits/rejected": -2.114022731781006, "logps/chosen": -0.7341379523277283, "logps/rejected": -0.9148346781730652, "loss": 1.1785, "nll_loss": 0.9897924661636353, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22024139761924744, "rewards/margins": 0.054209016263484955, "rewards/rejected": -0.274450421333313, "step": 4240 }, { "epoch": 1.1134398742467906, "grad_norm": 0.9192869663238525, "learning_rate": 5.797237248818024e-06, "log_odds_chosen": 0.384788453578949, "log_odds_ratio": -0.6233862638473511, "logits/chosen": -2.1199538707733154, "logits/rejected": -2.087918281555176, "logps/chosen": -0.8231413960456848, "logps/rejected": -1.0741630792617798, "loss": 1.1487, "nll_loss": 1.045141339302063, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2469424456357956, "rewards/margins": 0.0753064900636673, "rewards/rejected": -0.3222489058971405, "step": 4250 }, { "epoch": 1.1160597327744302, "grad_norm": 0.6544294953346252, "learning_rate": 5.787122485812077e-06, "log_odds_chosen": 0.7356141805648804, "log_odds_ratio": -0.5337044596672058, "logits/chosen": -2.2020184993743896, "logits/rejected": -2.1514484882354736, "logps/chosen": -0.6761825680732727, "logps/rejected": -1.1245512962341309, "loss": 1.1631, "nll_loss": 1.018638253211975, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.20285475254058838, "rewards/margins": 0.13451066613197327, "rewards/rejected": -0.33736541867256165, "step": 4260 }, { "epoch": 1.1186795913020697, "grad_norm": NaN, "learning_rate": 5.7769934253019775e-06, "log_odds_chosen": 0.15335743129253387, "log_odds_ratio": -0.6996569633483887, "logits/chosen": -2.2010855674743652, "logits/rejected": -2.2433183193206787, "logps/chosen": -0.854087233543396, "logps/rejected": -0.9291955828666687, "loss": 1.2728, "nll_loss": 1.0275825262069702, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.25622621178627014, "rewards/margins": 0.022532489150762558, "rewards/rejected": -0.278758704662323, "step": 4270 }, { "epoch": 1.121299449829709, "grad_norm": 0.6840103268623352, "learning_rate": 5.766850148323184e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.166538953781128, "logits/rejected": -2.0720064640045166, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2659, "nll_loss": 1.0969786643981934, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4280 }, { "epoch": 1.1239193083573487, "grad_norm": 1.0011624097824097, "learning_rate": 5.756692736024884e-06, "log_odds_chosen": 0.4415491223335266, "log_odds_ratio": -0.601821780204773, "logits/chosen": -2.201594352722168, "logits/rejected": -2.1413183212280273, "logps/chosen": -0.6856690645217896, "logps/rejected": -0.9649276733398438, "loss": 1.1491, "nll_loss": 0.9242115020751953, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20570075511932373, "rewards/margins": 0.08377757668495178, "rewards/rejected": -0.2894783616065979, "step": 4290 }, { "epoch": 1.1265391668849882, "grad_norm": 0.8497743606567383, "learning_rate": 5.74652126966936e-06, "log_odds_chosen": 0.5068636536598206, "log_odds_ratio": -0.5752713084220886, "logits/chosen": -2.2251133918762207, "logits/rejected": -2.038505792617798, "logps/chosen": -0.7721516489982605, "logps/rejected": -1.0666340589523315, "loss": 1.1214, "nll_loss": 0.9732298851013184, "rewards/accuracies": 0.75, "rewards/chosen": -0.23164550960063934, "rewards/margins": 0.08834470063447952, "rewards/rejected": -0.31999021768569946, "step": 4300 }, { "epoch": 1.1291590254126278, "grad_norm": 1.2796540260314941, "learning_rate": 5.736335830631323e-06, "log_odds_chosen": 0.595421552658081, "log_odds_ratio": -0.5772111415863037, "logits/chosen": -2.159956693649292, "logits/rejected": -2.0973076820373535, "logps/chosen": -0.7004303932189941, "logps/rejected": -1.0651299953460693, "loss": 1.1808, "nll_loss": 0.9537744522094727, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.21012911200523376, "rewards/margins": 0.10940992832183838, "rewards/rejected": -0.31953901052474976, "step": 4310 }, { "epoch": 1.1317788839402672, "grad_norm": NaN, "learning_rate": 5.726136500397275e-06, "log_odds_chosen": 0.4614637494087219, "log_odds_ratio": -0.6108739376068115, "logits/chosen": -2.1499109268188477, "logits/rejected": -2.1379308700561523, "logps/chosen": -0.6630758047103882, "logps/rejected": -0.9627348184585571, "loss": 1.2078, "nll_loss": 1.0147100687026978, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1989227533340454, "rewards/margins": 0.08989767730236053, "rewards/rejected": -0.28882044553756714, "step": 4320 }, { "epoch": 1.1343987424679067, "grad_norm": 0.5608808994293213, "learning_rate": 5.71592336056485e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1599581241607666, "logits/rejected": -2.0783767700195312, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2543, "nll_loss": 1.0856053829193115, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4330 }, { "epoch": 1.1370186009955463, "grad_norm": 0.9036433100700378, "learning_rate": 5.705696492842162e-06, "log_odds_chosen": 0.4976620674133301, "log_odds_ratio": -0.6041185855865479, "logits/chosen": -2.1566379070281982, "logits/rejected": -2.12960147857666, "logps/chosen": -0.7742776274681091, "logps/rejected": -1.1046762466430664, "loss": 1.2415, "nll_loss": 1.085137128829956, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2322833091020584, "rewards/margins": 0.09911959618330002, "rewards/rejected": -0.3314029276371002, "step": 4340 }, { "epoch": 1.1396384595231857, "grad_norm": 0.5706554651260376, "learning_rate": 5.695455979047154e-06, "log_odds_chosen": 0.5171149969100952, "log_odds_ratio": -0.5648395419120789, "logits/chosen": -2.15861177444458, "logits/rejected": -2.0888702869415283, "logps/chosen": -0.7626606225967407, "logps/rejected": -1.0750781297683716, "loss": 1.1996, "nll_loss": 1.0683215856552124, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22879819571971893, "rewards/margins": 0.09372520446777344, "rewards/rejected": -0.32252341508865356, "step": 4350 }, { "epoch": 1.1422583180508252, "grad_norm": 2.083329916000366, "learning_rate": 5.685201901106939e-06, "log_odds_chosen": 0.6199439167976379, "log_odds_ratio": -0.5523152351379395, "logits/chosen": -2.146747350692749, "logits/rejected": -1.9911495447158813, "logps/chosen": -0.7076459527015686, "logps/rejected": -1.0835096836090088, "loss": 1.1669, "nll_loss": 0.9675666689872742, "rewards/accuracies": 0.6875, "rewards/chosen": -0.21229378879070282, "rewards/margins": 0.11275909096002579, "rewards/rejected": -0.3250528872013092, "step": 4360 }, { "epoch": 1.1448781765784648, "grad_norm": 0.7301594018936157, "learning_rate": 5.674934341057151e-06, "log_odds_chosen": 0.6096700429916382, "log_odds_ratio": -0.5456306338310242, "logits/chosen": -2.2767937183380127, "logits/rejected": -2.1964430809020996, "logps/chosen": -0.7679567337036133, "logps/rejected": -1.1160123348236084, "loss": 1.1945, "nll_loss": 1.0861451625823975, "rewards/accuracies": 0.625, "rewards/chosen": -0.23038701713085175, "rewards/margins": 0.10441669076681137, "rewards/rejected": -0.3348037302494049, "step": 4370 }, { "epoch": 1.1474980351061044, "grad_norm": 1.0318119525909424, "learning_rate": 5.66465338104128e-06, "log_odds_chosen": 0.5163030028343201, "log_odds_ratio": -0.6089088320732117, "logits/chosen": -2.221351385116577, "logits/rejected": -2.190001964569092, "logps/chosen": -0.7547196745872498, "logps/rejected": -1.0689661502838135, "loss": 1.2015, "nll_loss": 1.0428102016448975, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22641590237617493, "rewards/margins": 0.09427394717931747, "rewards/rejected": -0.320689857006073, "step": 4380 }, { "epoch": 1.1501178936337437, "grad_norm": 0.7385018467903137, "learning_rate": 5.654359103310022e-06, "log_odds_chosen": 0.6267518997192383, "log_odds_ratio": -0.555157482624054, "logits/chosen": -2.143590211868286, "logits/rejected": -2.069866895675659, "logps/chosen": -0.6334356665611267, "logps/rejected": -1.0031400918960571, "loss": 1.1755, "nll_loss": 0.9535596966743469, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19003073871135712, "rewards/margins": 0.1109112948179245, "rewards/rejected": -0.30094200372695923, "step": 4390 }, { "epoch": 1.1527377521613833, "grad_norm": 1.0142706632614136, "learning_rate": 5.6440515902206206e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.202237367630005, "logits/rejected": -2.1017959117889404, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2252, "nll_loss": 0.9758127927780151, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4400 }, { "epoch": 1.1553576106890229, "grad_norm": 0.5689259767532349, "learning_rate": 5.633730924236204e-06, "log_odds_chosen": 0.7088979482650757, "log_odds_ratio": -0.5544286966323853, "logits/chosen": -2.194206714630127, "logits/rejected": -2.127917528152466, "logps/chosen": -0.6782454252243042, "logps/rejected": -1.0992326736450195, "loss": 1.1865, "nll_loss": 1.0016109943389893, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20347361266613007, "rewards/margins": 0.12629619240760803, "rewards/rejected": -0.3297698497772217, "step": 4410 }, { "epoch": 1.1579774692166622, "grad_norm": 0.6148982644081116, "learning_rate": 5.623397187925127e-06, "log_odds_chosen": 0.5620762705802917, "log_odds_ratio": -0.5760869979858398, "logits/chosen": -2.191058874130249, "logits/rejected": -2.074683666229248, "logps/chosen": -0.7417292594909668, "logps/rejected": -1.0597374439239502, "loss": 1.1232, "nll_loss": 0.9176362752914429, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22251877188682556, "rewards/margins": 0.09540250152349472, "rewards/rejected": -0.3179212808609009, "step": 4420 }, { "epoch": 1.1605973277443018, "grad_norm": 0.5461212992668152, "learning_rate": 5.613050463960312e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.0644755363464355, "logits/rejected": -2.057598114013672, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1839, "nll_loss": 1.0013986825942993, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4430 }, { "epoch": 1.1632171862719414, "grad_norm": 1.233856201171875, "learning_rate": 5.602690835118586e-06, "log_odds_chosen": 0.5128128528594971, "log_odds_ratio": -0.5618588924407959, "logits/chosen": -2.1942710876464844, "logits/rejected": -2.1401619911193848, "logps/chosen": -0.6601070761680603, "logps/rejected": -0.9148343205451965, "loss": 1.1991, "nll_loss": 1.0277049541473389, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19803211092948914, "rewards/margins": 0.07641817629337311, "rewards/rejected": -0.27445030212402344, "step": 4440 }, { "epoch": 1.1658370447995807, "grad_norm": 0.7636669874191284, "learning_rate": 5.592318384280017e-06, "log_odds_chosen": 0.2992534339427948, "log_odds_ratio": -0.656128466129303, "logits/chosen": -2.273115634918213, "logits/rejected": -2.27303409576416, "logps/chosen": -0.7537247538566589, "logps/rejected": -0.8874446749687195, "loss": 1.1891, "nll_loss": 1.008082389831543, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22611741721630096, "rewards/margins": 0.040115971118211746, "rewards/rejected": -0.2662333846092224, "step": 4450 }, { "epoch": 1.1684569033272203, "grad_norm": 0.750213086605072, "learning_rate": 5.581933194427258e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3001513481140137, "logits/rejected": -2.1162426471710205, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1811, "nll_loss": 0.9987217783927917, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4460 }, { "epoch": 1.1710767618548599, "grad_norm": 0.8625192046165466, "learning_rate": 5.571535348644871e-06, "log_odds_chosen": 0.6081840395927429, "log_odds_ratio": -0.5844097137451172, "logits/chosen": -2.2348897457122803, "logits/rejected": -2.1039540767669678, "logps/chosen": -0.7398802638053894, "logps/rejected": -1.1396759748458862, "loss": 1.1676, "nll_loss": 0.9621046781539917, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22196409106254578, "rewards/margins": 0.11993874609470367, "rewards/rejected": -0.34190285205841064, "step": 4470 }, { "epoch": 1.1736966203824992, "grad_norm": 0.8553658127784729, "learning_rate": 5.561124930118675e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.264207363128662, "logits/rejected": -2.2318942546844482, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2371, "nll_loss": 1.0146753787994385, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4480 }, { "epoch": 1.1763164789101388, "grad_norm": 0.6145127415657043, "learning_rate": 5.5507020221350696e-06, "log_odds_chosen": 0.45706287026405334, "log_odds_ratio": -0.6214126348495483, "logits/chosen": -2.2555148601531982, "logits/rejected": -2.076838493347168, "logps/chosen": -0.7861701846122742, "logps/rejected": -1.0875070095062256, "loss": 1.2297, "nll_loss": 0.9897290468215942, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.23585107922554016, "rewards/margins": 0.0904010534286499, "rewards/rejected": -0.32625213265419006, "step": 4490 }, { "epoch": 1.1789363374377784, "grad_norm": 0.8549245595932007, "learning_rate": 5.54026670808038e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.198408842086792, "logits/rejected": -2.0811421871185303, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1525, "nll_loss": 0.961644172668457, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4500 }, { "epoch": 1.181556195965418, "grad_norm": 0.9001950621604919, "learning_rate": 5.529819071440176e-06, "log_odds_chosen": 0.49879759550094604, "log_odds_ratio": -0.6109659075737, "logits/chosen": -2.2001471519470215, "logits/rejected": -2.150583267211914, "logps/chosen": -0.7624453902244568, "logps/rejected": -1.1123197078704834, "loss": 1.1445, "nll_loss": 0.940385639667511, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.228733628988266, "rewards/margins": 0.1049622893333435, "rewards/rejected": -0.3336959481239319, "step": 4510 }, { "epoch": 1.1841760544930573, "grad_norm": 5.100362300872803, "learning_rate": 5.519359195798619e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2807350158691406, "logits/rejected": -2.119257926940918, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3187, "nll_loss": 1.0498327016830444, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4520 }, { "epoch": 1.1867959130206969, "grad_norm": 1.0939064025878906, "learning_rate": 5.50888716483778e-06, "log_odds_chosen": 0.5804306864738464, "log_odds_ratio": -0.5650111436843872, "logits/chosen": -2.2719502449035645, "logits/rejected": -2.20320463180542, "logps/chosen": -0.723966121673584, "logps/rejected": -1.07760751247406, "loss": 1.1694, "nll_loss": 0.9599002599716187, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21718986332416534, "rewards/margins": 0.1060924082994461, "rewards/rejected": -0.32328224182128906, "step": 4530 }, { "epoch": 1.1894157715483364, "grad_norm": 0.8288666009902954, "learning_rate": 5.49840306233698e-06, "log_odds_chosen": 0.7947500944137573, "log_odds_ratio": -0.5464813113212585, "logits/chosen": -2.2509570121765137, "logits/rejected": -2.107905626296997, "logps/chosen": -0.817412257194519, "logps/rejected": -1.3070552349090576, "loss": 1.134, "nll_loss": 1.0326296091079712, "rewards/accuracies": 0.6875, "rewards/chosen": -0.24522367119789124, "rewards/margins": 0.14689292013645172, "rewards/rejected": -0.39211660623550415, "step": 4540 }, { "epoch": 1.192035630075976, "grad_norm": 0.8320391774177551, "learning_rate": 5.487906972172115e-06, "log_odds_chosen": 0.6579881906509399, "log_odds_ratio": -0.5683633089065552, "logits/chosen": -2.1729767322540283, "logits/rejected": -2.039681911468506, "logps/chosen": -0.753968358039856, "logps/rejected": -1.089351773262024, "loss": 1.2416, "nll_loss": 1.0231292247772217, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22619052231311798, "rewards/margins": 0.1006150096654892, "rewards/rejected": -0.3268055319786072, "step": 4550 }, { "epoch": 1.1946554886036154, "grad_norm": 6.0025954246521, "learning_rate": 5.477398978314985e-06, "log_odds_chosen": 0.35641974210739136, "log_odds_ratio": -0.6696723699569702, "logits/chosen": -2.2432117462158203, "logits/rejected": -2.237640142440796, "logps/chosen": -0.8173462152481079, "logps/rejected": -1.034088373184204, "loss": 1.2956, "nll_loss": 1.1147581338882446, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.2452038824558258, "rewards/margins": 0.06502262502908707, "rewards/rejected": -0.3102265000343323, "step": 4560 }, { "epoch": 1.197275347131255, "grad_norm": 0.8572553396224976, "learning_rate": 5.466879164832623e-06, "log_odds_chosen": 0.5371285676956177, "log_odds_ratio": -0.6154212951660156, "logits/chosen": -2.221435070037842, "logits/rejected": -2.1366376876831055, "logps/chosen": -0.7789465188980103, "logps/rejected": -1.0916316509246826, "loss": 1.2357, "nll_loss": 1.1134682893753052, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2336839735507965, "rewards/margins": 0.09380555897951126, "rewards/rejected": -0.3274895250797272, "step": 4570 }, { "epoch": 1.1998952056588945, "grad_norm": 0.649990439414978, "learning_rate": 5.456347615886623e-06, "log_odds_chosen": 0.4370390474796295, "log_odds_ratio": -0.5851489305496216, "logits/chosen": -2.3460276126861572, "logits/rejected": -2.339559555053711, "logps/chosen": -0.725236713886261, "logps/rejected": -0.9841029047966003, "loss": 1.2237, "nll_loss": 1.0405529737472534, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.21757102012634277, "rewards/margins": 0.07765985280275345, "rewards/rejected": -0.2952309250831604, "step": 4580 }, { "epoch": 1.2025150641865339, "grad_norm": 0.8032832145690918, "learning_rate": 5.445804415732465e-06, "log_odds_chosen": 0.5654562711715698, "log_odds_ratio": -0.5666981339454651, "logits/chosen": -2.371126174926758, "logits/rejected": -2.27974009513855, "logps/chosen": -0.7760007381439209, "logps/rejected": -1.1086640357971191, "loss": 1.1957, "nll_loss": 1.036826252937317, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.23280024528503418, "rewards/margins": 0.09979898482561111, "rewards/rejected": -0.3325992226600647, "step": 4590 }, { "epoch": 1.2051349227141734, "grad_norm": 0.6946600079536438, "learning_rate": 5.435249648718845e-06, "log_odds_chosen": 0.36225420236587524, "log_odds_ratio": -0.6761174201965332, "logits/chosen": -2.299126386642456, "logits/rejected": -2.2368223667144775, "logps/chosen": -0.7831307649612427, "logps/rejected": -1.0296188592910767, "loss": 1.2592, "nll_loss": 1.0291225910186768, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23493924736976624, "rewards/margins": 0.07394643872976303, "rewards/rejected": -0.3088856637477875, "step": 4600 }, { "epoch": 1.207754781241813, "grad_norm": 3.4581382274627686, "learning_rate": 5.4246833992869955e-06, "log_odds_chosen": 0.47069159150123596, "log_odds_ratio": -0.5814048051834106, "logits/chosen": -2.3368303775787354, "logits/rejected": -2.2810065746307373, "logps/chosen": -0.7481335401535034, "logps/rejected": -1.020504117012024, "loss": 1.2114, "nll_loss": 1.0135027170181274, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2244400680065155, "rewards/margins": 0.08171116560697556, "rewards/rejected": -0.30615124106407166, "step": 4610 }, { "epoch": 1.2103746397694524, "grad_norm": 0.9120251536369324, "learning_rate": 5.414105751970011e-06, "log_odds_chosen": 0.31580883264541626, "log_odds_ratio": -0.6648154854774475, "logits/chosen": -2.3392817974090576, "logits/rejected": -2.26995587348938, "logps/chosen": -0.7548210620880127, "logps/rejected": -0.928641140460968, "loss": 1.2043, "nll_loss": 1.048303484916687, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22644634544849396, "rewards/margins": 0.052146025002002716, "rewards/rejected": -0.2785923480987549, "step": 4620 }, { "epoch": 1.212994498297092, "grad_norm": 0.7799502611160278, "learning_rate": 5.403516791392172e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.169452667236328, "logits/rejected": -2.1369123458862305, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1102, "nll_loss": 1.0016441345214844, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4630 }, { "epoch": 1.2156143568247315, "grad_norm": 2.788393020629883, "learning_rate": 5.39291660226827e-06, "log_odds_chosen": 0.2608758509159088, "log_odds_ratio": -0.6833235621452332, "logits/chosen": -2.316744804382324, "logits/rejected": -2.311548948287964, "logps/chosen": -0.765727698802948, "logps/rejected": -0.876560389995575, "loss": 1.2316, "nll_loss": 1.0258175134658813, "rewards/accuracies": 0.625, "rewards/chosen": -0.22971835732460022, "rewards/margins": 0.03324980288743973, "rewards/rejected": -0.26296812295913696, "step": 4640 }, { "epoch": 1.2182342153523709, "grad_norm": 0.8829882144927979, "learning_rate": 5.38230526940293e-06, "log_odds_chosen": 0.7884406447410583, "log_odds_ratio": -0.5182503461837769, "logits/chosen": -2.304915189743042, "logits/rejected": -2.1821165084838867, "logps/chosen": -0.7145798802375793, "logps/rejected": -1.22406005859375, "loss": 1.1909, "nll_loss": 1.0599157810211182, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.21437397599220276, "rewards/margins": 0.15284407138824463, "rewards/rejected": -0.367218017578125, "step": 4650 }, { "epoch": 1.2208540738800104, "grad_norm": 0.5903418064117432, "learning_rate": 5.371682877689925e-06, "log_odds_chosen": 0.64000004529953, "log_odds_ratio": -0.5256044268608093, "logits/chosen": -2.2222208976745605, "logits/rejected": -2.1384360790252686, "logps/chosen": -0.7520798444747925, "logps/rejected": -1.1560475826263428, "loss": 1.2653, "nll_loss": 1.0503709316253662, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2256239652633667, "rewards/margins": 0.12119030952453613, "rewards/rejected": -0.34681424498558044, "step": 4660 }, { "epoch": 1.22347393240765, "grad_norm": 0.9091732501983643, "learning_rate": 5.361049512111505e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2442493438720703, "logits/rejected": -2.238497018814087, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3074, "nll_loss": 1.1275999546051025, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4670 }, { "epoch": 1.2260937909352896, "grad_norm": 0.9587445259094238, "learning_rate": 5.350405257737715e-06, "log_odds_chosen": 0.663695752620697, "log_odds_ratio": -0.5566957592964172, "logits/chosen": -2.261946201324463, "logits/rejected": -2.2284061908721924, "logps/chosen": -0.7028164267539978, "logps/rejected": -1.0594313144683838, "loss": 1.2409, "nll_loss": 1.0589364767074585, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.210844948887825, "rewards/margins": 0.1069844588637352, "rewards/rejected": -0.3178294003009796, "step": 4680 }, { "epoch": 1.228713649462929, "grad_norm": 0.7679633498191833, "learning_rate": 5.339750199725714e-06, "log_odds_chosen": 0.37978872656822205, "log_odds_ratio": -0.6333969831466675, "logits/chosen": -2.2118563652038574, "logits/rejected": -2.1550440788269043, "logps/chosen": -0.7647315859794617, "logps/rejected": -1.0188699960708618, "loss": 1.2062, "nll_loss": 0.9702175259590149, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2294194996356964, "rewards/margins": 0.07624147832393646, "rewards/rejected": -0.3056609630584717, "step": 4690 }, { "epoch": 1.2313335079905685, "grad_norm": 1.3534033298492432, "learning_rate": 5.329084423319089e-06, "log_odds_chosen": 0.6458851099014282, "log_odds_ratio": -0.5549395084381104, "logits/chosen": -2.328274965286255, "logits/rejected": -2.2693893909454346, "logps/chosen": -0.7748360633850098, "logps/rejected": -1.1211254596710205, "loss": 1.2086, "nll_loss": 1.0478445291519165, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23245087265968323, "rewards/margins": 0.10388679802417755, "rewards/rejected": -0.3363376259803772, "step": 4700 }, { "epoch": 1.233953366518208, "grad_norm": NaN, "learning_rate": 5.318408013847182e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3330655097961426, "logits/rejected": -2.2584781646728516, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3004, "nll_loss": 1.1013375520706177, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4710 }, { "epoch": 1.2365732250458474, "grad_norm": 0.9647149443626404, "learning_rate": 5.307721056724399e-06, "log_odds_chosen": 0.4450560212135315, "log_odds_ratio": -0.6558130383491516, "logits/chosen": -2.3764123916625977, "logits/rejected": -2.311568260192871, "logps/chosen": -0.7864806652069092, "logps/rejected": -1.039116382598877, "loss": 1.1606, "nll_loss": 0.9932456016540527, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.23594419658184052, "rewards/margins": 0.07579073309898376, "rewards/rejected": -0.3117349445819855, "step": 4720 }, { "epoch": 1.239193083573487, "grad_norm": 0.5681069493293762, "learning_rate": 5.2970236374495335e-06, "log_odds_chosen": 0.6598880887031555, "log_odds_ratio": -0.5590001940727234, "logits/chosen": -2.387953996658325, "logits/rejected": -2.293609142303467, "logps/chosen": -0.7634426951408386, "logps/rejected": -1.1560559272766113, "loss": 1.2283, "nll_loss": 1.0877689123153687, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.22903279960155487, "rewards/margins": 0.11778400093317032, "rewards/rejected": -0.3468167781829834, "step": 4730 }, { "epoch": 1.2418129421011266, "grad_norm": 1.1110118627548218, "learning_rate": 5.286315841605078e-06, "log_odds_chosen": 0.5250371694564819, "log_odds_ratio": -0.58372962474823, "logits/chosen": -2.2952144145965576, "logits/rejected": -2.248246908187866, "logps/chosen": -0.795624315738678, "logps/rejected": -1.0814110040664673, "loss": 1.2189, "nll_loss": 1.1381975412368774, "rewards/accuracies": 0.6875, "rewards/chosen": -0.23868732154369354, "rewards/margins": 0.08573600649833679, "rewards/rejected": -0.32442331314086914, "step": 4740 }, { "epoch": 1.2444328006287662, "grad_norm": 2.2186660766601562, "learning_rate": 5.275597754856539e-06, "log_odds_chosen": 0.34842389822006226, "log_odds_ratio": -0.6409770250320435, "logits/chosen": -2.4112536907196045, "logits/rejected": -2.331148147583008, "logps/chosen": -0.7966256141662598, "logps/rejected": -1.0280308723449707, "loss": 1.2336, "nll_loss": 1.009595274925232, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23898771405220032, "rewards/margins": 0.06942159682512283, "rewards/rejected": -0.3084092438220978, "step": 4750 }, { "epoch": 1.2470526591564055, "grad_norm": 0.7730907797813416, "learning_rate": 5.2648694629517565e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3275115489959717, "logits/rejected": -2.180114269256592, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1393, "nll_loss": 0.8951295614242554, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4760 }, { "epoch": 1.249672517684045, "grad_norm": 0.8178501129150391, "learning_rate": 5.254131051720212e-06, "log_odds_chosen": 0.5744642019271851, "log_odds_ratio": -0.5903124809265137, "logits/chosen": -2.284144163131714, "logits/rejected": -2.2599081993103027, "logps/chosen": -0.7336593866348267, "logps/rejected": -1.0689289569854736, "loss": 1.1637, "nll_loss": 0.9736175537109375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22009782493114471, "rewards/margins": 0.1005808562040329, "rewards/rejected": -0.3206787109375, "step": 4770 }, { "epoch": 1.2522923762116847, "grad_norm": 1.9074848890304565, "learning_rate": 5.243382607072346e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3268842697143555, "logits/rejected": -2.239356517791748, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2032, "nll_loss": 0.9109100103378296, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4780 }, { "epoch": 1.254912234739324, "grad_norm": 1.2775012254714966, "learning_rate": 5.2326242149988675e-06, "log_odds_chosen": 0.38393455743789673, "log_odds_ratio": -0.6468745470046997, "logits/chosen": -2.2799997329711914, "logits/rejected": -2.2906551361083984, "logps/chosen": -0.7340123057365417, "logps/rejected": -0.9778379201889038, "loss": 1.1602, "nll_loss": 0.9625921249389648, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2202036827802658, "rewards/margins": 0.07314766943454742, "rewards/rejected": -0.29335135221481323, "step": 4790 }, { "epoch": 1.2575320932669636, "grad_norm": NaN, "learning_rate": 5.2218559615700715e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.270491123199463, "logits/rejected": -2.22379994392395, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2555, "nll_loss": 1.0630104541778564, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4800 }, { "epoch": 1.2601519517946032, "grad_norm": 0.869377076625824, "learning_rate": 5.211077932935143e-06, "log_odds_chosen": 0.6087290048599243, "log_odds_ratio": -0.5622527003288269, "logits/chosen": -2.366708993911743, "logits/rejected": -2.23652720451355, "logps/chosen": -0.8133349418640137, "logps/rejected": -1.1779882907867432, "loss": 1.2126, "nll_loss": 1.0561307668685913, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24400050938129425, "rewards/margins": 0.10939594358205795, "rewards/rejected": -0.3533964455127716, "step": 4810 }, { "epoch": 1.2627718103222425, "grad_norm": 1.1645615100860596, "learning_rate": 5.200290215321472e-06, "log_odds_chosen": 0.3839130103588104, "log_odds_ratio": -0.6257196664810181, "logits/chosen": -2.346999406814575, "logits/rejected": -2.3805789947509766, "logps/chosen": -0.8189098238945007, "logps/rejected": -1.0416443347930908, "loss": 1.3199, "nll_loss": 1.205476999282837, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.24567297101020813, "rewards/margins": 0.06682037562131882, "rewards/rejected": -0.31249332427978516, "step": 4820 }, { "epoch": 1.265391668849882, "grad_norm": 1.592443585395813, "learning_rate": 5.1894928950339645e-06, "log_odds_chosen": 0.5637732744216919, "log_odds_ratio": -0.5837487578392029, "logits/chosen": -2.3725080490112305, "logits/rejected": -2.265498638153076, "logps/chosen": -0.8184593915939331, "logps/rejected": -1.1717450618743896, "loss": 1.2163, "nll_loss": 1.011283040046692, "rewards/accuracies": 0.6875, "rewards/chosen": -0.24553783237934113, "rewards/margins": 0.10598573833703995, "rewards/rejected": -0.35152357816696167, "step": 4830 }, { "epoch": 1.2680115273775217, "grad_norm": 0.7641615867614746, "learning_rate": 5.178686058454352e-06, "log_odds_chosen": 0.4931333065032959, "log_odds_ratio": -0.6536533832550049, "logits/chosen": -2.2803072929382324, "logits/rejected": -2.261302947998047, "logps/chosen": -0.8164812326431274, "logps/rejected": -1.0958176851272583, "loss": 1.2135, "nll_loss": 1.0308202505111694, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.24494437873363495, "rewards/margins": 0.08380093425512314, "rewards/rejected": -0.3287453055381775, "step": 4840 }, { "epoch": 1.270631385905161, "grad_norm": 1.0792655944824219, "learning_rate": 5.167869792040496e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4156057834625244, "logits/rejected": -2.3951292037963867, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2539, "nll_loss": 1.0638929605484009, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4850 }, { "epoch": 1.2732512444328006, "grad_norm": 1.1128162145614624, "learning_rate": 5.1570441823257e-06, "log_odds_chosen": 0.4428636431694031, "log_odds_ratio": -0.6104263067245483, "logits/chosen": -2.395709753036499, "logits/rejected": -2.305656909942627, "logps/chosen": -0.7273253202438354, "logps/rejected": -1.0087066888809204, "loss": 1.1954, "nll_loss": 1.0329511165618896, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21819761395454407, "rewards/margins": 0.08441440761089325, "rewards/rejected": -0.3026120066642761, "step": 4860 }, { "epoch": 1.2758711029604402, "grad_norm": 0.7986030578613281, "learning_rate": 5.146209315918019e-06, "log_odds_chosen": 0.5525490045547485, "log_odds_ratio": -0.5958794951438904, "logits/chosen": -2.413449764251709, "logits/rejected": -2.307947874069214, "logps/chosen": -0.7440060973167419, "logps/rejected": -1.1095563173294067, "loss": 1.2767, "nll_loss": 1.1226774454116821, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22320182621479034, "rewards/margins": 0.10966508090496063, "rewards/rejected": -0.33286693692207336, "step": 4870 }, { "epoch": 1.2784909614880797, "grad_norm": 0.7782097458839417, "learning_rate": 5.135365279499562e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3509130477905273, "logits/rejected": -2.301701784133911, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2007, "nll_loss": 1.0632917881011963, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4880 }, { "epoch": 1.281110820015719, "grad_norm": NaN, "learning_rate": 5.124512159825802e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4330737590789795, "logits/rejected": -2.3533096313476562, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1891, "nll_loss": 0.9533231854438782, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4890 }, { "epoch": 1.2837306785433586, "grad_norm": 0.7802425622940063, "learning_rate": 5.113650043724879e-06, "log_odds_chosen": 0.5719162821769714, "log_odds_ratio": -0.5822450518608093, "logits/chosen": -2.504112720489502, "logits/rejected": -2.3719191551208496, "logps/chosen": -0.7639566659927368, "logps/rejected": -1.1320428848266602, "loss": 1.1744, "nll_loss": 0.9373259544372559, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2291869819164276, "rewards/margins": 0.11042588949203491, "rewards/rejected": -0.33961284160614014, "step": 4900 }, { "epoch": 1.2863505370709982, "grad_norm": 1.0334900617599487, "learning_rate": 5.10277901809691e-06, "log_odds_chosen": 0.8272826075553894, "log_odds_ratio": -0.5615550875663757, "logits/chosen": -2.2939743995666504, "logits/rejected": -2.2322840690612793, "logps/chosen": -0.7636197805404663, "logps/rejected": -1.2942662239074707, "loss": 1.2065, "nll_loss": 1.0263195037841797, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22908595204353333, "rewards/margins": 0.15919391810894012, "rewards/rejected": -0.38827985525131226, "step": 4910 }, { "epoch": 1.2889703955986378, "grad_norm": 0.7886615991592407, "learning_rate": 5.091899169913289e-06, "log_odds_chosen": 0.4004344940185547, "log_odds_ratio": -0.6735684871673584, "logits/chosen": -2.3876309394836426, "logits/rejected": -2.284850597381592, "logps/chosen": -0.8398586511611938, "logps/rejected": -1.1257551908493042, "loss": 1.2524, "nll_loss": 1.0556118488311768, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.25195759534835815, "rewards/margins": 0.08576899766921997, "rewards/rejected": -0.33772653341293335, "step": 4920 }, { "epoch": 1.2915902541262771, "grad_norm": 1.8681901693344116, "learning_rate": 5.081010586215992e-06, "log_odds_chosen": 0.541029155254364, "log_odds_ratio": -0.5896049737930298, "logits/chosen": -2.3980846405029297, "logits/rejected": -2.3472893238067627, "logps/chosen": -0.7228965163230896, "logps/rejected": -1.0433108806610107, "loss": 1.2099, "nll_loss": 1.025068998336792, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21686896681785583, "rewards/margins": 0.09612435102462769, "rewards/rejected": -0.3129933178424835, "step": 4930 }, { "epoch": 1.2942101126539167, "grad_norm": 0.7173250317573547, "learning_rate": 5.0701133541168845e-06, "log_odds_chosen": 0.7148399949073792, "log_odds_ratio": -0.5588162541389465, "logits/chosen": -2.3845438957214355, "logits/rejected": -2.3768117427825928, "logps/chosen": -0.6516094207763672, "logps/rejected": -1.0806598663330078, "loss": 1.1569, "nll_loss": 1.0293605327606201, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19548282027244568, "rewards/margins": 0.1287151575088501, "rewards/rejected": -0.3241979479789734, "step": 4940 }, { "epoch": 1.2968299711815563, "grad_norm": 2.5794811248779297, "learning_rate": 5.059207560797017e-06, "log_odds_chosen": 0.4904839098453522, "log_odds_ratio": -0.6668694615364075, "logits/chosen": -2.2457096576690674, "logits/rejected": -2.2289137840270996, "logps/chosen": -0.8089202046394348, "logps/rejected": -1.204458475112915, "loss": 1.2014, "nll_loss": 1.005413293838501, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24267606437206268, "rewards/margins": 0.11866150796413422, "rewards/rejected": -0.3613375425338745, "step": 4950 }, { "epoch": 1.2994498297091956, "grad_norm": 4.102689743041992, "learning_rate": 5.048293293505938e-06, "log_odds_chosen": 0.6797679662704468, "log_odds_ratio": -0.5920431017875671, "logits/chosen": -2.414154291152954, "logits/rejected": -2.3150312900543213, "logps/chosen": -0.7271084189414978, "logps/rejected": -1.1320843696594238, "loss": 1.191, "nll_loss": 0.9279645681381226, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21813254058361053, "rewards/margins": 0.12149278819561005, "rewards/rejected": -0.3396252989768982, "step": 4960 }, { "epoch": 1.3020696882368352, "grad_norm": 0.9171664118766785, "learning_rate": 5.037370639560987e-06, "log_odds_chosen": 0.2917039096355438, "log_odds_ratio": -0.6932202577590942, "logits/chosen": -2.3796908855438232, "logits/rejected": -2.357335329055786, "logps/chosen": -0.7811436653137207, "logps/rejected": -0.9214129447937012, "loss": 1.2275, "nll_loss": 1.0404516458511353, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.23434312641620636, "rewards/margins": 0.04208080843091011, "rewards/rejected": -0.2764239013195038, "step": 4970 }, { "epoch": 1.3046895467644748, "grad_norm": 0.5840489268302917, "learning_rate": 5.026439686346599e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2720789909362793, "logits/rejected": -2.2027018070220947, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2019, "nll_loss": 0.9869183301925659, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 4980 }, { "epoch": 1.3073094052921141, "grad_norm": 1.670447587966919, "learning_rate": 5.0155005213136065e-06, "log_odds_chosen": 0.2529212534427643, "log_odds_ratio": -0.6684775948524475, "logits/chosen": -2.4062445163726807, "logits/rejected": -2.335999011993408, "logps/chosen": -0.7672814130783081, "logps/rejected": -0.9098614454269409, "loss": 1.2375, "nll_loss": 1.0832791328430176, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2301844358444214, "rewards/margins": 0.04277399182319641, "rewards/rejected": -0.2729584276676178, "step": 4990 }, { "epoch": 1.3099292638197537, "grad_norm": 0.9187710881233215, "learning_rate": 5.0045532319785405e-06, "log_odds_chosen": 0.5238833427429199, "log_odds_ratio": -0.5820587873458862, "logits/chosen": -2.38010573387146, "logits/rejected": -2.351256847381592, "logps/chosen": -0.7336803674697876, "logps/rejected": -1.0468286275863647, "loss": 1.1396, "nll_loss": 0.9888582229614258, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22010409832000732, "rewards/margins": 0.0939444974064827, "rewards/rejected": -0.3140485882759094, "step": 5000 }, { "epoch": 1.3125491223473933, "grad_norm": 0.8632611036300659, "learning_rate": 4.993597905922925e-06, "log_odds_chosen": 0.29155150055885315, "log_odds_ratio": -0.6565324068069458, "logits/chosen": -2.493464946746826, "logits/rejected": -2.418168067932129, "logps/chosen": -0.7110509276390076, "logps/rejected": -0.8991442918777466, "loss": 1.2034, "nll_loss": 1.0190273523330688, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21331527829170227, "rewards/margins": 0.05642802268266678, "rewards/rejected": -0.26974329352378845, "step": 5010 }, { "epoch": 1.3151689808750326, "grad_norm": 0.6655256152153015, "learning_rate": 4.982634630792584e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.403351068496704, "logits/rejected": -2.348081111907959, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3461, "nll_loss": 1.0430594682693481, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5020 }, { "epoch": 1.3177888394026722, "grad_norm": 0.6591448783874512, "learning_rate": 4.971663494296934e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3719589710235596, "logits/rejected": -2.346503973007202, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1845, "nll_loss": 0.9714627265930176, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5030 }, { "epoch": 1.3204086979303118, "grad_norm": 0.7419347763061523, "learning_rate": 4.960684584208286e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3715498447418213, "logits/rejected": -2.322359561920166, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2173, "nll_loss": 1.0355503559112549, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5040 }, { "epoch": 1.3230285564579511, "grad_norm": 1.7828229665756226, "learning_rate": 4.94969798836114e-06, "log_odds_chosen": 0.4413382411003113, "log_odds_ratio": -0.6082134246826172, "logits/chosen": -2.454434394836426, "logits/rejected": -2.330362558364868, "logps/chosen": -0.7726138234138489, "logps/rejected": -1.0426418781280518, "loss": 1.177, "nll_loss": 1.0249308347702026, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2317841351032257, "rewards/margins": 0.08100844919681549, "rewards/rejected": -0.31279256939888, "step": 5050 }, { "epoch": 1.3256484149855907, "grad_norm": 0.7485870718955994, "learning_rate": 4.938703794651486e-06, "log_odds_chosen": 0.4538070559501648, "log_odds_ratio": -0.6045533418655396, "logits/chosen": -2.4150519371032715, "logits/rejected": -2.3022396564483643, "logps/chosen": -0.7443751096725464, "logps/rejected": -0.995866596698761, "loss": 1.1276, "nll_loss": 0.9599580764770508, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22331254184246063, "rewards/margins": 0.07544741779565811, "rewards/rejected": -0.29875996708869934, "step": 5060 }, { "epoch": 1.3282682735132303, "grad_norm": 1.1943587064743042, "learning_rate": 4.927702091036101e-06, "log_odds_chosen": 0.4153831899166107, "log_odds_ratio": -0.6166917085647583, "logits/chosen": -2.41345477104187, "logits/rejected": -2.342224597930908, "logps/chosen": -0.8483909368515015, "logps/rejected": -1.0995229482650757, "loss": 1.2123, "nll_loss": 1.0378968715667725, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2545172870159149, "rewards/margins": 0.07533960789442062, "rewards/rejected": -0.3298569321632385, "step": 5070 }, { "epoch": 1.3308881320408699, "grad_norm": 1.3192931413650513, "learning_rate": 4.91669296553184e-06, "log_odds_chosen": 0.45083361864089966, "log_odds_ratio": -0.6285390257835388, "logits/chosen": -2.3325870037078857, "logits/rejected": -2.3389909267425537, "logps/chosen": -0.733669638633728, "logps/rejected": -1.0234605073928833, "loss": 1.1741, "nll_loss": 0.9990497827529907, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.22010087966918945, "rewards/margins": 0.08693723380565643, "rewards/rejected": -0.3070381283760071, "step": 5080 }, { "epoch": 1.3335079905685094, "grad_norm": 1.1604743003845215, "learning_rate": 4.905676506214935e-06, "log_odds_chosen": 0.4265998303890228, "log_odds_ratio": -0.6188628077507019, "logits/chosen": -2.411616802215576, "logits/rejected": -2.3870112895965576, "logps/chosen": -0.7369780540466309, "logps/rejected": -1.0224689245224, "loss": 1.1863, "nll_loss": 1.0739386081695557, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22109341621398926, "rewards/margins": 0.08564727753400803, "rewards/rejected": -0.3067407011985779, "step": 5090 }, { "epoch": 1.3361278490961488, "grad_norm": 1.0418484210968018, "learning_rate": 4.894652801220294e-06, "log_odds_chosen": 0.44457611441612244, "log_odds_ratio": -0.5927441120147705, "logits/chosen": -2.421163320541382, "logits/rejected": -2.375669002532959, "logps/chosen": -0.6784693002700806, "logps/rejected": -0.9187573194503784, "loss": 1.1945, "nll_loss": 0.9535526037216187, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20354080200195312, "rewards/margins": 0.07208643853664398, "rewards/rejected": -0.2756272256374359, "step": 5100 }, { "epoch": 1.3387477076237884, "grad_norm": 0.9982894062995911, "learning_rate": 4.883621938740791e-06, "log_odds_chosen": 0.4082639813423157, "log_odds_ratio": -0.6205071806907654, "logits/chosen": -2.252592086791992, "logits/rejected": -2.1758408546447754, "logps/chosen": -0.7578132748603821, "logps/rejected": -1.0165596008300781, "loss": 1.1992, "nll_loss": 1.092667818069458, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22734399139881134, "rewards/margins": 0.07762391865253448, "rewards/rejected": -0.30496788024902344, "step": 5110 }, { "epoch": 1.341367566151428, "grad_norm": 0.8193521499633789, "learning_rate": 4.872584007026561e-06, "log_odds_chosen": 0.30825406312942505, "log_odds_ratio": -0.6907910704612732, "logits/chosen": -2.355109691619873, "logits/rejected": -2.3015522956848145, "logps/chosen": -0.8386304974555969, "logps/rejected": -1.0290942192077637, "loss": 1.2152, "nll_loss": 1.0270957946777344, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2515891492366791, "rewards/margins": 0.05713914707303047, "rewards/rejected": -0.30872827768325806, "step": 5120 }, { "epoch": 1.3439874246790673, "grad_norm": 1.088674783706665, "learning_rate": 4.861539094384296e-06, "log_odds_chosen": 0.6136468648910522, "log_odds_ratio": -0.5839930772781372, "logits/chosen": -2.3591606616973877, "logits/rejected": -2.28766131401062, "logps/chosen": -0.6774436831474304, "logps/rejected": -0.9499239921569824, "loss": 1.1979, "nll_loss": 1.0161521434783936, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20323312282562256, "rewards/margins": 0.08174407482147217, "rewards/rejected": -0.28497716784477234, "step": 5130 }, { "epoch": 1.3466072832067069, "grad_norm": 0.7477260828018188, "learning_rate": 4.850487289176539e-06, "log_odds_chosen": 0.12135878950357437, "log_odds_ratio": -0.7513008117675781, "logits/chosen": -2.3980236053466797, "logits/rejected": -2.2986035346984863, "logps/chosen": -0.8211213946342468, "logps/rejected": -0.9170609712600708, "loss": 1.2617, "nll_loss": 1.0957401990890503, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2463364154100418, "rewards/margins": 0.028781885281205177, "rewards/rejected": -0.27511829137802124, "step": 5140 }, { "epoch": 1.3492271417343464, "grad_norm": 0.6150688529014587, "learning_rate": 4.839428679820971e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.351151943206787, "logits/rejected": -2.251274585723877, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1769, "nll_loss": 1.0152794122695923, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5150 }, { "epoch": 1.3518470002619858, "grad_norm": 0.5077309608459473, "learning_rate": 4.828363354789713e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4220705032348633, "logits/rejected": -2.303199052810669, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1666, "nll_loss": 0.9483256340026855, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5160 }, { "epoch": 1.3544668587896254, "grad_norm": 0.8797613382339478, "learning_rate": 4.81729140260861e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.375478744506836, "logits/rejected": -2.276717185974121, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.186, "nll_loss": 1.0448063611984253, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5170 }, { "epoch": 1.357086717317265, "grad_norm": 0.6219307780265808, "learning_rate": 4.806212911856528e-06, "log_odds_chosen": 0.25291362404823303, "log_odds_ratio": -0.6814008951187134, "logits/chosen": -2.3588204383850098, "logits/rejected": -2.2607219219207764, "logps/chosen": -0.8502522706985474, "logps/rejected": -1.0065758228302002, "loss": 1.1971, "nll_loss": 1.0833252668380737, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.25507569313049316, "rewards/margins": 0.04689708352088928, "rewards/rejected": -0.30197277665138245, "step": 5180 }, { "epoch": 1.3597065758449043, "grad_norm": 0.6994002461433411, "learning_rate": 4.795127971164644e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3516035079956055, "logits/rejected": -2.2990856170654297, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1178, "nll_loss": 0.9671450853347778, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5190 }, { "epoch": 1.3623264343725439, "grad_norm": 0.7307136058807373, "learning_rate": 4.7840366692157336e-06, "log_odds_chosen": 0.41218847036361694, "log_odds_ratio": -0.6296796798706055, "logits/chosen": -2.3130667209625244, "logits/rejected": -2.293635129928589, "logps/chosen": -0.7203344106674194, "logps/rejected": -0.9972794651985168, "loss": 1.1869, "nll_loss": 0.9703718423843384, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.21610033512115479, "rewards/margins": 0.08308352530002594, "rewards/rejected": -0.29918384552001953, "step": 5200 }, { "epoch": 1.3649462929001834, "grad_norm": 0.5832682251930237, "learning_rate": 4.7729390947434665e-06, "log_odds_chosen": 0.33324000239372253, "log_odds_ratio": -0.6413792371749878, "logits/chosen": -2.351470470428467, "logits/rejected": -2.3119454383850098, "logps/chosen": -0.8638683557510376, "logps/rejected": -1.0406765937805176, "loss": 1.1694, "nll_loss": 0.9603753089904785, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.25916048884391785, "rewards/margins": 0.05304250121116638, "rewards/rejected": -0.31220299005508423, "step": 5210 }, { "epoch": 1.3675661514278228, "grad_norm": 1.0776324272155762, "learning_rate": 4.761835336531695e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4412894248962402, "logits/rejected": -2.379896640777588, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.187, "nll_loss": 0.9484588503837585, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5220 }, { "epoch": 1.3701860099554624, "grad_norm": 0.9076582193374634, "learning_rate": 4.750725483413741e-06, "log_odds_chosen": 0.3344917893409729, "log_odds_ratio": -0.6866899728775024, "logits/chosen": -2.4220659732818604, "logits/rejected": -2.3664746284484863, "logps/chosen": -0.7685586214065552, "logps/rejected": -0.9658347964286804, "loss": 1.1836, "nll_loss": 0.9696909189224243, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.23056760430335999, "rewards/margins": 0.059182871133089066, "rewards/rejected": -0.28975048661231995, "step": 5230 }, { "epoch": 1.372805868483102, "grad_norm": 0.9014420509338379, "learning_rate": 4.739609624271689e-06, "log_odds_chosen": 0.4967070519924164, "log_odds_ratio": -0.6187809705734253, "logits/chosen": -2.3426151275634766, "logits/rejected": -2.365074872970581, "logps/chosen": -0.7324684858322144, "logps/rejected": -1.0295459032058716, "loss": 1.2049, "nll_loss": 0.9761987924575806, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21974053978919983, "rewards/margins": 0.08912321925163269, "rewards/rejected": -0.3088637888431549, "step": 5240 }, { "epoch": 1.3754257270107413, "grad_norm": 0.7322718501091003, "learning_rate": 4.728487848035674e-06, "log_odds_chosen": 0.5654800534248352, "log_odds_ratio": -0.5738216638565063, "logits/chosen": -2.3457882404327393, "logits/rejected": -2.2357258796691895, "logps/chosen": -0.7347289323806763, "logps/rejected": -1.033896803855896, "loss": 1.2551, "nll_loss": 1.0589892864227295, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.22041866183280945, "rewards/margins": 0.08975042402744293, "rewards/rejected": -0.3101691007614136, "step": 5250 }, { "epoch": 1.3780455855383809, "grad_norm": 0.5969277024269104, "learning_rate": 4.717360243683167e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.377880334854126, "logits/rejected": -2.30635666847229, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2156, "nll_loss": 0.9952245950698853, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5260 }, { "epoch": 1.3806654440660204, "grad_norm": 0.9886066913604736, "learning_rate": 4.706226900238268e-06, "log_odds_chosen": 0.3602708876132965, "log_odds_ratio": -0.6383979320526123, "logits/chosen": -2.3027966022491455, "logits/rejected": -2.2895915508270264, "logps/chosen": -0.7345737814903259, "logps/rejected": -0.9452807307243347, "loss": 1.196, "nll_loss": 0.9818413853645325, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22037215530872345, "rewards/margins": 0.063212089240551, "rewards/rejected": -0.28358420729637146, "step": 5270 }, { "epoch": 1.38328530259366, "grad_norm": 0.7160565257072449, "learning_rate": 4.6950879067709915e-06, "log_odds_chosen": 0.6268895268440247, "log_odds_ratio": -0.5668733716011047, "logits/chosen": -2.412714719772339, "logits/rejected": -2.3347463607788086, "logps/chosen": -0.669111967086792, "logps/rejected": -1.055177092552185, "loss": 1.1777, "nll_loss": 0.9561313390731812, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.20073361694812775, "rewards/margins": 0.1158195286989212, "rewards/rejected": -0.31655317544937134, "step": 5280 }, { "epoch": 1.3859051611212996, "grad_norm": 0.6353239417076111, "learning_rate": 4.683943352396551e-06, "log_odds_chosen": 0.4478158950805664, "log_odds_ratio": -0.629090428352356, "logits/chosen": -2.428103446960449, "logits/rejected": -2.384150981903076, "logps/chosen": -0.7417416572570801, "logps/rejected": -1.0799376964569092, "loss": 1.2186, "nll_loss": 0.988239586353302, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22252249717712402, "rewards/margins": 0.10145878791809082, "rewards/rejected": -0.32398131489753723, "step": 5290 }, { "epoch": 1.388525019648939, "grad_norm": 0.7798917889595032, "learning_rate": 4.672793326274652e-06, "log_odds_chosen": 0.3211117386817932, "log_odds_ratio": -0.6918748021125793, "logits/chosen": -2.3091633319854736, "logits/rejected": -2.2042551040649414, "logps/chosen": -0.7912590503692627, "logps/rejected": -0.9811212420463562, "loss": 1.2057, "nll_loss": 0.9917973279953003, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23737771809101105, "rewards/margins": 0.05695866420865059, "rewards/rejected": -0.29433637857437134, "step": 5300 }, { "epoch": 1.3911448781765785, "grad_norm": 0.8665565252304077, "learning_rate": 4.661637917608774e-06, "log_odds_chosen": 0.3676202893257141, "log_odds_ratio": -0.6537615060806274, "logits/chosen": -2.3075671195983887, "logits/rejected": -2.295673370361328, "logps/chosen": -0.7706180214881897, "logps/rejected": -1.0245699882507324, "loss": 1.0908, "nll_loss": 0.9073265194892883, "rewards/accuracies": 0.625, "rewards/chosen": -0.23118539154529572, "rewards/margins": 0.07618560642004013, "rewards/rejected": -0.30737099051475525, "step": 5310 }, { "epoch": 1.393764736704218, "grad_norm": 0.9400366544723511, "learning_rate": 4.650477215645461e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.379730701446533, "logits/rejected": -2.3162615299224854, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2484, "nll_loss": 0.9844146966934204, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5320 }, { "epoch": 1.3963845952318574, "grad_norm": 1.6560202836990356, "learning_rate": 4.639311309673599e-06, "log_odds_chosen": 0.5555107593536377, "log_odds_ratio": -0.6333471536636353, "logits/chosen": -2.411482572555542, "logits/rejected": -2.362051486968994, "logps/chosen": -0.8317302465438843, "logps/rejected": -1.1824783086776733, "loss": 1.2446, "nll_loss": 1.0923302173614502, "rewards/accuracies": 0.625, "rewards/chosen": -0.24951907992362976, "rewards/margins": 0.10522445291280746, "rewards/rejected": -0.3547435402870178, "step": 5330 }, { "epoch": 1.399004453759497, "grad_norm": 1.1855988502502441, "learning_rate": 4.628140289023713e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.479907512664795, "logits/rejected": -2.39785099029541, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2359, "nll_loss": 1.0314867496490479, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5340 }, { "epoch": 1.4016243122871366, "grad_norm": 1.123755931854248, "learning_rate": 4.616964243067245e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.36859393119812, "logits/rejected": -2.3603310585021973, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1804, "nll_loss": 0.9799089431762695, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5350 }, { "epoch": 1.404244170814776, "grad_norm": 0.7306629419326782, "learning_rate": 4.605783261215843e-06, "log_odds_chosen": 0.6471961736679077, "log_odds_ratio": -0.5967639088630676, "logits/chosen": -2.3650448322296143, "logits/rejected": -2.3321611881256104, "logps/chosen": -0.6963730454444885, "logps/rejected": -1.114411473274231, "loss": 1.1858, "nll_loss": 0.9783366918563843, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2089119255542755, "rewards/margins": 0.1254115253686905, "rewards/rejected": -0.3343234658241272, "step": 5360 }, { "epoch": 1.4068640293424155, "grad_norm": 1.5102404356002808, "learning_rate": 4.594597432920641e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3171491622924805, "logits/rejected": -2.2824034690856934, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2078, "nll_loss": 1.0284786224365234, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5370 }, { "epoch": 1.409483887870055, "grad_norm": 1.3747023344039917, "learning_rate": 4.5834068476715455e-06, "log_odds_chosen": 0.5361596941947937, "log_odds_ratio": -0.6014043092727661, "logits/chosen": -2.4222617149353027, "logits/rejected": -2.3578083515167236, "logps/chosen": -0.7548558712005615, "logps/rejected": -1.08682119846344, "loss": 1.2237, "nll_loss": 1.0596224069595337, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22645679116249084, "rewards/margins": 0.09958957135677338, "rewards/rejected": -0.32604631781578064, "step": 5380 }, { "epoch": 1.4121037463976944, "grad_norm": 0.7205027341842651, "learning_rate": 4.572211594996521e-06, "log_odds_chosen": 0.633956789970398, "log_odds_ratio": -0.5438359975814819, "logits/chosen": -2.3987784385681152, "logits/rejected": -2.3798859119415283, "logps/chosen": -0.6384294033050537, "logps/rejected": -0.9765987396240234, "loss": 1.1883, "nll_loss": 0.9298230409622192, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1915288269519806, "rewards/margins": 0.10145078599452972, "rewards/rejected": -0.2929796576499939, "step": 5390 }, { "epoch": 1.414723604925334, "grad_norm": 0.9667037725448608, "learning_rate": 4.5610117644608765e-06, "log_odds_chosen": 0.572964072227478, "log_odds_ratio": -0.5569775700569153, "logits/chosen": -2.4394562244415283, "logits/rejected": -2.3883721828460693, "logps/chosen": -0.6916719675064087, "logps/rejected": -0.9760059118270874, "loss": 1.1566, "nll_loss": 1.0035834312438965, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.2075015753507614, "rewards/margins": 0.08530023694038391, "rewards/rejected": -0.29280179738998413, "step": 5400 }, { "epoch": 1.4173434634529736, "grad_norm": 1.420699954032898, "learning_rate": 4.549807445666538e-06, "log_odds_chosen": 0.5470725297927856, "log_odds_ratio": -0.5531865954399109, "logits/chosen": -2.466904878616333, "logits/rejected": -2.4200520515441895, "logps/chosen": -0.6810835003852844, "logps/rejected": -0.9925681948661804, "loss": 1.1808, "nll_loss": 0.9337202310562134, "rewards/accuracies": 0.6875, "rewards/chosen": -0.20432503521442413, "rewards/margins": 0.09344540536403656, "rewards/rejected": -0.2977704405784607, "step": 5410 }, { "epoch": 1.419963321980613, "grad_norm": 1.0672879219055176, "learning_rate": 4.538598728251345e-06, "log_odds_chosen": 0.4975447654724121, "log_odds_ratio": -0.5491037964820862, "logits/chosen": -2.459238052368164, "logits/rejected": -2.3649611473083496, "logps/chosen": -0.7308903932571411, "logps/rejected": -1.0443768501281738, "loss": 1.1381, "nll_loss": 0.9317666888237, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.2192670851945877, "rewards/margins": 0.09404599666595459, "rewards/rejected": -0.3133130669593811, "step": 5420 }, { "epoch": 1.4225831805082525, "grad_norm": 3.8588361740112305, "learning_rate": 4.527385701888324e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3039016723632812, "logits/rejected": -2.2287468910217285, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2004, "nll_loss": 1.0419814586639404, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5430 }, { "epoch": 1.425203039035892, "grad_norm": 1.453376054763794, "learning_rate": 4.516168456284977e-06, "log_odds_chosen": 0.36558419466018677, "log_odds_ratio": -0.6865782141685486, "logits/chosen": -2.4386606216430664, "logits/rejected": -2.3766369819641113, "logps/chosen": -0.7987833023071289, "logps/rejected": -1.0606192350387573, "loss": 1.1467, "nll_loss": 0.9902628064155579, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.23963499069213867, "rewards/margins": 0.07855082303285599, "rewards/rejected": -0.31818580627441406, "step": 5440 }, { "epoch": 1.4278228975635316, "grad_norm": 0.5360792279243469, "learning_rate": 4.504947081182556e-06, "log_odds_chosen": 0.31183040142059326, "log_odds_ratio": -0.6487476229667664, "logits/chosen": -2.479891300201416, "logits/rejected": -2.461791753768921, "logps/chosen": -0.7525569796562195, "logps/rejected": -0.9501042366027832, "loss": 1.2063, "nll_loss": 1.0353929996490479, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2257671058177948, "rewards/margins": 0.0592641644179821, "rewards/rejected": -0.2850313186645508, "step": 5450 }, { "epoch": 1.430442756091171, "grad_norm": 0.6310731172561646, "learning_rate": 4.493721666355357e-06, "log_odds_chosen": 0.35815805196762085, "log_odds_ratio": -0.6353496313095093, "logits/chosen": -2.4350433349609375, "logits/rejected": -2.3494415283203125, "logps/chosen": -0.7694178819656372, "logps/rejected": -1.0187819004058838, "loss": 1.2094, "nll_loss": 0.9950467348098755, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23082534968852997, "rewards/margins": 0.07480920851230621, "rewards/rejected": -0.3056345582008362, "step": 5460 }, { "epoch": 1.4330626146188106, "grad_norm": 0.988227128982544, "learning_rate": 4.4824923016099906e-06, "log_odds_chosen": 0.33712267875671387, "log_odds_ratio": -0.6348481774330139, "logits/chosen": -2.418428897857666, "logits/rejected": -2.375114917755127, "logps/chosen": -0.7938346862792969, "logps/rejected": -1.0025193691253662, "loss": 1.1838, "nll_loss": 0.9749778509140015, "rewards/accuracies": 0.625, "rewards/chosen": -0.23815038800239563, "rewards/margins": 0.06260543316602707, "rewards/rejected": -0.3007558286190033, "step": 5470 }, { "epoch": 1.4356824731464501, "grad_norm": 1.0108747482299805, "learning_rate": 4.471259076784669e-06, "log_odds_chosen": 0.6326293349266052, "log_odds_ratio": -0.5306640863418579, "logits/chosen": -2.4605326652526855, "logits/rejected": -2.3757853507995605, "logps/chosen": -0.7296367883682251, "logps/rejected": -1.1208875179290771, "loss": 1.1451, "nll_loss": 0.9719759225845337, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.2188909947872162, "rewards/margins": 0.11737523972988129, "rewards/rejected": -0.33626627922058105, "step": 5480 }, { "epoch": 1.4383023316740897, "grad_norm": 0.6918076872825623, "learning_rate": 4.460022081748485e-06, "log_odds_chosen": 0.5613770484924316, "log_odds_ratio": -0.5693602561950684, "logits/chosen": -2.4230449199676514, "logits/rejected": -2.316225528717041, "logps/chosen": -0.7198591232299805, "logps/rejected": -1.052952527999878, "loss": 1.1357, "nll_loss": 0.9428300857543945, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21595771610736847, "rewards/margins": 0.09992805123329163, "rewards/rejected": -0.3158857822418213, "step": 5490 }, { "epoch": 1.440922190201729, "grad_norm": 1.0350478887557983, "learning_rate": 4.448781406400698e-06, "log_odds_chosen": 0.3961525857448578, "log_odds_ratio": -0.644041895866394, "logits/chosen": -2.462289333343506, "logits/rejected": -2.3162975311279297, "logps/chosen": -0.7792466282844543, "logps/rejected": -1.0287293195724487, "loss": 1.1965, "nll_loss": 1.0171387195587158, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.23377402126789093, "rewards/margins": 0.07484479248523712, "rewards/rejected": -0.30861884355545044, "step": 5500 }, { "epoch": 1.4435420487293686, "grad_norm": 1.44649338722229, "learning_rate": 4.437537140670004e-06, "log_odds_chosen": 0.49053525924682617, "log_odds_ratio": -0.6044145822525024, "logits/chosen": -2.4378318786621094, "logits/rejected": -2.3786139488220215, "logps/chosen": -0.7799569964408875, "logps/rejected": -1.133111834526062, "loss": 1.1106, "nll_loss": 0.9515721201896667, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.23398712277412415, "rewards/margins": 0.10594646632671356, "rewards/rejected": -0.3399336040019989, "step": 5510 }, { "epoch": 1.4461619072570082, "grad_norm": 0.979152262210846, "learning_rate": 4.426289374513832e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3976664543151855, "logits/rejected": -2.3414340019226074, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2453, "nll_loss": 1.0173914432525635, "rewards/accuracies": 0.699999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5520 }, { "epoch": 1.4487817657846476, "grad_norm": 2.1504385471343994, "learning_rate": 4.415038197917608e-06, "log_odds_chosen": 0.47776904702186584, "log_odds_ratio": -0.6402390003204346, "logits/chosen": -2.324899196624756, "logits/rejected": -2.2806315422058105, "logps/chosen": -0.8371583223342896, "logps/rejected": -1.1308166980743408, "loss": 1.1855, "nll_loss": 0.9977825284004211, "rewards/accuracies": 0.625, "rewards/chosen": -0.25114747881889343, "rewards/margins": 0.08809753507375717, "rewards/rejected": -0.3392450511455536, "step": 5530 }, { "epoch": 1.4514016243122871, "grad_norm": 0.5285787582397461, "learning_rate": 4.403783700894047e-06, "log_odds_chosen": 0.6117449998855591, "log_odds_ratio": -0.5794439315795898, "logits/chosen": -2.3999905586242676, "logits/rejected": -2.297729015350342, "logps/chosen": -0.8059555292129517, "logps/rejected": -1.1755410432815552, "loss": 1.1004, "nll_loss": 0.895393967628479, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2417866438627243, "rewards/margins": 0.11087566614151001, "rewards/rejected": -0.3526623547077179, "step": 5540 }, { "epoch": 1.4540214828399267, "grad_norm": 0.7233635783195496, "learning_rate": 4.392525973482426e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.441760301589966, "logits/rejected": -2.3925576210021973, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2669, "nll_loss": 1.0321805477142334, "rewards/accuracies": 0.550000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5550 }, { "epoch": 1.456641341367566, "grad_norm": 1.0763764381408691, "learning_rate": 4.381265105747867e-06, "log_odds_chosen": 0.4290178418159485, "log_odds_ratio": -0.6179078817367554, "logits/chosen": -2.388598918914795, "logits/rejected": -2.294349193572998, "logps/chosen": -0.8460661768913269, "logps/rejected": -1.0927733182907104, "loss": 1.2201, "nll_loss": 1.013191819190979, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.25381988286972046, "rewards/margins": 0.07401217520236969, "rewards/rejected": -0.32783204317092896, "step": 5560 }, { "epoch": 1.4592611998952056, "grad_norm": 0.8960614204406738, "learning_rate": 4.370001187780615e-06, "log_odds_chosen": 0.5532227754592896, "log_odds_ratio": -0.5581238865852356, "logits/chosen": -2.3765923976898193, "logits/rejected": -2.363312244415283, "logps/chosen": -0.8298677206039429, "logps/rejected": -1.209708333015442, "loss": 1.238, "nll_loss": 1.1678897142410278, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.24896030128002167, "rewards/margins": 0.11395217478275299, "rewards/rejected": -0.36291250586509705, "step": 5570 }, { "epoch": 1.4618810584228452, "grad_norm": 0.898713231086731, "learning_rate": 4.3587343096953185e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4093635082244873, "logits/rejected": -2.371655225753784, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2847, "nll_loss": 1.0583226680755615, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5580 }, { "epoch": 1.4645009169504846, "grad_norm": 1.5386309623718262, "learning_rate": 4.347464561630307e-06, "log_odds_chosen": 0.41168349981307983, "log_odds_ratio": -0.6472896337509155, "logits/chosen": -2.4589626789093018, "logits/rejected": -2.398488759994507, "logps/chosen": -0.7656463384628296, "logps/rejected": -1.0515060424804688, "loss": 1.16, "nll_loss": 1.0152032375335693, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2296939194202423, "rewards/margins": 0.08575791865587234, "rewards/rejected": -0.31545183062553406, "step": 5590 }, { "epoch": 1.4671207754781241, "grad_norm": 1.380110263824463, "learning_rate": 4.336192033746872e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.421870708465576, "logits/rejected": -2.3027217388153076, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1496, "nll_loss": 0.9063361287117004, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5600 }, { "epoch": 1.4697406340057637, "grad_norm": NaN, "learning_rate": 4.324916816228542e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.425846576690674, "logits/rejected": -2.3967223167419434, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2666, "nll_loss": 1.088600516319275, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5610 }, { "epoch": 1.472360492533403, "grad_norm": 0.7306493520736694, "learning_rate": 4.3136389992803656e-06, "log_odds_chosen": 0.4805302619934082, "log_odds_ratio": -0.6202538013458252, "logits/chosen": -2.3172154426574707, "logits/rejected": -2.3074936866760254, "logps/chosen": -0.8308989405632019, "logps/rejected": -1.1549360752105713, "loss": 1.1946, "nll_loss": 1.074549913406372, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.24926972389221191, "rewards/margins": 0.09721114486455917, "rewards/rejected": -0.3464808464050293, "step": 5620 }, { "epoch": 1.4749803510610426, "grad_norm": NaN, "learning_rate": 4.302358673128187e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3672308921813965, "logits/rejected": -2.3078296184539795, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2326, "nll_loss": 1.0475478172302246, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5630 }, { "epoch": 1.4776002095886822, "grad_norm": 0.7869731783866882, "learning_rate": 4.291075928017925e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4357738494873047, "logits/rejected": -2.3945627212524414, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2667, "nll_loss": 1.1048412322998047, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5640 }, { "epoch": 1.4802200681163218, "grad_norm": 0.9301585555076599, "learning_rate": 4.279790854214848e-06, "log_odds_chosen": 0.3675153851509094, "log_odds_ratio": -0.6176114082336426, "logits/chosen": -2.521925687789917, "logits/rejected": -2.4610800743103027, "logps/chosen": -0.8269756436347961, "logps/rejected": -1.0206220149993896, "loss": 1.2744, "nll_loss": 1.0129854679107666, "rewards/accuracies": 0.625, "rewards/chosen": -0.24809269607067108, "rewards/margins": 0.05809388309717178, "rewards/rejected": -0.30618658661842346, "step": 5650 }, { "epoch": 1.4828399266439614, "grad_norm": 0.5557612776756287, "learning_rate": 4.26850354200286e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.5029406547546387, "logits/rejected": -2.3773818016052246, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2174, "nll_loss": 1.0448089838027954, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5660 }, { "epoch": 1.4854597851716007, "grad_norm": 0.7769845724105835, "learning_rate": 4.2572140816837684e-06, "log_odds_chosen": 0.3781079947948456, "log_odds_ratio": -0.6224623322486877, "logits/chosen": -2.4692859649658203, "logits/rejected": -2.4003944396972656, "logps/chosen": -0.769267201423645, "logps/rejected": -1.0152894258499146, "loss": 1.209, "nll_loss": 1.0059512853622437, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.23078016936779022, "rewards/margins": 0.07380668818950653, "rewards/rejected": -0.30458688735961914, "step": 5670 }, { "epoch": 1.4880796436992403, "grad_norm": 0.7134237885475159, "learning_rate": 4.245922563576569e-06, "log_odds_chosen": 0.5156056880950928, "log_odds_ratio": -0.5861083269119263, "logits/chosen": -2.474405288696289, "logits/rejected": -2.482937812805176, "logps/chosen": -0.7390599846839905, "logps/rejected": -1.04417085647583, "loss": 1.2037, "nll_loss": 1.044568419456482, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.22171799838542938, "rewards/margins": 0.09153326600790024, "rewards/rejected": -0.3132512867450714, "step": 5680 }, { "epoch": 1.4906995022268799, "grad_norm": 0.8303751945495605, "learning_rate": 4.234629078016715e-06, "log_odds_chosen": 0.43730849027633667, "log_odds_ratio": -0.604202151298523, "logits/chosen": -2.4935295581817627, "logits/rejected": -2.3025600910186768, "logps/chosen": -0.7190378904342651, "logps/rejected": -0.9985675811767578, "loss": 1.1457, "nll_loss": 0.9067157506942749, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.21571138501167297, "rewards/margins": 0.08385889232158661, "rewards/rejected": -0.2995702624320984, "step": 5690 }, { "epoch": 1.4933193607545192, "grad_norm": 1.086159110069275, "learning_rate": 4.22333371535541e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.443654775619507, "logits/rejected": -2.39729380607605, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2464, "nll_loss": 1.028681755065918, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5700 }, { "epoch": 1.4959392192821588, "grad_norm": 0.8281998634338379, "learning_rate": 4.212036565958864e-06, "log_odds_chosen": 0.3561393618583679, "log_odds_ratio": -0.6336366534233093, "logits/chosen": -2.4466185569763184, "logits/rejected": -2.427351474761963, "logps/chosen": -0.7092923521995544, "logps/rejected": -0.9020012617111206, "loss": 1.1497, "nll_loss": 0.9456235766410828, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2127877175807953, "rewards/margins": 0.05781267210841179, "rewards/rejected": -0.2706003785133362, "step": 5710 }, { "epoch": 1.4985590778097984, "grad_norm": 0.657654345035553, "learning_rate": 4.200737720207587e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.416606903076172, "logits/rejected": -2.3697075843811035, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2361, "nll_loss": 1.0386137962341309, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5720 }, { "epoch": 1.5011789363374377, "grad_norm": 0.5235869288444519, "learning_rate": 4.189437268495663e-06, "log_odds_chosen": 0.6813857555389404, "log_odds_ratio": -0.5091375112533569, "logits/chosen": -2.4099841117858887, "logits/rejected": -2.311011791229248, "logps/chosen": -0.6939648389816284, "logps/rejected": -1.0519921779632568, "loss": 1.1713, "nll_loss": 1.021719217300415, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.208189457654953, "rewards/margins": 0.10740820318460464, "rewards/rejected": -0.31559768319129944, "step": 5730 }, { "epoch": 1.5037987948650773, "grad_norm": 0.7007103562355042, "learning_rate": 4.178135301230017e-06, "log_odds_chosen": 0.4555930197238922, "log_odds_ratio": -0.6286441087722778, "logits/chosen": -2.397502899169922, "logits/rejected": -2.332331895828247, "logps/chosen": -0.7889333963394165, "logps/rejected": -1.0123064517974854, "loss": 1.1707, "nll_loss": 1.072936773300171, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2366800308227539, "rewards/margins": 0.06701190769672394, "rewards/rejected": -0.30369195342063904, "step": 5740 }, { "epoch": 1.5064186533927169, "grad_norm": 0.7393710613250732, "learning_rate": 4.166831908829703e-06, "log_odds_chosen": 0.48530226945877075, "log_odds_ratio": -0.5630481839179993, "logits/chosen": -2.4751031398773193, "logits/rejected": -2.3955414295196533, "logps/chosen": -0.7348467111587524, "logps/rejected": -1.0008395910263062, "loss": 1.1279, "nll_loss": 0.9472218751907349, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.22045402228832245, "rewards/margins": 0.07979784905910492, "rewards/rejected": -0.30025190114974976, "step": 5750 }, { "epoch": 1.5090385119203562, "grad_norm": 0.9899107813835144, "learning_rate": 4.155527181725178e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.45513916015625, "logits/rejected": -2.4146530628204346, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2746, "nll_loss": 1.0815412998199463, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5760 }, { "epoch": 1.5116583704479958, "grad_norm": 1.4502196311950684, "learning_rate": 4.144221210357575e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.364431858062744, "logits/rejected": -2.306673526763916, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3256, "nll_loss": 1.0396257638931274, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5770 }, { "epoch": 1.5142782289756354, "grad_norm": NaN, "learning_rate": 4.1329140851779794e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3533847332000732, "logits/rejected": -2.2847626209259033, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2315, "nll_loss": 1.001805305480957, "rewards/accuracies": 0.699999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5780 }, { "epoch": 1.5168980875032747, "grad_norm": 0.9413073658943176, "learning_rate": 4.121605896646711e-06, "log_odds_chosen": 0.33956173062324524, "log_odds_ratio": -0.6491349935531616, "logits/chosen": -2.3733296394348145, "logits/rejected": -2.3022263050079346, "logps/chosen": -0.7486525774002075, "logps/rejected": -0.9595144987106323, "loss": 1.1876, "nll_loss": 1.0022284984588623, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2245957851409912, "rewards/margins": 0.0632585734128952, "rewards/rejected": -0.2878543734550476, "step": 5790 }, { "epoch": 1.5195179460309143, "grad_norm": 1.5874128341674805, "learning_rate": 4.110296735232596e-06, "log_odds_chosen": 0.32506659626960754, "log_odds_ratio": -0.677202582359314, "logits/chosen": -2.4510602951049805, "logits/rejected": -2.415235996246338, "logps/chosen": -0.7791822552680969, "logps/rejected": -0.988020122051239, "loss": 1.1996, "nll_loss": 1.0092436075210571, "rewards/accuracies": 0.5625, "rewards/chosen": -0.23375466465950012, "rewards/margins": 0.06265141069889069, "rewards/rejected": -0.296406090259552, "step": 5800 }, { "epoch": 1.5221378045585539, "grad_norm": 0.8349314332008362, "learning_rate": 4.0989866914122435e-06, "log_odds_chosen": 0.7311462759971619, "log_odds_ratio": -0.5172233581542969, "logits/chosen": -2.3554909229278564, "logits/rejected": -2.273383378982544, "logps/chosen": -0.7170080542564392, "logps/rejected": -1.166775107383728, "loss": 1.1746, "nll_loss": 0.9888837933540344, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2151024341583252, "rewards/margins": 0.1349301040172577, "rewards/rejected": -0.3500325381755829, "step": 5810 }, { "epoch": 1.5247576630861932, "grad_norm": 0.7036364078521729, "learning_rate": 4.087675855669321e-06, "log_odds_chosen": 0.17683088779449463, "log_odds_ratio": -0.673678994178772, "logits/chosen": -2.4675686359405518, "logits/rejected": -2.467219591140747, "logps/chosen": -0.7808110117912292, "logps/rejected": -0.8778573870658875, "loss": 1.2773, "nll_loss": 1.0607216358184814, "rewards/accuracies": 0.5625, "rewards/chosen": -0.23424330353736877, "rewards/margins": 0.029113929718732834, "rewards/rejected": -0.2633572220802307, "step": 5820 }, { "epoch": 1.527377521613833, "grad_norm": 1.044055461883545, "learning_rate": 4.076364318493833e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4566075801849365, "logits/rejected": -2.371917247772217, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1992, "nll_loss": 0.955036461353302, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5830 }, { "epoch": 1.5299973801414724, "grad_norm": 0.6240733861923218, "learning_rate": 4.0650521703813946e-06, "log_odds_chosen": 0.39215242862701416, "log_odds_ratio": -0.6038270592689514, "logits/chosen": -2.402052164077759, "logits/rejected": -2.426231861114502, "logps/chosen": -0.6982978582382202, "logps/rejected": -0.9223588705062866, "loss": 1.1652, "nll_loss": 1.0054436922073364, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2094893455505371, "rewards/margins": 0.06721832603216171, "rewards/rejected": -0.2767077088356018, "step": 5840 }, { "epoch": 1.5326172386691117, "grad_norm": 1.6955559253692627, "learning_rate": 4.05373950183251e-06, "log_odds_chosen": 0.27639341354370117, "log_odds_ratio": -0.6730767488479614, "logits/chosen": -2.415717601776123, "logits/rejected": -2.3453452587127686, "logps/chosen": -0.7197058200836182, "logps/rejected": -0.8359025120735168, "loss": 1.1797, "nll_loss": 1.0020372867584229, "rewards/accuracies": 0.625, "rewards/chosen": -0.21591177582740784, "rewards/margins": 0.034858979284763336, "rewards/rejected": -0.2507707476615906, "step": 5850 }, { "epoch": 1.5352370971967515, "grad_norm": 0.7922465205192566, "learning_rate": 4.042426403351845e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.45566725730896, "logits/rejected": -2.43271803855896, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2756, "nll_loss": 1.0195497274398804, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5860 }, { "epoch": 1.5378569557243909, "grad_norm": 0.9240489602088928, "learning_rate": 4.0311129654475076e-06, "log_odds_chosen": 0.45750293135643005, "log_odds_ratio": -0.5985121726989746, "logits/chosen": -2.3499133586883545, "logits/rejected": -2.352663516998291, "logps/chosen": -0.7686606645584106, "logps/rejected": -1.0618098974227905, "loss": 1.221, "nll_loss": 1.036934733390808, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23059818148612976, "rewards/margins": 0.08794478327035904, "rewards/rejected": -0.3185429871082306, "step": 5870 }, { "epoch": 1.5404768142520304, "grad_norm": 0.5164545774459839, "learning_rate": 4.01979927863032e-06, "log_odds_chosen": 0.28389543294906616, "log_odds_ratio": -0.6285902261734009, "logits/chosen": -2.473706007003784, "logits/rejected": -2.4658374786376953, "logps/chosen": -0.7244846820831299, "logps/rejected": -0.8700612783432007, "loss": 1.1838, "nll_loss": 1.0215680599212646, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2173454314470291, "rewards/margins": 0.04367298632860184, "rewards/rejected": -0.26101839542388916, "step": 5880 }, { "epoch": 1.54309667277967, "grad_norm": 1.3488552570343018, "learning_rate": 4.008485433413094e-06, "log_odds_chosen": 0.35579928755760193, "log_odds_ratio": -0.6589492559432983, "logits/chosen": -2.4476184844970703, "logits/rejected": -2.4278388023376465, "logps/chosen": -0.8010204434394836, "logps/rejected": -1.0141433477401733, "loss": 1.1996, "nll_loss": 0.9611210823059082, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.24030616879463196, "rewards/margins": 0.06393691152334213, "rewards/rejected": -0.3042430281639099, "step": 5890 }, { "epoch": 1.5457165313073093, "grad_norm": NaN, "learning_rate": 3.997171520309912e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4348056316375732, "logits/rejected": -2.3237717151641846, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2478, "nll_loss": 1.0702065229415894, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5900 }, { "epoch": 1.548336389834949, "grad_norm": 1.2898962497711182, "learning_rate": 3.985857629835398e-06, "log_odds_chosen": 0.66201251745224, "log_odds_ratio": -0.5317898988723755, "logits/chosen": -2.2403149604797363, "logits/rejected": -2.2536842823028564, "logps/chosen": -0.748397707939148, "logps/rejected": -1.1546473503112793, "loss": 1.1397, "nll_loss": 1.017835259437561, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2245192974805832, "rewards/margins": 0.12187491357326508, "rewards/rejected": -0.34639424085617065, "step": 5910 }, { "epoch": 1.5509562483625885, "grad_norm": 1.269016981124878, "learning_rate": 3.974543852503992e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.384793281555176, "logits/rejected": -2.29872727394104, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2182, "nll_loss": 1.0491408109664917, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5920 }, { "epoch": 1.5535761068902278, "grad_norm": 0.8282928466796875, "learning_rate": 3.9632302788292355e-06, "log_odds_chosen": 0.5040421485900879, "log_odds_ratio": -0.6023513674736023, "logits/chosen": -2.4477410316467285, "logits/rejected": -2.4162304401397705, "logps/chosen": -0.7097965478897095, "logps/rejected": -0.9652647972106934, "loss": 1.2206, "nll_loss": 1.0607261657714844, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.21293897926807404, "rewards/margins": 0.07664050161838531, "rewards/rejected": -0.28957948088645935, "step": 5930 }, { "epoch": 1.5561959654178674, "grad_norm": 2.14642333984375, "learning_rate": 3.951916999323034e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4726364612579346, "logits/rejected": -2.380606174468994, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2456, "nll_loss": 0.9194933772087097, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5940 }, { "epoch": 1.558815823945507, "grad_norm": 0.8636308908462524, "learning_rate": 3.940604104494945e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.393198251724243, "logits/rejected": -2.320706367492676, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.168, "nll_loss": 1.0208678245544434, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5950 }, { "epoch": 1.5614356824731463, "grad_norm": 0.5564221739768982, "learning_rate": 3.929291684851443e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.389420986175537, "logits/rejected": -2.423875093460083, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2009, "nll_loss": 1.0351238250732422, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 5960 }, { "epoch": 1.564055541000786, "grad_norm": 0.8435341119766235, "learning_rate": 3.917979830895205e-06, "log_odds_chosen": 0.49435049295425415, "log_odds_ratio": -0.5817097425460815, "logits/chosen": -2.4690699577331543, "logits/rejected": -2.4116077423095703, "logps/chosen": -0.6850340366363525, "logps/rejected": -0.9777368307113647, "loss": 1.1783, "nll_loss": 1.0011721849441528, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2055102288722992, "rewards/margins": 0.08781085163354874, "rewards/rejected": -0.29332104325294495, "step": 5970 }, { "epoch": 1.5666753995284255, "grad_norm": 1.0007085800170898, "learning_rate": 3.906668633124381e-06, "log_odds_chosen": 0.6309979557991028, "log_odds_ratio": -0.5775954723358154, "logits/chosen": -2.4478096961975098, "logits/rejected": -2.4020302295684814, "logps/chosen": -0.8316677212715149, "logps/rejected": -1.2230148315429688, "loss": 1.2804, "nll_loss": 1.1336445808410645, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2495003044605255, "rewards/margins": 0.11740412563085556, "rewards/rejected": -0.36690443754196167, "step": 5980 }, { "epoch": 1.5692952580560648, "grad_norm": 0.7393476366996765, "learning_rate": 3.895358182031873e-06, "log_odds_chosen": 0.5744069814682007, "log_odds_ratio": -0.5631635785102844, "logits/chosen": -2.536099910736084, "logits/rejected": -2.4198031425476074, "logps/chosen": -0.7470074892044067, "logps/rejected": -1.1177363395690918, "loss": 1.1575, "nll_loss": 0.9562535285949707, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.22410225868225098, "rewards/margins": 0.11121867597103119, "rewards/rejected": -0.33532091975212097, "step": 5990 }, { "epoch": 1.5719151165837046, "grad_norm": 0.6107606291770935, "learning_rate": 3.884048568104604e-06, "log_odds_chosen": 0.3918944001197815, "log_odds_ratio": -0.6408665180206299, "logits/chosen": -2.505794048309326, "logits/rejected": -2.4066402912139893, "logps/chosen": -0.7670705914497375, "logps/rejected": -1.0517598390579224, "loss": 1.1511, "nll_loss": 0.9587279558181763, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2301211655139923, "rewards/margins": 0.08540678024291992, "rewards/rejected": -0.3155279755592346, "step": 6000 }, { "epoch": 1.574534975111344, "grad_norm": 1.1238287687301636, "learning_rate": 3.8727398818228065e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.475957155227661, "logits/rejected": -2.4124057292938232, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2284, "nll_loss": 0.9872648119926453, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6010 }, { "epoch": 1.5771548336389833, "grad_norm": 0.9552204012870789, "learning_rate": 3.861432213659286e-06, "log_odds_chosen": 0.3674893379211426, "log_odds_ratio": -0.6453391313552856, "logits/chosen": -2.397127628326416, "logits/rejected": -2.3540725708007812, "logps/chosen": -0.8223768472671509, "logps/rejected": -1.0367498397827148, "loss": 1.1576, "nll_loss": 0.9568025469779968, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2467130720615387, "rewards/margins": 0.0643119290471077, "rewards/rejected": -0.3110249638557434, "step": 6020 }, { "epoch": 1.5797746921666231, "grad_norm": 0.9253653883934021, "learning_rate": 3.850125654078708e-06, "log_odds_chosen": 0.4108589291572571, "log_odds_ratio": -0.6486179232597351, "logits/chosen": -2.4277961254119873, "logits/rejected": -2.428398847579956, "logps/chosen": -0.8508657217025757, "logps/rejected": -1.0913779735565186, "loss": 1.2317, "nll_loss": 1.0663704872131348, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.25525975227355957, "rewards/margins": 0.07215368747711182, "rewards/rejected": -0.327413409948349, "step": 6030 }, { "epoch": 1.5823945506942625, "grad_norm": 0.6138676404953003, "learning_rate": 3.8388202935368616e-06, "log_odds_chosen": 0.3522074222564697, "log_odds_ratio": -0.6284127235412598, "logits/chosen": -2.464743137359619, "logits/rejected": -2.3975300788879395, "logps/chosen": -0.7408307790756226, "logps/rejected": -0.9335058927536011, "loss": 1.2121, "nll_loss": 1.0174435377120972, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22224923968315125, "rewards/margins": 0.05780254676938057, "rewards/rejected": -0.2800517976284027, "step": 6040 }, { "epoch": 1.585014409221902, "grad_norm": 0.8565660119056702, "learning_rate": 3.827516222479952e-06, "log_odds_chosen": 0.3548716902732849, "log_odds_ratio": -0.6456078886985779, "logits/chosen": -2.375278949737549, "logits/rejected": -2.306056499481201, "logps/chosen": -0.765622615814209, "logps/rejected": -0.9663844108581543, "loss": 1.155, "nll_loss": 0.9368178248405457, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22968681156635284, "rewards/margins": 0.060228537768125534, "rewards/rejected": -0.2899153530597687, "step": 6050 }, { "epoch": 1.5876342677495416, "grad_norm": 1.118617296218872, "learning_rate": 3.81621353134386e-06, "log_odds_chosen": 0.12105727195739746, "log_odds_ratio": -0.7198285460472107, "logits/chosen": -2.385141134262085, "logits/rejected": -2.3370063304901123, "logps/chosen": -0.8246275782585144, "logps/rejected": -0.887412428855896, "loss": 1.2602, "nll_loss": 1.0599610805511475, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2473883181810379, "rewards/margins": 0.01883544586598873, "rewards/rejected": -0.2662237286567688, "step": 6060 }, { "epoch": 1.590254126277181, "grad_norm": 0.8472238183021545, "learning_rate": 3.804912310553434e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.427402973175049, "logits/rejected": -2.351914882659912, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2097, "nll_loss": 0.97773277759552, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6070 }, { "epoch": 1.5928739848048206, "grad_norm": 1.477189302444458, "learning_rate": 3.7936126505217527e-06, "log_odds_chosen": 0.5675028562545776, "log_odds_ratio": -0.5505374073982239, "logits/chosen": -2.4650697708129883, "logits/rejected": -2.4334378242492676, "logps/chosen": -0.7141626477241516, "logps/rejected": -1.0379524230957031, "loss": 1.1714, "nll_loss": 1.0117725133895874, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21424880623817444, "rewards/margins": 0.09713692218065262, "rewards/rejected": -0.31138572096824646, "step": 6080 }, { "epoch": 1.5954938433324601, "grad_norm": 1.3021999597549438, "learning_rate": 3.7823146416494136e-06, "log_odds_chosen": 0.43630900979042053, "log_odds_ratio": -0.6063941717147827, "logits/chosen": -2.4580063819885254, "logits/rejected": -2.384783983230591, "logps/chosen": -0.7304325103759766, "logps/rejected": -0.9883109927177429, "loss": 1.0998, "nll_loss": 0.9053698778152466, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2191297560930252, "rewards/margins": 0.07736356556415558, "rewards/rejected": -0.2964933216571808, "step": 6090 }, { "epoch": 1.5981137018600995, "grad_norm": 1.1132816076278687, "learning_rate": 3.7710183743238003e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3338537216186523, "logits/rejected": -2.329974412918091, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2415, "nll_loss": 1.0543345212936401, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6100 }, { "epoch": 1.600733560387739, "grad_norm": 1.0331331491470337, "learning_rate": 3.759723938918366e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.378486156463623, "logits/rejected": -2.402557373046875, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2239, "nll_loss": 1.01723313331604, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6110 }, { "epoch": 1.6033534189153786, "grad_norm": 1.2722296714782715, "learning_rate": 3.7484314257919067e-06, "log_odds_chosen": 0.4136427342891693, "log_odds_ratio": -0.5976285338401794, "logits/chosen": -2.4959394931793213, "logits/rejected": -2.413407325744629, "logps/chosen": -0.7959750890731812, "logps/rejected": -1.0478800535202026, "loss": 1.2286, "nll_loss": 1.0350730419158936, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2387925684452057, "rewards/margins": 0.07557149231433868, "rewards/rejected": -0.3143640160560608, "step": 6120 }, { "epoch": 1.605973277443018, "grad_norm": 0.6361857056617737, "learning_rate": 3.737140925287841e-06, "log_odds_chosen": 0.40517789125442505, "log_odds_ratio": -0.6569835543632507, "logits/chosen": -2.322185516357422, "logits/rejected": -2.2197515964508057, "logps/chosen": -0.8344414830207825, "logps/rejected": -1.110835313796997, "loss": 1.1959, "nll_loss": 1.0114325284957886, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2503325045108795, "rewards/margins": 0.08291812986135483, "rewards/rejected": -0.33325058221817017, "step": 6130 }, { "epoch": 1.6085931359706576, "grad_norm": 1.1310380697250366, "learning_rate": 3.725852527733483e-06, "log_odds_chosen": 0.7813963294029236, "log_odds_ratio": -0.516627848148346, "logits/chosen": -2.3583731651306152, "logits/rejected": -2.3365085124969482, "logps/chosen": -0.6674385666847229, "logps/rejected": -1.1292940378189087, "loss": 1.167, "nll_loss": 0.9783490300178528, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20023158192634583, "rewards/margins": 0.13855662941932678, "rewards/rejected": -0.3387882113456726, "step": 6140 }, { "epoch": 1.6112129944982971, "grad_norm": 1.0610793828964233, "learning_rate": 3.714566323439328e-06, "log_odds_chosen": 0.7315729856491089, "log_odds_ratio": -0.5701491832733154, "logits/chosen": -2.4803481101989746, "logits/rejected": -2.4405646324157715, "logps/chosen": -0.7608025670051575, "logps/rejected": -1.2227985858917236, "loss": 1.1843, "nll_loss": 0.9879032373428345, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22824080288410187, "rewards/margins": 0.13859878480434418, "rewards/rejected": -0.36683958768844604, "step": 6150 }, { "epoch": 1.6138328530259365, "grad_norm": NaN, "learning_rate": 3.7032824026983178e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.383418560028076, "logits/rejected": -2.386319398880005, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1462, "nll_loss": 0.9506675004959106, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6160 }, { "epoch": 1.616452711553576, "grad_norm": 0.9748279452323914, "learning_rate": 3.6920008557851316e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3217554092407227, "logits/rejected": -2.1462481021881104, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1804, "nll_loss": 0.9891242980957031, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6170 }, { "epoch": 1.6190725700812156, "grad_norm": 4.10261344909668, "learning_rate": 3.680721772955451e-06, "log_odds_chosen": 0.4911497235298157, "log_odds_ratio": -0.6619998216629028, "logits/chosen": -2.344756603240967, "logits/rejected": -2.284764528274536, "logps/chosen": -0.7634533643722534, "logps/rejected": -1.0724388360977173, "loss": 1.2325, "nll_loss": 1.0362259149551392, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22903604805469513, "rewards/margins": 0.09269563108682632, "rewards/rejected": -0.32173171639442444, "step": 6180 }, { "epoch": 1.621692428608855, "grad_norm": 0.7789308428764343, "learning_rate": 3.6694452444452515e-06, "log_odds_chosen": 0.48502570390701294, "log_odds_ratio": -0.6664021611213684, "logits/chosen": -2.2957470417022705, "logits/rejected": -2.3299431800842285, "logps/chosen": -0.7477073073387146, "logps/rejected": -1.0275934934616089, "loss": 1.171, "nll_loss": 0.9934831857681274, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2243122160434723, "rewards/margins": 0.08396584540605545, "rewards/rejected": -0.30827802419662476, "step": 6190 }, { "epoch": 1.6243122871364948, "grad_norm": 0.7142664194107056, "learning_rate": 3.658171360470065e-06, "log_odds_chosen": 0.4822072386741638, "log_odds_ratio": -0.6337543725967407, "logits/chosen": -2.4455630779266357, "logits/rejected": -2.3188540935516357, "logps/chosen": -0.7671526074409485, "logps/rejected": -1.0784368515014648, "loss": 1.2161, "nll_loss": 1.0031368732452393, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.23014584183692932, "rewards/margins": 0.0933852419257164, "rewards/rejected": -0.32353106141090393, "step": 6200 }, { "epoch": 1.6269321456641341, "grad_norm": 0.7610660791397095, "learning_rate": 3.646900211224274e-06, "log_odds_chosen": 0.5880210995674133, "log_odds_ratio": -0.6082598567008972, "logits/chosen": -2.3791415691375732, "logits/rejected": -2.2794244289398193, "logps/chosen": -0.7974375486373901, "logps/rejected": -1.1406627893447876, "loss": 1.2283, "nll_loss": 1.0850203037261963, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.23923127353191376, "rewards/margins": 0.1029675230383873, "rewards/rejected": -0.34219878911972046, "step": 6210 }, { "epoch": 1.6295520041917735, "grad_norm": 0.9204304814338684, "learning_rate": 3.635631886880377e-06, "log_odds_chosen": 0.3050144910812378, "log_odds_ratio": -0.6533457636833191, "logits/chosen": -2.39023756980896, "logits/rejected": -2.3429079055786133, "logps/chosen": -0.7827911972999573, "logps/rejected": -0.9678739309310913, "loss": 1.2511, "nll_loss": 1.07235848903656, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2348373681306839, "rewards/margins": 0.05552482604980469, "rewards/rejected": -0.2903622090816498, "step": 6220 }, { "epoch": 1.6321718627194133, "grad_norm": 2.5517125129699707, "learning_rate": 3.6243664775882748e-06, "log_odds_chosen": 0.4103999137878418, "log_odds_ratio": -0.6181532740592957, "logits/chosen": -2.3555502891540527, "logits/rejected": -2.3086800575256348, "logps/chosen": -0.767102837562561, "logps/rejected": -1.0355796813964844, "loss": 1.1691, "nll_loss": 1.0424261093139648, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2301308661699295, "rewards/margins": 0.08054305613040924, "rewards/rejected": -0.31067386269569397, "step": 6230 }, { "epoch": 1.6347917212470526, "grad_norm": 0.8504154086112976, "learning_rate": 3.6131040734745464e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4001963138580322, "logits/rejected": -2.3225629329681396, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1213, "nll_loss": 0.9501116871833801, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6240 }, { "epoch": 1.6374115797746922, "grad_norm": 0.7272464632987976, "learning_rate": 3.6018447646417284e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.296221971511841, "logits/rejected": -2.1961820125579834, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.174, "nll_loss": 0.9293657541275024, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6250 }, { "epoch": 1.6400314383023318, "grad_norm": 0.8937414884567261, "learning_rate": 3.5905886411675962e-06, "log_odds_chosen": 0.7105098962783813, "log_odds_ratio": -0.548377275466919, "logits/chosen": -2.2796742916107178, "logits/rejected": -2.2234253883361816, "logps/chosen": -0.703691840171814, "logps/rejected": -1.1499989032745361, "loss": 1.1223, "nll_loss": 1.0366276502609253, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.21110758185386658, "rewards/margins": 0.13389214873313904, "rewards/rejected": -0.3449997305870056, "step": 6260 }, { "epoch": 1.6426512968299711, "grad_norm": 0.6537105441093445, "learning_rate": 3.5793357931044365e-06, "log_odds_chosen": 0.2889330983161926, "log_odds_ratio": -0.6874335408210754, "logits/chosen": -2.260418653488159, "logits/rejected": -2.22468638420105, "logps/chosen": -0.7978725433349609, "logps/rejected": -0.9951356053352356, "loss": 1.2135, "nll_loss": 1.0822842121124268, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.23936176300048828, "rewards/margins": 0.05917893722653389, "rewards/rejected": -0.2985406816005707, "step": 6270 }, { "epoch": 1.6452711553576107, "grad_norm": 1.2999050617218018, "learning_rate": 3.568086310478339e-06, "log_odds_chosen": 0.5609639883041382, "log_odds_ratio": -0.6819075345993042, "logits/chosen": -2.2616524696350098, "logits/rejected": -2.213780164718628, "logps/chosen": -0.8205879330635071, "logps/rejected": -1.1896730661392212, "loss": 1.2318, "nll_loss": 1.1423786878585815, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.24617640674114227, "rewards/margins": 0.11072554439306259, "rewards/rejected": -0.3569019138813019, "step": 6280 }, { "epoch": 1.6478910138852503, "grad_norm": 0.7002812623977661, "learning_rate": 3.55684028328846e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.295775890350342, "logits/rejected": -2.279738664627075, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1921, "nll_loss": 0.9820319414138794, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6290 }, { "epoch": 1.6505108724128896, "grad_norm": 1.135003924369812, "learning_rate": 3.545597801506321e-06, "log_odds_chosen": 0.3278753459453583, "log_odds_ratio": -0.684370756149292, "logits/chosen": -2.329282283782959, "logits/rejected": -2.3369967937469482, "logps/chosen": -0.8004878759384155, "logps/rejected": -1.0081490278244019, "loss": 1.2479, "nll_loss": 1.0501327514648438, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.24014635384082794, "rewards/margins": 0.062298376113176346, "rewards/rejected": -0.3024447560310364, "step": 6300 }, { "epoch": 1.6531307309405292, "grad_norm": 0.8920988440513611, "learning_rate": 3.5343589550750703e-06, "log_odds_chosen": 0.5853239893913269, "log_odds_ratio": -0.5934478044509888, "logits/chosen": -2.354022741317749, "logits/rejected": -2.320906639099121, "logps/chosen": -0.7394348382949829, "logps/rejected": -1.1113550662994385, "loss": 1.2324, "nll_loss": 1.047239899635315, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22183048725128174, "rewards/margins": 0.11157606542110443, "rewards/rejected": -0.333406537771225, "step": 6310 }, { "epoch": 1.6557505894681688, "grad_norm": 0.7120447754859924, "learning_rate": 3.5231238339087797e-06, "log_odds_chosen": 0.3921879231929779, "log_odds_ratio": -0.6286450028419495, "logits/chosen": -2.371170997619629, "logits/rejected": -2.216235876083374, "logps/chosen": -0.7550754547119141, "logps/rejected": -1.0009901523590088, "loss": 1.1158, "nll_loss": 0.9442843198776245, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.22652265429496765, "rewards/margins": 0.07377444207668304, "rewards/rejected": -0.3002970814704895, "step": 6320 }, { "epoch": 1.6583704479958081, "grad_norm": 0.6975628733634949, "learning_rate": 3.5118925278917117e-06, "log_odds_chosen": 0.5727685689926147, "log_odds_ratio": -0.5595538020133972, "logits/chosen": -2.2726268768310547, "logits/rejected": -2.2030229568481445, "logps/chosen": -0.7047120332717896, "logps/rejected": -1.050318717956543, "loss": 1.1651, "nll_loss": 0.9577640295028687, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21141362190246582, "rewards/margins": 0.1036820039153099, "rewards/rejected": -0.31509560346603394, "step": 6330 }, { "epoch": 1.6609903065234477, "grad_norm": 0.7310910224914551, "learning_rate": 3.50066512687761e-06, "log_odds_chosen": 0.3725420832633972, "log_odds_ratio": -0.607479453086853, "logits/chosen": -2.269491672515869, "logits/rejected": -2.1959457397460938, "logps/chosen": -0.6918579936027527, "logps/rejected": -0.9175349473953247, "loss": 1.0645, "nll_loss": 0.8868710398674011, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20755741000175476, "rewards/margins": 0.06770307570695877, "rewards/rejected": -0.2752605080604553, "step": 6340 }, { "epoch": 1.6636101650510873, "grad_norm": 0.7783880233764648, "learning_rate": 3.489441720688977e-06, "log_odds_chosen": 0.49550923705101013, "log_odds_ratio": -0.5881508588790894, "logits/chosen": -2.3945648670196533, "logits/rejected": -2.3629584312438965, "logps/chosen": -0.7244151830673218, "logps/rejected": -1.0098795890808105, "loss": 1.1884, "nll_loss": 0.9577997326850891, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.21732454001903534, "rewards/margins": 0.08563931286334991, "rewards/rejected": -0.30296391248703003, "step": 6350 }, { "epoch": 1.6662300235787266, "grad_norm": 0.8528424501419067, "learning_rate": 3.4782223991163557e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3745059967041016, "logits/rejected": -2.228710412979126, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1211, "nll_loss": 1.0015404224395752, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6360 }, { "epoch": 1.6688498821063664, "grad_norm": 0.6544032692909241, "learning_rate": 3.4670072519176058e-06, "log_odds_chosen": 0.5126922130584717, "log_odds_ratio": -0.596053957939148, "logits/chosen": -2.289463520050049, "logits/rejected": -2.220689535140991, "logps/chosen": -0.8071084022521973, "logps/rejected": -1.0977959632873535, "loss": 1.1461, "nll_loss": 0.9869711995124817, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2421325147151947, "rewards/margins": 0.08720628917217255, "rewards/rejected": -0.32933884859085083, "step": 6370 }, { "epoch": 1.6714697406340058, "grad_norm": 0.6766595840454102, "learning_rate": 3.4557963688172e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.257631778717041, "logits/rejected": -2.1662447452545166, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1275, "nll_loss": 0.9073723554611206, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6380 }, { "epoch": 1.6740895991616451, "grad_norm": NaN, "learning_rate": 3.4445898395054878e-06, "log_odds_chosen": 0.60215824842453, "log_odds_ratio": -0.5270574688911438, "logits/chosen": -2.311774730682373, "logits/rejected": -2.349231719970703, "logps/chosen": -0.7534734010696411, "logps/rejected": -1.1204513311386108, "loss": 1.2104, "nll_loss": 1.0359408855438232, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22604206204414368, "rewards/margins": 0.1100933700799942, "rewards/rejected": -0.3361354470252991, "step": 6390 }, { "epoch": 1.676709457689285, "grad_norm": 0.7829771637916565, "learning_rate": 3.433387753637995e-06, "log_odds_chosen": 0.22522762417793274, "log_odds_ratio": -0.739223301410675, "logits/chosen": -2.362504720687866, "logits/rejected": -2.2293152809143066, "logps/chosen": -0.7580915689468384, "logps/rejected": -0.952072262763977, "loss": 1.1835, "nll_loss": 0.987299919128418, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.22742751240730286, "rewards/margins": 0.05819417163729668, "rewards/rejected": -0.285621702671051, "step": 6400 }, { "epoch": 1.6793293162169243, "grad_norm": 1.1405155658721924, "learning_rate": 3.42219020083469e-06, "log_odds_chosen": 0.5307233929634094, "log_odds_ratio": -0.6090705394744873, "logits/chosen": -2.2317373752593994, "logits/rejected": -2.1718804836273193, "logps/chosen": -0.7267665266990662, "logps/rejected": -1.0924227237701416, "loss": 1.2162, "nll_loss": 1.0200742483139038, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21802997589111328, "rewards/margins": 0.1096968874335289, "rewards/rejected": -0.3277268409729004, "step": 6410 }, { "epoch": 1.6819491747445638, "grad_norm": 0.8956800699234009, "learning_rate": 3.4109972706792854e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.172436237335205, "logits/rejected": -2.1902947425842285, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2536, "nll_loss": 1.0303465127944946, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6420 }, { "epoch": 1.6845690332722034, "grad_norm": 0.7158610224723816, "learning_rate": 3.399809052718502e-06, "log_odds_chosen": 0.5530306100845337, "log_odds_ratio": -0.5529865026473999, "logits/chosen": -2.2133121490478516, "logits/rejected": -2.254706859588623, "logps/chosen": -0.750544011592865, "logps/rejected": -1.0760347843170166, "loss": 1.2068, "nll_loss": 1.0315442085266113, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.22516325116157532, "rewards/margins": 0.09764722734689713, "rewards/rejected": -0.32281047105789185, "step": 6430 }, { "epoch": 1.6871888917998428, "grad_norm": 0.9278326630592346, "learning_rate": 3.388625636461368e-06, "log_odds_chosen": 0.38563862442970276, "log_odds_ratio": -0.6210590600967407, "logits/chosen": -2.3743011951446533, "logits/rejected": -2.2487475872039795, "logps/chosen": -0.7700831294059753, "logps/rejected": -1.0305745601654053, "loss": 1.2423, "nll_loss": 1.0432674884796143, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.23102495074272156, "rewards/margins": 0.07814739644527435, "rewards/rejected": -0.3091723322868347, "step": 6440 }, { "epoch": 1.6898087503274823, "grad_norm": 0.9581519961357117, "learning_rate": 3.3774471113784915e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2551276683807373, "logits/rejected": -2.230548620223999, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2099, "nll_loss": 0.97411048412323, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6450 }, { "epoch": 1.692428608855122, "grad_norm": 1.195448637008667, "learning_rate": 3.3662735669013565e-06, "log_odds_chosen": 0.375348836183548, "log_odds_ratio": -0.6269182562828064, "logits/chosen": -2.3063530921936035, "logits/rejected": -2.270944833755493, "logps/chosen": -0.7412567138671875, "logps/rejected": -0.9901641607284546, "loss": 1.1834, "nll_loss": 1.0193015336990356, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22237703204154968, "rewards/margins": 0.07467224448919296, "rewards/rejected": -0.29704928398132324, "step": 6460 }, { "epoch": 1.6950484673827613, "grad_norm": 0.7351571917533875, "learning_rate": 3.355105092421593e-06, "log_odds_chosen": 0.4668746888637543, "log_odds_ratio": -0.6065074801445007, "logits/chosen": -2.362496852874756, "logits/rejected": -2.2971670627593994, "logps/chosen": -0.7529791593551636, "logps/rejected": -1.045400619506836, "loss": 1.2002, "nll_loss": 1.048012137413025, "rewards/accuracies": 0.625, "rewards/chosen": -0.2258937656879425, "rewards/margins": 0.0877264216542244, "rewards/rejected": -0.3136201798915863, "step": 6470 }, { "epoch": 1.6976683259104008, "grad_norm": 1.1187652349472046, "learning_rate": 3.343941777290275e-06, "log_odds_chosen": 0.42323631048202515, "log_odds_ratio": -0.6275444626808167, "logits/chosen": -2.331284999847412, "logits/rejected": -2.269507646560669, "logps/chosen": -0.6962444186210632, "logps/rejected": -0.9085853695869446, "loss": 1.2211, "nll_loss": 1.0354540348052979, "rewards/accuracies": 0.5625, "rewards/chosen": -0.20887334644794464, "rewards/margins": 0.06370227038860321, "rewards/rejected": -0.27257561683654785, "step": 6480 }, { "epoch": 1.7002881844380404, "grad_norm": 0.8490583300590515, "learning_rate": 3.332783710817197e-06, "log_odds_chosen": 0.5958862900733948, "log_odds_ratio": -0.5679258704185486, "logits/chosen": -2.25551176071167, "logits/rejected": -2.149019718170166, "logps/chosen": -0.708450973033905, "logps/rejected": -1.1002769470214844, "loss": 1.1557, "nll_loss": 1.0008022785186768, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2125353068113327, "rewards/margins": 0.11754777282476425, "rewards/rejected": -0.33008313179016113, "step": 6490 }, { "epoch": 1.7029080429656798, "grad_norm": NaN, "learning_rate": 3.3216309822701667e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1508946418762207, "logits/rejected": -2.0659499168395996, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3028, "nll_loss": 0.9965121150016785, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6500 }, { "epoch": 1.7055279014933193, "grad_norm": 1.2870737314224243, "learning_rate": 3.3104836808742803e-06, "log_odds_chosen": 0.4263274073600769, "log_odds_ratio": -0.6014240980148315, "logits/chosen": -2.2421059608459473, "logits/rejected": -2.2357077598571777, "logps/chosen": -0.6788707971572876, "logps/rejected": -0.9084952473640442, "loss": 1.1748, "nll_loss": 0.9584276080131531, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2036612331867218, "rewards/margins": 0.06888733804225922, "rewards/rejected": -0.2725485563278198, "step": 6510 }, { "epoch": 1.708147760020959, "grad_norm": 1.1239455938339233, "learning_rate": 3.2993418958112233e-06, "log_odds_chosen": 0.5465539693832397, "log_odds_ratio": -0.5881921648979187, "logits/chosen": -2.173633098602295, "logits/rejected": -2.1550698280334473, "logps/chosen": -0.7187901735305786, "logps/rejected": -1.0582475662231445, "loss": 1.1464, "nll_loss": 0.9422969818115234, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.21563704311847687, "rewards/margins": 0.10183729231357574, "rewards/rejected": -0.3174743056297302, "step": 6520 }, { "epoch": 1.7107676185485983, "grad_norm": 1.1630749702453613, "learning_rate": 3.288205716218541e-06, "log_odds_chosen": 0.37476491928100586, "log_odds_ratio": -0.6473188996315002, "logits/chosen": -2.2372660636901855, "logits/rejected": -2.177471160888672, "logps/chosen": -0.7576078176498413, "logps/rejected": -1.0050045251846313, "loss": 1.1866, "nll_loss": 0.9996652603149414, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22728237509727478, "rewards/margins": 0.07421897351741791, "rewards/rejected": -0.3015013337135315, "step": 6530 }, { "epoch": 1.7133874770762378, "grad_norm": 1.1559101343154907, "learning_rate": 3.277075231188941e-06, "log_odds_chosen": 0.4482978880405426, "log_odds_ratio": -0.5876015424728394, "logits/chosen": -2.28401780128479, "logits/rejected": -2.2063045501708984, "logps/chosen": -0.7236659526824951, "logps/rejected": -1.0301755666732788, "loss": 1.1776, "nll_loss": 0.9600070714950562, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.21709981560707092, "rewards/margins": 0.09195290505886078, "rewards/rejected": -0.3090526759624481, "step": 6540 }, { "epoch": 1.7160073356038774, "grad_norm": 0.6277070045471191, "learning_rate": 3.2659505297695635e-06, "log_odds_chosen": 0.7539229393005371, "log_odds_ratio": -0.5260113477706909, "logits/chosen": -2.301644802093506, "logits/rejected": -2.2274527549743652, "logps/chosen": -0.6992179751396179, "logps/rejected": -1.1178562641143799, "loss": 1.162, "nll_loss": 0.9440162777900696, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20976538956165314, "rewards/margins": 0.12559150159358978, "rewards/rejected": -0.3353569209575653, "step": 6550 }, { "epoch": 1.7186271941315168, "grad_norm": 0.7298431396484375, "learning_rate": 3.254831700961289e-06, "log_odds_chosen": 0.5205257534980774, "log_odds_ratio": -0.616594672203064, "logits/chosen": -2.2746243476867676, "logits/rejected": -2.226409912109375, "logps/chosen": -0.7212196588516235, "logps/rejected": -1.101148247718811, "loss": 1.1605, "nll_loss": 0.9709869623184204, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.21636590361595154, "rewards/margins": 0.11397857964038849, "rewards/rejected": -0.3303444981575012, "step": 6560 }, { "epoch": 1.7212470526591566, "grad_norm": 1.1308634281158447, "learning_rate": 3.243718833718005e-06, "log_odds_chosen": 0.6605545878410339, "log_odds_ratio": -0.5390324592590332, "logits/chosen": -2.4154953956604004, "logits/rejected": -2.203551769256592, "logps/chosen": -0.7727235555648804, "logps/rejected": -1.2032467126846313, "loss": 1.1423, "nll_loss": 0.9885053634643555, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2318170815706253, "rewards/margins": 0.1291569173336029, "rewards/rejected": -0.3609740138053894, "step": 6570 }, { "epoch": 1.723866911186796, "grad_norm": 0.6704199314117432, "learning_rate": 3.2326120169459136e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2411022186279297, "logits/rejected": -2.1284408569335938, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2082, "nll_loss": 0.9967138171195984, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6580 }, { "epoch": 1.7264867697144353, "grad_norm": 0.7941656112670898, "learning_rate": 3.221511339502803e-06, "log_odds_chosen": 0.6527765393257141, "log_odds_ratio": -0.6470965147018433, "logits/chosen": -2.225978374481201, "logits/rejected": -2.1912052631378174, "logps/chosen": -0.7140629887580872, "logps/rejected": -1.1879582405090332, "loss": 1.2213, "nll_loss": 0.9985234141349792, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21421892940998077, "rewards/margins": 0.1421685814857483, "rewards/rejected": -0.35638752579689026, "step": 6590 }, { "epoch": 1.729106628242075, "grad_norm": 1.3023148775100708, "learning_rate": 3.2104168901973535e-06, "log_odds_chosen": 0.3226056694984436, "log_odds_ratio": -0.6682389974594116, "logits/chosen": -2.1701531410217285, "logits/rejected": -2.167208194732666, "logps/chosen": -0.8140000104904175, "logps/rejected": -1.026731014251709, "loss": 1.252, "nll_loss": 1.031699299812317, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.24420003592967987, "rewards/margins": 0.06381932646036148, "rewards/rejected": -0.30801934003829956, "step": 6600 }, { "epoch": 1.7317264867697144, "grad_norm": NaN, "learning_rate": 3.1993287577884115e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.236755609512329, "logits/rejected": -2.1901564598083496, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2231, "nll_loss": 1.0181955099105835, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6610 }, { "epoch": 1.734346345297354, "grad_norm": 0.6890396475791931, "learning_rate": 3.188247030984289e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2579498291015625, "logits/rejected": -2.2464613914489746, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1675, "nll_loss": 0.9939492344856262, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6620 }, { "epoch": 1.7369662038249936, "grad_norm": 0.9525524973869324, "learning_rate": 3.1771717984420528e-06, "log_odds_chosen": 0.24646663665771484, "log_odds_ratio": -0.6878327131271362, "logits/chosen": -2.3479771614074707, "logits/rejected": -2.2917308807373047, "logps/chosen": -0.8952921032905579, "logps/rejected": -1.0496872663497925, "loss": 1.1924, "nll_loss": 1.0006110668182373, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2685876488685608, "rewards/margins": 0.046318553388118744, "rewards/rejected": -0.31490620970726013, "step": 6630 }, { "epoch": 1.739586062352633, "grad_norm": 0.8323249816894531, "learning_rate": 3.1661031487668118e-06, "log_odds_chosen": 0.3241046965122223, "log_odds_ratio": -0.6486853957176208, "logits/chosen": -2.3477137088775635, "logits/rejected": -2.303598403930664, "logps/chosen": -0.802817702293396, "logps/rejected": -0.993054986000061, "loss": 1.1803, "nll_loss": 1.0021356344223022, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24084532260894775, "rewards/margins": 0.05707116797566414, "rewards/rejected": -0.2979165017604828, "step": 6640 }, { "epoch": 1.7422059208802725, "grad_norm": 0.688955545425415, "learning_rate": 3.1550411705110085e-06, "log_odds_chosen": 0.46539703011512756, "log_odds_ratio": -0.6490334272384644, "logits/chosen": -2.3209311962127686, "logits/rejected": -2.2130379676818848, "logps/chosen": -0.776705265045166, "logps/rejected": -1.0776326656341553, "loss": 1.1354, "nll_loss": 0.9193539619445801, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.23301160335540771, "rewards/margins": 0.0902782455086708, "rewards/rejected": -0.32328981161117554, "step": 6650 }, { "epoch": 1.744825779407912, "grad_norm": 0.7260697484016418, "learning_rate": 3.1439859521737173e-06, "log_odds_chosen": 0.38034358620643616, "log_odds_ratio": -0.5873896479606628, "logits/chosen": -2.2985472679138184, "logits/rejected": -2.2744991779327393, "logps/chosen": -0.6880839467048645, "logps/rejected": -0.9148743748664856, "loss": 1.2217, "nll_loss": 0.9199172854423523, "rewards/accuracies": 0.625, "rewards/chosen": -0.20642521977424622, "rewards/margins": 0.06803710758686066, "rewards/rejected": -0.27446234226226807, "step": 6660 }, { "epoch": 1.7474456379355514, "grad_norm": 0.7359466552734375, "learning_rate": 3.1329375821999246e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.339416027069092, "logits/rejected": -2.2469916343688965, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1771, "nll_loss": 1.0115878582000732, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6670 }, { "epoch": 1.750065496463191, "grad_norm": 1.6156954765319824, "learning_rate": 3.121896148979833e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.187173366546631, "logits/rejected": -2.14278244972229, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2433, "nll_loss": 0.9784001111984253, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6680 }, { "epoch": 1.7526853549908306, "grad_norm": 1.5922880172729492, "learning_rate": 3.1108617408481496e-06, "log_odds_chosen": 0.3938189446926117, "log_odds_ratio": -0.6226946711540222, "logits/chosen": -2.287928342819214, "logits/rejected": -2.2009117603302, "logps/chosen": -0.7571663856506348, "logps/rejected": -0.9800394773483276, "loss": 1.2197, "nll_loss": 1.050803780555725, "rewards/accuracies": 0.5625, "rewards/chosen": -0.22714991867542267, "rewards/margins": 0.06686193495988846, "rewards/rejected": -0.2940118610858917, "step": 6690 }, { "epoch": 1.75530521351847, "grad_norm": 0.9683268070220947, "learning_rate": 3.0998344460833724e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2917752265930176, "logits/rejected": -2.15508770942688, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1603, "nll_loss": 0.9635206460952759, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6700 }, { "epoch": 1.7579250720461095, "grad_norm": 0.5453466773033142, "learning_rate": 3.0888143529070977e-06, "log_odds_chosen": 0.6834238767623901, "log_odds_ratio": -0.5879372358322144, "logits/chosen": -2.2778310775756836, "logits/rejected": -2.214627265930176, "logps/chosen": -0.6823738813400269, "logps/rejected": -1.0677812099456787, "loss": 1.243, "nll_loss": 1.0066688060760498, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2047121822834015, "rewards/margins": 0.11562222242355347, "rewards/rejected": -0.32033440470695496, "step": 6710 }, { "epoch": 1.760544930573749, "grad_norm": 1.5349467992782593, "learning_rate": 3.0778015494833018e-06, "log_odds_chosen": 0.4861537516117096, "log_odds_ratio": -0.6320363283157349, "logits/chosen": -2.4229989051818848, "logits/rejected": -2.34236478805542, "logps/chosen": -0.785082995891571, "logps/rejected": -1.1106232404708862, "loss": 1.2017, "nll_loss": 0.9796552658081055, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.23552489280700684, "rewards/margins": 0.097662054002285, "rewards/rejected": -0.33318695425987244, "step": 6720 }, { "epoch": 1.7631647891013884, "grad_norm": 0.9129669666290283, "learning_rate": 3.0667961239176433e-06, "log_odds_chosen": 0.5889455676078796, "log_odds_ratio": -0.5561032891273499, "logits/chosen": -2.329016923904419, "logits/rejected": -2.191868305206299, "logps/chosen": -0.7296756505966187, "logps/rejected": -1.0747191905975342, "loss": 1.2446, "nll_loss": 1.0545942783355713, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21890270709991455, "rewards/margins": 0.10351304709911346, "rewards/rejected": -0.3224157392978668, "step": 6730 }, { "epoch": 1.765784647629028, "grad_norm": 1.4453428983688354, "learning_rate": 3.0557981642567553e-06, "log_odds_chosen": 0.3799251914024353, "log_odds_ratio": -0.6356996893882751, "logits/chosen": -2.4228408336639404, "logits/rejected": -2.2255430221557617, "logps/chosen": -0.7685620784759521, "logps/rejected": -0.9782430529594421, "loss": 1.1974, "nll_loss": 0.9375076293945312, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.23056864738464355, "rewards/margins": 0.06290433555841446, "rewards/rejected": -0.29347294569015503, "step": 6740 }, { "epoch": 1.7684045061566676, "grad_norm": 0.6232451796531677, "learning_rate": 3.0448077584875425e-06, "log_odds_chosen": 0.24817411601543427, "log_odds_ratio": -0.7054853439331055, "logits/chosen": -2.2701821327209473, "logits/rejected": -2.2229182720184326, "logps/chosen": -0.8630892634391785, "logps/rejected": -0.9973052144050598, "loss": 1.1909, "nll_loss": 0.990777850151062, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2589268088340759, "rewards/margins": 0.040264781564474106, "rewards/rejected": -0.2991916239261627, "step": 6750 }, { "epoch": 1.771024364684307, "grad_norm": 1.3491566181182861, "learning_rate": 3.033824994536472e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2183072566986084, "logits/rejected": -2.0954301357269287, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1579, "nll_loss": 0.9655704498291016, "rewards/accuracies": 0.737500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6760 }, { "epoch": 1.7736442232119467, "grad_norm": 0.9653310179710388, "learning_rate": 3.022849960268881e-06, "log_odds_chosen": 0.367656946182251, "log_odds_ratio": -0.6720989942550659, "logits/chosen": -2.322944164276123, "logits/rejected": -2.1874661445617676, "logps/chosen": -0.8483086824417114, "logps/rejected": -1.109639286994934, "loss": 1.1787, "nll_loss": 1.0447022914886475, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2544926404953003, "rewards/margins": 0.07839914411306381, "rewards/rejected": -0.3328917324542999, "step": 6770 }, { "epoch": 1.776264081739586, "grad_norm": NaN, "learning_rate": 3.0118827434882604e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3625288009643555, "logits/rejected": -2.257436752319336, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1455, "nll_loss": 0.9488118886947632, "rewards/accuracies": 0.75, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6780 }, { "epoch": 1.7788839402672254, "grad_norm": 1.5954241752624512, "learning_rate": 3.0009234319355643e-06, "log_odds_chosen": 0.576341450214386, "log_odds_ratio": -0.5990161299705505, "logits/chosen": -2.379418134689331, "logits/rejected": -2.333085060119629, "logps/chosen": -0.8212820887565613, "logps/rejected": -1.2161452770233154, "loss": 1.1687, "nll_loss": 0.9847425222396851, "rewards/accuracies": 0.625, "rewards/chosen": -0.24638459086418152, "rewards/margins": 0.11845903098583221, "rewards/rejected": -0.3648436665534973, "step": 6790 }, { "epoch": 1.7815037987948652, "grad_norm": NaN, "learning_rate": 2.9899721132884974e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2814114093780518, "logits/rejected": -2.146989345550537, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1346, "nll_loss": 0.9581565856933594, "rewards/accuracies": 0.699999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6800 }, { "epoch": 1.7841236573225046, "grad_norm": 0.7540439367294312, "learning_rate": 2.979028875160824e-06, "log_odds_chosen": 0.4175484776496887, "log_odds_ratio": -0.6445009112358093, "logits/chosen": -2.304530620574951, "logits/rejected": -2.2686991691589355, "logps/chosen": -0.7360900640487671, "logps/rejected": -0.98237144947052, "loss": 1.1353, "nll_loss": 0.9526556730270386, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22082705795764923, "rewards/margins": 0.07388441264629364, "rewards/rejected": -0.2947114408016205, "step": 6810 }, { "epoch": 1.7867435158501441, "grad_norm": 2.5776143074035645, "learning_rate": 2.968093805101657e-06, "log_odds_chosen": 0.7189489603042603, "log_odds_ratio": -0.5196324586868286, "logits/chosen": -2.137977361679077, "logits/rejected": -2.1734824180603027, "logps/chosen": -0.6681398153305054, "logps/rejected": -1.1064468622207642, "loss": 1.0873, "nll_loss": 0.8721854090690613, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.20044198632240295, "rewards/margins": 0.13149209320545197, "rewards/rejected": -0.3319340646266937, "step": 6820 }, { "epoch": 1.7893633743777837, "grad_norm": 0.9298581480979919, "learning_rate": 2.9571669905947676e-06, "log_odds_chosen": 0.2924830913543701, "log_odds_ratio": -0.6632274389266968, "logits/chosen": -2.3892862796783447, "logits/rejected": -2.288114070892334, "logps/chosen": -0.7936322093009949, "logps/rejected": -0.9823340177536011, "loss": 1.1911, "nll_loss": 1.068759560585022, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.23808971047401428, "rewards/margins": 0.056610554456710815, "rewards/rejected": -0.2947002351284027, "step": 6830 }, { "epoch": 1.791983232905423, "grad_norm": 1.3156569004058838, "learning_rate": 2.946248519057874e-06, "log_odds_chosen": 0.6429121494293213, "log_odds_ratio": -0.5551846027374268, "logits/chosen": -2.2253212928771973, "logits/rejected": -2.147435426712036, "logps/chosen": -0.7101195454597473, "logps/rejected": -1.1207340955734253, "loss": 1.1751, "nll_loss": 0.952113926410675, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21303585171699524, "rewards/margins": 0.12318436801433563, "rewards/rejected": -0.3362202048301697, "step": 6840 }, { "epoch": 1.7946030914330626, "grad_norm": 0.8928410410881042, "learning_rate": 2.9353384778419548e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.224086046218872, "logits/rejected": -2.2090001106262207, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2263, "nll_loss": 1.0280404090881348, "rewards/accuracies": 0.5625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6850 }, { "epoch": 1.7972229499607022, "grad_norm": 1.4003548622131348, "learning_rate": 2.924436954230538e-06, "log_odds_chosen": 0.5719331502914429, "log_odds_ratio": -0.591873824596405, "logits/chosen": -2.2771191596984863, "logits/rejected": -2.1807217597961426, "logps/chosen": -0.7863732576370239, "logps/rejected": -1.165257453918457, "loss": 1.2516, "nll_loss": 0.9793763160705566, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2359120398759842, "rewards/margins": 0.11366524547338486, "rewards/rejected": -0.34957727789878845, "step": 6860 }, { "epoch": 1.7998428084883416, "grad_norm": 0.7599306106567383, "learning_rate": 2.9135440354390116e-06, "log_odds_chosen": 0.5950825810432434, "log_odds_ratio": -0.5592339038848877, "logits/chosen": -2.248295307159424, "logits/rejected": -2.1958999633789062, "logps/chosen": -0.674095630645752, "logps/rejected": -1.0216007232666016, "loss": 1.144, "nll_loss": 0.8894790410995483, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.20222869515419006, "rewards/margins": 0.10425152629613876, "rewards/rejected": -0.3064802289009094, "step": 6870 }, { "epoch": 1.8024626670159811, "grad_norm": 1.2155282497406006, "learning_rate": 2.902659808613921e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.268092632293701, "logits/rejected": -2.192298412322998, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.217, "nll_loss": 0.9481989741325378, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6880 }, { "epoch": 1.8050825255436207, "grad_norm": 1.8137444257736206, "learning_rate": 2.8917843608322747e-06, "log_odds_chosen": 0.5596975088119507, "log_odds_ratio": -0.59568852186203, "logits/chosen": -2.2108044624328613, "logits/rejected": -2.157378673553467, "logps/chosen": -0.8066128492355347, "logps/rejected": -1.1774985790252686, "loss": 1.1753, "nll_loss": 1.0125099420547485, "rewards/accuracies": 0.625, "rewards/chosen": -0.24198384582996368, "rewards/margins": 0.11126569658517838, "rewards/rejected": -0.35324957966804504, "step": 6890 }, { "epoch": 1.80770238407126, "grad_norm": 0.7944884896278381, "learning_rate": 2.8809177791008427e-06, "log_odds_chosen": 0.4686200022697449, "log_odds_ratio": -0.6185899972915649, "logits/chosen": -2.2908568382263184, "logits/rejected": -2.191671133041382, "logps/chosen": -0.7328974008560181, "logps/rejected": -1.0347293615341187, "loss": 1.2088, "nll_loss": 1.0481358766555786, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2198692262172699, "rewards/margins": 0.09054963290691376, "rewards/rejected": -0.31041884422302246, "step": 6900 }, { "epoch": 1.8103222425988996, "grad_norm": 1.071787714958191, "learning_rate": 2.8700601503554696e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.163125514984131, "logits/rejected": -2.0256645679473877, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2197, "nll_loss": 0.9308420419692993, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6910 }, { "epoch": 1.8129421011265392, "grad_norm": 0.7302517890930176, "learning_rate": 2.859211561460368e-06, "log_odds_chosen": 0.4308759272098541, "log_odds_ratio": -0.6102745532989502, "logits/chosen": -2.3107173442840576, "logits/rejected": -2.2043020725250244, "logps/chosen": -0.7389687299728394, "logps/rejected": -1.0006009340286255, "loss": 1.1627, "nll_loss": 0.9221687316894531, "rewards/accuracies": 0.625, "rewards/chosen": -0.2216906100511551, "rewards/margins": 0.07848963886499405, "rewards/rejected": -0.30018025636672974, "step": 6920 }, { "epoch": 1.8155619596541785, "grad_norm": 1.1777688264846802, "learning_rate": 2.8483720992074344e-06, "log_odds_chosen": 0.6145451664924622, "log_odds_ratio": -0.6073742508888245, "logits/chosen": -2.2098164558410645, "logits/rejected": -2.184037208557129, "logps/chosen": -0.8031198382377625, "logps/rejected": -1.2395751476287842, "loss": 1.1615, "nll_loss": 0.9996545910835266, "rewards/accuracies": 0.625, "rewards/chosen": -0.24093595147132874, "rewards/margins": 0.1309366226196289, "rewards/rejected": -0.37187251448631287, "step": 6930 }, { "epoch": 1.8181818181818183, "grad_norm": 1.1147243976593018, "learning_rate": 2.837541850315544e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.225109815597534, "logits/rejected": -2.1955490112304688, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1393, "nll_loss": 0.9692218899726868, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 6940 }, { "epoch": 1.8208016767094577, "grad_norm": 0.7361018061637878, "learning_rate": 2.826720901429868e-06, "log_odds_chosen": 0.3654032051563263, "log_odds_ratio": -0.6539285182952881, "logits/chosen": -2.469818353652954, "logits/rejected": -2.3494763374328613, "logps/chosen": -0.7924243211746216, "logps/rejected": -1.0371134281158447, "loss": 1.202, "nll_loss": 0.9583717584609985, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.2377273142337799, "rewards/margins": 0.07340669631958008, "rewards/rejected": -0.31113401055336, "step": 6950 }, { "epoch": 1.823421535237097, "grad_norm": 1.332423448562622, "learning_rate": 2.8159093391211678e-06, "log_odds_chosen": 0.7002588510513306, "log_odds_ratio": -0.5509282350540161, "logits/chosen": -2.314791202545166, "logits/rejected": -2.292970657348633, "logps/chosen": -0.7444082498550415, "logps/rejected": -1.2057534456253052, "loss": 1.1833, "nll_loss": 1.0037739276885986, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.22332246601581573, "rewards/margins": 0.1384035348892212, "rewards/rejected": -0.36172598600387573, "step": 6960 }, { "epoch": 1.8260413937647368, "grad_norm": 1.708801507949829, "learning_rate": 2.805107249885118e-06, "log_odds_chosen": 0.3296947181224823, "log_odds_ratio": -0.6740930676460266, "logits/chosen": -2.2503254413604736, "logits/rejected": -2.1926331520080566, "logps/chosen": -0.7673376202583313, "logps/rejected": -0.9907523393630981, "loss": 1.1923, "nll_loss": 1.0275996923446655, "rewards/accuracies": 0.5625, "rewards/chosen": -0.23020127415657043, "rewards/margins": 0.06702445447444916, "rewards/rejected": -0.2972257137298584, "step": 6970 }, { "epoch": 1.8286612522923762, "grad_norm": 0.8989710807800293, "learning_rate": 2.7943147201415978e-06, "log_odds_chosen": 0.6221337914466858, "log_odds_ratio": -0.5200523138046265, "logits/chosen": -2.420691967010498, "logits/rejected": -2.325584888458252, "logps/chosen": -0.670040488243103, "logps/rejected": -0.9970777630805969, "loss": 1.2007, "nll_loss": 0.9946231842041016, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.20101216435432434, "rewards/margins": 0.09811116009950638, "rewards/rejected": -0.2991233468055725, "step": 6980 }, { "epoch": 1.8312811108200158, "grad_norm": 0.8722968697547913, "learning_rate": 2.783531836234014e-06, "log_odds_chosen": 0.5219287276268005, "log_odds_ratio": -0.6103740930557251, "logits/chosen": -2.339700698852539, "logits/rejected": -2.2255289554595947, "logps/chosen": -0.8026955723762512, "logps/rejected": -1.133228063583374, "loss": 1.1463, "nll_loss": 1.0273789167404175, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24080868065357208, "rewards/margins": 0.09915982186794281, "rewards/rejected": -0.3399685025215149, "step": 6990 }, { "epoch": 1.8339009693476553, "grad_norm": 1.5135352611541748, "learning_rate": 2.7727586844285987e-06, "log_odds_chosen": 0.3882468640804291, "log_odds_ratio": -0.6719781160354614, "logits/chosen": -2.379749059677124, "logits/rejected": -2.2883834838867188, "logps/chosen": -0.7540624737739563, "logps/rejected": -0.9779283404350281, "loss": 1.1943, "nll_loss": 0.9878481030464172, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22621873021125793, "rewards/margins": 0.06715979427099228, "rewards/rejected": -0.2933785319328308, "step": 7000 }, { "epoch": 1.8365208278752947, "grad_norm": 1.3614134788513184, "learning_rate": 2.7619953509137277e-06, "log_odds_chosen": 0.6449731588363647, "log_odds_ratio": -0.5671099424362183, "logits/chosen": -2.356935739517212, "logits/rejected": -2.2761781215667725, "logps/chosen": -0.7355815172195435, "logps/rejected": -1.1195685863494873, "loss": 1.1411, "nll_loss": 0.9814909100532532, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22067447006702423, "rewards/margins": 0.1151961237192154, "rewards/rejected": -0.3358705937862396, "step": 7010 }, { "epoch": 1.8391406864029343, "grad_norm": 0.7990119457244873, "learning_rate": 2.7512419217992268e-06, "log_odds_chosen": 0.7305532097816467, "log_odds_ratio": -0.5357871055603027, "logits/chosen": -2.343663454055786, "logits/rejected": -2.25128436088562, "logps/chosen": -0.7450858950614929, "logps/rejected": -1.1980149745941162, "loss": 1.1912, "nll_loss": 0.9636834859848022, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2235257923603058, "rewards/margins": 0.1358787715435028, "rewards/rejected": -0.3594045341014862, "step": 7020 }, { "epoch": 1.8417605449305738, "grad_norm": 1.0968410968780518, "learning_rate": 2.740498483115684e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.226714849472046, "logits/rejected": -2.1865830421447754, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1683, "nll_loss": 0.9385259747505188, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7030 }, { "epoch": 1.8443804034582132, "grad_norm": 0.6525400876998901, "learning_rate": 2.7297651208137583e-06, "log_odds_chosen": 0.4090195596218109, "log_odds_ratio": -0.6438524723052979, "logits/chosen": -2.3154828548431396, "logits/rejected": -2.2293734550476074, "logps/chosen": -0.7979323267936707, "logps/rejected": -1.014940619468689, "loss": 1.1604, "nll_loss": 1.001305103302002, "rewards/accuracies": 0.625, "rewards/chosen": -0.23937971889972687, "rewards/margins": 0.06510251760482788, "rewards/rejected": -0.30448219180107117, "step": 7040 }, { "epoch": 1.8470002619858528, "grad_norm": 35.39341735839844, "learning_rate": 2.7190419207635e-06, "log_odds_chosen": 0.7071629762649536, "log_odds_ratio": -0.5610674619674683, "logits/chosen": -2.348436117172241, "logits/rejected": -2.3025569915771484, "logps/chosen": -0.7812968492507935, "logps/rejected": -1.2323338985443115, "loss": 1.1259, "nll_loss": 1.0490795373916626, "rewards/accuracies": 0.6875, "rewards/chosen": -0.234389066696167, "rewards/margins": 0.13531114161014557, "rewards/rejected": -0.369700163602829, "step": 7050 }, { "epoch": 1.8496201205134923, "grad_norm": 1.4473878145217896, "learning_rate": 2.7083289687536522e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3886818885803223, "logits/rejected": -2.307551145553589, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2064, "nll_loss": 0.9941096305847168, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7060 }, { "epoch": 1.8522399790411317, "grad_norm": 1.6291314363479614, "learning_rate": 2.697626350490976e-06, "log_odds_chosen": 0.3417912423610687, "log_odds_ratio": -0.6092373132705688, "logits/chosen": -2.4120070934295654, "logits/rejected": -2.352879047393799, "logps/chosen": -0.7626071572303772, "logps/rejected": -0.9607568979263306, "loss": 1.2031, "nll_loss": 0.9923830032348633, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22878217697143555, "rewards/margins": 0.05944494530558586, "rewards/rejected": -0.2882271409034729, "step": 7070 }, { "epoch": 1.8548598375687713, "grad_norm": 0.7410784959793091, "learning_rate": 2.686934151599555e-06, "log_odds_chosen": 0.5696754455566406, "log_odds_ratio": -0.5632771849632263, "logits/chosen": -2.3442981243133545, "logits/rejected": -2.235983371734619, "logps/chosen": -0.713737964630127, "logps/rejected": -1.0414085388183594, "loss": 1.1325, "nll_loss": 0.9439838528633118, "rewards/accuracies": 0.6875, "rewards/chosen": -0.21412137150764465, "rewards/margins": 0.09830118715763092, "rewards/rejected": -0.31242257356643677, "step": 7080 }, { "epoch": 1.8574796960964108, "grad_norm": 1.3336268663406372, "learning_rate": 2.67625245762012e-06, "log_odds_chosen": 0.5773431658744812, "log_odds_ratio": -0.6082563996315002, "logits/chosen": -2.2860207557678223, "logits/rejected": -2.229630708694458, "logps/chosen": -0.7456690073013306, "logps/rejected": -1.1083402633666992, "loss": 1.2497, "nll_loss": 0.9713162183761597, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22370071709156036, "rewards/margins": 0.10880140215158463, "rewards/rejected": -0.3325020968914032, "step": 7090 }, { "epoch": 1.8600995546240502, "grad_norm": 1.813109278678894, "learning_rate": 2.6655813540093524e-06, "log_odds_chosen": 0.5411877036094666, "log_odds_ratio": -0.5924819111824036, "logits/chosen": -2.383405923843384, "logits/rejected": -2.3080523014068604, "logps/chosen": -0.7528702020645142, "logps/rejected": -1.0318208932876587, "loss": 1.1631, "nll_loss": 0.9742571115493774, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2258610725402832, "rewards/margins": 0.08368515223264694, "rewards/rejected": -0.30954626202583313, "step": 7100 }, { "epoch": 1.8627194131516898, "grad_norm": 1.7921181917190552, "learning_rate": 2.6549209261392155e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2033591270446777, "logits/rejected": -2.171348810195923, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2259, "nll_loss": 1.0687577724456787, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7110 }, { "epoch": 1.8653392716793293, "grad_norm": 0.9146808385848999, "learning_rate": 2.644271259296256e-06, "log_odds_chosen": 0.48158279061317444, "log_odds_ratio": -0.6674372553825378, "logits/chosen": -2.2634315490722656, "logits/rejected": -2.20759916305542, "logps/chosen": -0.8164430856704712, "logps/rejected": -1.1461536884307861, "loss": 1.1827, "nll_loss": 0.9769312143325806, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24493291974067688, "rewards/margins": 0.09891323745250702, "rewards/rejected": -0.3438461720943451, "step": 7120 }, { "epoch": 1.8679591302069687, "grad_norm": 1.0102413892745972, "learning_rate": 2.633632438680936e-06, "log_odds_chosen": 0.4971712529659271, "log_odds_ratio": -0.6158735156059265, "logits/chosen": -2.2912437915802, "logits/rejected": -2.2795071601867676, "logps/chosen": -0.7317792177200317, "logps/rejected": -1.0571151971817017, "loss": 1.1261, "nll_loss": 0.9475619196891785, "rewards/accuracies": 0.625, "rewards/chosen": -0.219533771276474, "rewards/margins": 0.09760081022977829, "rewards/rejected": -0.3171345591545105, "step": 7130 }, { "epoch": 1.8705789887346085, "grad_norm": 1.3619753122329712, "learning_rate": 2.62300454940694e-06, "log_odds_chosen": 0.5962792634963989, "log_odds_ratio": -0.568900465965271, "logits/chosen": -2.160715341567993, "logits/rejected": -2.120772123336792, "logps/chosen": -0.7430431246757507, "logps/rejected": -1.1179596185684204, "loss": 1.0866, "nll_loss": 0.9763456583023071, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22291293740272522, "rewards/margins": 0.1124749630689621, "rewards/rejected": -0.3353879153728485, "step": 7140 }, { "epoch": 1.8731988472622478, "grad_norm": 0.7661622762680054, "learning_rate": 2.612387676500499e-06, "log_odds_chosen": 0.46346092224121094, "log_odds_ratio": -0.6402563452720642, "logits/chosen": -2.2370216846466064, "logits/rejected": -2.1382508277893066, "logps/chosen": -0.8224751353263855, "logps/rejected": -1.093515157699585, "loss": 1.226, "nll_loss": 0.9588418006896973, "rewards/accuracies": 0.625, "rewards/chosen": -0.24674256145954132, "rewards/margins": 0.08131198585033417, "rewards/rejected": -0.3280545473098755, "step": 7150 }, { "epoch": 1.8758187057898872, "grad_norm": 0.6997863054275513, "learning_rate": 2.601781904899713e-06, "log_odds_chosen": 0.4618530869483948, "log_odds_ratio": -0.6102968454360962, "logits/chosen": -2.328076124191284, "logits/rejected": -2.1687369346618652, "logps/chosen": -0.7478479146957397, "logps/rejected": -1.0229175090789795, "loss": 1.1598, "nll_loss": 1.0463063716888428, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2243543565273285, "rewards/margins": 0.08252091705799103, "rewards/rejected": -0.3068752884864807, "step": 7160 }, { "epoch": 1.878438564317527, "grad_norm": NaN, "learning_rate": 2.591187319453863e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.323748826980591, "logits/rejected": -2.190333843231201, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3169, "nll_loss": 0.9868178367614746, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7170 }, { "epoch": 1.8810584228451663, "grad_norm": 1.995194673538208, "learning_rate": 2.5806040049227446e-06, "log_odds_chosen": 0.6937137842178345, "log_odds_ratio": -0.5623892545700073, "logits/chosen": -2.2810490131378174, "logits/rejected": -2.229980945587158, "logps/chosen": -0.7199284434318542, "logps/rejected": -1.17444908618927, "loss": 1.1901, "nll_loss": 1.0108692646026611, "rewards/accuracies": 0.6875, "rewards/chosen": -0.21597853302955627, "rewards/margins": 0.1363561749458313, "rewards/rejected": -0.35233473777770996, "step": 7180 }, { "epoch": 1.883678281372806, "grad_norm": 0.7864789366722107, "learning_rate": 2.5700320459759743e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.326265811920166, "logits/rejected": -2.213115930557251, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2041, "nll_loss": 0.9647018313407898, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7190 }, { "epoch": 1.8862981399004455, "grad_norm": 1.0507898330688477, "learning_rate": 2.559471527192329e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.252711534500122, "logits/rejected": -2.1825642585754395, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2034, "nll_loss": 0.9669488668441772, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7200 }, { "epoch": 1.8889179984280848, "grad_norm": 1.1230922937393188, "learning_rate": 2.548922533059053e-06, "log_odds_chosen": 0.6566305756568909, "log_odds_ratio": -0.581844687461853, "logits/chosen": -2.2933008670806885, "logits/rejected": -2.2125682830810547, "logps/chosen": -0.761601984500885, "logps/rejected": -1.1673160791397095, "loss": 1.1624, "nll_loss": 1.0092836618423462, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22848057746887207, "rewards/margins": 0.12171424925327301, "rewards/rejected": -0.3501948416233063, "step": 7210 }, { "epoch": 1.8915378569557244, "grad_norm": 0.7266789674758911, "learning_rate": 2.538385147971199e-06, "log_odds_chosen": 0.5706096291542053, "log_odds_ratio": -0.6025015115737915, "logits/chosen": -2.2631795406341553, "logits/rejected": -2.264530658721924, "logps/chosen": -0.7515076994895935, "logps/rejected": -1.082597017288208, "loss": 1.1907, "nll_loss": 1.0154660940170288, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.22545230388641357, "rewards/margins": 0.09932676702737808, "rewards/rejected": -0.32477909326553345, "step": 7220 }, { "epoch": 1.894157715483364, "grad_norm": 0.9363006949424744, "learning_rate": 2.5278594562309332e-06, "log_odds_chosen": 0.5203119516372681, "log_odds_ratio": -0.5501137375831604, "logits/chosen": -2.4314701557159424, "logits/rejected": -2.29498553276062, "logps/chosen": -0.7129740118980408, "logps/rejected": -1.0247596502304077, "loss": 1.1543, "nll_loss": 0.9576756358146667, "rewards/accuracies": 0.6875, "rewards/chosen": -0.21389222145080566, "rewards/margins": 0.09353569149971008, "rewards/rejected": -0.30742794275283813, "step": 7230 }, { "epoch": 1.8967775740110033, "grad_norm": 0.8012925386428833, "learning_rate": 2.5173455420468827e-06, "log_odds_chosen": 0.6349529027938843, "log_odds_ratio": -0.5626159906387329, "logits/chosen": -2.317776679992676, "logits/rejected": -2.245028257369995, "logps/chosen": -0.6989836692810059, "logps/rejected": -1.0682556629180908, "loss": 1.2233, "nll_loss": 1.0510202646255493, "rewards/accuracies": 0.625, "rewards/chosen": -0.20969510078430176, "rewards/margins": 0.11078157275915146, "rewards/rejected": -0.3204767107963562, "step": 7240 }, { "epoch": 1.899397432538643, "grad_norm": 1.266675353050232, "learning_rate": 2.5068434895334414e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.28068208694458, "logits/rejected": -2.193248987197876, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2584, "nll_loss": 1.0074201822280884, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7250 }, { "epoch": 1.9020172910662825, "grad_norm": 0.6654836535453796, "learning_rate": 2.496353382710112e-06, "log_odds_chosen": 0.6151525974273682, "log_odds_ratio": -0.5692977905273438, "logits/chosen": -2.2454304695129395, "logits/rejected": -2.120586395263672, "logps/chosen": -0.7338978052139282, "logps/rejected": -1.1037317514419556, "loss": 1.1983, "nll_loss": 0.986906886100769, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22016935050487518, "rewards/margins": 0.11095015704631805, "rewards/rejected": -0.33111950755119324, "step": 7260 }, { "epoch": 1.9046371495939218, "grad_norm": 0.9921262860298157, "learning_rate": 2.4858753055008263e-06, "log_odds_chosen": 0.5599578619003296, "log_odds_ratio": -0.5946468114852905, "logits/chosen": -2.267651319503784, "logits/rejected": -2.258702039718628, "logps/chosen": -0.7607963681221008, "logps/rejected": -1.120488166809082, "loss": 1.153, "nll_loss": 1.0052974224090576, "rewards/accuracies": 0.625, "rewards/chosen": -0.22823889553546906, "rewards/margins": 0.10790754854679108, "rewards/rejected": -0.3361464738845825, "step": 7270 }, { "epoch": 1.9072570081215614, "grad_norm": 1.546822428703308, "learning_rate": 2.4754093417332764e-06, "log_odds_chosen": 0.6318588256835938, "log_odds_ratio": -0.6101927757263184, "logits/chosen": -2.295870065689087, "logits/rejected": -2.1990246772766113, "logps/chosen": -0.7669285535812378, "logps/rejected": -1.1514922380447388, "loss": 1.2067, "nll_loss": 1.0626983642578125, "rewards/accuracies": 0.625, "rewards/chosen": -0.23007860779762268, "rewards/margins": 0.11536906659603119, "rewards/rejected": -0.3454476594924927, "step": 7280 }, { "epoch": 1.909876866649201, "grad_norm": 1.3784595727920532, "learning_rate": 2.46495557513824e-06, "log_odds_chosen": 0.6157860159873962, "log_odds_ratio": -0.5560578107833862, "logits/chosen": -2.2793774604797363, "logits/rejected": -2.220374822616577, "logps/chosen": -0.7291957139968872, "logps/rejected": -1.1117278337478638, "loss": 1.1661, "nll_loss": 0.9626666307449341, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2187587320804596, "rewards/margins": 0.1147596463561058, "rewards/rejected": -0.333518385887146, "step": 7290 }, { "epoch": 1.9124967251768403, "grad_norm": 0.9075945019721985, "learning_rate": 2.4545140893489197e-06, "log_odds_chosen": 0.5100824236869812, "log_odds_ratio": -0.5770628452301025, "logits/chosen": -2.355062246322632, "logits/rejected": -2.202401638031006, "logps/chosen": -0.8151373863220215, "logps/rejected": -1.1501867771148682, "loss": 1.2206, "nll_loss": 1.0511032342910767, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2445412129163742, "rewards/margins": 0.10051479190587997, "rewards/rejected": -0.3450559973716736, "step": 7300 }, { "epoch": 1.9151165837044801, "grad_norm": 1.1048208475112915, "learning_rate": 2.444084967900263e-06, "log_odds_chosen": 0.36640235781669617, "log_odds_ratio": -0.6181304454803467, "logits/chosen": -2.2428784370422363, "logits/rejected": -2.264085531234741, "logps/chosen": -0.7331316471099854, "logps/rejected": -0.9381580352783203, "loss": 1.1807, "nll_loss": 0.9931954145431519, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21993950009346008, "rewards/margins": 0.0615079328417778, "rewards/rejected": -0.2814474403858185, "step": 7310 }, { "epoch": 1.9177364422321195, "grad_norm": 1.0345975160598755, "learning_rate": 2.433668294228302e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.347674608230591, "logits/rejected": -2.34781813621521, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3571, "nll_loss": 1.0880860090255737, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7320 }, { "epoch": 1.9203563007597588, "grad_norm": 0.9241424798965454, "learning_rate": 2.423264151669481e-06, "log_odds_chosen": 0.568239688873291, "log_odds_ratio": -0.5936022996902466, "logits/chosen": -2.2317540645599365, "logits/rejected": -2.217822313308716, "logps/chosen": -0.8242543935775757, "logps/rejected": -1.1996852159500122, "loss": 1.2085, "nll_loss": 0.9867044687271118, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.24727633595466614, "rewards/margins": 0.11262927949428558, "rewards/rejected": -0.35990557074546814, "step": 7330 }, { "epoch": 1.9229761592873986, "grad_norm": 0.8969234824180603, "learning_rate": 2.412872623459994e-06, "log_odds_chosen": 0.6093281507492065, "log_odds_ratio": -0.5739611387252808, "logits/chosen": -2.306847095489502, "logits/rejected": -2.3019471168518066, "logps/chosen": -0.7466956377029419, "logps/rejected": -1.1422849893569946, "loss": 1.2188, "nll_loss": 1.0437730550765991, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.224008709192276, "rewards/margins": 0.1186768040060997, "rewards/rejected": -0.3426854908466339, "step": 7340 }, { "epoch": 1.925596017815038, "grad_norm": 1.3840523958206177, "learning_rate": 2.402493792735112e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.238914728164673, "logits/rejected": -2.1660828590393066, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2521, "nll_loss": 0.9485225677490234, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7350 }, { "epoch": 1.9282158763426775, "grad_norm": 1.0512713193893433, "learning_rate": 2.3921277425285303e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.287971258163452, "logits/rejected": -2.2131266593933105, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2246, "nll_loss": 0.9929054379463196, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7360 }, { "epoch": 1.9308357348703171, "grad_norm": 0.9588366150856018, "learning_rate": 2.3817745557716868e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3661816120147705, "logits/rejected": -2.29964280128479, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1394, "nll_loss": 0.9502054452896118, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7370 }, { "epoch": 1.9334555933979565, "grad_norm": 1.68594491481781, "learning_rate": 2.3714343152931165e-06, "log_odds_chosen": 0.5299914479255676, "log_odds_ratio": -0.6712908148765564, "logits/chosen": -2.3157665729522705, "logits/rejected": -2.274721145629883, "logps/chosen": -0.8720364570617676, "logps/rejected": -1.2435215711593628, "loss": 1.2269, "nll_loss": 1.0485539436340332, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2616109251976013, "rewards/margins": 0.11144556850194931, "rewards/rejected": -0.373056560754776, "step": 7380 }, { "epoch": 1.936075451925596, "grad_norm": 1.2626219987869263, "learning_rate": 2.3611071038177745e-06, "log_odds_chosen": 0.5913956165313721, "log_odds_ratio": -0.571188747882843, "logits/chosen": -2.267409563064575, "logits/rejected": -2.214531660079956, "logps/chosen": -0.7608032822608948, "logps/rejected": -1.139075517654419, "loss": 1.12, "nll_loss": 1.0046684741973877, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2282409965991974, "rewards/margins": 0.11348165571689606, "rewards/rejected": -0.34172266721725464, "step": 7390 }, { "epoch": 1.9386953104532356, "grad_norm": 0.9646998047828674, "learning_rate": 2.350793003966382e-06, "log_odds_chosen": 0.3830323815345764, "log_odds_ratio": -0.6631676554679871, "logits/chosen": -2.3006176948547363, "logits/rejected": -2.199284076690674, "logps/chosen": -0.7172687649726868, "logps/rejected": -1.003336787223816, "loss": 1.1736, "nll_loss": 0.8924170732498169, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2151806354522705, "rewards/margins": 0.08582038432359695, "rewards/rejected": -0.30100104212760925, "step": 7400 }, { "epoch": 1.941315168980875, "grad_norm": 1.33575439453125, "learning_rate": 2.3404920982547636e-06, "log_odds_chosen": 0.2726490795612335, "log_odds_ratio": -0.6793085336685181, "logits/chosen": -2.4090230464935303, "logits/rejected": -2.2951130867004395, "logps/chosen": -0.8716771006584167, "logps/rejected": -1.0857603549957275, "loss": 1.2142, "nll_loss": 1.0264158248901367, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2615031599998474, "rewards/margins": 0.0642249658703804, "rewards/rejected": -0.3257281184196472, "step": 7410 }, { "epoch": 1.9439350275085145, "grad_norm": 1.3892840147018433, "learning_rate": 2.330204469093188e-06, "log_odds_chosen": 0.5772608518600464, "log_odds_ratio": -0.5746273398399353, "logits/chosen": -2.30539608001709, "logits/rejected": -2.197165012359619, "logps/chosen": -0.8435670733451843, "logps/rejected": -1.240609884262085, "loss": 1.1739, "nll_loss": 1.0167052745819092, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2530701458454132, "rewards/margins": 0.11911284923553467, "rewards/rejected": -0.3721829950809479, "step": 7420 }, { "epoch": 1.9465548860361541, "grad_norm": 1.429061770439148, "learning_rate": 2.3199301987857053e-06, "log_odds_chosen": 0.5333697199821472, "log_odds_ratio": -0.5824709534645081, "logits/chosen": -2.37585186958313, "logits/rejected": -2.319981098175049, "logps/chosen": -0.7444313764572144, "logps/rejected": -1.0762418508529663, "loss": 1.1998, "nll_loss": 1.0054235458374023, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22332945466041565, "rewards/margins": 0.09954317659139633, "rewards/rejected": -0.3228726089000702, "step": 7430 }, { "epoch": 1.9491747445637935, "grad_norm": 0.7353323698043823, "learning_rate": 2.3096693695294937e-06, "log_odds_chosen": 0.40545210242271423, "log_odds_ratio": -0.6104671955108643, "logits/chosen": -2.2640256881713867, "logits/rejected": -2.198930025100708, "logps/chosen": -0.8673563003540039, "logps/rejected": -1.1016764640808105, "loss": 1.2765, "nll_loss": 1.1698143482208252, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2602068781852722, "rewards/margins": 0.07029604911804199, "rewards/rejected": -0.3305029273033142, "step": 7440 }, { "epoch": 1.951794603091433, "grad_norm": 0.6376176476478577, "learning_rate": 2.2994220634141958e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2141005992889404, "logits/rejected": -2.255988836288452, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1842, "nll_loss": 0.9651936292648315, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7450 }, { "epoch": 1.9544144616190726, "grad_norm": NaN, "learning_rate": 2.2891883624212654e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.24912691116333, "logits/rejected": -2.1886165142059326, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2151, "nll_loss": 1.102168083190918, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7460 }, { "epoch": 1.957034320146712, "grad_norm": 1.0139979124069214, "learning_rate": 2.278968348423316e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.241690158843994, "logits/rejected": -2.1296300888061523, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1757, "nll_loss": 0.9078288078308105, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7470 }, { "epoch": 1.9596541786743515, "grad_norm": 1.0344386100769043, "learning_rate": 2.268762103183456e-06, "log_odds_chosen": 0.3571264445781708, "log_odds_ratio": -0.6528542637825012, "logits/chosen": -2.3741068840026855, "logits/rejected": -2.2728919982910156, "logps/chosen": -0.8248841166496277, "logps/rejected": -1.0589587688446045, "loss": 1.2424, "nll_loss": 1.0940830707550049, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.24746525287628174, "rewards/margins": 0.07022242248058319, "rewards/rejected": -0.3176876902580261, "step": 7480 }, { "epoch": 1.9622740372019911, "grad_norm": 0.8927868008613586, "learning_rate": 2.2585697083546392e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.26342511177063, "logits/rejected": -2.258974552154541, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2571, "nll_loss": 0.9749491810798645, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7490 }, { "epoch": 1.9648938957296305, "grad_norm": 0.7645418047904968, "learning_rate": 2.2483912454790177e-06, "log_odds_chosen": 0.5039868354797363, "log_odds_ratio": -0.613422155380249, "logits/chosen": -2.277289628982544, "logits/rejected": -2.167250156402588, "logps/chosen": -0.7949237823486328, "logps/rejected": -1.1476106643676758, "loss": 1.1885, "nll_loss": 0.9843498468399048, "rewards/accuracies": 0.6875, "rewards/chosen": -0.23847715556621552, "rewards/margins": 0.10580608993768692, "rewards/rejected": -0.34428325295448303, "step": 7500 }, { "epoch": 1.9675137542572703, "grad_norm": 0.8474311232566833, "learning_rate": 2.238226795987278e-06, "log_odds_chosen": 0.4893370270729065, "log_odds_ratio": -0.6255459189414978, "logits/chosen": -2.3115012645721436, "logits/rejected": -2.2205917835235596, "logps/chosen": -0.7513346076011658, "logps/rejected": -1.0360788106918335, "loss": 1.1807, "nll_loss": 0.9621448516845703, "rewards/accuracies": 0.625, "rewards/chosen": -0.2254004031419754, "rewards/margins": 0.0854233056306839, "rewards/rejected": -0.3108237087726593, "step": 7510 }, { "epoch": 1.9701336127849096, "grad_norm": 1.2420637607574463, "learning_rate": 2.2280764411979993e-06, "log_odds_chosen": 0.6481809616088867, "log_odds_ratio": -0.5428443551063538, "logits/chosen": -2.318506956100464, "logits/rejected": -2.216545581817627, "logps/chosen": -0.7406237721443176, "logps/rejected": -1.181581735610962, "loss": 1.1012, "nll_loss": 0.9000147581100464, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2221871316432953, "rewards/margins": 0.1322874277830124, "rewards/rejected": -0.35447457432746887, "step": 7520 }, { "epoch": 1.972753471312549, "grad_norm": 0.9612450003623962, "learning_rate": 2.217940262316996e-06, "log_odds_chosen": 0.6129547953605652, "log_odds_ratio": -0.5329427123069763, "logits/chosen": -2.2236170768737793, "logits/rejected": -2.2068817615509033, "logps/chosen": -0.7069040536880493, "logps/rejected": -1.08379328250885, "loss": 1.1697, "nll_loss": 0.9789407849311829, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2120712250471115, "rewards/margins": 0.11306673288345337, "rewards/rejected": -0.3251379430294037, "step": 7530 }, { "epoch": 1.9753733298401888, "grad_norm": 1.1521621942520142, "learning_rate": 2.207818340436676e-06, "log_odds_chosen": 0.5201526284217834, "log_odds_ratio": -0.6324434280395508, "logits/chosen": -2.269543409347534, "logits/rejected": -2.153547763824463, "logps/chosen": -0.8261318206787109, "logps/rejected": -1.2078969478607178, "loss": 1.2283, "nll_loss": 0.9270240664482117, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2478395402431488, "rewards/margins": 0.11452951282262802, "rewards/rejected": -0.3623690903186798, "step": 7540 }, { "epoch": 1.9779931883678281, "grad_norm": 1.2619409561157227, "learning_rate": 2.197710756535379e-06, "log_odds_chosen": 0.5527589321136475, "log_odds_ratio": -0.5775222778320312, "logits/chosen": -2.4370133876800537, "logits/rejected": -2.3317418098449707, "logps/chosen": -0.7751535177230835, "logps/rejected": -1.1080200672149658, "loss": 1.2136, "nll_loss": 1.0565601587295532, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.23254606127738953, "rewards/margins": 0.09985993057489395, "rewards/rejected": -0.33240601420402527, "step": 7550 }, { "epoch": 1.9806130468954677, "grad_norm": 0.8828101754188538, "learning_rate": 2.187617591476746e-06, "log_odds_chosen": 0.49080562591552734, "log_odds_ratio": -0.6004105806350708, "logits/chosen": -2.3029942512512207, "logits/rejected": -2.204378128051758, "logps/chosen": -0.7661387324333191, "logps/rejected": -1.0765304565429688, "loss": 1.1733, "nll_loss": 0.9469097852706909, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22984163463115692, "rewards/margins": 0.09311752021312714, "rewards/rejected": -0.32295915484428406, "step": 7560 }, { "epoch": 1.9832329054231073, "grad_norm": 2.1355535984039307, "learning_rate": 2.1775389260090542e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3346505165100098, "logits/rejected": -2.292855978012085, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2456, "nll_loss": 1.0403718948364258, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7570 }, { "epoch": 1.9858527639507466, "grad_norm": 0.7578134536743164, "learning_rate": 2.1674748407645864e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.34680438041687, "logits/rejected": -2.261329174041748, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1416, "nll_loss": 1.0163248777389526, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7580 }, { "epoch": 1.9884726224783862, "grad_norm": 4.9875946044921875, "learning_rate": 2.1574254162589775e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4339120388031006, "logits/rejected": -2.2861342430114746, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1646, "nll_loss": 0.9417012929916382, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7590 }, { "epoch": 1.9910924810060258, "grad_norm": 1.188344955444336, "learning_rate": 2.147390732890569e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.271679639816284, "logits/rejected": -2.25117826461792, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2257, "nll_loss": 0.9887748956680298, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7600 }, { "epoch": 1.9937123395336651, "grad_norm": 1.4424996376037598, "learning_rate": 2.137370870939775e-06, "log_odds_chosen": 0.6577612161636353, "log_odds_ratio": -0.5685340166091919, "logits/chosen": -2.4008913040161133, "logits/rejected": -2.2484939098358154, "logps/chosen": -0.750547468662262, "logps/rejected": -1.2088409662246704, "loss": 1.2019, "nll_loss": 1.012878656387329, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2251642644405365, "rewards/margins": 0.13748808205127716, "rewards/rejected": -0.36265233159065247, "step": 7610 }, { "epoch": 1.9963321980613047, "grad_norm": 1.0299482345581055, "learning_rate": 2.127365910568427e-06, "log_odds_chosen": 0.5114971995353699, "log_odds_ratio": -0.580726146697998, "logits/chosen": -2.3246021270751953, "logits/rejected": -2.259129762649536, "logps/chosen": -0.7633844614028931, "logps/rejected": -1.0645921230316162, "loss": 1.1898, "nll_loss": 0.9361041784286499, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2290153205394745, "rewards/margins": 0.09036235511302948, "rewards/rejected": -0.31937772035598755, "step": 7620 }, { "epoch": 1.9989520565889443, "grad_norm": 1.9288170337677002, "learning_rate": 2.1173759318191464e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3446907997131348, "logits/rejected": -2.2342357635498047, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.227, "nll_loss": 0.9751620292663574, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7630 }, { "epoch": 2.0015719151165836, "grad_norm": 0.9602162837982178, "learning_rate": 2.10740101461469e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3217968940734863, "logits/rejected": -2.2963099479675293, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1988, "nll_loss": 0.9848507046699524, "rewards/accuracies": 0.737500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7640 }, { "epoch": 2.0041917736442234, "grad_norm": 0.7912653684616089, "learning_rate": 2.0974412387573257e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.449824094772339, "logits/rejected": -2.320420026779175, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.189, "nll_loss": 0.9906517267227173, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7650 }, { "epoch": 2.0068116321718628, "grad_norm": 0.9288321137428284, "learning_rate": 2.0874966839281786e-06, "log_odds_chosen": 0.6703764796257019, "log_odds_ratio": -0.5454535484313965, "logits/chosen": -2.3728911876678467, "logits/rejected": -2.2839369773864746, "logps/chosen": -0.7790075540542603, "logps/rejected": -1.18270742893219, "loss": 1.162, "nll_loss": 1.0155384540557861, "rewards/accuracies": 0.75, "rewards/chosen": -0.23370233178138733, "rewards/margins": 0.12110990285873413, "rewards/rejected": -0.3548122048377991, "step": 7660 }, { "epoch": 2.009431490699502, "grad_norm": 1.0480377674102783, "learning_rate": 2.077567429686607e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.352382183074951, "logits/rejected": -2.277923345565796, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1788, "nll_loss": 1.0016335248947144, "rewards/accuracies": 0.699999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7670 }, { "epoch": 2.012051349227142, "grad_norm": 0.8156402707099915, "learning_rate": 2.0676535554695564e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3638763427734375, "logits/rejected": -2.2528433799743652, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1336, "nll_loss": 0.9809637069702148, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7680 }, { "epoch": 2.0146712077547813, "grad_norm": 0.9404595494270325, "learning_rate": 2.0577551405909308e-06, "log_odds_chosen": 0.8077133893966675, "log_odds_ratio": -0.5794797539710999, "logits/chosen": -2.3179290294647217, "logits/rejected": -2.2013301849365234, "logps/chosen": -0.7966238260269165, "logps/rejected": -1.2886152267456055, "loss": 1.155, "nll_loss": 0.9969694018363953, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23898717761039734, "rewards/margins": 0.14759741723537445, "rewards/rejected": -0.3865845799446106, "step": 7690 }, { "epoch": 2.0172910662824206, "grad_norm": 1.4715046882629395, "learning_rate": 2.047872264240951e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.316260814666748, "logits/rejected": -2.2958984375, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1612, "nll_loss": 0.9541808366775513, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7700 }, { "epoch": 2.0199109248100604, "grad_norm": 2.3115944862365723, "learning_rate": 2.0380050054855257e-06, "log_odds_chosen": 0.8525660634040833, "log_odds_ratio": -0.5238757133483887, "logits/chosen": -2.3436241149902344, "logits/rejected": -2.244536876678467, "logps/chosen": -0.8283430933952332, "logps/rejected": -1.3652596473693848, "loss": 1.1109, "nll_loss": 0.9802485704421997, "rewards/accuracies": 0.75, "rewards/chosen": -0.2485029250383377, "rewards/margins": 0.16107496619224548, "rewards/rejected": -0.4095779359340668, "step": 7710 }, { "epoch": 2.0225307833376998, "grad_norm": 0.8344228267669678, "learning_rate": 2.0281534432656237e-06, "log_odds_chosen": 0.434170663356781, "log_odds_ratio": -0.6366975903511047, "logits/chosen": -2.3675615787506104, "logits/rejected": -2.3589859008789062, "logps/chosen": -0.7570791840553284, "logps/rejected": -1.030453085899353, "loss": 1.1609, "nll_loss": 1.029824137687683, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22712378203868866, "rewards/margins": 0.08201216906309128, "rewards/rejected": -0.30913597345352173, "step": 7720 }, { "epoch": 2.025150641865339, "grad_norm": 2.282590866088867, "learning_rate": 2.01831765639663e-06, "log_odds_chosen": 0.5318518877029419, "log_odds_ratio": -0.5762071013450623, "logits/chosen": -2.4021527767181396, "logits/rejected": -2.2459230422973633, "logps/chosen": -0.7858981490135193, "logps/rejected": -1.0952107906341553, "loss": 1.1519, "nll_loss": 0.983130931854248, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.23576946556568146, "rewards/margins": 0.09279382228851318, "rewards/rejected": -0.32856327295303345, "step": 7730 }, { "epoch": 2.027770500392979, "grad_norm": 0.8600467443466187, "learning_rate": 2.008497723567724e-06, "log_odds_chosen": 0.7452343702316284, "log_odds_ratio": -0.5595897436141968, "logits/chosen": -2.4458136558532715, "logits/rejected": -2.225018262863159, "logps/chosen": -0.7062630653381348, "logps/rejected": -1.2145910263061523, "loss": 1.1475, "nll_loss": 0.9519978761672974, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2118789255619049, "rewards/margins": 0.15249839425086975, "rewards/rejected": -0.36437731981277466, "step": 7740 }, { "epoch": 2.0303903589206183, "grad_norm": 0.8811141848564148, "learning_rate": 1.9986937233412516e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3409245014190674, "logits/rejected": -2.277916431427002, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1939, "nll_loss": 0.9948007464408875, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7750 }, { "epoch": 2.0330102174482576, "grad_norm": 0.5677057504653931, "learning_rate": 1.988905734152088e-06, "log_odds_chosen": 0.5808444619178772, "log_odds_ratio": -0.5751546025276184, "logits/chosen": -2.3137598037719727, "logits/rejected": -2.2185585498809814, "logps/chosen": -0.7969663739204407, "logps/rejected": -1.1980606317520142, "loss": 1.1481, "nll_loss": 0.9438555836677551, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.23908989131450653, "rewards/margins": 0.12032829225063324, "rewards/rejected": -0.35941821336746216, "step": 7760 }, { "epoch": 2.0356300759758974, "grad_norm": 2.1646547317504883, "learning_rate": 1.9791338343070223e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2863032817840576, "logits/rejected": -2.236140727996826, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.192, "nll_loss": 1.0374308824539185, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7770 }, { "epoch": 2.0382499345035368, "grad_norm": 0.6659148931503296, "learning_rate": 1.969378101984115e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2522006034851074, "logits/rejected": -2.199974298477173, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2125, "nll_loss": 1.0265816450119019, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7780 }, { "epoch": 2.040869793031176, "grad_norm": 0.6289401650428772, "learning_rate": 1.9596386152320903e-06, "log_odds_chosen": 0.7426986694335938, "log_odds_ratio": -0.5269575119018555, "logits/chosen": -2.3425707817077637, "logits/rejected": -2.2757599353790283, "logps/chosen": -0.7139682173728943, "logps/rejected": -1.1992371082305908, "loss": 1.1533, "nll_loss": 0.960262656211853, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2141905128955841, "rewards/margins": 0.14558067917823792, "rewards/rejected": -0.35977116227149963, "step": 7790 }, { "epoch": 2.043489651558816, "grad_norm": 1.7167391777038574, "learning_rate": 1.949915451969696e-06, "log_odds_chosen": 0.7489277124404907, "log_odds_ratio": -0.5391007661819458, "logits/chosen": -2.311753511428833, "logits/rejected": -2.2604103088378906, "logps/chosen": -0.672787070274353, "logps/rejected": -1.160203456878662, "loss": 1.085, "nll_loss": 0.9138399958610535, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.20183610916137695, "rewards/margins": 0.1462249457836151, "rewards/rejected": -0.34806105494499207, "step": 7800 }, { "epoch": 2.0461095100864553, "grad_norm": 1.4033178091049194, "learning_rate": 1.9402086899850944e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2919821739196777, "logits/rejected": -2.243617057800293, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1102, "nll_loss": 0.903470516204834, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7810 }, { "epoch": 2.048729368614095, "grad_norm": 1.3817028999328613, "learning_rate": 1.9305184069352256e-06, "log_odds_chosen": 0.8575795888900757, "log_odds_ratio": -0.49834156036376953, "logits/chosen": -2.3728556632995605, "logits/rejected": -2.296515941619873, "logps/chosen": -0.7005519270896912, "logps/rejected": -1.2312911748886108, "loss": 1.1558, "nll_loss": 0.9809940457344055, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2101655900478363, "rewards/margins": 0.1592218279838562, "rewards/rejected": -0.36938735842704773, "step": 7820 }, { "epoch": 2.0513492271417344, "grad_norm": 0.8247695565223694, "learning_rate": 1.9208446803451996e-06, "log_odds_chosen": 0.7352222204208374, "log_odds_ratio": -0.5744787454605103, "logits/chosen": -2.3520543575286865, "logits/rejected": -2.235760450363159, "logps/chosen": -0.7395035028457642, "logps/rejected": -1.1680099964141846, "loss": 1.2063, "nll_loss": 1.0620120763778687, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22185106575489044, "rewards/margins": 0.12855198979377747, "rewards/rejected": -0.3504030108451843, "step": 7830 }, { "epoch": 2.0539690856693738, "grad_norm": 0.5955808758735657, "learning_rate": 1.9111875876076635e-06, "log_odds_chosen": 0.6643736958503723, "log_odds_ratio": -0.5428844690322876, "logits/chosen": -2.3253960609436035, "logits/rejected": -2.3104941844940186, "logps/chosen": -0.6836180686950684, "logps/rejected": -1.105628252029419, "loss": 1.1514, "nll_loss": 0.9766947031021118, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.20508544147014618, "rewards/margins": 0.12660303711891174, "rewards/rejected": -0.3316884934902191, "step": 7840 }, { "epoch": 2.0565889441970135, "grad_norm": 1.2371294498443604, "learning_rate": 1.9015472059821965e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.38055419921875, "logits/rejected": -2.285874843597412, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1352, "nll_loss": 0.9034536480903625, "rewards/accuracies": 0.737500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7850 }, { "epoch": 2.059208802724653, "grad_norm": 1.4026870727539062, "learning_rate": 1.891923612594675e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.323050022125244, "logits/rejected": -2.2908430099487305, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1728, "nll_loss": 0.9737453460693359, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7860 }, { "epoch": 2.0618286612522922, "grad_norm": 0.6913669109344482, "learning_rate": 1.8823168844366739e-06, "log_odds_chosen": 0.6061134338378906, "log_odds_ratio": -0.6045204401016235, "logits/chosen": -2.241185426712036, "logits/rejected": -2.1959152221679688, "logps/chosen": -0.8297128677368164, "logps/rejected": -1.2681459188461304, "loss": 1.0948, "nll_loss": 0.9307743310928345, "rewards/accuracies": 0.625, "rewards/chosen": -0.24891385436058044, "rewards/margins": 0.13152992725372314, "rewards/rejected": -0.380443811416626, "step": 7870 }, { "epoch": 2.064448519779932, "grad_norm": 0.9293632507324219, "learning_rate": 1.8727270983648325e-06, "log_odds_chosen": 0.9844502210617065, "log_odds_ratio": -0.5011180639266968, "logits/chosen": -2.315037250518799, "logits/rejected": -2.2568016052246094, "logps/chosen": -0.7039943337440491, "logps/rejected": -1.2657630443572998, "loss": 1.1229, "nll_loss": 0.9071322679519653, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21119828522205353, "rewards/margins": 0.1685306429862976, "rewards/rejected": -0.37972891330718994, "step": 7880 }, { "epoch": 2.0670683783075714, "grad_norm": 1.7293453216552734, "learning_rate": 1.8631543311002557e-06, "log_odds_chosen": 0.8172377347946167, "log_odds_ratio": -0.5609051585197449, "logits/chosen": -2.257143497467041, "logits/rejected": -2.1696114540100098, "logps/chosen": -0.7536892890930176, "logps/rejected": -1.2719123363494873, "loss": 1.1599, "nll_loss": 0.9940185546875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22610679268836975, "rewards/margins": 0.1554669290781021, "rewards/rejected": -0.38157373666763306, "step": 7890 }, { "epoch": 2.0696882368352107, "grad_norm": 1.2330516576766968, "learning_rate": 1.8535986592278873e-06, "log_odds_chosen": 0.7314665913581848, "log_odds_ratio": -0.5250632166862488, "logits/chosen": -2.2805240154266357, "logits/rejected": -2.2153568267822266, "logps/chosen": -0.6578829884529114, "logps/rejected": -1.0530554056167603, "loss": 1.1035, "nll_loss": 0.9666228294372559, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.1973649114370346, "rewards/margins": 0.11855168640613556, "rewards/rejected": -0.31591659784317017, "step": 7900 }, { "epoch": 2.0723080953628505, "grad_norm": 0.884573757648468, "learning_rate": 1.8440601591959086e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3763506412506104, "logits/rejected": -2.2347350120544434, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.0799, "nll_loss": 0.9876044392585754, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7910 }, { "epoch": 2.07492795389049, "grad_norm": 2.4213263988494873, "learning_rate": 1.8345389073151142e-06, "log_odds_chosen": 0.5543875694274902, "log_odds_ratio": -0.5870702266693115, "logits/chosen": -2.3952996730804443, "logits/rejected": -2.329784870147705, "logps/chosen": -0.7473592758178711, "logps/rejected": -1.0882837772369385, "loss": 1.1053, "nll_loss": 0.9380757212638855, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.224207803606987, "rewards/margins": 0.10227732360363007, "rewards/rejected": -0.3264850974082947, "step": 7920 }, { "epoch": 2.0775478124181292, "grad_norm": 1.0212457180023193, "learning_rate": 1.8250349797583162e-06, "log_odds_chosen": 0.7158786654472351, "log_odds_ratio": -0.5852283835411072, "logits/chosen": -2.368868589401245, "logits/rejected": -2.2817113399505615, "logps/chosen": -0.8467890024185181, "logps/rejected": -1.3384140729904175, "loss": 1.1884, "nll_loss": 1.0344393253326416, "rewards/accuracies": 0.5625, "rewards/chosen": -0.25403666496276855, "rewards/margins": 0.14748753607273102, "rewards/rejected": -0.40152424573898315, "step": 7930 }, { "epoch": 2.080167670945769, "grad_norm": 0.9459959268569946, "learning_rate": 1.8155484525597196e-06, "log_odds_chosen": 0.6080295443534851, "log_odds_ratio": -0.5915641784667969, "logits/chosen": -2.1655497550964355, "logits/rejected": -2.1392033100128174, "logps/chosen": -0.6987907290458679, "logps/rejected": -1.0427968502044678, "loss": 1.1201, "nll_loss": 1.0135481357574463, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20963719487190247, "rewards/margins": 0.10320182889699936, "rewards/rejected": -0.3128390610218048, "step": 7940 }, { "epoch": 2.0827875294734084, "grad_norm": 0.8269048929214478, "learning_rate": 1.8060794016143279e-06, "log_odds_chosen": 0.7803002595901489, "log_odds_ratio": -0.5174747705459595, "logits/chosen": -2.3265135288238525, "logits/rejected": -2.2618730068206787, "logps/chosen": -0.6731448173522949, "logps/rejected": -1.1815083026885986, "loss": 1.0815, "nll_loss": 0.9153990745544434, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20194347202777863, "rewards/margins": 0.15250901877880096, "rewards/rejected": -0.354452520608902, "step": 7950 }, { "epoch": 2.0854073880010477, "grad_norm": 1.1877038478851318, "learning_rate": 1.7966279026773238e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.268946886062622, "logits/rejected": -2.248805046081543, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2026, "nll_loss": 0.9730064272880554, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7960 }, { "epoch": 2.0880272465286875, "grad_norm": 0.6655046939849854, "learning_rate": 1.7871940313634744e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4024667739868164, "logits/rejected": -2.31274676322937, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2114, "nll_loss": 0.983184814453125, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7970 }, { "epoch": 2.090647105056327, "grad_norm": 0.9438948035240173, "learning_rate": 1.777777863146516e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.351984739303589, "logits/rejected": -2.256319522857666, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1149, "nll_loss": 0.9925878643989563, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 7980 }, { "epoch": 2.0932669635839662, "grad_norm": 0.7106908559799194, "learning_rate": 1.7683794733585552e-06, "log_odds_chosen": 0.5970109701156616, "log_odds_ratio": -0.5734195113182068, "logits/chosen": -2.412177562713623, "logits/rejected": -2.298950672149658, "logps/chosen": -0.705242931842804, "logps/rejected": -1.0666759014129639, "loss": 1.1749, "nll_loss": 0.9656350016593933, "rewards/accuracies": 0.75, "rewards/chosen": -0.21157288551330566, "rewards/margins": 0.1084299236536026, "rewards/rejected": -0.32000279426574707, "step": 7990 }, { "epoch": 2.095886822111606, "grad_norm": 1.3047922849655151, "learning_rate": 1.7589989371894705e-06, "log_odds_chosen": 0.6427454352378845, "log_odds_ratio": -0.5513819456100464, "logits/chosen": -2.3327856063842773, "logits/rejected": -2.2674529552459717, "logps/chosen": -0.7269993424415588, "logps/rejected": -1.1144554615020752, "loss": 1.1113, "nll_loss": 0.9388291239738464, "rewards/accuracies": 0.75, "rewards/chosen": -0.21809980273246765, "rewards/margins": 0.1162368506193161, "rewards/rejected": -0.33433666825294495, "step": 8000 }, { "epoch": 2.0985066806392454, "grad_norm": 0.7169250249862671, "learning_rate": 1.7496363296863016e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.286776542663574, "logits/rejected": -2.3217639923095703, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1768, "nll_loss": 0.9349471926689148, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8010 }, { "epoch": 2.101126539166885, "grad_norm": 1.247929334640503, "learning_rate": 1.740291725752653e-06, "log_odds_chosen": 0.7687138319015503, "log_odds_ratio": -0.565596342086792, "logits/chosen": -2.341578722000122, "logits/rejected": -2.2685227394104004, "logps/chosen": -0.7499393224716187, "logps/rejected": -1.2359591722488403, "loss": 1.1492, "nll_loss": 0.997593879699707, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22498175501823425, "rewards/margins": 0.14580599963665009, "rewards/rejected": -0.37078773975372314, "step": 8020 }, { "epoch": 2.1037463976945245, "grad_norm": 1.023982286453247, "learning_rate": 1.7309652001480997e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2767581939697266, "logits/rejected": -2.15973162651062, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2082, "nll_loss": 0.9991844892501831, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8030 }, { "epoch": 2.106366256222164, "grad_norm": 1.0089677572250366, "learning_rate": 1.721656827487584e-06, "log_odds_chosen": 0.469683974981308, "log_odds_ratio": -0.5843351483345032, "logits/chosen": -2.3716225624084473, "logits/rejected": -2.335596799850464, "logps/chosen": -0.7634120583534241, "logps/rejected": -1.059469223022461, "loss": 1.1588, "nll_loss": 0.9623720049858093, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22902362048625946, "rewards/margins": 0.08881714195013046, "rewards/rejected": -0.3178407847881317, "step": 8040 }, { "epoch": 2.1089861147498037, "grad_norm": 1.6868935823440552, "learning_rate": 1.7123666822408148e-06, "log_odds_chosen": 0.8139742612838745, "log_odds_ratio": -0.5377219319343567, "logits/chosen": -2.262265205383301, "logits/rejected": -2.193080186843872, "logps/chosen": -0.7089479565620422, "logps/rejected": -1.2358653545379639, "loss": 1.0919, "nll_loss": 0.8989003896713257, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.21268439292907715, "rewards/margins": 0.1580752432346344, "rewards/rejected": -0.37075963616371155, "step": 8050 }, { "epoch": 2.111605973277443, "grad_norm": 1.0124181509017944, "learning_rate": 1.7030948387316818e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.283889055252075, "logits/rejected": -2.22818922996521, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1583, "nll_loss": 1.0144877433776855, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8060 }, { "epoch": 2.1142258318050824, "grad_norm": 5.148787021636963, "learning_rate": 1.6938413711376511e-06, "log_odds_chosen": 0.6552556157112122, "log_odds_ratio": -0.5662426352500916, "logits/chosen": -2.266077756881714, "logits/rejected": -2.211160182952881, "logps/chosen": -0.7898212671279907, "logps/rejected": -1.1915773153305054, "loss": 1.1417, "nll_loss": 0.9442359209060669, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23694634437561035, "rewards/margins": 0.12052682787179947, "rewards/rejected": -0.3574731945991516, "step": 8070 }, { "epoch": 2.116845690332722, "grad_norm": 0.9233986139297485, "learning_rate": 1.6846063534891802e-06, "log_odds_chosen": 0.8238309025764465, "log_odds_ratio": -0.5024297833442688, "logits/chosen": -2.325260639190674, "logits/rejected": -2.295228958129883, "logps/chosen": -0.7024565935134888, "logps/rejected": -1.2032439708709717, "loss": 1.1475, "nll_loss": 0.9639929533004761, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.210736945271492, "rewards/margins": 0.15023623406887054, "rewards/rejected": -0.36097317934036255, "step": 8080 }, { "epoch": 2.1194655488603615, "grad_norm": 0.707560122013092, "learning_rate": 1.6753898596691163e-06, "log_odds_chosen": 0.4516966938972473, "log_odds_ratio": -0.6329379081726074, "logits/chosen": -2.3413074016571045, "logits/rejected": -2.284184217453003, "logps/chosen": -0.7636586427688599, "logps/rejected": -1.0585229396820068, "loss": 1.1312, "nll_loss": 0.9462622404098511, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22909760475158691, "rewards/margins": 0.08845923840999603, "rewards/rejected": -0.31755685806274414, "step": 8090 }, { "epoch": 2.122085407388001, "grad_norm": 0.8858146667480469, "learning_rate": 1.6661919634121175e-06, "log_odds_chosen": 0.7090789079666138, "log_odds_ratio": -0.5392205119132996, "logits/chosen": -2.2699055671691895, "logits/rejected": -2.2176644802093506, "logps/chosen": -0.6315934658050537, "logps/rejected": -0.9919190406799316, "loss": 1.1213, "nll_loss": 0.9590030908584595, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.1894780397415161, "rewards/margins": 0.10809768736362457, "rewards/rejected": -0.2975757122039795, "step": 8100 }, { "epoch": 2.1247052659156407, "grad_norm": 0.9733865857124329, "learning_rate": 1.6570127383040488e-06, "log_odds_chosen": 0.4151204526424408, "log_odds_ratio": -0.635924756526947, "logits/chosen": -2.406508207321167, "logits/rejected": -2.315406322479248, "logps/chosen": -0.783687949180603, "logps/rejected": -1.048293948173523, "loss": 1.1213, "nll_loss": 0.9334977269172668, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23510637879371643, "rewards/margins": 0.07938181608915329, "rewards/rejected": -0.3144882321357727, "step": 8110 }, { "epoch": 2.12732512444328, "grad_norm": 0.9540517330169678, "learning_rate": 1.6478522577814066e-06, "log_odds_chosen": 0.6353100538253784, "log_odds_ratio": -0.566614031791687, "logits/chosen": -2.311232805252075, "logits/rejected": -2.2750048637390137, "logps/chosen": -0.6917222738265991, "logps/rejected": -1.098175048828125, "loss": 1.1197, "nll_loss": 0.9371941685676575, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.20751670002937317, "rewards/margins": 0.12193586677312851, "rewards/rejected": -0.3294525742530823, "step": 8120 }, { "epoch": 2.1299449829709194, "grad_norm": 0.7097244262695312, "learning_rate": 1.6387105951307195e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.209522247314453, "logits/rejected": -2.2008469104766846, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1505, "nll_loss": 0.9197705388069153, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8130 }, { "epoch": 2.132564841498559, "grad_norm": 1.1502565145492554, "learning_rate": 1.6295878234879732e-06, "log_odds_chosen": 0.6306387782096863, "log_odds_ratio": -0.5366018414497375, "logits/chosen": -2.2957167625427246, "logits/rejected": -2.2116801738739014, "logps/chosen": -0.7290579080581665, "logps/rejected": -1.0887279510498047, "loss": 1.1998, "nll_loss": 1.0030767917633057, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.21871736645698547, "rewards/margins": 0.10790102183818817, "rewards/rejected": -0.32661840319633484, "step": 8140 }, { "epoch": 2.1351847000261985, "grad_norm": 0.8026818037033081, "learning_rate": 1.6204840158380147e-06, "log_odds_chosen": 0.9157935976982117, "log_odds_ratio": -0.47766026854515076, "logits/chosen": -2.3548781871795654, "logits/rejected": -2.244502305984497, "logps/chosen": -0.6885102987289429, "logps/rejected": -1.25448477268219, "loss": 1.1016, "nll_loss": 0.8961178064346313, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.20655310153961182, "rewards/margins": 0.1697923243045807, "rewards/rejected": -0.3763454556465149, "step": 8150 }, { "epoch": 2.137804558553838, "grad_norm": 1.4274654388427734, "learning_rate": 1.6113992450139784e-06, "log_odds_chosen": 0.851911723613739, "log_odds_ratio": -0.5648254156112671, "logits/chosen": -2.289860248565674, "logits/rejected": -2.2871177196502686, "logps/chosen": -0.6814625859260559, "logps/rejected": -1.2425367832183838, "loss": 1.1615, "nll_loss": 0.969342827796936, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20443880558013916, "rewards/margins": 0.16832228004932404, "rewards/rejected": -0.372761070728302, "step": 8160 }, { "epoch": 2.1404244170814777, "grad_norm": 1.1137869358062744, "learning_rate": 1.6023335836966933e-06, "log_odds_chosen": 0.8018379211425781, "log_odds_ratio": -0.5364068150520325, "logits/chosen": -2.3494226932525635, "logits/rejected": -2.2561962604522705, "logps/chosen": -0.7662001252174377, "logps/rejected": -1.2701977491378784, "loss": 1.1298, "nll_loss": 0.9278900027275085, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.22986003756523132, "rewards/margins": 0.1511993557214737, "rewards/rejected": -0.3810593783855438, "step": 8170 }, { "epoch": 2.143044275609117, "grad_norm": 1.8191009759902954, "learning_rate": 1.5932871044141122e-06, "log_odds_chosen": 0.8843866586685181, "log_odds_ratio": -0.5532401204109192, "logits/chosen": -2.3065075874328613, "logits/rejected": -2.215930461883545, "logps/chosen": -0.746457040309906, "logps/rejected": -1.3091540336608887, "loss": 1.127, "nll_loss": 0.95921790599823, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22393712401390076, "rewards/margins": 0.16880914568901062, "rewards/rejected": -0.3927462697029114, "step": 8180 }, { "epoch": 2.145664134136757, "grad_norm": 0.772455096244812, "learning_rate": 1.58425987954072e-06, "log_odds_chosen": 0.9703394174575806, "log_odds_ratio": -0.4702835977077484, "logits/chosen": -2.308228015899658, "logits/rejected": -2.254560947418213, "logps/chosen": -0.7453449964523315, "logps/rejected": -1.3874322175979614, "loss": 1.1677, "nll_loss": 1.01626455783844, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.2236034870147705, "rewards/margins": 0.19262614846229553, "rewards/rejected": -0.4162296652793884, "step": 8190 }, { "epoch": 2.148283992664396, "grad_norm": 0.978524386882782, "learning_rate": 1.5752519812969674e-06, "log_odds_chosen": 1.1068809032440186, "log_odds_ratio": -0.46975260972976685, "logits/chosen": -2.2659993171691895, "logits/rejected": -2.1292202472686768, "logps/chosen": -0.694267749786377, "logps/rejected": -1.430761694908142, "loss": 1.1152, "nll_loss": 0.9265459775924683, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.20828032493591309, "rewards/margins": 0.2209482192993164, "rewards/rejected": -0.4292284846305847, "step": 8200 }, { "epoch": 2.1509038511920355, "grad_norm": 3.5094265937805176, "learning_rate": 1.5662634817486807e-06, "log_odds_chosen": 0.796878457069397, "log_odds_ratio": -0.5688443779945374, "logits/chosen": -2.2141013145446777, "logits/rejected": -2.156899929046631, "logps/chosen": -0.8117578625679016, "logps/rejected": -1.365912914276123, "loss": 1.1286, "nll_loss": 0.9503684043884277, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2435273677110672, "rewards/margins": 0.16624650359153748, "rewards/rejected": -0.40977391600608826, "step": 8210 }, { "epoch": 2.1535237097196753, "grad_norm": 0.6164889931678772, "learning_rate": 1.5572944528064924e-06, "log_odds_chosen": 0.8399462699890137, "log_odds_ratio": -0.5310218334197998, "logits/chosen": -2.3283259868621826, "logits/rejected": -2.2858433723449707, "logps/chosen": -0.7888689041137695, "logps/rejected": -1.3581318855285645, "loss": 1.1608, "nll_loss": 1.0222628116607666, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2366606742143631, "rewards/margins": 0.17077893018722534, "rewards/rejected": -0.40743955969810486, "step": 8220 }, { "epoch": 2.1561435682473147, "grad_norm": 0.56831955909729, "learning_rate": 1.5483449662252675e-06, "log_odds_chosen": 0.8700420260429382, "log_odds_ratio": -0.5313364267349243, "logits/chosen": -2.3548262119293213, "logits/rejected": -2.2441577911376953, "logps/chosen": -0.7659019231796265, "logps/rejected": -1.387765645980835, "loss": 1.2331, "nll_loss": 1.1575219631195068, "rewards/accuracies": 0.625, "rewards/chosen": -0.22977058589458466, "rewards/margins": 0.18655914068222046, "rewards/rejected": -0.4163297712802887, "step": 8230 }, { "epoch": 2.158763426774954, "grad_norm": 2.4858086109161377, "learning_rate": 1.5394150936035227e-06, "log_odds_chosen": 0.45087093114852905, "log_odds_ratio": -0.6702399849891663, "logits/chosen": -2.370619297027588, "logits/rejected": -2.2804319858551025, "logps/chosen": -0.8342496752738953, "logps/rejected": -1.1709798574447632, "loss": 1.2156, "nll_loss": 1.053002119064331, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2502748966217041, "rewards/margins": 0.10101909935474396, "rewards/rejected": -0.3512939512729645, "step": 8240 }, { "epoch": 2.161383285302594, "grad_norm": 1.327843189239502, "learning_rate": 1.5305049063828576e-06, "log_odds_chosen": 0.8689268827438354, "log_odds_ratio": -0.4629904627799988, "logits/chosen": -2.417534351348877, "logits/rejected": -2.273923397064209, "logps/chosen": -0.7162599563598633, "logps/rejected": -1.2633002996444702, "loss": 1.1458, "nll_loss": 0.9327513575553894, "rewards/accuracies": 0.8125, "rewards/chosen": -0.21487799286842346, "rewards/margins": 0.16411210596561432, "rewards/rejected": -0.378990113735199, "step": 8250 }, { "epoch": 2.164003143830233, "grad_norm": 0.9285928010940552, "learning_rate": 1.5216144758473864e-06, "log_odds_chosen": 0.6072670221328735, "log_odds_ratio": -0.5657426118850708, "logits/chosen": -2.280355930328369, "logits/rejected": -2.2409987449645996, "logps/chosen": -0.7077945470809937, "logps/rejected": -1.062334418296814, "loss": 1.1336, "nll_loss": 0.9364790916442871, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2123383730649948, "rewards/margins": 0.10636196285486221, "rewards/rejected": -0.31870037317276, "step": 8260 }, { "epoch": 2.1666230023578725, "grad_norm": 1.713695764541626, "learning_rate": 1.5127438731231585e-06, "log_odds_chosen": 0.4784313142299652, "log_odds_ratio": -0.6128493547439575, "logits/chosen": -2.282982110977173, "logits/rejected": -2.2446393966674805, "logps/chosen": -0.707381010055542, "logps/rejected": -0.9963580965995789, "loss": 1.1868, "nll_loss": 0.9916016459465027, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21221432089805603, "rewards/margins": 0.08669310808181763, "rewards/rejected": -0.29890742897987366, "step": 8270 }, { "epoch": 2.1692428608855123, "grad_norm": 1.0349271297454834, "learning_rate": 1.5038931691776008e-06, "log_odds_chosen": 0.7779685854911804, "log_odds_ratio": -0.5485564470291138, "logits/chosen": -2.3171167373657227, "logits/rejected": -2.2523233890533447, "logps/chosen": -0.7525123953819275, "logps/rejected": -1.2594842910766602, "loss": 1.1289, "nll_loss": 0.9566656351089478, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22575370967388153, "rewards/margins": 0.15209157764911652, "rewards/rejected": -0.37784528732299805, "step": 8280 }, { "epoch": 2.1718627194131517, "grad_norm": 0.7644699215888977, "learning_rate": 1.4950624348189385e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3402669429779053, "logits/rejected": -2.2790560722351074, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1499, "nll_loss": 0.978752613067627, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8290 }, { "epoch": 2.174482577940791, "grad_norm": 0.5995610952377319, "learning_rate": 1.4862517406956401e-06, "log_odds_chosen": 0.6101630926132202, "log_odds_ratio": -0.5798295736312866, "logits/chosen": -2.2667810916900635, "logits/rejected": -2.2326595783233643, "logps/chosen": -0.7548749446868896, "logps/rejected": -1.1257014274597168, "loss": 1.0991, "nll_loss": 0.9228875041007996, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2264624834060669, "rewards/margins": 0.1112479418516159, "rewards/rejected": -0.3377104103565216, "step": 8300 }, { "epoch": 2.177102436468431, "grad_norm": 3.115504264831543, "learning_rate": 1.4774611572958403e-06, "log_odds_chosen": 0.5531939268112183, "log_odds_ratio": -0.5671836137771606, "logits/chosen": -2.3889362812042236, "logits/rejected": -2.2741141319274902, "logps/chosen": -0.7969766855239868, "logps/rejected": -1.1776422262191772, "loss": 1.1541, "nll_loss": 0.9193738102912903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23909299075603485, "rewards/margins": 0.11419965326786041, "rewards/rejected": -0.35329264402389526, "step": 8310 }, { "epoch": 2.17972229499607, "grad_norm": 0.9155730605125427, "learning_rate": 1.4686907549467873e-06, "log_odds_chosen": 0.8230752944946289, "log_odds_ratio": -0.5453607439994812, "logits/chosen": -2.3971354961395264, "logits/rejected": -2.3101887702941895, "logps/chosen": -0.7338647842407227, "logps/rejected": -1.260888934135437, "loss": 1.0891, "nll_loss": 0.9765573740005493, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22015945613384247, "rewards/margins": 0.15810725092887878, "rewards/rejected": -0.37826666235923767, "step": 8320 }, { "epoch": 2.1823421535237095, "grad_norm": 0.6449805498123169, "learning_rate": 1.459940603814271e-06, "log_odds_chosen": 0.6671076416969299, "log_odds_ratio": -0.5928485989570618, "logits/chosen": -2.3091838359832764, "logits/rejected": -2.337674856185913, "logps/chosen": -0.8095825910568237, "logps/rejected": -1.2497963905334473, "loss": 1.0954, "nll_loss": 0.9393445253372192, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24287477135658264, "rewards/margins": 0.13206419348716736, "rewards/rejected": -0.37493896484375, "step": 8330 }, { "epoch": 2.1849620120513493, "grad_norm": 1.080777645111084, "learning_rate": 1.4512107739020695e-06, "log_odds_chosen": 0.64756840467453, "log_odds_ratio": -0.5886824727058411, "logits/chosen": -2.2798759937286377, "logits/rejected": -2.224525213241577, "logps/chosen": -0.7196683287620544, "logps/rejected": -1.1370394229888916, "loss": 1.1547, "nll_loss": 0.9530329704284668, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2159004956483841, "rewards/margins": 0.12521134316921234, "rewards/rejected": -0.34111183881759644, "step": 8340 }, { "epoch": 2.1875818705789887, "grad_norm": 0.5750529170036316, "learning_rate": 1.4425013350513804e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.28871488571167, "logits/rejected": -2.286705493927002, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1448, "nll_loss": 0.9213430285453796, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8350 }, { "epoch": 2.1902017291066285, "grad_norm": 0.8781396746635437, "learning_rate": 1.433812356940272e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3075361251831055, "logits/rejected": -2.266986131668091, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1873, "nll_loss": 1.0113605260849, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8360 }, { "epoch": 2.192821587634268, "grad_norm": 1.8018348217010498, "learning_rate": 1.425143909083116e-06, "log_odds_chosen": 0.6193541288375854, "log_odds_ratio": -0.6082859039306641, "logits/chosen": -2.2629141807556152, "logits/rejected": -2.189175844192505, "logps/chosen": -0.8054765462875366, "logps/rejected": -1.2357633113861084, "loss": 1.2368, "nll_loss": 1.0390828847885132, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2416429966688156, "rewards/margins": 0.12908601760864258, "rewards/rejected": -0.3707290291786194, "step": 8370 }, { "epoch": 2.195441446161907, "grad_norm": 1.0677623748779297, "learning_rate": 1.4164960608300404e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3507161140441895, "logits/rejected": -2.3473572731018066, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2104, "nll_loss": 1.070184350013733, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8380 }, { "epoch": 2.198061304689547, "grad_norm": 1.3537495136260986, "learning_rate": 1.4078688813663657e-06, "log_odds_chosen": 0.6175998449325562, "log_odds_ratio": -0.6177070736885071, "logits/chosen": -2.350473642349243, "logits/rejected": -2.299494981765747, "logps/chosen": -0.7294925451278687, "logps/rejected": -1.0991421937942505, "loss": 1.1824, "nll_loss": 1.0402027368545532, "rewards/accuracies": 0.625, "rewards/chosen": -0.21884775161743164, "rewards/margins": 0.11089493334293365, "rewards/rejected": -0.3297426700592041, "step": 8390 }, { "epoch": 2.2006811632171863, "grad_norm": 0.8092440962791443, "learning_rate": 1.3992624397120611e-06, "log_odds_chosen": 0.4864440858364105, "log_odds_ratio": -0.6015315055847168, "logits/chosen": -2.2861626148223877, "logits/rejected": -2.260592222213745, "logps/chosen": -0.7875272631645203, "logps/rejected": -1.091841459274292, "loss": 1.1952, "nll_loss": 1.0200917720794678, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.23625822365283966, "rewards/margins": 0.09129424393177032, "rewards/rejected": -0.3275524377822876, "step": 8400 }, { "epoch": 2.2033010217448257, "grad_norm": 0.9931483268737793, "learning_rate": 1.390676804721182e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1939094066619873, "logits/rejected": -2.2365541458129883, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2058, "nll_loss": 0.96241694688797, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8410 }, { "epoch": 2.2059208802724655, "grad_norm": 1.0932252407073975, "learning_rate": 1.3821120450813296e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3187055587768555, "logits/rejected": -2.1979761123657227, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1725, "nll_loss": 0.9216848611831665, "rewards/accuracies": 0.762499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8420 }, { "epoch": 2.208540738800105, "grad_norm": 1.6031500101089478, "learning_rate": 1.3735682293130904e-06, "log_odds_chosen": 0.717898964881897, "log_odds_ratio": -0.524624228477478, "logits/chosen": -2.267751455307007, "logits/rejected": -2.2495803833007812, "logps/chosen": -0.7513440251350403, "logps/rejected": -1.1824089288711548, "loss": 1.1555, "nll_loss": 1.0327035188674927, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.22540323436260223, "rewards/margins": 0.1293194591999054, "rewards/rejected": -0.3547227084636688, "step": 8430 }, { "epoch": 2.211160597327744, "grad_norm": 0.8955727815628052, "learning_rate": 1.3650454257695004e-06, "log_odds_chosen": 0.6082738041877747, "log_odds_ratio": -0.6197978854179382, "logits/chosen": -2.2929813861846924, "logits/rejected": -2.2896344661712646, "logps/chosen": -0.7416673302650452, "logps/rejected": -1.0775187015533447, "loss": 1.1696, "nll_loss": 0.9177933931350708, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.22250020503997803, "rewards/margins": 0.10075543820858002, "rewards/rejected": -0.32325565814971924, "step": 8440 }, { "epoch": 2.213780455855384, "grad_norm": 2.1467807292938232, "learning_rate": 1.3565437026354848e-06, "log_odds_chosen": 0.8498493432998657, "log_odds_ratio": -0.5043315291404724, "logits/chosen": -2.345242977142334, "logits/rejected": -2.2636256217956543, "logps/chosen": -0.7220414280891418, "logps/rejected": -1.2821003198623657, "loss": 1.1267, "nll_loss": 0.9218358993530273, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.21661242842674255, "rewards/margins": 0.1680176556110382, "rewards/rejected": -0.38463008403778076, "step": 8450 }, { "epoch": 2.2164003143830233, "grad_norm": 2.089454174041748, "learning_rate": 1.3480631279273258e-06, "log_odds_chosen": 0.7336888909339905, "log_odds_ratio": -0.5629469156265259, "logits/chosen": -2.4151599407196045, "logits/rejected": -2.327817440032959, "logps/chosen": -0.7095783352851868, "logps/rejected": -1.1782857179641724, "loss": 1.1763, "nll_loss": 0.969098687171936, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.21287353336811066, "rewards/margins": 0.14061219990253448, "rewards/rejected": -0.35348567366600037, "step": 8460 }, { "epoch": 2.2190201729106627, "grad_norm": 0.7892192006111145, "learning_rate": 1.3396037694921063e-06, "log_odds_chosen": 0.9951766133308411, "log_odds_ratio": -0.46867403388023376, "logits/chosen": -2.3707871437072754, "logits/rejected": -2.3155226707458496, "logps/chosen": -0.6781240701675415, "logps/rejected": -1.2925235033035278, "loss": 1.0785, "nll_loss": 0.9551092982292175, "rewards/accuracies": 0.8125, "rewards/chosen": -0.2034371793270111, "rewards/margins": 0.18431983888149261, "rewards/rejected": -0.3877570629119873, "step": 8470 }, { "epoch": 2.2216400314383025, "grad_norm": 3.118610143661499, "learning_rate": 1.331165695007178e-06, "log_odds_chosen": 0.7139686346054077, "log_odds_ratio": -0.5537358522415161, "logits/chosen": -2.3440024852752686, "logits/rejected": -2.3089890480041504, "logps/chosen": -0.727476954460144, "logps/rejected": -1.181262493133545, "loss": 1.1523, "nll_loss": 1.008698582649231, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21824312210083008, "rewards/margins": 0.13613565266132355, "rewards/rejected": -0.35437875986099243, "step": 8480 }, { "epoch": 2.224259889965942, "grad_norm": 1.5117188692092896, "learning_rate": 1.3227489719796103e-06, "log_odds_chosen": 0.8428221940994263, "log_odds_ratio": -0.5391819477081299, "logits/chosen": -2.236184597015381, "logits/rejected": -2.1514389514923096, "logps/chosen": -0.7910536527633667, "logps/rejected": -1.3860055208206177, "loss": 1.1063, "nll_loss": 0.9411818385124207, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2373161017894745, "rewards/margins": 0.17848555743694305, "rewards/rejected": -0.41580167412757874, "step": 8490 }, { "epoch": 2.226879748493581, "grad_norm": 0.848373532295227, "learning_rate": 1.3143536677456556e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2260618209838867, "logits/rejected": -2.1491122245788574, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2194, "nll_loss": 1.029792308807373, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8500 }, { "epoch": 2.229499607021221, "grad_norm": 1.3540852069854736, "learning_rate": 1.305979849470214e-06, "log_odds_chosen": 0.4573415219783783, "log_odds_ratio": -0.6199575662612915, "logits/chosen": -2.3242225646972656, "logits/rejected": -2.301670551300049, "logps/chosen": -0.7677930593490601, "logps/rejected": -1.0322539806365967, "loss": 1.1556, "nll_loss": 0.9912976026535034, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2303379327058792, "rewards/margins": 0.07933827489614487, "rewards/rejected": -0.30967622995376587, "step": 8510 }, { "epoch": 2.2321194655488603, "grad_norm": 0.8905701637268066, "learning_rate": 1.297627584146284e-06, "log_odds_chosen": 0.9992515444755554, "log_odds_ratio": -0.5397320985794067, "logits/chosen": -2.345698595046997, "logits/rejected": -2.2667131423950195, "logps/chosen": -0.7295430302619934, "logps/rejected": -1.4442812204360962, "loss": 1.0997, "nll_loss": 0.9267648458480835, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21886292099952698, "rewards/margins": 0.21442146599292755, "rewards/rejected": -0.43328437209129333, "step": 8520 }, { "epoch": 2.2347393240764997, "grad_norm": 1.2402878999710083, "learning_rate": 1.2892969385944424e-06, "log_odds_chosen": 0.9395334124565125, "log_odds_ratio": -0.5334161520004272, "logits/chosen": -2.3000433444976807, "logits/rejected": -2.2278964519500732, "logps/chosen": -0.7398746013641357, "logps/rejected": -1.307929277420044, "loss": 1.1554, "nll_loss": 0.9303131103515625, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22196240723133087, "rewards/margins": 0.17041637003421783, "rewards/rejected": -0.3923787474632263, "step": 8530 }, { "epoch": 2.2373591826041395, "grad_norm": 0.7556144595146179, "learning_rate": 1.280987979462294e-06, "log_odds_chosen": 0.6403089761734009, "log_odds_ratio": -0.5536657571792603, "logits/chosen": -2.394951343536377, "logits/rejected": -2.27315354347229, "logps/chosen": -0.7784160375595093, "logps/rejected": -1.158451795578003, "loss": 1.1793, "nll_loss": 0.961911678314209, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23352476954460144, "rewards/margins": 0.114010751247406, "rewards/rejected": -0.34753555059432983, "step": 8540 }, { "epoch": 2.239979041131779, "grad_norm": 0.6780878305435181, "learning_rate": 1.2727007732239524e-06, "log_odds_chosen": 0.645702600479126, "log_odds_ratio": -0.5607272982597351, "logits/chosen": -2.4490718841552734, "logits/rejected": -2.3187615871429443, "logps/chosen": -0.709814727306366, "logps/rejected": -1.1567081212997437, "loss": 1.1018, "nll_loss": 0.9027941823005676, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21294443309307098, "rewards/margins": 0.13406796753406525, "rewards/rejected": -0.347012460231781, "step": 8550 }, { "epoch": 2.242598899659418, "grad_norm": 1.0143823623657227, "learning_rate": 1.2644353861794955e-06, "log_odds_chosen": 0.6321672201156616, "log_odds_ratio": -0.585572361946106, "logits/chosen": -2.348769426345825, "logits/rejected": -2.230147123336792, "logps/chosen": -0.7538818717002869, "logps/rejected": -1.1902244091033936, "loss": 1.1405, "nll_loss": 0.9629570841789246, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2261645346879959, "rewards/margins": 0.13090278208255768, "rewards/rejected": -0.3570673167705536, "step": 8560 }, { "epoch": 2.245218758187058, "grad_norm": 1.1202448606491089, "learning_rate": 1.2561918844544481e-06, "log_odds_chosen": 0.5690768957138062, "log_odds_ratio": -0.5792809724807739, "logits/chosen": -2.355116605758667, "logits/rejected": -2.2998900413513184, "logps/chosen": -0.7332422137260437, "logps/rejected": -1.1446585655212402, "loss": 1.1419, "nll_loss": 0.9517391324043274, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21997268497943878, "rewards/margins": 0.12342490255832672, "rewards/rejected": -0.3433975577354431, "step": 8570 }, { "epoch": 2.2478386167146973, "grad_norm": 0.7738605737686157, "learning_rate": 1.2479703339992384e-06, "log_odds_chosen": 0.7932100892066956, "log_odds_ratio": -0.5683737993240356, "logits/chosen": -2.284139633178711, "logits/rejected": -2.288024425506592, "logps/chosen": -0.7952319979667664, "logps/rejected": -1.328848123550415, "loss": 1.1393, "nll_loss": 0.987812876701355, "rewards/accuracies": 0.6875, "rewards/chosen": -0.23856958746910095, "rewards/margins": 0.16008484363555908, "rewards/rejected": -0.3986544609069824, "step": 8580 }, { "epoch": 2.250458475242337, "grad_norm": 0.8358622193336487, "learning_rate": 1.239770800588685e-06, "log_odds_chosen": 0.742386519908905, "log_odds_ratio": -0.5501828193664551, "logits/chosen": -2.338921546936035, "logits/rejected": -2.245392084121704, "logps/chosen": -0.7393203973770142, "logps/rejected": -1.2486248016357422, "loss": 1.0775, "nll_loss": 0.9657154083251953, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22179611027240753, "rewards/margins": 0.1527913510799408, "rewards/rejected": -0.3745874762535095, "step": 8590 }, { "epoch": 2.2530783337699765, "grad_norm": 1.5943080186843872, "learning_rate": 1.2315933498214582e-06, "log_odds_chosen": 0.549643337726593, "log_odds_ratio": -0.6037155985832214, "logits/chosen": -2.3938796520233154, "logits/rejected": -2.338270664215088, "logps/chosen": -0.6918709874153137, "logps/rejected": -1.0082517862319946, "loss": 1.1231, "nll_loss": 0.9420291185379028, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20756134390830994, "rewards/margins": 0.09491421282291412, "rewards/rejected": -0.3024755120277405, "step": 8600 }, { "epoch": 2.255698192297616, "grad_norm": 1.3010436296463013, "learning_rate": 1.2234380471195653e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3951871395111084, "logits/rejected": -2.262284755706787, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.125, "nll_loss": 1.0048863887786865, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8610 }, { "epoch": 2.2583180508252556, "grad_norm": 0.738894522190094, "learning_rate": 1.215304957727818e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.357652187347412, "logits/rejected": -2.2313804626464844, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1017, "nll_loss": 0.9810165166854858, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8620 }, { "epoch": 2.260937909352895, "grad_norm": 4.366784572601318, "learning_rate": 1.2071941467133195e-06, "log_odds_chosen": 0.8487613797187805, "log_odds_ratio": -0.5361254811286926, "logits/chosen": -2.334378242492676, "logits/rejected": -2.308259963989258, "logps/chosen": -0.7141419649124146, "logps/rejected": -1.3033958673477173, "loss": 1.142, "nll_loss": 0.9466320276260376, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2142426073551178, "rewards/margins": 0.1767762005329132, "rewards/rejected": -0.391018807888031, "step": 8630 }, { "epoch": 2.2635577678805343, "grad_norm": 1.2237880229949951, "learning_rate": 1.1991056789649352e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.360246419906616, "logits/rejected": -2.334704637527466, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1857, "nll_loss": 0.9747809171676636, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8640 }, { "epoch": 2.266177626408174, "grad_norm": 0.6723268032073975, "learning_rate": 1.1910396191927823e-06, "log_odds_chosen": 0.4639887809753418, "log_odds_ratio": -0.5976921319961548, "logits/chosen": -2.3446085453033447, "logits/rejected": -2.310011148452759, "logps/chosen": -0.7315848469734192, "logps/rejected": -0.9905832409858704, "loss": 1.1548, "nll_loss": 0.9442617297172546, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21947546303272247, "rewards/margins": 0.07769951969385147, "rewards/rejected": -0.29717499017715454, "step": 8650 }, { "epoch": 2.2687974849358135, "grad_norm": 0.624419629573822, "learning_rate": 1.1829960319277023e-06, "log_odds_chosen": 0.8823187947273254, "log_odds_ratio": -0.4785476624965668, "logits/chosen": -2.337517023086548, "logits/rejected": -2.299792766571045, "logps/chosen": -0.7347037196159363, "logps/rejected": -1.270385980606079, "loss": 1.1206, "nll_loss": 0.9621292352676392, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22041110694408417, "rewards/margins": 0.16070470213890076, "rewards/rejected": -0.38111579418182373, "step": 8660 }, { "epoch": 2.271417343463453, "grad_norm": 1.0945886373519897, "learning_rate": 1.1749749815207564e-06, "log_odds_chosen": 0.6094951033592224, "log_odds_ratio": -0.5400856137275696, "logits/chosen": -2.3540005683898926, "logits/rejected": -2.2506682872772217, "logps/chosen": -0.6675869822502136, "logps/rejected": -0.9984952807426453, "loss": 1.1344, "nll_loss": 0.9185441136360168, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.20027610659599304, "rewards/margins": 0.0992724746465683, "rewards/rejected": -0.29954856634140015, "step": 8670 }, { "epoch": 2.2740372019910926, "grad_norm": 0.8828402757644653, "learning_rate": 1.1669765321426975e-06, "log_odds_chosen": 0.9565989375114441, "log_odds_ratio": -0.49542707204818726, "logits/chosen": -2.2234721183776855, "logits/rejected": -2.151531934738159, "logps/chosen": -0.658672571182251, "logps/rejected": -1.2780702114105225, "loss": 1.1192, "nll_loss": 0.9278495907783508, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19760176539421082, "rewards/margins": 0.18581931293010712, "rewards/rejected": -0.3834210932254791, "step": 8680 }, { "epoch": 2.276657060518732, "grad_norm": 1.3013527393341064, "learning_rate": 1.1590007477834696e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.32578706741333, "logits/rejected": -2.285616159439087, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1862, "nll_loss": 1.0512043237686157, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8690 }, { "epoch": 2.2792769190463713, "grad_norm": 0.9912120699882507, "learning_rate": 1.151047692251685e-06, "log_odds_chosen": 0.8474682569503784, "log_odds_ratio": -0.5545841455459595, "logits/chosen": -2.253981351852417, "logits/rejected": -2.2106118202209473, "logps/chosen": -0.7208150029182434, "logps/rejected": -1.2561895847320557, "loss": 1.1273, "nll_loss": 0.9376838803291321, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21624450385570526, "rewards/margins": 0.1606123447418213, "rewards/rejected": -0.37685686349868774, "step": 8700 }, { "epoch": 2.281896777574011, "grad_norm": NaN, "learning_rate": 1.1431174291741238e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.276804208755493, "logits/rejected": -2.2268340587615967, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1144, "nll_loss": 0.9234612584114075, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8710 }, { "epoch": 2.2845166361016505, "grad_norm": 0.621164083480835, "learning_rate": 1.1352100219952144e-06, "log_odds_chosen": 0.8589345812797546, "log_odds_ratio": -0.5304359197616577, "logits/chosen": -2.30129337310791, "logits/rejected": -2.22340989112854, "logps/chosen": -0.74507737159729, "logps/rejected": -1.308280110359192, "loss": 1.1854, "nll_loss": 1.0814646482467651, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22352321445941925, "rewards/margins": 0.1689608097076416, "rewards/rejected": -0.39248400926589966, "step": 8720 }, { "epoch": 2.28713649462929, "grad_norm": 0.8740217089653015, "learning_rate": 1.1273255339765362e-06, "log_odds_chosen": 0.7973732948303223, "log_odds_ratio": -0.5346581935882568, "logits/chosen": -2.3237717151641846, "logits/rejected": -2.27093505859375, "logps/chosen": -0.6801123023033142, "logps/rejected": -1.1222355365753174, "loss": 1.1234, "nll_loss": 0.9545769691467285, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2040337324142456, "rewards/margins": 0.13263697922229767, "rewards/rejected": -0.3366706967353821, "step": 8730 }, { "epoch": 2.2897563531569296, "grad_norm": 0.8346465229988098, "learning_rate": 1.119464028196305e-06, "log_odds_chosen": 0.4411230981349945, "log_odds_ratio": -0.6858288049697876, "logits/chosen": -2.3519287109375, "logits/rejected": -2.362748622894287, "logps/chosen": -0.8538503646850586, "logps/rejected": -1.1701089143753052, "loss": 1.2423, "nll_loss": 1.0388590097427368, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2561551332473755, "rewards/margins": 0.09487754106521606, "rewards/rejected": -0.35103267431259155, "step": 8740 }, { "epoch": 2.292376211684569, "grad_norm": 0.9023311138153076, "learning_rate": 1.1116255675488724e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2826225757598877, "logits/rejected": -2.349752426147461, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2649, "nll_loss": 1.0099340677261353, "rewards/accuracies": 0.800000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8750 }, { "epoch": 2.2949960702122087, "grad_norm": 0.8043201565742493, "learning_rate": 1.1038102147442253e-06, "log_odds_chosen": 0.966736912727356, "log_odds_ratio": -0.49693384766578674, "logits/chosen": -2.3245232105255127, "logits/rejected": -2.2794699668884277, "logps/chosen": -0.6746972799301147, "logps/rejected": -1.3361904621124268, "loss": 1.1429, "nll_loss": 0.9874985814094543, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20240919291973114, "rewards/margins": 0.19844797253608704, "rewards/rejected": -0.400857150554657, "step": 8760 }, { "epoch": 2.297615928739848, "grad_norm": 0.9029083251953125, "learning_rate": 1.0960180323074776e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4458742141723633, "logits/rejected": -2.308607339859009, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2232, "nll_loss": 1.0424312353134155, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8770 }, { "epoch": 2.3002357872674875, "grad_norm": 1.332794189453125, "learning_rate": 1.0882490825783738e-06, "log_odds_chosen": 0.6195895671844482, "log_odds_ratio": -0.5800909996032715, "logits/chosen": -2.350688934326172, "logits/rejected": -2.2491958141326904, "logps/chosen": -0.7048109769821167, "logps/rejected": -1.145982265472412, "loss": 1.1615, "nll_loss": 0.975730299949646, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21144327521324158, "rewards/margins": 0.13235141336917877, "rewards/rejected": -0.34379473328590393, "step": 8780 }, { "epoch": 2.3028556457951272, "grad_norm": 1.1330759525299072, "learning_rate": 1.0805034277107923e-06, "log_odds_chosen": 0.8517688512802124, "log_odds_ratio": -0.5438254475593567, "logits/chosen": -2.3573813438415527, "logits/rejected": -2.2693843841552734, "logps/chosen": -0.7226864099502563, "logps/rejected": -1.2901893854141235, "loss": 1.1698, "nll_loss": 0.9296733736991882, "rewards/accuracies": 0.6875, "rewards/chosen": -0.21680593490600586, "rewards/margins": 0.17025086283683777, "rewards/rejected": -0.387056827545166, "step": 8790 }, { "epoch": 2.3054755043227666, "grad_norm": 1.605631947517395, "learning_rate": 1.072781129672243e-06, "log_odds_chosen": 0.7980759143829346, "log_odds_ratio": -0.5296114087104797, "logits/chosen": -2.331624746322632, "logits/rejected": -2.214062452316284, "logps/chosen": -0.7453430891036987, "logps/rejected": -1.3075072765350342, "loss": 1.141, "nll_loss": 0.9471443295478821, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22360293567180634, "rewards/margins": 0.16864928603172302, "rewards/rejected": -0.39225220680236816, "step": 8800 }, { "epoch": 2.308095362850406, "grad_norm": 3.602459192276001, "learning_rate": 1.0650822502433773e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2976601123809814, "logits/rejected": -2.268963575363159, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2315, "nll_loss": 0.9973615407943726, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8810 }, { "epoch": 2.3107152213780457, "grad_norm": 1.633509874343872, "learning_rate": 1.057406851017487e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2278940677642822, "logits/rejected": -2.2952754497528076, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2161, "nll_loss": 1.0378488302230835, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8820 }, { "epoch": 2.313335079905685, "grad_norm": 0.8576143383979797, "learning_rate": 1.04975499340002e-06, "log_odds_chosen": 0.49237555265426636, "log_odds_ratio": -0.6519709825515747, "logits/chosen": -2.2885022163391113, "logits/rejected": -2.2347323894500732, "logps/chosen": -0.8077861070632935, "logps/rejected": -1.08531653881073, "loss": 1.1241, "nll_loss": 0.966915488243103, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.24233584105968475, "rewards/margins": 0.08325913548469543, "rewards/rejected": -0.325594961643219, "step": 8830 }, { "epoch": 2.3159549384333245, "grad_norm": 1.1486722230911255, "learning_rate": 1.0421267386080792e-06, "log_odds_chosen": 0.5318558812141418, "log_odds_ratio": -0.6292375326156616, "logits/chosen": -2.2533774375915527, "logits/rejected": -2.200471878051758, "logps/chosen": -0.7965937852859497, "logps/rejected": -1.0767972469329834, "loss": 1.1738, "nll_loss": 0.9978004693984985, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23897811770439148, "rewards/margins": 0.08406105637550354, "rewards/rejected": -0.3230392336845398, "step": 8840 }, { "epoch": 2.3185747969609642, "grad_norm": 0.7274095416069031, "learning_rate": 1.0345221476699438e-06, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2941043376922607, "logits/rejected": -2.2437822818756104, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2013, "nll_loss": 0.9984451532363892, "rewards/accuracies": 0.699999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 8850 }, { "epoch": 2.3211946554886036, "grad_norm": 2.0319666862487793, "learning_rate": 1.026941281424569e-06, "log_odds_chosen": 1.0079764127731323, "log_odds_ratio": -0.4842264652252197, "logits/chosen": -2.2680511474609375, "logits/rejected": -2.212315320968628, "logps/chosen": -0.754029393196106, "logps/rejected": -1.3792338371276855, "loss": 1.1747, "nll_loss": 0.9862092137336731, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22620880603790283, "rewards/margins": 0.18756136298179626, "rewards/rejected": -0.4137701392173767, "step": 8860 }, { "epoch": 2.323814514016243, "grad_norm": 0.5659903883934021, "learning_rate": 1.0193842005211112e-06, "log_odds_chosen": 0.706586480140686, "log_odds_ratio": -0.610140860080719, "logits/chosen": -2.2957186698913574, "logits/rejected": -2.242215633392334, "logps/chosen": -0.8312687873840332, "logps/rejected": -1.3248344659805298, "loss": 1.1946, "nll_loss": 1.0227365493774414, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.24938063323497772, "rewards/margins": 0.14806973934173584, "rewards/rejected": -0.39745038747787476, "step": 8870 }, { "epoch": 2.3264343725438827, "grad_norm": 1.2524254322052002, "learning_rate": 1.0118509654184327e-06, "log_odds_chosen": 0.6080425977706909, "log_odds_ratio": -0.5842702984809875, "logits/chosen": -2.2649052143096924, "logits/rejected": -2.211177349090576, "logps/chosen": -0.739174485206604, "logps/rejected": -1.0775439739227295, "loss": 1.1015, "nll_loss": 0.9344123005867004, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2217523604631424, "rewards/margins": 0.1015109047293663, "rewards/rejected": -0.3232632577419281, "step": 8880 }, { "epoch": 2.329054231071522, "grad_norm": 0.7821542620658875, "learning_rate": 1.0043416363846263e-06, "log_odds_chosen": 0.9459649920463562, "log_odds_ratio": -0.5242363214492798, "logits/chosen": -2.268319606781006, "logits/rejected": -2.187417507171631, "logps/chosen": -0.7150009870529175, "logps/rejected": -1.3489716053009033, "loss": 1.2059, "nll_loss": 1.0141136646270752, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2145003080368042, "rewards/margins": 0.19019125401973724, "rewards/rejected": -0.40469154715538025, "step": 8890 }, { "epoch": 2.3316740895991614, "grad_norm": 0.9345788359642029, "learning_rate": 9.96856273496525e-07, "log_odds_chosen": 0.7112823724746704, "log_odds_ratio": -0.530217170715332, "logits/chosen": -2.321181058883667, "logits/rejected": -2.200430393218994, "logps/chosen": -0.6991164684295654, "logps/rejected": -1.161613941192627, "loss": 1.1232, "nll_loss": 0.9155840873718262, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.2097349613904953, "rewards/margins": 0.13874925673007965, "rewards/rejected": -0.34848421812057495, "step": 8900 }, { "epoch": 2.3342939481268012, "grad_norm": 0.595198392868042, "learning_rate": 9.893949366392287e-07, "log_odds_chosen": 0.9271847605705261, "log_odds_ratio": -0.46211013197898865, "logits/chosen": -2.4060912132263184, "logits/rejected": -2.231027603149414, "logps/chosen": -0.6440202593803406, "logps/rejected": -1.2252671718597412, "loss": 1.0833, "nll_loss": 0.8985065221786499, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.19320610165596008, "rewards/margins": 0.174374058842659, "rewards/rejected": -0.3675801455974579, "step": 8910 }, { "epoch": 2.3369138066544406, "grad_norm": 0.8756603598594666, "learning_rate": 9.81957685505622e-07, "log_odds_chosen": 0.5744355916976929, "log_odds_ratio": -0.5650742650032043, "logits/chosen": -2.351215362548828, "logits/rejected": -2.285717487335205, "logps/chosen": -0.7366133332252502, "logps/rejected": -1.0894924402236938, "loss": 1.1516, "nll_loss": 0.9282768368721008, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22098398208618164, "rewards/margins": 0.10586372762918472, "rewards/rejected": -0.32684770226478577, "step": 8920 }, { "epoch": 2.3395336651820804, "grad_norm": 1.0272713899612427, "learning_rate": 9.745445795958942e-07, "log_odds_chosen": 0.5833867192268372, "log_odds_ratio": -0.6206507086753845, "logits/chosen": -2.1681251525878906, "logits/rejected": -2.1124045848846436, "logps/chosen": -0.6944400072097778, "logps/rejected": -1.0532819032669067, "loss": 1.1482, "nll_loss": 0.9370464086532593, "rewards/accuracies": 0.625, "rewards/chosen": -0.20833201706409454, "rewards/margins": 0.10765258222818375, "rewards/rejected": -0.3159845769405365, "step": 8930 }, { "epoch": 2.3421535237097197, "grad_norm": 0.7880951166152954, "learning_rate": 9.671556782170687e-07, "log_odds_chosen": 0.9043323397636414, "log_odds_ratio": -0.48898524045944214, "logits/chosen": -2.333193302154541, "logits/rejected": -2.2146565914154053, "logps/chosen": -0.698146402835846, "logps/rejected": -1.2523577213287354, "loss": 1.0583, "nll_loss": 0.8641611337661743, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.20944392681121826, "rewards/margins": 0.1662633866071701, "rewards/rejected": -0.3757072985172272, "step": 8940 }, { "epoch": 2.344773382237359, "grad_norm": 1.2719755172729492, "learning_rate": 9.597910404825227e-07, "log_odds_chosen": 0.5812257528305054, "log_odds_ratio": -0.5674748420715332, "logits/chosen": -2.410956382751465, "logits/rejected": -2.32401967048645, "logps/chosen": -0.7462170124053955, "logps/rejected": -1.096392035484314, "loss": 1.1521, "nll_loss": 0.9707596898078918, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2238651067018509, "rewards/margins": 0.10505251586437225, "rewards/rejected": -0.32891762256622314, "step": 8950 }, { "epoch": 2.3473932407649984, "grad_norm": 1.5816174745559692, "learning_rate": 9.524507253115199e-07, "log_odds_chosen": 0.6508310437202454, "log_odds_ratio": -0.6367233395576477, "logits/chosen": -2.3170766830444336, "logits/rejected": -2.3507332801818848, "logps/chosen": -0.7911494970321655, "logps/rejected": -1.2351810932159424, "loss": 1.1488, "nll_loss": 1.0624173879623413, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2373448610305786, "rewards/margins": 0.1332094967365265, "rewards/rejected": -0.3705543577671051, "step": 8960 }, { "epoch": 2.3500130992926382, "grad_norm": 0.8824944496154785, "learning_rate": 9.451347914287335e-07, "log_odds_chosen": 0.7711871266365051, "log_odds_ratio": -0.49173393845558167, "logits/chosen": -2.384990930557251, "logits/rejected": -2.302166223526001, "logps/chosen": -0.683822512626648, "logps/rejected": -1.1607885360717773, "loss": 1.1086, "nll_loss": 0.8897007703781128, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.20514675974845886, "rewards/margins": 0.14308980107307434, "rewards/rejected": -0.3482365608215332, "step": 8970 }, { "epoch": 2.3526329578202776, "grad_norm": 1.4584401845932007, "learning_rate": 9.378432973637828e-07, "log_odds_chosen": 0.5684810876846313, "log_odds_ratio": -0.599331259727478, "logits/chosen": -2.3299384117126465, "logits/rejected": -2.2472689151763916, "logps/chosen": -0.7790080308914185, "logps/rejected": -1.1568584442138672, "loss": 1.1478, "nll_loss": 0.9532071352005005, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2337023913860321, "rewards/margins": 0.11335517466068268, "rewards/rejected": -0.3470575511455536, "step": 8980 }, { "epoch": 2.3552528163479174, "grad_norm": 1.4911590814590454, "learning_rate": 9.305763014507584e-07, "log_odds_chosen": 0.6558458209037781, "log_odds_ratio": -0.5697001218795776, "logits/chosen": -2.2592883110046387, "logits/rejected": -2.1937448978424072, "logps/chosen": -0.7895419001579285, "logps/rejected": -1.153686285018921, "loss": 1.1911, "nll_loss": 0.9433363080024719, "rewards/accuracies": 0.6875, "rewards/chosen": -0.23686261475086212, "rewards/margins": 0.10924333333969116, "rewards/rejected": -0.3461059331893921, "step": 8990 }, { "epoch": 2.3578726748755567, "grad_norm": 1.0957554578781128, "learning_rate": 9.233338618277575e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3451802730560303, "logits/rejected": -2.2402498722076416, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2244, "nll_loss": 1.0301969051361084, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9000 }, { "epoch": 2.360492533403196, "grad_norm": 0.965144157409668, "learning_rate": 9.161160364364247e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3328311443328857, "logits/rejected": -2.2587530612945557, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.126, "nll_loss": 0.9607440829277039, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9010 }, { "epoch": 2.363112391930836, "grad_norm": 1.7490285634994507, "learning_rate": 9.08922883021479e-07, "log_odds_chosen": 0.5445868372917175, "log_odds_ratio": -0.5848249197006226, "logits/chosen": -2.250678300857544, "logits/rejected": -2.2927937507629395, "logps/chosen": -0.679873526096344, "logps/rejected": -0.9872776865959167, "loss": 1.1588, "nll_loss": 0.9031311273574829, "rewards/accuracies": 0.625, "rewards/chosen": -0.2039620578289032, "rewards/margins": 0.09222125262022018, "rewards/rejected": -0.2961832880973816, "step": 9020 }, { "epoch": 2.3657322504584752, "grad_norm": 1.5355708599090576, "learning_rate": 9.017544591302568e-07, "log_odds_chosen": 0.8090284466743469, "log_odds_ratio": -0.5080108642578125, "logits/chosen": -2.3402602672576904, "logits/rejected": -2.294734239578247, "logps/chosen": -0.6735700368881226, "logps/rejected": -1.127269983291626, "loss": 1.1373, "nll_loss": 0.9313151240348816, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.20207099616527557, "rewards/margins": 0.13610999286174774, "rewards/rejected": -0.3381809890270233, "step": 9030 }, { "epoch": 2.3683521089861146, "grad_norm": 1.382020115852356, "learning_rate": 8.946108221122548e-07, "log_odds_chosen": 0.47954583168029785, "log_odds_ratio": -0.6153063774108887, "logits/chosen": -2.3095602989196777, "logits/rejected": -2.3240573406219482, "logps/chosen": -0.7759014368057251, "logps/rejected": -1.0760177373886108, "loss": 1.2194, "nll_loss": 1.0020735263824463, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.23277044296264648, "rewards/margins": 0.09003488719463348, "rewards/rejected": -0.32280534505844116, "step": 9040 }, { "epoch": 2.3709719675137544, "grad_norm": 0.8270111680030823, "learning_rate": 8.874920291186625e-07, "log_odds_chosen": 0.8946967124938965, "log_odds_ratio": -0.4834720194339752, "logits/chosen": -2.3125648498535156, "logits/rejected": -2.200518846511841, "logps/chosen": -0.7274741530418396, "logps/rejected": -1.3106865882873535, "loss": 1.1241, "nll_loss": 1.0035622119903564, "rewards/accuracies": 0.6875, "rewards/chosen": -0.21824225783348083, "rewards/margins": 0.17496375739574432, "rewards/rejected": -0.39320603013038635, "step": 9050 }, { "epoch": 2.3735918260413937, "grad_norm": 0.6589493751525879, "learning_rate": 8.803981371019138e-07, "log_odds_chosen": 0.7238329648971558, "log_odds_ratio": -0.517544150352478, "logits/chosen": -2.341801166534424, "logits/rejected": -2.2671008110046387, "logps/chosen": -0.7342585921287537, "logps/rejected": -1.2118873596191406, "loss": 1.1766, "nll_loss": 0.9740756750106812, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2202775925397873, "rewards/margins": 0.14328865706920624, "rewards/rejected": -0.36356621980667114, "step": 9060 }, { "epoch": 2.376211684569033, "grad_norm": 0.9537487030029297, "learning_rate": 8.733292028152241e-07, "log_odds_chosen": 0.663092315196991, "log_odds_ratio": -0.5655156970024109, "logits/chosen": -2.2187962532043457, "logits/rejected": -2.1496784687042236, "logps/chosen": -0.7370296716690063, "logps/rejected": -1.1615793704986572, "loss": 1.1047, "nll_loss": 0.9441116452217102, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22110891342163086, "rewards/margins": 0.12736491858959198, "rewards/rejected": -0.34847384691238403, "step": 9070 }, { "epoch": 2.378831543096673, "grad_norm": 1.5620964765548706, "learning_rate": 8.662852828121434e-07, "log_odds_chosen": 0.4089832305908203, "log_odds_ratio": -0.6394460201263428, "logits/chosen": -2.300558090209961, "logits/rejected": -2.284041166305542, "logps/chosen": -0.7592347264289856, "logps/rejected": -0.9744060635566711, "loss": 1.1931, "nll_loss": 1.0073720216751099, "rewards/accuracies": 0.625, "rewards/chosen": -0.22777041792869568, "rewards/margins": 0.06455138325691223, "rewards/rejected": -0.2923218309879303, "step": 9080 }, { "epoch": 2.3814514016243122, "grad_norm": 2.563565492630005, "learning_rate": 8.592664334460966e-07, "log_odds_chosen": 0.6685113310813904, "log_odds_ratio": -0.5449177026748657, "logits/chosen": -2.2612249851226807, "logits/rejected": -2.270404815673828, "logps/chosen": -0.7373486757278442, "logps/rejected": -1.1395182609558105, "loss": 1.152, "nll_loss": 0.9964426159858704, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22120463848114014, "rewards/margins": 0.12065087258815765, "rewards/rejected": -0.3418554663658142, "step": 9090 }, { "epoch": 2.384071260151952, "grad_norm": 0.929440975189209, "learning_rate": 8.52272710869939e-07, "log_odds_chosen": 0.473116397857666, "log_odds_ratio": -0.6314846873283386, "logits/chosen": -2.312241315841675, "logits/rejected": -2.2830560207366943, "logps/chosen": -0.7893573045730591, "logps/rejected": -1.0714190006256104, "loss": 1.1228, "nll_loss": 0.8852205276489258, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2368072271347046, "rewards/margins": 0.0846184641122818, "rewards/rejected": -0.3214256763458252, "step": 9100 }, { "epoch": 2.3866911186795914, "grad_norm": 1.0059694051742554, "learning_rate": 8.453041710355009e-07, "log_odds_chosen": 0.5390512347221375, "log_odds_ratio": -0.5975898504257202, "logits/chosen": -2.366567850112915, "logits/rejected": -2.2565507888793945, "logps/chosen": -0.8013361096382141, "logps/rejected": -1.1562631130218506, "loss": 1.1491, "nll_loss": 1.013501524925232, "rewards/accuracies": 0.6875, "rewards/chosen": -0.24040083587169647, "rewards/margins": 0.10647810995578766, "rewards/rejected": -0.34687894582748413, "step": 9110 }, { "epoch": 2.3893109772072307, "grad_norm": 2.3721923828125, "learning_rate": 8.383608696931474e-07, "log_odds_chosen": 0.5544378757476807, "log_odds_ratio": -0.6176367998123169, "logits/chosen": -2.2127938270568848, "logits/rejected": -2.2186968326568604, "logps/chosen": -0.7961745262145996, "logps/rejected": -1.2220814228057861, "loss": 1.1164, "nll_loss": 0.8999541997909546, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2388523519039154, "rewards/margins": 0.12777207791805267, "rewards/rejected": -0.3666244447231293, "step": 9120 }, { "epoch": 2.39193083573487, "grad_norm": 1.6503269672393799, "learning_rate": 8.314428623913245e-07, "log_odds_chosen": 0.6969851851463318, "log_odds_ratio": -0.5607636570930481, "logits/chosen": -2.3320770263671875, "logits/rejected": -2.2778725624084473, "logps/chosen": -0.7249714136123657, "logps/rejected": -1.1370363235473633, "loss": 1.1872, "nll_loss": 0.9675170183181763, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21749143302440643, "rewards/margins": 0.1236194595694542, "rewards/rejected": -0.34111088514328003, "step": 9130 }, { "epoch": 2.39455069426251, "grad_norm": 2.0087311267852783, "learning_rate": 8.245502044761217e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.390730857849121, "logits/rejected": -2.310168504714966, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2147, "nll_loss": 1.0169036388397217, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9140 }, { "epoch": 2.3971705527901492, "grad_norm": 1.615094542503357, "learning_rate": 8.176829510908221e-07, "log_odds_chosen": 0.4138880670070648, "log_odds_ratio": -0.6064644455909729, "logits/chosen": -2.3763301372528076, "logits/rejected": -2.3001182079315186, "logps/chosen": -0.7119525671005249, "logps/rejected": -0.9663875699043274, "loss": 1.1293, "nll_loss": 0.9114956855773926, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.213585764169693, "rewards/margins": 0.07633047550916672, "rewards/rejected": -0.2899162471294403, "step": 9150 }, { "epoch": 2.399790411317789, "grad_norm": 1.0358418226242065, "learning_rate": 8.108411571754694e-07, "log_odds_chosen": 0.6964499354362488, "log_odds_ratio": -0.5341280698776245, "logits/chosen": -2.314441680908203, "logits/rejected": -2.2328920364379883, "logps/chosen": -0.6694358587265015, "logps/rejected": -1.067570447921753, "loss": 1.171, "nll_loss": 0.9487268328666687, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20083074271678925, "rewards/margins": 0.11944043636322021, "rewards/rejected": -0.32027119398117065, "step": 9160 }, { "epoch": 2.4024102698454284, "grad_norm": 1.8128446340560913, "learning_rate": 8.040248774664205e-07, "log_odds_chosen": 0.737026572227478, "log_odds_ratio": -0.5604154467582703, "logits/chosen": -2.389708995819092, "logits/rejected": -2.368908405303955, "logps/chosen": -0.730854868888855, "logps/rejected": -1.1616265773773193, "loss": 1.1504, "nll_loss": 1.0144253969192505, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.21925649046897888, "rewards/margins": 0.1292315572500229, "rewards/rejected": -0.3484880328178406, "step": 9170 }, { "epoch": 2.4050301283730677, "grad_norm": 0.6910176277160645, "learning_rate": 7.972341664959143e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.430346965789795, "logits/rejected": -2.3240222930908203, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1291, "nll_loss": 0.9750243425369263, "rewards/accuracies": 0.75, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9180 }, { "epoch": 2.4076499869007075, "grad_norm": 0.6771955490112305, "learning_rate": 7.904690785916286e-07, "log_odds_chosen": 0.7622849345207214, "log_odds_ratio": -0.5101119875907898, "logits/chosen": -2.325289249420166, "logits/rejected": -2.245316743850708, "logps/chosen": -0.6623649001121521, "logps/rejected": -1.1464985609054565, "loss": 1.1561, "nll_loss": 0.9941354990005493, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.19870950281620026, "rewards/margins": 0.14524008333683014, "rewards/rejected": -0.3439495861530304, "step": 9190 }, { "epoch": 2.410269845428347, "grad_norm": 1.1505978107452393, "learning_rate": 7.837296678762535e-07, "log_odds_chosen": 0.8838232755661011, "log_odds_ratio": -0.4784703254699707, "logits/chosen": -2.4326930046081543, "logits/rejected": -2.280773878097534, "logps/chosen": -0.7148193120956421, "logps/rejected": -1.2801191806793213, "loss": 1.1332, "nll_loss": 0.9531828761100769, "rewards/accuracies": 0.8125, "rewards/chosen": -0.2144458293914795, "rewards/margins": 0.16958993673324585, "rewards/rejected": -0.38403576612472534, "step": 9200 }, { "epoch": 2.4128897039559862, "grad_norm": 1.2215460538864136, "learning_rate": 7.770159882670495e-07, "log_odds_chosen": 0.6239876747131348, "log_odds_ratio": -0.5764419436454773, "logits/chosen": -2.273998498916626, "logits/rejected": -2.135228395462036, "logps/chosen": -0.7360761761665344, "logps/rejected": -1.1147985458374023, "loss": 1.1824, "nll_loss": 0.9772021174430847, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.22082288563251495, "rewards/margins": 0.1136167049407959, "rewards/rejected": -0.33443960547447205, "step": 9210 }, { "epoch": 2.415509562483626, "grad_norm": 1.5921707153320312, "learning_rate": 7.703280934754253e-07, "log_odds_chosen": 0.6451455950737, "log_odds_ratio": -0.5364052653312683, "logits/chosen": -2.2917981147766113, "logits/rejected": -2.202629327774048, "logps/chosen": -0.7511600852012634, "logps/rejected": -1.1426410675048828, "loss": 1.2135, "nll_loss": 1.0232977867126465, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22534802556037903, "rewards/margins": 0.11744427680969238, "rewards/rejected": -0.3427923321723938, "step": 9220 }, { "epoch": 2.4181294210112654, "grad_norm": 1.3379807472229004, "learning_rate": 7.636660370064989e-07, "log_odds_chosen": 0.591212809085846, "log_odds_ratio": -0.5676021575927734, "logits/chosen": -2.3646669387817383, "logits/rejected": -2.330785036087036, "logps/chosen": -0.7641576528549194, "logps/rejected": -1.1408413648605347, "loss": 1.145, "nll_loss": 0.9385474920272827, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2292473018169403, "rewards/margins": 0.11300511658191681, "rewards/rejected": -0.3422524333000183, "step": 9230 }, { "epoch": 2.4207492795389047, "grad_norm": 0.9375367760658264, "learning_rate": 7.570298721586783e-07, "log_odds_chosen": 0.7105704545974731, "log_odds_ratio": -0.5353621244430542, "logits/chosen": -2.3589606285095215, "logits/rejected": -2.3418896198272705, "logps/chosen": -0.679908275604248, "logps/rejected": -1.1279957294464111, "loss": 1.1416, "nll_loss": 0.9195289611816406, "rewards/accuracies": 0.75, "rewards/chosen": -0.20397250354290009, "rewards/margins": 0.13442622125148773, "rewards/rejected": -0.3383987545967102, "step": 9240 }, { "epoch": 2.4233691380665445, "grad_norm": 1.5151035785675049, "learning_rate": 7.504196520232287e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.276154041290283, "logits/rejected": -2.2338197231292725, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1768, "nll_loss": 0.9730847477912903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9250 }, { "epoch": 2.425988996594184, "grad_norm": 1.1628143787384033, "learning_rate": 7.438354294838483e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.315247058868408, "logits/rejected": -2.2409558296203613, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.142, "nll_loss": 0.9325721859931946, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9260 }, { "epoch": 2.4286088551218232, "grad_norm": 0.6855966448783875, "learning_rate": 7.37277257216252e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3910531997680664, "logits/rejected": -2.257803440093994, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1154, "nll_loss": 0.9654915928840637, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9270 }, { "epoch": 2.431228713649463, "grad_norm": 0.6044749021530151, "learning_rate": 7.307451876877398e-07, "log_odds_chosen": 0.7980896234512329, "log_odds_ratio": -0.5260680913925171, "logits/chosen": -2.3155455589294434, "logits/rejected": -2.2700533866882324, "logps/chosen": -0.701077938079834, "logps/rejected": -1.19578218460083, "loss": 1.1524, "nll_loss": 0.9522598385810852, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21032337844371796, "rewards/margins": 0.1484113186597824, "rewards/rejected": -0.358734667301178, "step": 9280 }, { "epoch": 2.4338485721771024, "grad_norm": 1.7128592729568481, "learning_rate": 7.242392731567841e-07, "log_odds_chosen": 0.7442942261695862, "log_odds_ratio": -0.5780912637710571, "logits/chosen": -2.29571270942688, "logits/rejected": -2.208618640899658, "logps/chosen": -0.7556976079940796, "logps/rejected": -1.2446203231811523, "loss": 1.1037, "nll_loss": 0.9555568695068359, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2267092764377594, "rewards/margins": 0.14667683839797974, "rewards/rejected": -0.37338611483573914, "step": 9290 }, { "epoch": 2.4364684307047417, "grad_norm": 0.8476665616035461, "learning_rate": 7.17759565672611e-07, "log_odds_chosen": 0.9404208064079285, "log_odds_ratio": -0.48902469873428345, "logits/chosen": -2.3960392475128174, "logits/rejected": -2.3327724933624268, "logps/chosen": -0.6876486539840698, "logps/rejected": -1.3132613897323608, "loss": 1.2013, "nll_loss": 1.0218064785003662, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20629461109638214, "rewards/margins": 0.18768379092216492, "rewards/rejected": -0.39397841691970825, "step": 9300 }, { "epoch": 2.4390882892323815, "grad_norm": 0.9920228123664856, "learning_rate": 7.1130611707478e-07, "log_odds_chosen": 0.7620173692703247, "log_odds_ratio": -0.5178192853927612, "logits/chosen": -2.338993549346924, "logits/rejected": -2.251821279525757, "logps/chosen": -0.729996919631958, "logps/rejected": -1.1761316061019897, "loss": 1.1557, "nll_loss": 0.9489896893501282, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.21899910271167755, "rewards/margins": 0.133840411901474, "rewards/rejected": -0.35283949971199036, "step": 9310 }, { "epoch": 2.441708147760021, "grad_norm": 1.3576570749282837, "learning_rate": 7.048789789927743e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.363481044769287, "logits/rejected": -2.247933864593506, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2278, "nll_loss": 1.077132225036621, "rewards/accuracies": 0.762499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9320 }, { "epoch": 2.4443280062876607, "grad_norm": 0.9965018630027771, "learning_rate": 6.984782028455823e-07, "log_odds_chosen": 0.5627372860908508, "log_odds_ratio": -0.5437310338020325, "logits/chosen": -2.428241729736328, "logits/rejected": -2.2924516201019287, "logps/chosen": -0.7450696229934692, "logps/rejected": -1.0861434936523438, "loss": 1.161, "nll_loss": 0.9332772493362427, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2235209047794342, "rewards/margins": 0.10232217609882355, "rewards/rejected": -0.32584303617477417, "step": 9330 }, { "epoch": 2.4469478648153, "grad_norm": 4.641717910766602, "learning_rate": 6.921038398412929e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4416372776031494, "logits/rejected": -2.3583922386169434, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1984, "nll_loss": 1.043822169303894, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9340 }, { "epoch": 2.4495677233429394, "grad_norm": 2.492459297180176, "learning_rate": 6.857559409766778e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.456437349319458, "logits/rejected": -2.2857556343078613, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.236, "nll_loss": 1.0503156185150146, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9350 }, { "epoch": 2.452187581870579, "grad_norm": 1.4727625846862793, "learning_rate": 6.794345570367914e-07, "log_odds_chosen": 0.750757098197937, "log_odds_ratio": -0.5450291633605957, "logits/chosen": -2.4588472843170166, "logits/rejected": -2.328873872756958, "logps/chosen": -0.7672897577285767, "logps/rejected": -1.301822304725647, "loss": 1.1906, "nll_loss": 1.0114530324935913, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23018696904182434, "rewards/margins": 0.160359725356102, "rewards/rejected": -0.39054664969444275, "step": 9360 }, { "epoch": 2.4548074403982185, "grad_norm": 1.0900980234146118, "learning_rate": 6.73139738594561e-07, "log_odds_chosen": 0.8386037945747375, "log_odds_ratio": -0.5098464488983154, "logits/chosen": -2.377699851989746, "logits/rejected": -2.2698769569396973, "logps/chosen": -0.7421108484268188, "logps/rejected": -1.2674834728240967, "loss": 1.1487, "nll_loss": 0.9985564351081848, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.2226332426071167, "rewards/margins": 0.15761184692382812, "rewards/rejected": -0.38024505972862244, "step": 9370 }, { "epoch": 2.457427298925858, "grad_norm": 0.9275605082511902, "learning_rate": 6.668715360103783e-07, "log_odds_chosen": 0.5713030099868774, "log_odds_ratio": -0.636144757270813, "logits/chosen": -2.3610827922821045, "logits/rejected": -2.2531991004943848, "logps/chosen": -0.8923199772834778, "logps/rejected": -1.3324123620986938, "loss": 1.1543, "nll_loss": 0.99763423204422, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.26769599318504333, "rewards/margins": 0.13202770054340363, "rewards/rejected": -0.39972370862960815, "step": 9380 }, { "epoch": 2.4600471574534977, "grad_norm": NaN, "learning_rate": 6.60629999431705e-07, "log_odds_chosen": 0.6998494863510132, "log_odds_ratio": -0.5567597150802612, "logits/chosen": -2.361398696899414, "logits/rejected": -2.317922353744507, "logps/chosen": -0.7368091344833374, "logps/rejected": -1.1543363332748413, "loss": 1.1704, "nll_loss": 0.9373108148574829, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22104278206825256, "rewards/margins": 0.1252581775188446, "rewards/rejected": -0.3463009297847748, "step": 9390 }, { "epoch": 2.462667015981137, "grad_norm": 0.9322507381439209, "learning_rate": 6.544151787926622e-07, "log_odds_chosen": 0.6954869031906128, "log_odds_ratio": -0.5671581029891968, "logits/chosen": -2.3633201122283936, "logits/rejected": -2.341416120529175, "logps/chosen": -0.6465151906013489, "logps/rejected": -1.1284759044647217, "loss": 1.165, "nll_loss": 1.011000394821167, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.19395455718040466, "rewards/margins": 0.14458820223808289, "rewards/rejected": -0.33854275941848755, "step": 9400 }, { "epoch": 2.4652868745087764, "grad_norm": 0.7925376892089844, "learning_rate": 6.48227123813641e-07, "log_odds_chosen": 1.0177465677261353, "log_odds_ratio": -0.4948200583457947, "logits/chosen": -2.485670566558838, "logits/rejected": -2.387874126434326, "logps/chosen": -0.731361985206604, "logps/rejected": -1.3637794256210327, "loss": 1.1363, "nll_loss": 0.9966681599617004, "rewards/accuracies": 0.6875, "rewards/chosen": -0.21940863132476807, "rewards/margins": 0.18972525000572205, "rewards/rejected": -0.4091339111328125, "step": 9410 }, { "epoch": 2.467906733036416, "grad_norm": 5.027873992919922, "learning_rate": 6.420658840008939e-07, "log_odds_chosen": 1.0126241445541382, "log_odds_ratio": -0.5173187255859375, "logits/chosen": -2.41336727142334, "logits/rejected": -2.2453229427337646, "logps/chosen": -0.7374023795127869, "logps/rejected": -1.4581704139709473, "loss": 1.1657, "nll_loss": 0.9608467817306519, "rewards/accuracies": 0.75, "rewards/chosen": -0.22122076153755188, "rewards/margins": 0.2162303924560547, "rewards/rejected": -0.43745118379592896, "step": 9420 }, { "epoch": 2.4705265915640555, "grad_norm": 1.043600082397461, "learning_rate": 6.35931508646149e-07, "log_odds_chosen": 0.8108348846435547, "log_odds_ratio": -0.5048445463180542, "logits/chosen": -2.299959659576416, "logits/rejected": -2.195606231689453, "logps/chosen": -0.7133424878120422, "logps/rejected": -1.2336629629135132, "loss": 1.0579, "nll_loss": 0.8505862355232239, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.21400272846221924, "rewards/margins": 0.15609613060951233, "rewards/rejected": -0.37009885907173157, "step": 9430 }, { "epoch": 2.473146450091695, "grad_norm": 1.235447645187378, "learning_rate": 6.29824046826207e-07, "log_odds_chosen": 0.7211583256721497, "log_odds_ratio": -0.5399688482284546, "logits/chosen": -2.460517168045044, "logits/rejected": -2.351698637008667, "logps/chosen": -0.7261886596679688, "logps/rejected": -1.1932836771011353, "loss": 1.133, "nll_loss": 1.0024311542510986, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21785660088062286, "rewards/margins": 0.1401285082101822, "rewards/rejected": -0.35798510909080505, "step": 9440 }, { "epoch": 2.4757663086193347, "grad_norm": 0.6715003252029419, "learning_rate": 6.237435474025559e-07, "log_odds_chosen": 0.5213996767997742, "log_odds_ratio": -0.6092666387557983, "logits/chosen": -2.403353214263916, "logits/rejected": -2.3316941261291504, "logps/chosen": -0.7367696166038513, "logps/rejected": -1.0818666219711304, "loss": 1.155, "nll_loss": 0.9449120759963989, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.22103090584278107, "rewards/margins": 0.10352907329797745, "rewards/rejected": -0.3245599865913391, "step": 9450 }, { "epoch": 2.478386167146974, "grad_norm": 1.0861988067626953, "learning_rate": 6.17690059020973e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.348480224609375, "logits/rejected": -2.328958034515381, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1734, "nll_loss": 0.9700533151626587, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9460 }, { "epoch": 2.4810060256746134, "grad_norm": 0.8311039805412292, "learning_rate": 6.116636301111429e-07, "log_odds_chosen": 0.9785975217819214, "log_odds_ratio": -0.48017168045043945, "logits/chosen": -2.269501209259033, "logits/rejected": -2.244539976119995, "logps/chosen": -0.6743909120559692, "logps/rejected": -1.2474693059921265, "loss": 1.0948, "nll_loss": 0.8807214498519897, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2023172825574875, "rewards/margins": 0.17192354798316956, "rewards/rejected": -0.37424081563949585, "step": 9470 }, { "epoch": 2.483625884202253, "grad_norm": 1.3558731079101562, "learning_rate": 6.056643088862627e-07, "log_odds_chosen": 0.6416984796524048, "log_odds_ratio": -0.5698156356811523, "logits/chosen": -2.4173808097839355, "logits/rejected": -2.2414774894714355, "logps/chosen": -0.7805334329605103, "logps/rejected": -1.161002278327942, "loss": 1.1395, "nll_loss": 1.009762167930603, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23416003584861755, "rewards/margins": 0.11414060741662979, "rewards/rejected": -0.34830066561698914, "step": 9480 }, { "epoch": 2.4862457427298925, "grad_norm": 0.8290237188339233, "learning_rate": 5.996921433426645e-07, "log_odds_chosen": 0.7706652879714966, "log_odds_ratio": -0.5054231882095337, "logits/chosen": -2.3739352226257324, "logits/rejected": -2.3174655437469482, "logps/chosen": -0.6906797289848328, "logps/rejected": -1.13401460647583, "loss": 1.1511, "nll_loss": 0.9796941876411438, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2072039395570755, "rewards/margins": 0.1330004632472992, "rewards/rejected": -0.3402043879032135, "step": 9490 }, { "epoch": 2.4888656012575323, "grad_norm": 2.2451627254486084, "learning_rate": 5.937471812594235e-07, "log_odds_chosen": 0.9266888499259949, "log_odds_ratio": -0.5234881639480591, "logits/chosen": -2.20286226272583, "logits/rejected": -2.17008638381958, "logps/chosen": -0.7935805916786194, "logps/rejected": -1.4147309064865112, "loss": 1.1655, "nll_loss": 1.008556604385376, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2380741536617279, "rewards/margins": 0.18634510040283203, "rewards/rejected": -0.4244193136692047, "step": 9500 }, { "epoch": 2.4914854597851717, "grad_norm": 0.7897793650627136, "learning_rate": 5.878294701979793e-07, "log_odds_chosen": 1.012347936630249, "log_odds_ratio": -0.5029776096343994, "logits/chosen": -2.254631996154785, "logits/rejected": -2.2736144065856934, "logps/chosen": -0.7418469786643982, "logps/rejected": -1.4189364910125732, "loss": 1.0815, "nll_loss": 0.8792352676391602, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22255408763885498, "rewards/margins": 0.20312681794166565, "rewards/rejected": -0.425680935382843, "step": 9510 }, { "epoch": 2.494105318312811, "grad_norm": 0.7238850593566895, "learning_rate": 5.819390575017582e-07, "log_odds_chosen": 0.7864845991134644, "log_odds_ratio": -0.5776903033256531, "logits/chosen": -2.3183844089508057, "logits/rejected": -2.241542339324951, "logps/chosen": -0.871917724609375, "logps/rejected": -1.4022810459136963, "loss": 1.1918, "nll_loss": 1.0646456480026245, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2615753412246704, "rewards/margins": 0.15910901129245758, "rewards/rejected": -0.4206843376159668, "step": 9520 }, { "epoch": 2.4967251768404504, "grad_norm": 1.3511147499084473, "learning_rate": 5.760759902957892e-07, "log_odds_chosen": 0.4843518137931824, "log_odds_ratio": -0.6223189234733582, "logits/chosen": -2.378861665725708, "logits/rejected": -2.319117307662964, "logps/chosen": -0.7727516293525696, "logps/rejected": -1.045016884803772, "loss": 1.1587, "nll_loss": 0.947311520576477, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23182550072669983, "rewards/margins": 0.081679567694664, "rewards/rejected": -0.31350505352020264, "step": 9530 }, { "epoch": 2.49934503536809, "grad_norm": 1.1318645477294922, "learning_rate": 5.702403154863287e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2689716815948486, "logits/rejected": -2.2635371685028076, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1834, "nll_loss": 1.0758607387542725, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9540 }, { "epoch": 2.5019648938957295, "grad_norm": 1.6540809869766235, "learning_rate": 5.644320797604893e-07, "log_odds_chosen": 0.7427883744239807, "log_odds_ratio": -0.5254132151603699, "logits/chosen": -2.3359038829803467, "logits/rejected": -2.259584903717041, "logps/chosen": -0.715695321559906, "logps/rejected": -1.148630142211914, "loss": 1.1551, "nll_loss": 0.9111733436584473, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.214708611369133, "rewards/margins": 0.12988042831420898, "rewards/rejected": -0.3445890545845032, "step": 9550 }, { "epoch": 2.5045847524233693, "grad_norm": 1.0016443729400635, "learning_rate": 5.586513295858584e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.384629726409912, "logits/rejected": -2.3260703086853027, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1279, "nll_loss": 1.0235499143600464, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9560 }, { "epoch": 2.5072046109510087, "grad_norm": 0.7153031229972839, "learning_rate": 5.528981112101352e-07, "log_odds_chosen": 0.8152477145195007, "log_odds_ratio": -0.5442308187484741, "logits/chosen": -2.329617500305176, "logits/rejected": -2.2720532417297363, "logps/chosen": -0.7405346035957336, "logps/rejected": -1.2582966089248657, "loss": 1.1774, "nll_loss": 1.0430828332901, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22216038405895233, "rewards/margins": 0.1553286463022232, "rewards/rejected": -0.37748900055885315, "step": 9570 }, { "epoch": 2.509824469478648, "grad_norm": 0.6918602585792542, "learning_rate": 5.471724706607528e-07, "log_odds_chosen": 0.5589702129364014, "log_odds_ratio": -0.6279712915420532, "logits/chosen": -2.23506760597229, "logits/rejected": -2.261875629425049, "logps/chosen": -0.8075177073478699, "logps/rejected": -1.1722062826156616, "loss": 1.0734, "nll_loss": 0.9686850309371948, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.242255300283432, "rewards/margins": 0.10940657556056976, "rewards/rejected": -0.3516618609428406, "step": 9580 }, { "epoch": 2.512444328006288, "grad_norm": 0.7547513842582703, "learning_rate": 5.414744537445167e-07, "log_odds_chosen": 0.6493240594863892, "log_odds_ratio": -0.5867668390274048, "logits/chosen": -2.45292592048645, "logits/rejected": -2.360464096069336, "logps/chosen": -0.7879999279975891, "logps/rejected": -1.210455298423767, "loss": 1.1435, "nll_loss": 0.9793103933334351, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23639997839927673, "rewards/margins": 0.1267366260290146, "rewards/rejected": -0.3631366193294525, "step": 9590 }, { "epoch": 2.515064186533927, "grad_norm": 1.2764583826065063, "learning_rate": 5.358041060472324e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.328200340270996, "logits/rejected": -2.3007493019104004, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1233, "nll_loss": 1.048449158668518, "rewards/accuracies": 0.699999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9600 }, { "epoch": 2.5176840450615665, "grad_norm": 0.956585168838501, "learning_rate": 5.301614729333468e-07, "log_odds_chosen": 0.7119942903518677, "log_odds_ratio": -0.5205411911010742, "logits/chosen": -2.375429630279541, "logits/rejected": -2.296013355255127, "logps/chosen": -0.7459525465965271, "logps/rejected": -1.2281458377838135, "loss": 1.1211, "nll_loss": 0.9410945773124695, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.22378578782081604, "rewards/margins": 0.14465799927711487, "rewards/rejected": -0.3684437870979309, "step": 9610 }, { "epoch": 2.5203039035892063, "grad_norm": 2.0714962482452393, "learning_rate": 5.245465995455776e-07, "log_odds_chosen": 0.6160212755203247, "log_odds_ratio": -0.5730375051498413, "logits/chosen": -2.441861629486084, "logits/rejected": -2.3153023719787598, "logps/chosen": -0.8125659227371216, "logps/rejected": -1.163535237312317, "loss": 1.1208, "nll_loss": 0.9535290002822876, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2437698394060135, "rewards/margins": 0.10529077053070068, "rewards/rejected": -0.3490605652332306, "step": 9620 }, { "epoch": 2.5229237621168457, "grad_norm": 1.9005030393600464, "learning_rate": 5.189595308045617e-07, "log_odds_chosen": 0.6108651161193848, "log_odds_ratio": -0.5773124694824219, "logits/chosen": -2.4241418838500977, "logits/rejected": -2.378830909729004, "logps/chosen": -0.7843604683876038, "logps/rejected": -1.1120840311050415, "loss": 1.1563, "nll_loss": 1.0193842649459839, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.23530817031860352, "rewards/margins": 0.09831706434488297, "rewards/rejected": -0.3336252272129059, "step": 9630 }, { "epoch": 2.525543620644485, "grad_norm": 1.4055416584014893, "learning_rate": 5.134003114084854e-07, "log_odds_chosen": 0.6324718594551086, "log_odds_ratio": -0.5796335339546204, "logits/chosen": -2.4250504970550537, "logits/rejected": -2.3498237133026123, "logps/chosen": -0.8159322738647461, "logps/rejected": -1.190702199935913, "loss": 1.1763, "nll_loss": 1.0226068496704102, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.24477970600128174, "rewards/margins": 0.11243095248937607, "rewards/rejected": -0.3572106957435608, "step": 9640 }, { "epoch": 2.528163479172125, "grad_norm": 0.805396556854248, "learning_rate": 5.078689858327374e-07, "log_odds_chosen": 0.6897085309028625, "log_odds_ratio": -0.581073522567749, "logits/chosen": -2.5067665576934814, "logits/rejected": -2.3402276039123535, "logps/chosen": -0.6902645826339722, "logps/rejected": -1.1405141353607178, "loss": 1.1548, "nll_loss": 0.9634538888931274, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.20707938075065613, "rewards/margins": 0.13507486879825592, "rewards/rejected": -0.34215423464775085, "step": 9650 }, { "epoch": 2.530783337699764, "grad_norm": 1.3159271478652954, "learning_rate": 5.023655983295425e-07, "log_odds_chosen": 0.7620981335639954, "log_odds_ratio": -0.504449725151062, "logits/chosen": -2.321803331375122, "logits/rejected": -2.3287596702575684, "logps/chosen": -0.6916342973709106, "logps/rejected": -1.1319247484207153, "loss": 1.1092, "nll_loss": 0.92485111951828, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.20749032497406006, "rewards/margins": 0.1320871114730835, "rewards/rejected": -0.33957743644714355, "step": 9660 }, { "epoch": 2.533403196227404, "grad_norm": 0.9020645022392273, "learning_rate": 4.968901929276175e-07, "log_odds_chosen": 0.7223199605941772, "log_odds_ratio": -0.5651143789291382, "logits/chosen": -2.4111390113830566, "logits/rejected": -2.3181276321411133, "logps/chosen": -0.7043615579605103, "logps/rejected": -1.1277828216552734, "loss": 1.1013, "nll_loss": 0.9140084981918335, "rewards/accuracies": 0.6875, "rewards/chosen": -0.21130844950675964, "rewards/margins": 0.12702640891075134, "rewards/rejected": -0.338334858417511, "step": 9670 }, { "epoch": 2.5360230547550433, "grad_norm": 0.8407982587814331, "learning_rate": 4.914428134318092e-07, "log_odds_chosen": 1.0647474527359009, "log_odds_ratio": -0.4958344101905823, "logits/chosen": -2.2591638565063477, "logits/rejected": -2.232186794281006, "logps/chosen": -0.6699371933937073, "logps/rejected": -1.4025089740753174, "loss": 1.1597, "nll_loss": 1.045196294784546, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20098114013671875, "rewards/margins": 0.21977153420448303, "rewards/rejected": -0.42075270414352417, "step": 9680 }, { "epoch": 2.5386429132826827, "grad_norm": 0.5315903425216675, "learning_rate": 4.860235034227545e-07, "log_odds_chosen": 0.46667614579200745, "log_odds_ratio": -0.6188749074935913, "logits/chosen": -2.375154972076416, "logits/rejected": -2.303943157196045, "logps/chosen": -0.8229526281356812, "logps/rejected": -1.1520020961761475, "loss": 1.1393, "nll_loss": 1.0228458642959595, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24688580632209778, "rewards/margins": 0.09871487319469452, "rewards/rejected": -0.3456006646156311, "step": 9690 }, { "epoch": 2.541262771810322, "grad_norm": 0.7877675890922546, "learning_rate": 4.806323062565205e-07, "log_odds_chosen": 1.0039631128311157, "log_odds_ratio": -0.4946627616882324, "logits/chosen": -2.3107972145080566, "logits/rejected": -2.290309429168701, "logps/chosen": -0.668211817741394, "logps/rejected": -1.30512273311615, "loss": 1.0518, "nll_loss": 0.9355505704879761, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20046353340148926, "rewards/margins": 0.19107329845428467, "rewards/rejected": -0.3915368616580963, "step": 9700 }, { "epoch": 2.543882630337962, "grad_norm": NaN, "learning_rate": 4.7526926506426865e-07, "log_odds_chosen": 0.7227948904037476, "log_odds_ratio": -0.5676661729812622, "logits/chosen": -2.406127452850342, "logits/rejected": -2.229170322418213, "logps/chosen": -0.7436398863792419, "logps/rejected": -1.2667601108551025, "loss": 1.1247, "nll_loss": 0.8908705711364746, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2230919897556305, "rewards/margins": 0.15693603456020355, "rewards/rejected": -0.38002803921699524, "step": 9710 }, { "epoch": 2.546502488865601, "grad_norm": 0.7818073034286499, "learning_rate": 4.699344227518991e-07, "log_odds_chosen": 0.8282346725463867, "log_odds_ratio": -0.5653745532035828, "logits/chosen": -2.3674793243408203, "logits/rejected": -2.271454334259033, "logps/chosen": -0.7130936980247498, "logps/rejected": -1.318558931350708, "loss": 1.1358, "nll_loss": 0.9378981590270996, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21392810344696045, "rewards/margins": 0.18163956701755524, "rewards/rejected": -0.3955676555633545, "step": 9720 }, { "epoch": 2.549122347393241, "grad_norm": 0.5566282272338867, "learning_rate": 4.646278219997182e-07, "log_odds_chosen": 0.6988282203674316, "log_odds_ratio": -0.5393753051757812, "logits/chosen": -2.40324068069458, "logits/rejected": -2.3415870666503906, "logps/chosen": -0.7572978138923645, "logps/rejected": -1.1877591609954834, "loss": 1.0925, "nll_loss": 0.9433972239494324, "rewards/accuracies": 0.75, "rewards/chosen": -0.2271893471479416, "rewards/margins": 0.12913846969604492, "rewards/rejected": -0.3563278317451477, "step": 9730 }, { "epoch": 2.5517422059208803, "grad_norm": 0.7307307124137878, "learning_rate": 4.593495052620864e-07, "log_odds_chosen": 0.9668407440185547, "log_odds_ratio": -0.4852634370326996, "logits/chosen": -2.440396308898926, "logits/rejected": -2.333209991455078, "logps/chosen": -0.671924889087677, "logps/rejected": -1.3186924457550049, "loss": 1.055, "nll_loss": 0.9540376663208008, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.20157749950885773, "rewards/margins": 0.1940302848815918, "rewards/rejected": -0.3956077992916107, "step": 9740 }, { "epoch": 2.5543620644485197, "grad_norm": 0.8000277876853943, "learning_rate": 4.540995147670874e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.282747983932495, "logits/rejected": -2.29353666305542, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1798, "nll_loss": 0.9952613711357117, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9750 }, { "epoch": 2.5569819229761594, "grad_norm": 1.028389573097229, "learning_rate": 4.48877892516184e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3182716369628906, "logits/rejected": -2.2645046710968018, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1076, "nll_loss": 0.9596049189567566, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9760 }, { "epoch": 2.559601781503799, "grad_norm": 0.9493250250816345, "learning_rate": 4.4368468028388806e-07, "log_odds_chosen": 0.686352014541626, "log_odds_ratio": -0.5421107411384583, "logits/chosen": -2.3856723308563232, "logits/rejected": -2.3578383922576904, "logps/chosen": -0.7326850295066833, "logps/rejected": -1.1657085418701172, "loss": 1.1259, "nll_loss": 0.9647611379623413, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2198055237531662, "rewards/margins": 0.1299070119857788, "rewards/rejected": -0.3497125208377838, "step": 9770 }, { "epoch": 2.562221640031438, "grad_norm": 0.7193650603294373, "learning_rate": 4.3851991961741895e-07, "log_odds_chosen": 0.6933731436729431, "log_odds_ratio": -0.5278171300888062, "logits/chosen": -2.4111220836639404, "logits/rejected": -2.3390114307403564, "logps/chosen": -0.7253640294075012, "logps/rejected": -1.1532789468765259, "loss": 1.202, "nll_loss": 1.0576145648956299, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2176092118024826, "rewards/margins": 0.12837448716163635, "rewards/rejected": -0.34598368406295776, "step": 9780 }, { "epoch": 2.564841498559078, "grad_norm": 0.7366334795951843, "learning_rate": 4.3338365183637737e-07, "log_odds_chosen": 0.8135086894035339, "log_odds_ratio": -0.5046383738517761, "logits/chosen": -2.358776569366455, "logits/rejected": -2.3037238121032715, "logps/chosen": -0.6948508620262146, "logps/rejected": -1.2013102769851685, "loss": 1.136, "nll_loss": 0.9625489115715027, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.20845524966716766, "rewards/margins": 0.1519378423690796, "rewards/rejected": -0.36039310693740845, "step": 9790 }, { "epoch": 2.5674613570867173, "grad_norm": 0.9162473678588867, "learning_rate": 4.282759180324138e-07, "log_odds_chosen": 0.7356789708137512, "log_odds_ratio": -0.5583966970443726, "logits/chosen": -2.3929221630096436, "logits/rejected": -2.4074289798736572, "logps/chosen": -0.7299818992614746, "logps/rejected": -1.2179768085479736, "loss": 1.1679, "nll_loss": 0.9845606684684753, "rewards/accuracies": 0.75, "rewards/chosen": -0.21899457275867462, "rewards/margins": 0.14639848470687866, "rewards/rejected": -0.3653930723667145, "step": 9800 }, { "epoch": 2.5700812156143567, "grad_norm": 1.0515886545181274, "learning_rate": 4.231967590688947e-07, "log_odds_chosen": 0.7672973275184631, "log_odds_ratio": -0.5580050349235535, "logits/chosen": -2.2761521339416504, "logits/rejected": -2.3277323246002197, "logps/chosen": -0.7685660719871521, "logps/rejected": -1.2114847898483276, "loss": 1.1552, "nll_loss": 0.9451150894165039, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.23056983947753906, "rewards/margins": 0.13287565112113953, "rewards/rejected": -0.3634454905986786, "step": 9810 }, { "epoch": 2.5727010741419964, "grad_norm": 1.8192346096038818, "learning_rate": 4.181462155805842e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3863656520843506, "logits/rejected": -2.30202317237854, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1162, "nll_loss": 1.0572060346603394, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9820 }, { "epoch": 2.575320932669636, "grad_norm": 0.8500099778175354, "learning_rate": 4.131243279733101e-07, "log_odds_chosen": 0.7008156180381775, "log_odds_ratio": -0.5274959206581116, "logits/chosen": -2.4207351207733154, "logits/rejected": -2.2745890617370605, "logps/chosen": -0.7328315377235413, "logps/rejected": -1.158913016319275, "loss": 1.1058, "nll_loss": 0.8834317922592163, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21984946727752686, "rewards/margins": 0.12782445549964905, "rewards/rejected": -0.3476739227771759, "step": 9830 }, { "epoch": 2.5779407911972756, "grad_norm": 0.7564804553985596, "learning_rate": 4.0813113642364796e-07, "log_odds_chosen": 0.7722280621528625, "log_odds_ratio": -0.4995396137237549, "logits/chosen": -2.4410531520843506, "logits/rejected": -2.3541769981384277, "logps/chosen": -0.7844181060791016, "logps/rejected": -1.2974592447280884, "loss": 1.101, "nll_loss": 0.891581654548645, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.23532545566558838, "rewards/margins": 0.15391232073307037, "rewards/rejected": -0.38923776149749756, "step": 9840 }, { "epoch": 2.580560649724915, "grad_norm": 0.8153582215309143, "learning_rate": 4.031666808785927e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2798430919647217, "logits/rejected": -2.261728286743164, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2003, "nll_loss": 0.9644447565078735, "rewards/accuracies": 0.75, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9850 }, { "epoch": 2.5831805082525543, "grad_norm": 4.753087043762207, "learning_rate": 3.9823100105524785e-07, "log_odds_chosen": 0.9417045712471008, "log_odds_ratio": -0.4958771765232086, "logits/chosen": -2.370734930038452, "logits/rejected": -2.324840784072876, "logps/chosen": -0.6897854804992676, "logps/rejected": -1.3206872940063477, "loss": 1.1433, "nll_loss": 0.9996825456619263, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20693564414978027, "rewards/margins": 0.1892705261707306, "rewards/rejected": -0.39620620012283325, "step": 9860 }, { "epoch": 2.5858003667801936, "grad_norm": 1.0378552675247192, "learning_rate": 3.933241364404973e-07, "log_odds_chosen": 1.2095052003860474, "log_odds_ratio": -0.46555715799331665, "logits/chosen": -2.276398181915283, "logits/rejected": -2.1979660987854004, "logps/chosen": -0.6971297264099121, "logps/rejected": -1.5149221420288086, "loss": 1.1158, "nll_loss": 0.9797788858413696, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2091389149427414, "rewards/margins": 0.2453378140926361, "rewards/rejected": -0.4544767737388611, "step": 9870 }, { "epoch": 2.5884202253078334, "grad_norm": 0.945682942867279, "learning_rate": 3.8844612629069974e-07, "log_odds_chosen": 0.729568600654602, "log_odds_ratio": -0.5800584554672241, "logits/chosen": -2.280125856399536, "logits/rejected": -2.259681463241577, "logps/chosen": -0.812541127204895, "logps/rejected": -1.3271983861923218, "loss": 1.1846, "nll_loss": 1.065170407295227, "rewards/accuracies": 0.625, "rewards/chosen": -0.24376234412193298, "rewards/margins": 0.1543971598148346, "rewards/rejected": -0.39815953373908997, "step": 9880 }, { "epoch": 2.591040083835473, "grad_norm": 1.4457460641860962, "learning_rate": 3.83597009631365e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.298670530319214, "logits/rejected": -2.2830517292022705, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1576, "nll_loss": 0.9340412020683289, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9890 }, { "epoch": 2.5936599423631126, "grad_norm": 0.8205615282058716, "learning_rate": 3.787768252568515e-07, "log_odds_chosen": 0.9069029688835144, "log_odds_ratio": -0.5017563104629517, "logits/chosen": -2.2407138347625732, "logits/rejected": -2.187716484069824, "logps/chosen": -0.6912156343460083, "logps/rejected": -1.2503759860992432, "loss": 1.1573, "nll_loss": 0.9351553916931152, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20736470818519592, "rewards/margins": 0.16774815320968628, "rewards/rejected": -0.3751128315925598, "step": 9900 }, { "epoch": 2.596279800890752, "grad_norm": 5.878938674926758, "learning_rate": 3.7398561173004593e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2511987686157227, "logits/rejected": -2.204184055328369, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1557, "nll_loss": 1.0607749223709106, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9910 }, { "epoch": 2.5988996594183913, "grad_norm": 0.7921401858329773, "learning_rate": 3.6922340738206347e-07, "log_odds_chosen": 0.7674717903137207, "log_odds_ratio": -0.5356482267379761, "logits/chosen": -2.320565700531006, "logits/rejected": -2.227555274963379, "logps/chosen": -0.7677240967750549, "logps/rejected": -1.2626534700393677, "loss": 1.0856, "nll_loss": 0.9122964143753052, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23031726479530334, "rewards/margins": 0.14847880601882935, "rewards/rejected": -0.3787961006164551, "step": 9920 }, { "epoch": 2.6015195179460306, "grad_norm": 0.7397720813751221, "learning_rate": 3.6449025031193514e-07, "log_odds_chosen": 0.9842907786369324, "log_odds_ratio": -0.510271430015564, "logits/chosen": -2.268761157989502, "logits/rejected": -2.210519313812256, "logps/chosen": -0.6933885216712952, "logps/rejected": -1.3189904689788818, "loss": 1.1194, "nll_loss": 0.9744271039962769, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.2080165594816208, "rewards/margins": 0.18768061697483063, "rewards/rejected": -0.3956971764564514, "step": 9930 }, { "epoch": 2.6041393764736704, "grad_norm": 2.3469293117523193, "learning_rate": 3.5978617838630677e-07, "log_odds_chosen": 1.1132045984268188, "log_odds_ratio": -0.520469069480896, "logits/chosen": -2.402930736541748, "logits/rejected": -2.301086902618408, "logps/chosen": -0.7153148651123047, "logps/rejected": -1.4832861423492432, "loss": 1.1708, "nll_loss": 0.9959756135940552, "rewards/accuracies": 0.75, "rewards/chosen": -0.21459448337554932, "rewards/margins": 0.23039141297340393, "rewards/rejected": -0.44498586654663086, "step": 9940 }, { "epoch": 2.60675923500131, "grad_norm": 1.2291711568832397, "learning_rate": 3.551112292391325e-07, "log_odds_chosen": 0.7255959510803223, "log_odds_ratio": -0.5536289811134338, "logits/chosen": -2.329251766204834, "logits/rejected": -2.3140499591827393, "logps/chosen": -0.7349006533622742, "logps/rejected": -1.1454975605010986, "loss": 1.1419, "nll_loss": 0.9732163548469543, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.22047019004821777, "rewards/margins": 0.12317910045385361, "rewards/rejected": -0.34364932775497437, "step": 9950 }, { "epoch": 2.6093790935289496, "grad_norm": NaN, "learning_rate": 3.504654402713787e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.339892864227295, "logits/rejected": -2.262610673904419, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1994, "nll_loss": 1.0445204973220825, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 9960 }, { "epoch": 2.611998952056589, "grad_norm": 1.1974103450775146, "learning_rate": 3.458488486507183e-07, "log_odds_chosen": 0.44145217537879944, "log_odds_ratio": -0.6541040539741516, "logits/chosen": -2.3028881549835205, "logits/rejected": -2.2831456661224365, "logps/chosen": -0.8470257520675659, "logps/rejected": -1.1466723680496216, "loss": 1.1596, "nll_loss": 1.0074025392532349, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2541077435016632, "rewards/margins": 0.08989398181438446, "rewards/rejected": -0.3440017104148865, "step": 9970 }, { "epoch": 2.6146188105842283, "grad_norm": 1.045196294784546, "learning_rate": 3.412614913112399e-07, "log_odds_chosen": 0.9949218034744263, "log_odds_ratio": -0.45927438139915466, "logits/chosen": -2.424241781234741, "logits/rejected": -2.298633098602295, "logps/chosen": -0.7037177681922913, "logps/rejected": -1.3882901668548584, "loss": 1.1538, "nll_loss": 0.9385913610458374, "rewards/accuracies": 0.75, "rewards/chosen": -0.21111531555652618, "rewards/margins": 0.20537173748016357, "rewards/rejected": -0.41648703813552856, "step": 9980 }, { "epoch": 2.617238669111868, "grad_norm": 1.9980030059814453, "learning_rate": 3.3670340495314696e-07, "log_odds_chosen": 0.6124451756477356, "log_odds_ratio": -0.6006813049316406, "logits/chosen": -2.2961690425872803, "logits/rejected": -2.282686233520508, "logps/chosen": -0.7737469673156738, "logps/rejected": -1.1280186176300049, "loss": 1.1509, "nll_loss": 0.9299696087837219, "rewards/accuracies": 0.625, "rewards/chosen": -0.23212411999702454, "rewards/margins": 0.10628153383731842, "rewards/rejected": -0.338405579328537, "step": 9990 }, { "epoch": 2.6198585276395074, "grad_norm": 0.998110830783844, "learning_rate": 3.3217462604246826e-07, "log_odds_chosen": 0.8079570531845093, "log_odds_ratio": -0.5693781971931458, "logits/chosen": -2.290193796157837, "logits/rejected": -2.2498762607574463, "logps/chosen": -0.8722237348556519, "logps/rejected": -1.5083279609680176, "loss": 1.2233, "nll_loss": 1.0476272106170654, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2616671323776245, "rewards/margins": 0.1908312290906906, "rewards/rejected": -0.45249834656715393, "step": 10000 }, { "epoch": 2.6224783861671472, "grad_norm": 1.4080458879470825, "learning_rate": 3.2767519081076247e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.420208215713501, "logits/rejected": -2.246579647064209, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1228, "nll_loss": 0.9031478762626648, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10010 }, { "epoch": 2.6250982446947866, "grad_norm": 0.7923830151557922, "learning_rate": 3.232051352548333e-07, "log_odds_chosen": 0.9187763333320618, "log_odds_ratio": -0.46098223328590393, "logits/chosen": -2.3312766551971436, "logits/rejected": -2.2327990531921387, "logps/chosen": -0.6532350182533264, "logps/rejected": -1.1950854063034058, "loss": 1.0938, "nll_loss": 0.9360191226005554, "rewards/accuracies": 0.8125, "rewards/chosen": -0.19597050547599792, "rewards/margins": 0.16255512833595276, "rewards/rejected": -0.3585256338119507, "step": 10020 }, { "epoch": 2.627718103222426, "grad_norm": 2.157318592071533, "learning_rate": 3.1876449513643523e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.33823823928833, "logits/rejected": -2.2579469680786133, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1716, "nll_loss": 0.9784253239631653, "rewards/accuracies": 0.699999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10030 }, { "epoch": 2.6303379617500653, "grad_norm": 1.222089409828186, "learning_rate": 3.1435330598199095e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4058218002319336, "logits/rejected": -2.268009901046753, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.0226, "nll_loss": 0.8623671531677246, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10040 }, { "epoch": 2.632957820277705, "grad_norm": 1.280110478401184, "learning_rate": 3.0997160308230985e-07, "log_odds_chosen": 0.5150691866874695, "log_odds_ratio": -0.5693272352218628, "logits/chosen": -2.3639068603515625, "logits/rejected": -2.3268611431121826, "logps/chosen": -0.7649921178817749, "logps/rejected": -1.0453704595565796, "loss": 1.0875, "nll_loss": 0.9112681150436401, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22949767112731934, "rewards/margins": 0.08411349356174469, "rewards/rejected": -0.31361114978790283, "step": 10050 }, { "epoch": 2.6355776788053444, "grad_norm": 1.0825072526931763, "learning_rate": 3.056194214922998e-07, "log_odds_chosen": 0.6653600931167603, "log_odds_ratio": -0.5887200236320496, "logits/chosen": -2.326374053955078, "logits/rejected": -2.3341169357299805, "logps/chosen": -0.7667369842529297, "logps/rejected": -1.2025988101959229, "loss": 1.1769, "nll_loss": 1.031564474105835, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23002111911773682, "rewards/margins": 0.13075849413871765, "rewards/rejected": -0.36077961325645447, "step": 10060 }, { "epoch": 2.6381975373329842, "grad_norm": NaN, "learning_rate": 3.012967960306891e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4032397270202637, "logits/rejected": -2.3771142959594727, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1295, "nll_loss": 0.9869459867477417, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10070 }, { "epoch": 2.6408173958606236, "grad_norm": 0.7263054847717285, "learning_rate": 2.970037612797509e-07, "log_odds_chosen": 0.8589914441108704, "log_odds_ratio": -0.5667373538017273, "logits/chosen": -2.2766222953796387, "logits/rejected": -2.1890833377838135, "logps/chosen": -0.7590381503105164, "logps/rejected": -1.3689934015274048, "loss": 1.1125, "nll_loss": 0.9304605722427368, "rewards/accuracies": 0.625, "rewards/chosen": -0.22771143913269043, "rewards/margins": 0.18298658728599548, "rewards/rejected": -0.4106980264186859, "step": 10080 }, { "epoch": 2.643437254388263, "grad_norm": 1.0810316801071167, "learning_rate": 2.9274035158502174e-07, "log_odds_chosen": 0.7783977389335632, "log_odds_ratio": -0.5645152926445007, "logits/chosen": -2.35717511177063, "logits/rejected": -2.2181050777435303, "logps/chosen": -0.7383741140365601, "logps/rejected": -1.1828334331512451, "loss": 1.1034, "nll_loss": 0.925057053565979, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.22151222825050354, "rewards/margins": 0.1333378106355667, "rewards/rejected": -0.35485005378723145, "step": 10090 }, { "epoch": 2.6460571129159023, "grad_norm": 2.1583266258239746, "learning_rate": 2.88506601055031e-07, "log_odds_chosen": 0.5272485017776489, "log_odds_ratio": -0.5599733591079712, "logits/chosen": -2.3485047817230225, "logits/rejected": -2.2938199043273926, "logps/chosen": -0.7484295964241028, "logps/rejected": -1.0543246269226074, "loss": 1.0666, "nll_loss": 0.9690427780151367, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.22452886402606964, "rewards/margins": 0.0917685255408287, "rewards/rejected": -0.31629738211631775, "step": 10100 }, { "epoch": 2.648676971443542, "grad_norm": 1.0484261512756348, "learning_rate": 2.843025435610231e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.31541109085083, "logits/rejected": -2.1800684928894043, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.09, "nll_loss": 0.9070317149162292, "rewards/accuracies": 0.824999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10110 }, { "epoch": 2.6512968299711814, "grad_norm": 0.7819397449493408, "learning_rate": 2.8012821273669307e-07, "log_odds_chosen": 0.7603693008422852, "log_odds_ratio": -0.5517936944961548, "logits/chosen": -2.3781747817993164, "logits/rejected": -2.300516128540039, "logps/chosen": -0.6974642872810364, "logps/rejected": -1.178773283958435, "loss": 1.096, "nll_loss": 0.9076254963874817, "rewards/accuracies": 0.6875, "rewards/chosen": -0.20923928916454315, "rewards/margins": 0.144392728805542, "rewards/rejected": -0.35363203287124634, "step": 10120 }, { "epoch": 2.6539166884988212, "grad_norm": 0.772362470626831, "learning_rate": 2.759836419779109e-07, "log_odds_chosen": 1.168003797531128, "log_odds_ratio": -0.497683584690094, "logits/chosen": -2.3078763484954834, "logits/rejected": -2.154547691345215, "logps/chosen": -0.723802924156189, "logps/rejected": -1.5347645282745361, "loss": 1.1378, "nll_loss": 1.0466837882995605, "rewards/accuracies": 0.8125, "rewards/chosen": -0.21714086830615997, "rewards/margins": 0.24328844249248505, "rewards/rejected": -0.4604293704032898, "step": 10130 }, { "epoch": 2.6565365470264606, "grad_norm": 0.8276832699775696, "learning_rate": 2.718688644424598e-07, "log_odds_chosen": 0.8564811944961548, "log_odds_ratio": -0.5790836215019226, "logits/chosen": -2.276923656463623, "logits/rejected": -2.230149507522583, "logps/chosen": -0.7254506349563599, "logps/rejected": -1.2569117546081543, "loss": 1.1182, "nll_loss": 0.9403184652328491, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21763519942760468, "rewards/margins": 0.1594383418560028, "rewards/rejected": -0.3770735263824463, "step": 10140 }, { "epoch": 2.6591564055541, "grad_norm": 1.7377941608428955, "learning_rate": 2.6778391304976524e-07, "log_odds_chosen": 0.8235523104667664, "log_odds_ratio": -0.4803791046142578, "logits/chosen": -2.387723684310913, "logits/rejected": -2.3164308071136475, "logps/chosen": -0.6616402268409729, "logps/rejected": -1.121366262435913, "loss": 1.0898, "nll_loss": 0.8629072904586792, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19849207997322083, "rewards/margins": 0.13791780173778534, "rewards/rejected": -0.33640986680984497, "step": 10150 }, { "epoch": 2.6617762640817397, "grad_norm": 2.362804651260376, "learning_rate": 2.6372882048063895e-07, "log_odds_chosen": 0.8660916090011597, "log_odds_ratio": -0.5038883686065674, "logits/chosen": -2.315138339996338, "logits/rejected": -2.2766647338867188, "logps/chosen": -0.6488054990768433, "logps/rejected": -1.1476974487304688, "loss": 1.1982, "nll_loss": 0.9477452039718628, "rewards/accuracies": 0.75, "rewards/chosen": -0.19464163482189178, "rewards/margins": 0.1496676206588745, "rewards/rejected": -0.3443092405796051, "step": 10160 }, { "epoch": 2.664396122609379, "grad_norm": 1.4099706411361694, "learning_rate": 2.597036191770088e-07, "log_odds_chosen": 0.8449774980545044, "log_odds_ratio": -0.5453621745109558, "logits/chosen": -2.400585174560547, "logits/rejected": -2.333113431930542, "logps/chosen": -0.7111660242080688, "logps/rejected": -1.2430622577667236, "loss": 1.1578, "nll_loss": 0.9381807446479797, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2133498191833496, "rewards/margins": 0.15956886112689972, "rewards/rejected": -0.3729187250137329, "step": 10170 }, { "epoch": 2.667015981137019, "grad_norm": NaN, "learning_rate": 2.5570834134166765e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3170082569122314, "logits/rejected": -2.311556339263916, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1543, "nll_loss": 0.978473961353302, "rewards/accuracies": 0.699999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10180 }, { "epoch": 2.6696358396646582, "grad_norm": 0.9706037044525146, "learning_rate": 2.517430189380096e-07, "log_odds_chosen": 0.8274259567260742, "log_odds_ratio": -0.5272882580757141, "logits/chosen": -2.3717803955078125, "logits/rejected": -2.3310370445251465, "logps/chosen": -0.6900848150253296, "logps/rejected": -1.221040964126587, "loss": 1.0705, "nll_loss": 0.953170895576477, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2070254385471344, "rewards/margins": 0.15928688645362854, "rewards/rejected": -0.36631232500076294, "step": 10190 }, { "epoch": 2.6722556981922976, "grad_norm": 0.7311919331550598, "learning_rate": 2.478076836897776e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3494410514831543, "logits/rejected": -2.263465404510498, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2642, "nll_loss": 1.116474986076355, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10200 }, { "epoch": 2.674875556719937, "grad_norm": 1.4055017232894897, "learning_rate": 2.439023670808074e-07, "log_odds_chosen": 0.40736451745033264, "log_odds_ratio": -0.6398638486862183, "logits/chosen": -2.2245891094207764, "logits/rejected": -2.233670234680176, "logps/chosen": -0.760299801826477, "logps/rejected": -1.030622124671936, "loss": 1.1331, "nll_loss": 0.9539440274238586, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22808997333049774, "rewards/margins": 0.08109667897224426, "rewards/rejected": -0.3091866374015808, "step": 10210 }, { "epoch": 2.6774954152475767, "grad_norm": 1.0832417011260986, "learning_rate": 2.400271003547778e-07, "log_odds_chosen": 1.0436149835586548, "log_odds_ratio": -0.5007830262184143, "logits/chosen": -2.27843976020813, "logits/rejected": -2.1714470386505127, "logps/chosen": -0.6973791122436523, "logps/rejected": -1.4211821556091309, "loss": 1.1511, "nll_loss": 0.9739158749580383, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.2092137336730957, "rewards/margins": 0.21714094281196594, "rewards/rejected": -0.42635464668273926, "step": 10220 }, { "epoch": 2.680115273775216, "grad_norm": 1.1878796815872192, "learning_rate": 2.361819145149595e-07, "log_odds_chosen": 0.7068713307380676, "log_odds_ratio": -0.5356544256210327, "logits/chosen": -2.339158296585083, "logits/rejected": -2.2526872158050537, "logps/chosen": -0.7042909264564514, "logps/rejected": -1.1662135124206543, "loss": 1.1256, "nll_loss": 0.9679923057556152, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21128728985786438, "rewards/margins": 0.13857676088809967, "rewards/rejected": -0.34986403584480286, "step": 10230 }, { "epoch": 2.682735132302856, "grad_norm": 0.784809947013855, "learning_rate": 2.3236684032396758e-07, "log_odds_chosen": 0.46896782517433167, "log_odds_ratio": -0.6752322912216187, "logits/chosen": -2.339073419570923, "logits/rejected": -2.3877346515655518, "logps/chosen": -0.8208404779434204, "logps/rejected": -1.103986144065857, "loss": 1.1335, "nll_loss": 0.9561912417411804, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2462521493434906, "rewards/margins": 0.08494368940591812, "rewards/rejected": -0.3311958611011505, "step": 10240 }, { "epoch": 2.6853549908304952, "grad_norm": 0.6889975666999817, "learning_rate": 2.285819083035143e-07, "log_odds_chosen": 0.7750779390335083, "log_odds_ratio": -0.4902733266353607, "logits/chosen": -2.2936148643493652, "logits/rejected": -2.274186611175537, "logps/chosen": -0.6285282373428345, "logps/rejected": -1.0446773767471313, "loss": 1.0951, "nll_loss": 0.9345085024833679, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18855850398540497, "rewards/margins": 0.12484471499919891, "rewards/rejected": -0.3134032189846039, "step": 10250 }, { "epoch": 2.6879748493581346, "grad_norm": 0.9469316005706787, "learning_rate": 2.2482714873416574e-07, "log_odds_chosen": 0.7870739102363586, "log_odds_ratio": -0.595521867275238, "logits/chosen": -2.3648273944854736, "logits/rejected": -2.327631711959839, "logps/chosen": -0.7410359978675842, "logps/rejected": -1.28219735622406, "loss": 1.1571, "nll_loss": 1.000786542892456, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22231082618236542, "rewards/margins": 0.1623484045267105, "rewards/rejected": -0.3846592307090759, "step": 10260 }, { "epoch": 2.690594707885774, "grad_norm": 2.192561388015747, "learning_rate": 2.2110259165510147e-07, "log_odds_chosen": 0.9566601514816284, "log_odds_ratio": -0.5816847681999207, "logits/chosen": -2.299370288848877, "logits/rejected": -2.2218282222747803, "logps/chosen": -0.7832424640655518, "logps/rejected": -1.445091962814331, "loss": 1.192, "nll_loss": 1.0911991596221924, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2349727600812912, "rewards/margins": 0.1985548436641693, "rewards/rejected": -0.4335276186466217, "step": 10270 }, { "epoch": 2.6932145664134137, "grad_norm": 1.5554938316345215, "learning_rate": 2.1740826686386947e-07, "log_odds_chosen": 0.7407522797584534, "log_odds_ratio": -0.5290435552597046, "logits/chosen": -2.297454833984375, "logits/rejected": -2.2524068355560303, "logps/chosen": -0.7115712761878967, "logps/rejected": -1.175513505935669, "loss": 1.0992, "nll_loss": 0.9298652410507202, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21347138285636902, "rewards/margins": 0.13918264210224152, "rewards/rejected": -0.35265403985977173, "step": 10280 }, { "epoch": 2.695834424941053, "grad_norm": 0.6747899055480957, "learning_rate": 2.137442039161512e-07, "log_odds_chosen": 1.0680806636810303, "log_odds_ratio": -0.46357280015945435, "logits/chosen": -2.3834452629089355, "logits/rejected": -2.253643751144409, "logps/chosen": -0.6561059951782227, "logps/rejected": -1.2900981903076172, "loss": 1.0562, "nll_loss": 0.8463471531867981, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19683177769184113, "rewards/margins": 0.19019769132137299, "rewards/rejected": -0.3870294690132141, "step": 10290 }, { "epoch": 2.698454283468693, "grad_norm": 0.7741389274597168, "learning_rate": 2.1011043212552716e-07, "log_odds_chosen": 0.7812213897705078, "log_odds_ratio": -0.5526937246322632, "logits/chosen": -2.3746020793914795, "logits/rejected": -2.322638511657715, "logps/chosen": -0.8363631963729858, "logps/rejected": -1.360198974609375, "loss": 1.1095, "nll_loss": 0.9909842610359192, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2509090006351471, "rewards/margins": 0.15715071558952332, "rewards/rejected": -0.408059686422348, "step": 10300 }, { "epoch": 2.7010741419963322, "grad_norm": 1.2756887674331665, "learning_rate": 2.0650698056323646e-07, "log_odds_chosen": 0.7049878835678101, "log_odds_ratio": -0.578202486038208, "logits/chosen": -2.1894240379333496, "logits/rejected": -2.145888566970825, "logps/chosen": -0.7453755140304565, "logps/rejected": -1.1970598697662354, "loss": 1.1689, "nll_loss": 0.9821357727050781, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22361266613006592, "rewards/margins": 0.1355052888393402, "rewards/rejected": -0.35911795496940613, "step": 10310 }, { "epoch": 2.7036940005239716, "grad_norm": 0.6165465712547302, "learning_rate": 2.0293387805794792e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.398221492767334, "logits/rejected": -2.3065829277038574, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2037, "nll_loss": 0.9726324081420898, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10320 }, { "epoch": 2.7063138590516114, "grad_norm": 2.1697871685028076, "learning_rate": 1.9939115319553124e-07, "log_odds_chosen": 0.661516010761261, "log_odds_ratio": -0.5681261420249939, "logits/chosen": -2.4107825756073, "logits/rejected": -2.3739876747131348, "logps/chosen": -0.7710559368133545, "logps/rejected": -1.1784710884094238, "loss": 1.0723, "nll_loss": 0.951627254486084, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23131677508354187, "rewards/margins": 0.1222245842218399, "rewards/rejected": -0.3535413146018982, "step": 10330 }, { "epoch": 2.7089337175792507, "grad_norm": 1.0117154121398926, "learning_rate": 1.958788343188238e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3041129112243652, "logits/rejected": -2.2285120487213135, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1596, "nll_loss": 0.9581339955329895, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10340 }, { "epoch": 2.71155357610689, "grad_norm": 1.0766692161560059, "learning_rate": 1.9239694952740737e-07, "log_odds_chosen": 0.5340598225593567, "log_odds_ratio": -0.6321945786476135, "logits/chosen": -2.3354625701904297, "logits/rejected": -2.321169376373291, "logps/chosen": -0.7430625557899475, "logps/rejected": -1.0838154554367065, "loss": 1.2121, "nll_loss": 0.9690820574760437, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2229187786579132, "rewards/margins": 0.10222584009170532, "rewards/rejected": -0.3251446485519409, "step": 10350 }, { "epoch": 2.71417343463453, "grad_norm": 1.105901837348938, "learning_rate": 1.8894552667738117e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3667242527008057, "logits/rejected": -2.181406259536743, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1266, "nll_loss": 0.8552874326705933, "rewards/accuracies": 0.800000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10360 }, { "epoch": 2.716793293162169, "grad_norm": 1.1256743669509888, "learning_rate": 1.8552459338114113e-07, "log_odds_chosen": 0.8485711812973022, "log_odds_ratio": -0.5197492241859436, "logits/chosen": -2.2878870964050293, "logits/rejected": -2.1985230445861816, "logps/chosen": -0.7757714986801147, "logps/rejected": -1.3882782459259033, "loss": 1.1405, "nll_loss": 0.9767845869064331, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.23273146152496338, "rewards/margins": 0.18375198543071747, "rewards/rejected": -0.41648346185684204, "step": 10370 }, { "epoch": 2.7194131516898086, "grad_norm": NaN, "learning_rate": 1.821341770071565e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.204272508621216, "logits/rejected": -2.2301838397979736, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1704, "nll_loss": 0.9307327270507812, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10380 }, { "epoch": 2.7220330102174484, "grad_norm": 1.2373074293136597, "learning_rate": 1.7877430467975363e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.342777729034424, "logits/rejected": -2.255237579345703, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1135, "nll_loss": 0.9064844250679016, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10390 }, { "epoch": 2.7246528687450877, "grad_norm": 1.2625055313110352, "learning_rate": 1.754450032788961e-07, "log_odds_chosen": 0.9650007486343384, "log_odds_ratio": -0.5125551223754883, "logits/chosen": -2.2944588661193848, "logits/rejected": -2.2072033882141113, "logps/chosen": -0.7375284433364868, "logps/rejected": -1.4316635131835938, "loss": 1.1089, "nll_loss": 0.8967486619949341, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.22125855088233948, "rewards/margins": 0.20824047923088074, "rewards/rejected": -0.4294990599155426, "step": 10400 }, { "epoch": 2.7272727272727275, "grad_norm": 1.01276433467865, "learning_rate": 1.721462994399725e-07, "log_odds_chosen": 0.8086993098258972, "log_odds_ratio": -0.6307396292686462, "logits/chosen": -2.366819381713867, "logits/rejected": -2.2896690368652344, "logps/chosen": -0.8322774767875671, "logps/rejected": -1.3141645193099976, "loss": 1.1622, "nll_loss": 0.9718138575553894, "rewards/accuracies": 0.6875, "rewards/chosen": -0.24968329071998596, "rewards/margins": 0.144566148519516, "rewards/rejected": -0.39424940943717957, "step": 10410 }, { "epoch": 2.729892585800367, "grad_norm": 0.8177508115768433, "learning_rate": 1.688782195535805e-07, "log_odds_chosen": 0.835645318031311, "log_odds_ratio": -0.5123569965362549, "logits/chosen": -2.372413158416748, "logits/rejected": -2.2460012435913086, "logps/chosen": -0.7270916700363159, "logps/rejected": -1.2518794536590576, "loss": 1.0733, "nll_loss": 0.9286050796508789, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.21812748908996582, "rewards/margins": 0.157436341047287, "rewards/rejected": -0.3755638301372528, "step": 10420 }, { "epoch": 2.732512444328006, "grad_norm": 0.9062377214431763, "learning_rate": 1.6564078976531825e-07, "log_odds_chosen": 0.7128939032554626, "log_odds_ratio": -0.5566214323043823, "logits/chosen": -2.312203884124756, "logits/rejected": -2.2931346893310547, "logps/chosen": -0.714718222618103, "logps/rejected": -1.1137678623199463, "loss": 1.1565, "nll_loss": 0.9967786073684692, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.21441550552845, "rewards/margins": 0.11971487104892731, "rewards/rejected": -0.33413034677505493, "step": 10430 }, { "epoch": 2.7351323028556456, "grad_norm": 0.8374199271202087, "learning_rate": 1.624340359755738e-07, "log_odds_chosen": 0.801935076713562, "log_odds_ratio": -0.5110307335853577, "logits/chosen": -2.3175127506256104, "logits/rejected": -2.2742626667022705, "logps/chosen": -0.7020703554153442, "logps/rejected": -1.1916496753692627, "loss": 1.1323, "nll_loss": 0.9355728030204773, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21062108874320984, "rewards/margins": 0.14687378704547882, "rewards/rejected": -0.35749489068984985, "step": 10440 }, { "epoch": 2.7377521613832854, "grad_norm": 0.7173424959182739, "learning_rate": 1.5925798383931865e-07, "log_odds_chosen": 0.5640815496444702, "log_odds_ratio": -0.6207575798034668, "logits/chosen": -2.372138261795044, "logits/rejected": -2.3120014667510986, "logps/chosen": -0.8005073666572571, "logps/rejected": -1.1462863683700562, "loss": 1.1671, "nll_loss": 1.009124755859375, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24015220999717712, "rewards/margins": 0.10373370349407196, "rewards/rejected": -0.3438859283924103, "step": 10450 }, { "epoch": 2.7403720199109247, "grad_norm": 1.4581652879714966, "learning_rate": 1.5611265876590029e-07, "log_odds_chosen": 0.9357874989509583, "log_odds_ratio": -0.5705877542495728, "logits/chosen": -2.316117525100708, "logits/rejected": -2.224693536758423, "logps/chosen": -0.7466971278190613, "logps/rejected": -1.4625808000564575, "loss": 1.1094, "nll_loss": 1.0083115100860596, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.22400915622711182, "rewards/margins": 0.2147650420665741, "rewards/rejected": -0.4387742578983307, "step": 10460 }, { "epoch": 2.7429918784385645, "grad_norm": 0.7115671634674072, "learning_rate": 1.5299808591884334e-07, "log_odds_chosen": 0.8401080369949341, "log_odds_ratio": -0.5695337057113647, "logits/chosen": -2.3652522563934326, "logits/rejected": -2.2365949153900146, "logps/chosen": -0.8055969476699829, "logps/rejected": -1.3854467868804932, "loss": 1.2026, "nll_loss": 0.9819404482841492, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.24167907238006592, "rewards/margins": 0.17395499348640442, "rewards/rejected": -0.41563406586647034, "step": 10470 }, { "epoch": 2.745611736966204, "grad_norm": 0.7406944036483765, "learning_rate": 1.4991429021564339e-07, "log_odds_chosen": 1.0204907655715942, "log_odds_ratio": -0.5022918581962585, "logits/chosen": -2.2786316871643066, "logits/rejected": -2.2448203563690186, "logps/chosen": -0.7266801595687866, "logps/rejected": -1.3930270671844482, "loss": 1.1083, "nll_loss": 0.9542183876037598, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.21800406277179718, "rewards/margins": 0.19990402460098267, "rewards/rejected": -0.41790810227394104, "step": 10480 }, { "epoch": 2.748231595493843, "grad_norm": 1.6029797792434692, "learning_rate": 1.4686129632757172e-07, "log_odds_chosen": 0.7367085218429565, "log_odds_ratio": -0.5358102917671204, "logits/chosen": -2.42295503616333, "logits/rejected": -2.369525194168091, "logps/chosen": -0.6758593320846558, "logps/rejected": -1.1166200637817383, "loss": 1.085, "nll_loss": 0.8985443115234375, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2027577906847, "rewards/margins": 0.13222819566726685, "rewards/rejected": -0.33498597145080566, "step": 10490 }, { "epoch": 2.7508514540214826, "grad_norm": 1.0395845174789429, "learning_rate": 1.43839128679474e-07, "log_odds_chosen": 0.6745015382766724, "log_odds_ratio": -0.5734447240829468, "logits/chosen": -2.356210947036743, "logits/rejected": -2.272125720977783, "logps/chosen": -0.7154368162155151, "logps/rejected": -1.1770102977752686, "loss": 1.1058, "nll_loss": 0.9317604899406433, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21463103592395782, "rewards/margins": 0.13847209513187408, "rewards/rejected": -0.3531031012535095, "step": 10500 }, { "epoch": 2.7534713125491224, "grad_norm": 1.2730900049209595, "learning_rate": 1.4084781144957859e-07, "log_odds_chosen": 0.8383085131645203, "log_odds_ratio": -0.4979275166988373, "logits/chosen": -2.350409746170044, "logits/rejected": -2.268828868865967, "logps/chosen": -0.6946353912353516, "logps/rejected": -1.237947702407837, "loss": 1.1582, "nll_loss": 1.0397984981536865, "rewards/accuracies": 0.75, "rewards/chosen": -0.20839062333106995, "rewards/margins": 0.1629936844110489, "rewards/rejected": -0.37138432264328003, "step": 10510 }, { "epoch": 2.7560911710767617, "grad_norm": 0.761788010597229, "learning_rate": 1.378873685693005e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.388698101043701, "logits/rejected": -2.314945697784424, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1821, "nll_loss": 1.0440263748168945, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10520 }, { "epoch": 2.7587110296044015, "grad_norm": 1.5852481126785278, "learning_rate": 1.3495782372305155e-07, "log_odds_chosen": 1.0596219301223755, "log_odds_ratio": -0.49956098198890686, "logits/chosen": -2.3901686668395996, "logits/rejected": -2.2348904609680176, "logps/chosen": -0.6690577864646912, "logps/rejected": -1.4011346101760864, "loss": 1.0813, "nll_loss": 0.9225160479545593, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.20071737468242645, "rewards/margins": 0.21962299942970276, "rewards/rejected": -0.420340359210968, "step": 10530 }, { "epoch": 2.761330888132041, "grad_norm": 0.9650202393531799, "learning_rate": 1.3205920034805006e-07, "log_odds_chosen": 0.5967921614646912, "log_odds_ratio": -0.5343206524848938, "logits/chosen": -2.340925693511963, "logits/rejected": -2.3295085430145264, "logps/chosen": -0.7589212656021118, "logps/rejected": -1.1275676488876343, "loss": 1.1561, "nll_loss": 1.029836654663086, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2276763916015625, "rewards/margins": 0.11059390008449554, "rewards/rejected": -0.33827027678489685, "step": 10540 }, { "epoch": 2.76395074665968, "grad_norm": 1.0355913639068604, "learning_rate": 1.2919152163413237e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.320359706878662, "logits/rejected": -2.3118464946746826, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2093, "nll_loss": 0.9535323977470398, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10550 }, { "epoch": 2.76657060518732, "grad_norm": 0.8969010710716248, "learning_rate": 1.2635481052357054e-07, "log_odds_chosen": 0.6453114151954651, "log_odds_ratio": -0.6210991740226746, "logits/chosen": -2.3233447074890137, "logits/rejected": -2.3052430152893066, "logps/chosen": -0.7403451204299927, "logps/rejected": -1.1780436038970947, "loss": 1.1544, "nll_loss": 1.0222822427749634, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2221035212278366, "rewards/margins": 0.13130953907966614, "rewards/rejected": -0.35341310501098633, "step": 10560 }, { "epoch": 2.7691904637149594, "grad_norm": 2.1590168476104736, "learning_rate": 1.2354908971088553e-07, "log_odds_chosen": 0.44459065794944763, "log_odds_ratio": -0.5753412246704102, "logits/chosen": -2.416696548461914, "logits/rejected": -2.334017038345337, "logps/chosen": -0.7256730794906616, "logps/rejected": -0.9479466676712036, "loss": 1.1109, "nll_loss": 0.9349441528320312, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21770191192626953, "rewards/margins": 0.06668208539485931, "rewards/rejected": -0.28438401222229004, "step": 10570 }, { "epoch": 2.771810322242599, "grad_norm": 1.2668976783752441, "learning_rate": 1.2077438164266584e-07, "log_odds_chosen": 0.7808834910392761, "log_odds_ratio": -0.6035429835319519, "logits/chosen": -2.35711932182312, "logits/rejected": -2.3747668266296387, "logps/chosen": -0.7766934633255005, "logps/rejected": -1.3257118463516235, "loss": 1.1192, "nll_loss": 0.9890602231025696, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.23300807178020477, "rewards/margins": 0.16470548510551453, "rewards/rejected": -0.3977135419845581, "step": 10580 }, { "epoch": 2.7744301807702385, "grad_norm": 0.8204221129417419, "learning_rate": 1.180307085173915e-07, "log_odds_chosen": 0.8615990877151489, "log_odds_ratio": -0.5479172468185425, "logits/chosen": -2.239164113998413, "logits/rejected": -2.242769718170166, "logps/chosen": -0.7325757741928101, "logps/rejected": -1.3457813262939453, "loss": 1.1274, "nll_loss": 0.9694886207580566, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21977272629737854, "rewards/margins": 0.18396173417568207, "rewards/rejected": -0.4037344455718994, "step": 10590 }, { "epoch": 2.777050039297878, "grad_norm": 1.4454708099365234, "learning_rate": 1.1531809228525124e-07, "log_odds_chosen": 0.7473530769348145, "log_odds_ratio": -0.5666166543960571, "logits/chosen": -2.219308376312256, "logits/rejected": -2.139829158782959, "logps/chosen": -0.8725532293319702, "logps/rejected": -1.351177453994751, "loss": 1.2277, "nll_loss": 1.090494155883789, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2617659866809845, "rewards/margins": 0.14358729124069214, "rewards/rejected": -0.40535324811935425, "step": 10600 }, { "epoch": 2.779669897825517, "grad_norm": 0.7020652890205383, "learning_rate": 1.1263655464797129e-07, "log_odds_chosen": 0.778084397315979, "log_odds_ratio": -0.5180605053901672, "logits/chosen": -2.3839430809020996, "logits/rejected": -2.2749581336975098, "logps/chosen": -0.7134662866592407, "logps/rejected": -1.2147685289382935, "loss": 1.1366, "nll_loss": 0.936917781829834, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2140398919582367, "rewards/margins": 0.1503906548023224, "rewards/rejected": -0.36443060636520386, "step": 10610 }, { "epoch": 2.782289756353157, "grad_norm": NaN, "learning_rate": 1.0998611705863892e-07, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2588133811950684, "logits/rejected": -2.2540206909179688, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1925, "nll_loss": 0.9943526983261108, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10620 }, { "epoch": 2.7849096148807964, "grad_norm": 4.422313213348389, "learning_rate": 1.0736680072153337e-07, "log_odds_chosen": 0.5621723532676697, "log_odds_ratio": -0.5856891870498657, "logits/chosen": -2.393695116043091, "logits/rejected": -2.294121265411377, "logps/chosen": -0.7405222058296204, "logps/rejected": -1.1064960956573486, "loss": 1.0967, "nll_loss": 0.9625503420829773, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22215667366981506, "rewards/margins": 0.10979215800762177, "rewards/rejected": -0.33194881677627563, "step": 10630 }, { "epoch": 2.787529473408436, "grad_norm": 0.8217430710792542, "learning_rate": 1.0477862659195213e-07, "log_odds_chosen": 0.9204076528549194, "log_odds_ratio": -0.47777777910232544, "logits/chosen": -2.2841382026672363, "logits/rejected": -2.228870153427124, "logps/chosen": -0.6636013984680176, "logps/rejected": -1.1878998279571533, "loss": 1.1268, "nll_loss": 0.9647776484489441, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.1990804225206375, "rewards/margins": 0.1572895050048828, "rewards/rejected": -0.35636991262435913, "step": 10640 }, { "epoch": 2.7901493319360755, "grad_norm": 0.7655932903289795, "learning_rate": 1.022216153760489e-07, "log_odds_chosen": 0.5916593670845032, "log_odds_ratio": -0.5984503030776978, "logits/chosen": -2.4377143383026123, "logits/rejected": -2.3955185413360596, "logps/chosen": -0.7376391291618347, "logps/rejected": -1.154348611831665, "loss": 1.1723, "nll_loss": 1.029822587966919, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22129173576831818, "rewards/margins": 0.1250128298997879, "rewards/rejected": -0.3463045656681061, "step": 10650 }, { "epoch": 2.792769190463715, "grad_norm": 1.0913432836532593, "learning_rate": 9.9695787530663e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3333096504211426, "logits/rejected": -2.2263705730438232, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1357, "nll_loss": 0.8790294528007507, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10660 }, { "epoch": 2.795389048991354, "grad_norm": 1.6255464553833008, "learning_rate": 9.720116326315819e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.306401252746582, "logits/rejected": -2.2540736198425293, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.0662, "nll_loss": 0.9786487817764282, "rewards/accuracies": 0.699999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10670 }, { "epoch": 2.798008907518994, "grad_norm": 0.7858624458312988, "learning_rate": 9.473776253125976e-08, "log_odds_chosen": 0.9833593368530273, "log_odds_ratio": -0.5160533785820007, "logits/chosen": -2.3799021244049072, "logits/rejected": -2.3150715827941895, "logps/chosen": -0.7354437708854675, "logps/rejected": -1.4092861413955688, "loss": 1.1526, "nll_loss": 0.9560674428939819, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2206331193447113, "rewards/margins": 0.20215268433094025, "rewards/rejected": -0.42278581857681274, "step": 10680 }, { "epoch": 2.8006287660466334, "grad_norm": 0.9025372862815857, "learning_rate": 9.230560504289764e-08, "log_odds_chosen": 0.605222761631012, "log_odds_ratio": -0.5665966272354126, "logits/chosen": -2.2704150676727295, "logits/rejected": -2.3205437660217285, "logps/chosen": -0.6301859021186829, "logps/rejected": -0.9266950488090515, "loss": 1.21, "nll_loss": 1.0390046834945679, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18905577063560486, "rewards/margins": 0.08895277231931686, "rewards/rejected": -0.2780085504055023, "step": 10690 }, { "epoch": 2.803248624574273, "grad_norm": 0.9238100647926331, "learning_rate": 8.990471025604396e-08, "log_odds_chosen": 0.5076669454574585, "log_odds_ratio": -0.5894565582275391, "logits/chosen": -2.3590614795684814, "logits/rejected": -2.3448076248168945, "logps/chosen": -0.7470133900642395, "logps/rejected": -1.0777021646499634, "loss": 1.1338, "nll_loss": 0.9597268104553223, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.22410401701927185, "rewards/margins": 0.09920664131641388, "rewards/rejected": -0.3233106732368469, "step": 10700 }, { "epoch": 2.8058684831019125, "grad_norm": 2.1617748737335205, "learning_rate": 8.753509737856202e-08, "log_odds_chosen": 1.0198719501495361, "log_odds_ratio": -0.45143604278564453, "logits/chosen": -2.3422234058380127, "logits/rejected": -2.241276741027832, "logps/chosen": -0.6897640824317932, "logps/rejected": -1.332493543624878, "loss": 1.1291, "nll_loss": 0.9786874651908875, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.20692920684814453, "rewards/margins": 0.19281882047653198, "rewards/rejected": -0.3997480273246765, "step": 10710 }, { "epoch": 2.808488341629552, "grad_norm": 2.315678119659424, "learning_rate": 8.519678536804997e-08, "log_odds_chosen": 0.76271653175354, "log_odds_ratio": -0.5158853530883789, "logits/chosen": -2.4512505531311035, "logits/rejected": -2.349761486053467, "logps/chosen": -0.7106872797012329, "logps/rejected": -1.1915202140808105, "loss": 1.1316, "nll_loss": 0.93431556224823, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2132062017917633, "rewards/margins": 0.14424988627433777, "rewards/rejected": -0.3574560880661011, "step": 10720 }, { "epoch": 2.8111082001571916, "grad_norm": 3.2907867431640625, "learning_rate": 8.28897929316894e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4198520183563232, "logits/rejected": -2.3474273681640625, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1653, "nll_loss": 0.9499737024307251, "rewards/accuracies": 0.699999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10730 }, { "epoch": 2.813728058684831, "grad_norm": 1.0007901191711426, "learning_rate": 8.061413852609744e-08, "log_odds_chosen": 0.52562415599823, "log_odds_ratio": -0.6331382989883423, "logits/chosen": -2.3178675174713135, "logits/rejected": -2.2603108882904053, "logps/chosen": -0.8087295293807983, "logps/rejected": -1.1439543962478638, "loss": 1.1661, "nll_loss": 0.9895527958869934, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2426188886165619, "rewards/margins": 0.10056746006011963, "rewards/rejected": -0.34318631887435913, "step": 10740 }, { "epoch": 2.816347917212471, "grad_norm": 1.285024642944336, "learning_rate": 7.836984035717575e-08, "log_odds_chosen": 0.7676898241043091, "log_odds_ratio": -0.5186865925788879, "logits/chosen": -2.35322904586792, "logits/rejected": -2.2952373027801514, "logps/chosen": -0.7759171724319458, "logps/rejected": -1.2329920530319214, "loss": 1.2298, "nll_loss": 1.014460802078247, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23277516663074493, "rewards/margins": 0.1371225118637085, "rewards/rejected": -0.36989766359329224, "step": 10750 }, { "epoch": 2.81896777574011, "grad_norm": 0.6969426274299622, "learning_rate": 7.61569163799689e-08, "log_odds_chosen": 0.6826087236404419, "log_odds_ratio": -0.5924266576766968, "logits/chosen": -2.3830654621124268, "logits/rejected": -2.2713801860809326, "logps/chosen": -0.7358323335647583, "logps/rejected": -1.1943117380142212, "loss": 1.2286, "nll_loss": 0.9942234754562378, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22074970602989197, "rewards/margins": 0.13754378259181976, "rewards/rejected": -0.35829347372055054, "step": 10760 }, { "epoch": 2.8215876342677495, "grad_norm": 1.1911509037017822, "learning_rate": 7.397538429851735e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.33743953704834, "logits/rejected": -2.286205768585205, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2478, "nll_loss": 1.0514171123504639, "rewards/accuracies": 0.75, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10770 }, { "epoch": 2.824207492795389, "grad_norm": 1.5745198726654053, "learning_rate": 7.182526156571845e-08, "log_odds_chosen": 0.6577655673027039, "log_odds_ratio": -0.5309853553771973, "logits/chosen": -2.4132297039031982, "logits/rejected": -2.351916790008545, "logps/chosen": -0.8110003471374512, "logps/rejected": -1.248877763748169, "loss": 1.1032, "nll_loss": 0.9825669527053833, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.24330011010169983, "rewards/margins": 0.13136324286460876, "rewards/rejected": -0.3746633231639862, "step": 10780 }, { "epoch": 2.8268273513230286, "grad_norm": 1.1253849267959595, "learning_rate": 6.970656538318386e-08, "log_odds_chosen": 0.9155112504959106, "log_odds_ratio": -0.4956735670566559, "logits/chosen": -2.286374092102051, "logits/rejected": -2.238081932067871, "logps/chosen": -0.7113829851150513, "logps/rejected": -1.289254903793335, "loss": 1.1353, "nll_loss": 0.9603685140609741, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.21341490745544434, "rewards/margins": 0.17336159944534302, "rewards/rejected": -0.38677650690078735, "step": 10790 }, { "epoch": 2.829447209850668, "grad_norm": NaN, "learning_rate": 6.76193127011051e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3345208168029785, "logits/rejected": -2.260756015777588, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1134, "nll_loss": 1.011930227279663, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10800 }, { "epoch": 2.832067068378308, "grad_norm": 1.1054714918136597, "learning_rate": 6.556352021811617e-08, "log_odds_chosen": 0.9216645956039429, "log_odds_ratio": -0.5059899091720581, "logits/chosen": -2.2956061363220215, "logits/rejected": -2.2703959941864014, "logps/chosen": -0.7577810287475586, "logps/rejected": -1.3365097045898438, "loss": 1.0787, "nll_loss": 0.8997782468795776, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22733430564403534, "rewards/margins": 0.1736185997724533, "rewards/rejected": -0.40095290541648865, "step": 10810 }, { "epoch": 2.834686926905947, "grad_norm": 0.9766221642494202, "learning_rate": 6.353920438116e-08, "log_odds_chosen": 0.9184772372245789, "log_odds_ratio": -0.5358738899230957, "logits/chosen": -2.4099721908569336, "logits/rejected": -2.421539783477783, "logps/chosen": -0.7902649641036987, "logps/rejected": -1.449336290359497, "loss": 1.1043, "nll_loss": 1.0049077272415161, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2370794713497162, "rewards/margins": 0.19772139191627502, "rewards/rejected": -0.4348008632659912, "step": 10820 }, { "epoch": 2.8373067854335865, "grad_norm": 0.8454166650772095, "learning_rate": 6.154638138535651e-08, "log_odds_chosen": 1.006287932395935, "log_odds_ratio": -0.5113011598587036, "logits/chosen": -2.320559024810791, "logits/rejected": -2.2276337146759033, "logps/chosen": -0.7486079931259155, "logps/rejected": -1.407362699508667, "loss": 1.1454, "nll_loss": 0.9286549687385559, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22458240389823914, "rewards/margins": 0.19762639701366425, "rewards/rejected": -0.42220884561538696, "step": 10830 }, { "epoch": 2.839926643961226, "grad_norm": 0.8046466708183289, "learning_rate": 5.958506717387468e-08, "log_odds_chosen": 0.9894167184829712, "log_odds_ratio": -0.5391957759857178, "logits/chosen": -2.3388831615448, "logits/rejected": -2.2652759552001953, "logps/chosen": -0.7382687330245972, "logps/rejected": -1.3926557302474976, "loss": 1.1174, "nll_loss": 0.9266220331192017, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22148065268993378, "rewards/margins": 0.19631609320640564, "rewards/rejected": -0.4177967607975006, "step": 10840 }, { "epoch": 2.8425465024888656, "grad_norm": 1.7221218347549438, "learning_rate": 5.7655277437803406e-08, "log_odds_chosen": 0.6623095273971558, "log_odds_ratio": -0.550980806350708, "logits/chosen": -2.3476874828338623, "logits/rejected": -2.3273839950561523, "logps/chosen": -0.7469508647918701, "logps/rejected": -1.1702405214309692, "loss": 1.1338, "nll_loss": 0.9897996187210083, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22408528625965118, "rewards/margins": 0.12698690593242645, "rewards/rejected": -0.35107213258743286, "step": 10850 }, { "epoch": 2.845166361016505, "grad_norm": 1.3504419326782227, "learning_rate": 5.575702761602708e-08, "log_odds_chosen": 0.7548843622207642, "log_odds_ratio": -0.6013470888137817, "logits/chosen": -2.351412296295166, "logits/rejected": -2.294504404067993, "logps/chosen": -0.7643899917602539, "logps/rejected": -1.2511241436004639, "loss": 1.1524, "nll_loss": 0.9632598757743835, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.22931702435016632, "rewards/margins": 0.1460202932357788, "rewards/rejected": -0.37533727288246155, "step": 10860 }, { "epoch": 2.847786219544145, "grad_norm": 1.2814388275146484, "learning_rate": 5.389033289510037e-08, "log_odds_chosen": 1.0667012929916382, "log_odds_ratio": -0.4730833172798157, "logits/chosen": -2.3621487617492676, "logits/rejected": -2.2842798233032227, "logps/chosen": -0.6667525172233582, "logps/rejected": -1.2986093759536743, "loss": 1.1853, "nll_loss": 1.015015959739685, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.2000257521867752, "rewards/margins": 0.18955707550048828, "rewards/rejected": -0.3895828127861023, "step": 10870 }, { "epoch": 2.850406078071784, "grad_norm": 1.684585690498352, "learning_rate": 5.2055208209129674e-08, "log_odds_chosen": 0.6943972110748291, "log_odds_ratio": -0.5711382031440735, "logits/chosen": -2.327439785003662, "logits/rejected": -2.2573940753936768, "logps/chosen": -0.7850852012634277, "logps/rejected": -1.2471048831939697, "loss": 1.1801, "nll_loss": 1.0676658153533936, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.23552556335926056, "rewards/margins": 0.13860590755939484, "rewards/rejected": -0.3741315007209778, "step": 10880 }, { "epoch": 2.8530259365994235, "grad_norm": 0.8072106242179871, "learning_rate": 5.0251668239650104e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3302764892578125, "logits/rejected": -2.2525546550750732, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.0452, "nll_loss": 0.9189529418945312, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10890 }, { "epoch": 2.8556457951270633, "grad_norm": 1.5546976327896118, "learning_rate": 4.847972741551087e-08, "log_odds_chosen": 1.0830904245376587, "log_odds_ratio": -0.45147761702537537, "logits/chosen": -2.299193859100342, "logits/rejected": -2.3197805881500244, "logps/chosen": -0.7246221303939819, "logps/rejected": -1.390949010848999, "loss": 1.1765, "nll_loss": 1.049501657485962, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2173866331577301, "rewards/margins": 0.19989804923534393, "rewards/rejected": -0.41728466749191284, "step": 10900 }, { "epoch": 2.8582656536547026, "grad_norm": 1.5167957544326782, "learning_rate": 4.67393999127581e-08, "log_odds_chosen": 0.9169076681137085, "log_odds_ratio": -0.47328194975852966, "logits/chosen": -2.3032619953155518, "logits/rejected": -2.2396347522735596, "logps/chosen": -0.6919193267822266, "logps/rejected": -1.2649426460266113, "loss": 1.043, "nll_loss": 0.9167684316635132, "rewards/accuracies": 0.75, "rewards/chosen": -0.20757579803466797, "rewards/margins": 0.171906977891922, "rewards/rejected": -0.37948277592658997, "step": 10910 }, { "epoch": 2.860885512182342, "grad_norm": 1.2411357164382935, "learning_rate": 4.503069965452289e-08, "log_odds_chosen": 0.6107183694839478, "log_odds_ratio": -0.5778893232345581, "logits/chosen": -2.314042806625366, "logits/rejected": -2.255718231201172, "logps/chosen": -0.6799755096435547, "logps/rejected": -1.0160014629364014, "loss": 1.1707, "nll_loss": 0.9609449505805969, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.20399267971515656, "rewards/margins": 0.1008078083395958, "rewards/rejected": -0.30480045080184937, "step": 10920 }, { "epoch": 2.863505370709982, "grad_norm": 0.6967466473579407, "learning_rate": 4.33536403109076e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.385023832321167, "logits/rejected": -2.244363307952881, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2031, "nll_loss": 1.0537805557250977, "rewards/accuracies": 0.675000011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10930 }, { "epoch": 2.866125229237621, "grad_norm": 0.9365120530128479, "learning_rate": 4.170823529887979e-08, "log_odds_chosen": 0.6585452556610107, "log_odds_ratio": -0.6429806351661682, "logits/chosen": -2.229886054992676, "logits/rejected": -2.2017111778259277, "logps/chosen": -0.8120514154434204, "logps/rejected": -1.2626217603683472, "loss": 1.1475, "nll_loss": 0.9573556184768677, "rewards/accuracies": 0.625, "rewards/chosen": -0.24361543357372284, "rewards/margins": 0.1351710855960846, "rewards/rejected": -0.37878650426864624, "step": 10940 }, { "epoch": 2.8687450877652605, "grad_norm": 1.0952894687652588, "learning_rate": 4.0094497782161564e-08, "log_odds_chosen": 0.7637665271759033, "log_odds_ratio": -0.5762001276016235, "logits/chosen": -2.2679696083068848, "logits/rejected": -2.2402987480163574, "logps/chosen": -0.7352820634841919, "logps/rejected": -1.2175629138946533, "loss": 1.1702, "nll_loss": 0.9625948071479797, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22058460116386414, "rewards/margins": 0.14468425512313843, "rewards/rejected": -0.36526888608932495, "step": 10950 }, { "epoch": 2.8713649462929003, "grad_norm": 0.8912537097930908, "learning_rate": 3.851244067112747e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.327322483062744, "logits/rejected": -2.3189785480499268, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1699, "nll_loss": 1.031491994857788, "rewards/accuracies": 0.625, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 10960 }, { "epoch": 2.8739848048205396, "grad_norm": 1.037197232246399, "learning_rate": 3.6962076622697014e-08, "log_odds_chosen": 0.8208988308906555, "log_odds_ratio": -0.5157317519187927, "logits/chosen": -2.386425256729126, "logits/rejected": -2.2412095069885254, "logps/chosen": -0.6582361459732056, "logps/rejected": -1.1654677391052246, "loss": 1.0934, "nll_loss": 0.896253228187561, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.19747082889080048, "rewards/margins": 0.15216949582099915, "rewards/rejected": -0.3496403694152832, "step": 10970 }, { "epoch": 2.8766046633481794, "grad_norm": NaN, "learning_rate": 3.54434180402392e-08, "log_odds_chosen": 0.7344937324523926, "log_odds_ratio": -0.5708209276199341, "logits/chosen": -2.3084282875061035, "logits/rejected": -2.2078561782836914, "logps/chosen": -0.750468909740448, "logps/rejected": -1.2113325595855713, "loss": 1.138, "nll_loss": 0.9036375284194946, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.22514069080352783, "rewards/margins": 0.13825903832912445, "rewards/rejected": -0.3633997142314911, "step": 10980 }, { "epoch": 2.879224521875819, "grad_norm": 2.1626648902893066, "learning_rate": 3.3956477073466827e-08, "log_odds_chosen": 0.9011629223823547, "log_odds_ratio": -0.550035834312439, "logits/chosen": -2.3126068115234375, "logits/rejected": -2.2935118675231934, "logps/chosen": -0.7334474921226501, "logps/rejected": -1.3636322021484375, "loss": 1.105, "nll_loss": 0.9506476521492004, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22003427147865295, "rewards/margins": 0.189055398106575, "rewards/rejected": -0.4090896546840668, "step": 10990 }, { "epoch": 2.881844380403458, "grad_norm": 3.8684606552124023, "learning_rate": 3.25012656183441e-08, "log_odds_chosen": 0.8847728967666626, "log_odds_ratio": -0.5924534797668457, "logits/chosen": -2.3377444744110107, "logits/rejected": -2.237511396408081, "logps/chosen": -0.7822648286819458, "logps/rejected": -1.4330642223358154, "loss": 1.1646, "nll_loss": 0.9729121327400208, "rewards/accuracies": 0.625, "rewards/chosen": -0.2346794605255127, "rewards/margins": 0.1952398121356964, "rewards/rejected": -0.4299192428588867, "step": 11000 }, { "epoch": 2.8844642389310975, "grad_norm": 0.8238792419433594, "learning_rate": 3.107779531698984e-08, "log_odds_chosen": 0.7384845614433289, "log_odds_ratio": -0.5415464639663696, "logits/chosen": -2.2954981327056885, "logits/rejected": -2.20975923538208, "logps/chosen": -0.7254376411437988, "logps/rejected": -1.1892726421356201, "loss": 1.1248, "nll_loss": 0.9260106086730957, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2176312953233719, "rewards/margins": 0.1391504853963852, "rewards/rejected": -0.35678181052207947, "step": 11010 }, { "epoch": 2.8870840974587373, "grad_norm": 1.5002729892730713, "learning_rate": 2.9686077557584233e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3194546699523926, "logits/rejected": -2.269031286239624, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1306, "nll_loss": 0.9729447364807129, "rewards/accuracies": 0.737500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11020 }, { "epoch": 2.8897039559863766, "grad_norm": NaN, "learning_rate": 2.8326123474276876e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.321178436279297, "logits/rejected": -2.2570724487304688, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2025, "nll_loss": 1.0005773305892944, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11030 }, { "epoch": 2.8923238145140164, "grad_norm": NaN, "learning_rate": 2.69979439471002e-08, "log_odds_chosen": 0.7153624296188354, "log_odds_ratio": -0.590162992477417, "logits/chosen": -2.2357325553894043, "logits/rejected": -2.1519486904144287, "logps/chosen": -0.6952498555183411, "logps/rejected": -1.1393662691116333, "loss": 1.1547, "nll_loss": 0.872164249420166, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.20857493579387665, "rewards/margins": 0.1332349330186844, "rewards/rejected": -0.34180986881256104, "step": 11040 }, { "epoch": 2.894943673041656, "grad_norm": NaN, "learning_rate": 2.5701549601879314e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2953908443450928, "logits/rejected": -2.209501266479492, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1709, "nll_loss": 0.9403071403503418, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11050 }, { "epoch": 2.897563531569295, "grad_norm": 0.7746265530586243, "learning_rate": 2.4436950810149847e-08, "log_odds_chosen": 1.0284137725830078, "log_odds_ratio": -0.487679660320282, "logits/chosen": -2.327599048614502, "logits/rejected": -2.2837491035461426, "logps/chosen": -0.7078768610954285, "logps/rejected": -1.4306113719940186, "loss": 1.1784, "nll_loss": 1.0244431495666504, "rewards/accuracies": 0.75, "rewards/chosen": -0.2123630940914154, "rewards/margins": 0.21682032942771912, "rewards/rejected": -0.4291834235191345, "step": 11060 }, { "epoch": 2.9001833900969345, "grad_norm": 0.9709392189979553, "learning_rate": 2.3204157689073136e-08, "log_odds_chosen": 0.7098906636238098, "log_odds_ratio": -0.5482165217399597, "logits/chosen": -2.218433380126953, "logits/rejected": -2.1708014011383057, "logps/chosen": -0.7732901573181152, "logps/rejected": -1.2515465021133423, "loss": 1.1355, "nll_loss": 0.9904979467391968, "rewards/accuracies": 0.6875, "rewards/chosen": -0.23198704421520233, "rewards/margins": 0.14347688853740692, "rewards/rejected": -0.37546399235725403, "step": 11070 }, { "epoch": 2.9028032486245743, "grad_norm": 1.123426079750061, "learning_rate": 2.200318010135538e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2737412452697754, "logits/rejected": -2.282600164413452, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2529, "nll_loss": 1.0842092037200928, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11080 }, { "epoch": 2.9054231071522136, "grad_norm": 0.9011853933334351, "learning_rate": 2.083402765516995e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.396749258041382, "logits/rejected": -2.354302406311035, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2287, "nll_loss": 1.0083900690078735, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11090 }, { "epoch": 2.9080429656798534, "grad_norm": 0.878602921962738, "learning_rate": 1.9696709704079217e-08, "log_odds_chosen": 0.6855798363685608, "log_odds_ratio": -0.5572179555892944, "logits/chosen": -2.358626365661621, "logits/rejected": -2.3881070613861084, "logps/chosen": -0.6506325602531433, "logps/rejected": -1.0767731666564941, "loss": 1.1369, "nll_loss": 0.8477801084518433, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19518975913524628, "rewards/margins": 0.12784218788146973, "rewards/rejected": -0.3230319321155548, "step": 11100 }, { "epoch": 2.910662824207493, "grad_norm": 0.5835180282592773, "learning_rate": 1.8591235346959945e-08, "log_odds_chosen": 0.8432886004447937, "log_odds_ratio": -0.5384243726730347, "logits/chosen": -2.4013240337371826, "logits/rejected": -2.2917466163635254, "logps/chosen": -0.755750298500061, "logps/rejected": -1.3062589168548584, "loss": 1.1395, "nll_loss": 0.9641734957695007, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.22672513127326965, "rewards/margins": 0.1651526391506195, "rewards/rejected": -0.3918777406215668, "step": 11110 }, { "epoch": 2.913282682735132, "grad_norm": 0.6315678954124451, "learning_rate": 1.7517613427932233e-08, "log_odds_chosen": 0.8360864520072937, "log_odds_ratio": -0.5352117419242859, "logits/chosen": -2.4512171745300293, "logits/rejected": -2.3759982585906982, "logps/chosen": -0.7273198366165161, "logps/rejected": -1.276885986328125, "loss": 1.1655, "nll_loss": 1.0635243654251099, "rewards/accuracies": 0.6875, "rewards/chosen": -0.21819595992565155, "rewards/margins": 0.16486984491348267, "rewards/rejected": -0.3830658197402954, "step": 11120 }, { "epoch": 2.915902541262772, "grad_norm": 0.6582686305046082, "learning_rate": 1.6475852536285805e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.425966739654541, "logits/rejected": -2.3452038764953613, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1023, "nll_loss": 0.9783496856689453, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11130 }, { "epoch": 2.9185223997904113, "grad_norm": 0.9514375925064087, "learning_rate": 1.546596100641384e-08, "log_odds_chosen": 0.7965463399887085, "log_odds_ratio": -0.5078469514846802, "logits/chosen": -2.3194358348846436, "logits/rejected": -2.3255810737609863, "logps/chosen": -0.7326101660728455, "logps/rejected": -1.2426929473876953, "loss": 1.1322, "nll_loss": 0.9632688760757446, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21978306770324707, "rewards/margins": 0.1530248373746872, "rewards/rejected": -0.37280794978141785, "step": 11140 }, { "epoch": 2.921142258318051, "grad_norm": 1.180174469947815, "learning_rate": 1.4487946917744576e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2904489040374756, "logits/rejected": -2.270267963409424, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.144, "nll_loss": 0.9386903643608093, "rewards/accuracies": 0.699999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11150 }, { "epoch": 2.9237621168456904, "grad_norm": 3.558601140975952, "learning_rate": 1.3541818094676915e-08, "log_odds_chosen": 0.7389436364173889, "log_odds_ratio": -0.5250047445297241, "logits/chosen": -2.3108696937561035, "logits/rejected": -2.261458396911621, "logps/chosen": -0.682996928691864, "logps/rejected": -1.1152498722076416, "loss": 1.1324, "nll_loss": 0.9541209936141968, "rewards/accuracies": 0.75, "rewards/chosen": -0.20489910244941711, "rewards/margins": 0.12967590987682343, "rewards/rejected": -0.33457499742507935, "step": 11160 }, { "epoch": 2.92638197537333, "grad_norm": 0.9500933885574341, "learning_rate": 1.2627582106520484e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3808937072753906, "logits/rejected": -2.3173675537109375, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1371, "nll_loss": 0.8529782295227051, "rewards/accuracies": 0.75, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11170 }, { "epoch": 2.929001833900969, "grad_norm": 1.5640650987625122, "learning_rate": 1.1745246267429898e-08, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3421480655670166, "logits/rejected": -2.280428409576416, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1554, "nll_loss": 1.0068862438201904, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11180 }, { "epoch": 2.931621692428609, "grad_norm": 2.353492259979248, "learning_rate": 1.0894817636351917e-08, "log_odds_chosen": 0.837321937084198, "log_odds_ratio": -0.5408541560173035, "logits/chosen": -2.2589707374572754, "logits/rejected": -2.1754190921783447, "logps/chosen": -0.7674687504768372, "logps/rejected": -1.3122602701187134, "loss": 1.1192, "nll_loss": 0.9183322787284851, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2302406132221222, "rewards/margins": 0.16343745589256287, "rewards/rejected": -0.39367806911468506, "step": 11190 }, { "epoch": 2.9342415509562483, "grad_norm": 0.8251251578330994, "learning_rate": 1.007630301696416e-08, "log_odds_chosen": 0.766154408454895, "log_odds_ratio": -0.5710746049880981, "logits/chosen": -2.305239200592041, "logits/rejected": -2.290860652923584, "logps/chosen": -0.7397705912590027, "logps/rejected": -1.2289223670959473, "loss": 1.1912, "nll_loss": 1.093362808227539, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22193117439746857, "rewards/margins": 0.146745502948761, "rewards/rejected": -0.36867666244506836, "step": 11200 }, { "epoch": 2.936861409483888, "grad_norm": 1.3655190467834473, "learning_rate": 9.289708957624488e-09, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3390002250671387, "logits/rejected": -2.234774589538574, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1975, "nll_loss": 1.0170985460281372, "rewards/accuracies": 0.824999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11210 }, { "epoch": 2.9394812680115274, "grad_norm": 0.790612518787384, "learning_rate": 8.535041751315474e-09, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4223971366882324, "logits/rejected": -2.349656343460083, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1407, "nll_loss": 0.9644659757614136, "rewards/accuracies": 0.6875, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11220 }, { "epoch": 2.9421011265391668, "grad_norm": 0.9991037249565125, "learning_rate": 7.812307435596466e-09, "log_odds_chosen": 0.8778820037841797, "log_odds_ratio": -0.5339678525924683, "logits/chosen": -2.3414125442504883, "logits/rejected": -2.2406504154205322, "logps/chosen": -0.7151540517807007, "logps/rejected": -1.333064079284668, "loss": 1.1517, "nll_loss": 0.875017523765564, "rewards/accuracies": 0.75, "rewards/chosen": -0.21454620361328125, "rewards/margins": 0.18537306785583496, "rewards/rejected": -0.3999192714691162, "step": 11230 }, { "epoch": 2.944720985066806, "grad_norm": 1.1948329210281372, "learning_rate": 7.121511792553825e-09, "log_odds_chosen": 0.4593222141265869, "log_odds_ratio": -0.6348902583122253, "logits/chosen": -2.311891794204712, "logits/rejected": -2.326958417892456, "logps/chosen": -0.7736891508102417, "logps/rejected": -1.034523844718933, "loss": 1.1316, "nll_loss": 0.9957908391952515, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2321067601442337, "rewards/margins": 0.07825037837028503, "rewards/rejected": -0.31035715341567993, "step": 11240 }, { "epoch": 2.947340843594446, "grad_norm": 2.369385242462158, "learning_rate": 6.462660348755644e-09, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3908839225769043, "logits/rejected": -2.302323579788208, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1992, "nll_loss": 0.9360450506210327, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11250 }, { "epoch": 2.9499607021220853, "grad_norm": 4.4744133949279785, "learning_rate": 5.835758375206446e-09, "log_odds_chosen": 0.40064460039138794, "log_odds_ratio": -0.676773726940155, "logits/chosen": -2.4397072792053223, "logits/rejected": -2.3911824226379395, "logps/chosen": -0.7568601369857788, "logps/rejected": -1.0026400089263916, "loss": 1.1125, "nll_loss": 0.9224817156791687, "rewards/accuracies": 0.5, "rewards/chosen": -0.2270580530166626, "rewards/margins": 0.07373391091823578, "rewards/rejected": -0.30079200863838196, "step": 11260 }, { "epoch": 2.952580560649725, "grad_norm": 1.8634049892425537, "learning_rate": 5.240810887306324e-09, "log_odds_chosen": 0.845255970954895, "log_odds_ratio": -0.5195915699005127, "logits/chosen": -2.415769577026367, "logits/rejected": -2.3593575954437256, "logps/chosen": -0.7450459003448486, "logps/rejected": -1.2791351079940796, "loss": 1.1326, "nll_loss": 0.9331023097038269, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22351379692554474, "rewards/margins": 0.1602267473936081, "rewards/rejected": -0.3837405741214752, "step": 11270 }, { "epoch": 2.9552004191773644, "grad_norm": 0.9109734296798706, "learning_rate": 4.677822644809204e-09, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.299999713897705, "logits/rejected": -2.1877541542053223, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1291, "nll_loss": 1.056086778640747, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11280 }, { "epoch": 2.9578202777050038, "grad_norm": 0.5797399878501892, "learning_rate": 4.146798151786424e-09, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4548377990722656, "logits/rejected": -2.2929720878601074, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.1441, "nll_loss": 0.9811544418334961, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11290 }, { "epoch": 2.9604401362326436, "grad_norm": 0.9243193864822388, "learning_rate": 3.6477416565898756e-09, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.1874287128448486, "logits/rejected": -2.1511006355285645, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.244, "nll_loss": 1.0842883586883545, "rewards/accuracies": 0.612500011920929, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11300 }, { "epoch": 2.963059994760283, "grad_norm": 1.2255563735961914, "learning_rate": 3.1806571518173675e-09, "log_odds_chosen": 0.5397371649742126, "log_odds_ratio": -0.6091160178184509, "logits/chosen": -2.384014844894409, "logits/rejected": -2.2841219902038574, "logps/chosen": -0.7912045121192932, "logps/rejected": -1.1744745969772339, "loss": 1.1432, "nll_loss": 0.9531087875366211, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.237361341714859, "rewards/margins": 0.1149810403585434, "rewards/rejected": -0.3523423969745636, "step": 11310 }, { "epoch": 2.9656798532879227, "grad_norm": 1.5738017559051514, "learning_rate": 2.745548374282869e-09, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.3192741870880127, "logits/rejected": -2.2415130138397217, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.0552, "nll_loss": 0.911457359790802, "rewards/accuracies": 0.8125, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11320 }, { "epoch": 2.968299711815562, "grad_norm": 1.3944849967956543, "learning_rate": 2.342418804983204e-09, "log_odds_chosen": 0.9458763003349304, "log_odds_ratio": -0.5201556086540222, "logits/chosen": -2.352350950241089, "logits/rejected": -2.2456161975860596, "logps/chosen": -0.7383637428283691, "logps/rejected": -1.4352223873138428, "loss": 1.1077, "nll_loss": 0.9274641871452332, "rewards/accuracies": 0.75, "rewards/chosen": -0.22150912880897522, "rewards/margins": 0.20905756950378418, "rewards/rejected": -0.4305667281150818, "step": 11330 }, { "epoch": 2.9709195703432014, "grad_norm": 0.7486133575439453, "learning_rate": 1.9712716690740703e-09, "log_odds_chosen": 0.8406063914299011, "log_odds_ratio": -0.5690051913261414, "logits/chosen": -2.3495419025421143, "logits/rejected": -2.199279546737671, "logps/chosen": -0.7310729026794434, "logps/rejected": -1.2816814184188843, "loss": 1.1188, "nll_loss": 0.9790310859680176, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21932187676429749, "rewards/margins": 0.16518256068229675, "rewards/rejected": -0.38450440764427185, "step": 11340 }, { "epoch": 2.9735394288708408, "grad_norm": 1.500160574913025, "learning_rate": 1.632109935841175e-09, "log_odds_chosen": 0.578166127204895, "log_odds_ratio": -0.595360517501831, "logits/chosen": -2.3102645874023438, "logits/rejected": -2.229559898376465, "logps/chosen": -0.8010009527206421, "logps/rejected": -1.203283667564392, "loss": 1.1197, "nll_loss": 0.9201364517211914, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24030029773712158, "rewards/margins": 0.12068480253219604, "rewards/rejected": -0.3609851002693176, "step": 11350 }, { "epoch": 2.9761592873984806, "grad_norm": 1.302300214767456, "learning_rate": 1.324936318677583e-09, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.277737617492676, "logits/rejected": -2.273268222808838, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2157, "nll_loss": 1.0011972188949585, "rewards/accuracies": 0.574999988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11360 }, { "epoch": 2.97877914592612, "grad_norm": 1.1280386447906494, "learning_rate": 1.0497532750624038e-09, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.2766623497009277, "logits/rejected": -2.242147922515869, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.2225, "nll_loss": 0.9498116374015808, "rewards/accuracies": 0.637499988079071, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11370 }, { "epoch": 2.9813990044537597, "grad_norm": 0.7431193590164185, "learning_rate": 8.065630065408058e-10, "log_odds_chosen": 0.883421778678894, "log_odds_ratio": -0.5182092785835266, "logits/chosen": -2.2482006549835205, "logits/rejected": -2.2482218742370605, "logps/chosen": -0.6786171197891235, "logps/rejected": -1.2313085794448853, "loss": 1.1373, "nll_loss": 0.9228252172470093, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.20358511805534363, "rewards/margins": 0.16580747067928314, "rewards/rejected": -0.36939260363578796, "step": 11380 }, { "epoch": 2.984018862981399, "grad_norm": 0.9816609621047974, "learning_rate": 5.953674587066971e-10, "log_odds_chosen": 0.6654943227767944, "log_odds_ratio": -0.5905393362045288, "logits/chosen": -2.3116064071655273, "logits/rejected": -2.272850513458252, "logps/chosen": -0.7540726661682129, "logps/rejected": -1.180996060371399, "loss": 1.1447, "nll_loss": 0.9284960627555847, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.22622179985046387, "rewards/margins": 0.12807705998420715, "rewards/rejected": -0.354298859834671, "step": 11390 }, { "epoch": 2.9866387215090384, "grad_norm": 1.6065720319747925, "learning_rate": 4.161683211854061e-10, "log_odds_chosen": 0.453188419342041, "log_odds_ratio": -0.6091657280921936, "logits/chosen": -2.405611038208008, "logits/rejected": -2.2705788612365723, "logps/chosen": -0.8156019449234009, "logps/rejected": -1.10752534866333, "loss": 1.165, "nll_loss": 0.9370447397232056, "rewards/accuracies": 0.6875, "rewards/chosen": -0.24468059837818146, "rewards/margins": 0.08757705986499786, "rewards/rejected": -0.33225762844085693, "step": 11400 }, { "epoch": 2.9892585800366778, "grad_norm": 2.9840946197509766, "learning_rate": 2.68967027623912e-10, "log_odds_chosen": 0.7103438973426819, "log_odds_ratio": -0.5470684766769409, "logits/chosen": -2.2951602935791016, "logits/rejected": -2.3029372692108154, "logps/chosen": -0.7189238667488098, "logps/rejected": -1.1843281984329224, "loss": 1.1839, "nll_loss": 1.0248053073883057, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2156771868467331, "rewards/margins": 0.1396212875843048, "rewards/rejected": -0.3552984595298767, "step": 11410 }, { "epoch": 2.9918784385643176, "grad_norm": 1.0490409135818481, "learning_rate": 1.537647556744126e-10, "log_odds_chosen": 0.9936825037002563, "log_odds_ratio": -0.5020376443862915, "logits/chosen": -2.2909770011901855, "logits/rejected": -2.250410318374634, "logps/chosen": -0.7468298077583313, "logps/rejected": -1.3409092426300049, "loss": 1.1642, "nll_loss": 0.9282538294792175, "rewards/accuracies": 0.75, "rewards/chosen": -0.22404897212982178, "rewards/margins": 0.1782238483428955, "rewards/rejected": -0.4022728502750397, "step": 11420 }, { "epoch": 2.994498297091957, "grad_norm": 4.906797885894775, "learning_rate": 7.056242698988412e-11, "log_odds_chosen": NaN, "log_odds_ratio": NaN, "logits/chosen": -2.4009392261505127, "logits/rejected": -2.291551351547241, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.198, "nll_loss": 1.0413074493408203, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 11430 }, { "epoch": 2.9971181556195967, "grad_norm": 1.7115346193313599, "learning_rate": 1.9360707212978667e-11, "log_odds_chosen": 1.1079422235488892, "log_odds_ratio": -0.5229429006576538, "logits/chosen": -2.3862807750701904, "logits/rejected": -2.323098659515381, "logps/chosen": -0.7345533967018127, "logps/rejected": -1.440443754196167, "loss": 1.1818, "nll_loss": 1.0553444623947144, "rewards/accuracies": 0.75, "rewards/chosen": -0.22036603093147278, "rewards/margins": 0.21176710724830627, "rewards/rejected": -0.43213310837745667, "step": 11440 }, { "epoch": 2.999738014147236, "grad_norm": 1.1348971128463745, "learning_rate": 1.6000597291565555e-13, "log_odds_chosen": 0.7146393060684204, "log_odds_ratio": -0.5738018751144409, "logits/chosen": -2.3377110958099365, "logits/rejected": -2.305211305618286, "logps/chosen": -0.7385216951370239, "logps/rejected": -1.2187727689743042, "loss": 1.1728, "nll_loss": 1.0074824094772339, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22155654430389404, "rewards/margins": 0.14407534897327423, "rewards/rejected": -0.3656318783760071, "step": 11450 }, { "epoch": 3.0, "step": 11451, "total_flos": 0.0, "train_loss": 1.0584710824750894, "train_runtime": 37613.9724, "train_samples_per_second": 4.87, "train_steps_per_second": 0.304 } ], "logging_steps": 10, "max_steps": 11451, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }