|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.975609756097561, |
|
"eval_steps": 500, |
|
"global_step": 246, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016260162601626018, |
|
"grad_norm": 18.177886962890625, |
|
"learning_rate": 2e-05, |
|
"logits/chosen": -0.3472236394882202, |
|
"logits/rejected": -0.13716036081314087, |
|
"logps/chosen": -780.8181762695312, |
|
"logps/rejected": -909.20263671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.032520325203252036, |
|
"grad_norm": 23.274246215820312, |
|
"learning_rate": 4e-05, |
|
"logits/chosen": -0.2127760350704193, |
|
"logits/rejected": -0.08323362469673157, |
|
"logps/chosen": -583.0169067382812, |
|
"logps/rejected": -715.5615234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.04878048780487805, |
|
"grad_norm": 20.149507522583008, |
|
"learning_rate": 6e-05, |
|
"logits/chosen": -0.18167662620544434, |
|
"logits/rejected": -0.04478086531162262, |
|
"logps/chosen": -941.0387573242188, |
|
"logps/rejected": -825.662841796875, |
|
"loss": 0.6976, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.025517277419567108, |
|
"rewards/margins": 0.022285467013716698, |
|
"rewards/rejected": 0.0032318076118826866, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.06504065040650407, |
|
"grad_norm": 16.67251205444336, |
|
"learning_rate": 8e-05, |
|
"logits/chosen": 0.6866837739944458, |
|
"logits/rejected": 0.971089243888855, |
|
"logps/chosen": -999.306640625, |
|
"logps/rejected": -386.5375671386719, |
|
"loss": 0.563, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2688583433628082, |
|
"rewards/margins": 0.3312031030654907, |
|
"rewards/rejected": -0.062344741076231, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.08130081300813008, |
|
"grad_norm": 15.646084785461426, |
|
"learning_rate": 0.0001, |
|
"logits/chosen": 0.5107800364494324, |
|
"logits/rejected": 0.5942208766937256, |
|
"logps/chosen": -1051.1270751953125, |
|
"logps/rejected": -745.8003540039062, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.3622299134731293, |
|
"rewards/margins": 0.34313660860061646, |
|
"rewards/rejected": 0.01909332349896431, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 38.70280456542969, |
|
"learning_rate": 0.00012, |
|
"logits/chosen": -0.31406939029693604, |
|
"logits/rejected": -0.24293695390224457, |
|
"logps/chosen": -845.9321899414062, |
|
"logps/rejected": -932.499755859375, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5435073971748352, |
|
"rewards/margins": 0.47774890065193176, |
|
"rewards/rejected": 0.06575851887464523, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.11382113821138211, |
|
"grad_norm": 23.665071487426758, |
|
"learning_rate": 0.00014, |
|
"logits/chosen": -0.2646118402481079, |
|
"logits/rejected": -0.11520399153232574, |
|
"logps/chosen": -866.503173828125, |
|
"logps/rejected": -975.55126953125, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.6112838387489319, |
|
"rewards/margins": 0.4790405333042145, |
|
"rewards/rejected": 0.1322433352470398, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.13008130081300814, |
|
"grad_norm": 15.794047355651855, |
|
"learning_rate": 0.00016, |
|
"logits/chosen": -0.8256000876426697, |
|
"logits/rejected": -0.8912097811698914, |
|
"logps/chosen": -523.3858032226562, |
|
"logps/rejected": -1084.9468994140625, |
|
"loss": 0.4442, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.5804435610771179, |
|
"rewards/margins": 0.24081651866436005, |
|
"rewards/rejected": 0.33962705731391907, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.14634146341463414, |
|
"grad_norm": 13.538564682006836, |
|
"learning_rate": 0.00018, |
|
"logits/chosen": -0.11683523654937744, |
|
"logits/rejected": -0.0632472038269043, |
|
"logps/chosen": -652.114501953125, |
|
"logps/rejected": -551.6069946289062, |
|
"loss": 0.1564, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6716469526290894, |
|
"rewards/margins": 2.151698350906372, |
|
"rewards/rejected": -0.4800514578819275, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.16260162601626016, |
|
"grad_norm": 3.9652626514434814, |
|
"learning_rate": 0.0002, |
|
"logits/chosen": 0.4062778949737549, |
|
"logits/rejected": 0.5438919067382812, |
|
"logps/chosen": -771.1934814453125, |
|
"logps/rejected": -616.55908203125, |
|
"loss": 0.0792, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.8721909523010254, |
|
"rewards/margins": 5.208758354187012, |
|
"rewards/rejected": -1.3365669250488281, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17886178861788618, |
|
"grad_norm": 0.18261243402957916, |
|
"learning_rate": 0.0001999911398855782, |
|
"logits/chosen": -0.7774271965026855, |
|
"logits/rejected": -0.8629493117332458, |
|
"logps/chosen": -601.1015014648438, |
|
"logps/rejected": -1039.275146484375, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0800025463104248, |
|
"rewards/margins": 6.853862762451172, |
|
"rewards/rejected": -5.773860454559326, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 0.1421748697757721, |
|
"learning_rate": 0.00019996456111234527, |
|
"logits/chosen": 0.7899215817451477, |
|
"logits/rejected": 1.119359016418457, |
|
"logps/chosen": -1416.412353515625, |
|
"logps/rejected": -827.2066650390625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.7505874633789062, |
|
"rewards/margins": 15.09115982055664, |
|
"rewards/rejected": -11.340574264526367, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.21138211382113822, |
|
"grad_norm": 3.4406840801239014, |
|
"learning_rate": 0.00019992026839012067, |
|
"logits/chosen": -0.8033453226089478, |
|
"logits/rejected": -0.877557098865509, |
|
"logps/chosen": -514.6026611328125, |
|
"logps/rejected": -1206.25537109375, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7983558177947998, |
|
"rewards/margins": 23.49526596069336, |
|
"rewards/rejected": -21.696908950805664, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.22764227642276422, |
|
"grad_norm": 0.19398577511310577, |
|
"learning_rate": 0.0001998582695676762, |
|
"logits/chosen": 0.9254277944564819, |
|
"logits/rejected": 1.1634798049926758, |
|
"logps/chosen": -1028.993408203125, |
|
"logps/rejected": -955.4432983398438, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5009795427322388, |
|
"rewards/margins": 17.867931365966797, |
|
"rewards/rejected": -18.368911743164062, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 0.00010074722376884893, |
|
"learning_rate": 0.000199778575631345, |
|
"logits/chosen": 0.3904605507850647, |
|
"logits/rejected": 0.3719422519207001, |
|
"logps/chosen": -884.9620361328125, |
|
"logps/rejected": -1075.615966796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.482113838195801, |
|
"rewards/margins": 21.95424461364746, |
|
"rewards/rejected": -24.436357498168945, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2601626016260163, |
|
"grad_norm": 3.7136353057576343e-05, |
|
"learning_rate": 0.000199681200703075, |
|
"logits/chosen": 0.2578551769256592, |
|
"logits/rejected": 0.5335351824760437, |
|
"logps/chosen": -1073.548828125, |
|
"logps/rejected": -992.4033813476562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.9434356689453125, |
|
"rewards/margins": 20.854663848876953, |
|
"rewards/rejected": -23.798099517822266, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2764227642276423, |
|
"grad_norm": 8.596338147981442e-07, |
|
"learning_rate": 0.00019956616203792635, |
|
"logits/chosen": 0.5267460346221924, |
|
"logits/rejected": 0.4893237352371216, |
|
"logps/chosen": -987.3567504882812, |
|
"logps/rejected": -1127.171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0684036016464233, |
|
"rewards/margins": 32.558319091796875, |
|
"rewards/rejected": -33.62671661376953, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 0.004051027819514275, |
|
"learning_rate": 0.00019943348002101371, |
|
"logits/chosen": 1.0484071969985962, |
|
"logits/rejected": 1.1081664562225342, |
|
"logps/chosen": -1105.1634521484375, |
|
"logps/rejected": -898.9759521484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.1622314453125, |
|
"rewards/margins": 23.434669494628906, |
|
"rewards/rejected": -26.596900939941406, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.3089430894308943, |
|
"grad_norm": 0.003306547412648797, |
|
"learning_rate": 0.00019928317816389417, |
|
"logits/chosen": 0.5566614866256714, |
|
"logits/rejected": 0.6963181495666504, |
|
"logps/chosen": -932.650390625, |
|
"logps/rejected": -1061.4989013671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.36033821105957, |
|
"rewards/margins": 30.25779914855957, |
|
"rewards/rejected": -34.61813735961914, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.3252032520325203, |
|
"grad_norm": 1.3893560968369911e-08, |
|
"learning_rate": 0.00019911528310040074, |
|
"logits/chosen": 1.239579200744629, |
|
"logits/rejected": 1.046311855316162, |
|
"logps/chosen": -1079.0159912109375, |
|
"logps/rejected": -1033.2017822265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.044548749923706, |
|
"rewards/margins": 41.88936233520508, |
|
"rewards/rejected": -40.844810485839844, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34146341463414637, |
|
"grad_norm": 4.666223851756968e-09, |
|
"learning_rate": 0.00019892982458192288, |
|
"logits/chosen": 0.2726232409477234, |
|
"logits/rejected": 0.14665402472019196, |
|
"logps/chosen": -978.7222900390625, |
|
"logps/rejected": -1133.2047119140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.054238319396973, |
|
"rewards/margins": 54.86410140991211, |
|
"rewards/rejected": -43.80986404418945, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.35772357723577236, |
|
"grad_norm": 4.876813477494579e-07, |
|
"learning_rate": 0.00019872683547213446, |
|
"logits/chosen": -0.16925190389156342, |
|
"logits/rejected": -0.19759103655815125, |
|
"logps/chosen": -965.187255859375, |
|
"logps/rejected": -1239.143798828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.977485656738281, |
|
"rewards/margins": 29.40732765197754, |
|
"rewards/rejected": -44.38481140136719, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.37398373983739835, |
|
"grad_norm": 37.638973236083984, |
|
"learning_rate": 0.00019850635174117033, |
|
"logits/chosen": 0.437714159488678, |
|
"logits/rejected": 0.4761970639228821, |
|
"logps/chosen": -1137.6966552734375, |
|
"logps/rejected": -1166.5640869140625, |
|
"loss": 0.4393, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.159793853759766, |
|
"rewards/margins": 32.14189529418945, |
|
"rewards/rejected": -43.301692962646484, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 1.8173747229344173e-11, |
|
"learning_rate": 0.00019826841245925212, |
|
"logits/chosen": -0.7153763175010681, |
|
"logits/rejected": -0.6940470933914185, |
|
"logps/chosen": -938.263916015625, |
|
"logps/rejected": -1608.4205322265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -24.817350387573242, |
|
"rewards/margins": 34.095001220703125, |
|
"rewards/rejected": -58.912349700927734, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"grad_norm": 83.79772186279297, |
|
"learning_rate": 0.0001980130597897651, |
|
"logits/chosen": 1.1592888832092285, |
|
"logits/rejected": 1.1738824844360352, |
|
"logps/chosen": -948.4622802734375, |
|
"logps/rejected": -865.396728515625, |
|
"loss": 0.3825, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.343675374984741, |
|
"rewards/margins": 26.49417495727539, |
|
"rewards/rejected": -29.837852478027344, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.42276422764227645, |
|
"grad_norm": 2.6143006834900007e-06, |
|
"learning_rate": 0.00019774033898178667, |
|
"logits/chosen": 0.5444796085357666, |
|
"logits/rejected": 0.47586876153945923, |
|
"logps/chosen": -932.6605834960938, |
|
"logps/rejected": -1091.639892578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.2753777503967285, |
|
"rewards/margins": 34.133514404296875, |
|
"rewards/rejected": -38.40888977050781, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.43902439024390244, |
|
"grad_norm": 0.0003061926399823278, |
|
"learning_rate": 0.00019745029836206813, |
|
"logits/chosen": -0.6794779896736145, |
|
"logits/rejected": -0.8602011203765869, |
|
"logps/chosen": -894.3270263671875, |
|
"logps/rejected": -1067.5921630859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.433198928833008, |
|
"rewards/margins": 17.333955764770508, |
|
"rewards/rejected": -30.767154693603516, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.45528455284552843, |
|
"grad_norm": 3.805017101399244e-08, |
|
"learning_rate": 0.00019714298932647098, |
|
"logits/chosen": 0.4980026185512543, |
|
"logits/rejected": 0.6999194025993347, |
|
"logps/chosen": -911.8473510742188, |
|
"logps/rejected": -1126.07421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5412168502807617, |
|
"rewards/margins": 29.520708084106445, |
|
"rewards/rejected": -30.06192398071289, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.4715447154471545, |
|
"grad_norm": 5.17633900187775e-08, |
|
"learning_rate": 0.00019681846633085967, |
|
"logits/chosen": -0.5973828434944153, |
|
"logits/rejected": -0.8376109600067139, |
|
"logps/chosen": -711.66259765625, |
|
"logps/rejected": -1186.1884765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.467390537261963, |
|
"rewards/margins": 25.050704956054688, |
|
"rewards/rejected": -27.518096923828125, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 0.00011633769463514909, |
|
"learning_rate": 0.0001964767868814516, |
|
"logits/chosen": 1.3797093629837036, |
|
"logits/rejected": 1.5397391319274902, |
|
"logps/chosen": -877.42333984375, |
|
"logps/rejected": -1003.4732666015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.624107360839844, |
|
"rewards/margins": 29.784557342529297, |
|
"rewards/rejected": -25.160449981689453, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5040650406504065, |
|
"grad_norm": 6.257723228486611e-09, |
|
"learning_rate": 0.00019611801152462715, |
|
"logits/chosen": 1.2731826305389404, |
|
"logits/rejected": 1.6379995346069336, |
|
"logps/chosen": -1053.573486328125, |
|
"logps/rejected": -1010.915283203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.018058776855469, |
|
"rewards/margins": 32.15219497680664, |
|
"rewards/rejected": -21.13413429260254, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.5203252032520326, |
|
"grad_norm": 0.00035472630406729877, |
|
"learning_rate": 0.00019574220383620055, |
|
"logits/chosen": 0.6649560928344727, |
|
"logits/rejected": 0.983564019203186, |
|
"logps/chosen": -872.1873168945312, |
|
"logps/rejected": -965.9480590820312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.504961967468262, |
|
"rewards/margins": 23.669071197509766, |
|
"rewards/rejected": -18.164108276367188, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5365853658536586, |
|
"grad_norm": 3.0934195820009336e-05, |
|
"learning_rate": 0.00019534943041015423, |
|
"logits/chosen": 0.49574941396713257, |
|
"logits/rejected": 0.5190873742103577, |
|
"logps/chosen": -708.9269409179688, |
|
"logps/rejected": -842.974365234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.209194660186768, |
|
"rewards/margins": 20.690357208251953, |
|
"rewards/rejected": -13.48116397857666, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5528455284552846, |
|
"grad_norm": 0.0006856573163531721, |
|
"learning_rate": 0.00019493976084683813, |
|
"logits/chosen": 0.992796778678894, |
|
"logits/rejected": 1.1291236877441406, |
|
"logps/chosen": -673.6188354492188, |
|
"logps/rejected": -723.4482421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.3715057373046875, |
|
"rewards/margins": 19.963485717773438, |
|
"rewards/rejected": -14.591980934143066, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5691056910569106, |
|
"grad_norm": 5.983891969663091e-05, |
|
"learning_rate": 0.00019451326774063636, |
|
"logits/chosen": 0.7630600929260254, |
|
"logits/rejected": 0.910960853099823, |
|
"logps/chosen": -993.23828125, |
|
"logps/rejected": -1011.3184204101562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.109509468078613, |
|
"rewards/margins": 24.603878021240234, |
|
"rewards/rejected": -17.494367599487305, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 1.9749455532291904e-05, |
|
"learning_rate": 0.00019407002666710336, |
|
"logits/chosen": 1.8401339054107666, |
|
"logits/rejected": 1.9955703020095825, |
|
"logps/chosen": -1152.950927734375, |
|
"logps/rejected": -827.0269775390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.768245697021484, |
|
"rewards/margins": 38.1776123046875, |
|
"rewards/rejected": -22.40936851501465, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.6016260162601627, |
|
"grad_norm": 0.0017285533249378204, |
|
"learning_rate": 0.00019361011616957164, |
|
"logits/chosen": 2.153351306915283, |
|
"logits/rejected": 2.235447883605957, |
|
"logps/chosen": -1090.1943359375, |
|
"logps/rejected": -682.7992553710938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.726329803466797, |
|
"rewards/margins": 24.018630981445312, |
|
"rewards/rejected": -12.292303085327148, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.6178861788617886, |
|
"grad_norm": 0.00919501855969429, |
|
"learning_rate": 0.00019313361774523385, |
|
"logits/chosen": 0.47314736247062683, |
|
"logits/rejected": 0.557833731174469, |
|
"logps/chosen": -691.4217529296875, |
|
"logps/rejected": -673.1847534179688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.087795257568359, |
|
"rewards/margins": 12.628225326538086, |
|
"rewards/rejected": -6.540430068969727, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6341463414634146, |
|
"grad_norm": 0.002680833451449871, |
|
"learning_rate": 0.00019264061583070127, |
|
"logits/chosen": 0.20066705346107483, |
|
"logits/rejected": 0.2085224837064743, |
|
"logps/chosen": -693.7376098632812, |
|
"logps/rejected": -982.19091796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.779763221740723, |
|
"rewards/margins": 22.904094696044922, |
|
"rewards/rejected": -15.124334335327148, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6504065040650406, |
|
"grad_norm": 8.798202907200903e-05, |
|
"learning_rate": 0.00019213119778704128, |
|
"logits/chosen": 1.3898746967315674, |
|
"logits/rejected": 1.5520107746124268, |
|
"logps/chosen": -1247.770263671875, |
|
"logps/rejected": -916.4830322265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.276836395263672, |
|
"rewards/margins": 34.69191360473633, |
|
"rewards/rejected": -19.415077209472656, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.0009758697124198079, |
|
"learning_rate": 0.00019160545388429708, |
|
"logits/chosen": 2.345059633255005, |
|
"logits/rejected": 2.5746054649353027, |
|
"logps/chosen": -1102.5548095703125, |
|
"logps/rejected": -722.4332885742188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.800348281860352, |
|
"rewards/margins": 32.747169494628906, |
|
"rewards/rejected": -18.946823120117188, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.6829268292682927, |
|
"grad_norm": 0.0016077810432761908, |
|
"learning_rate": 0.00019106347728549135, |
|
"logits/chosen": 0.9104095697402954, |
|
"logits/rejected": 0.9921329021453857, |
|
"logps/chosen": -753.8040771484375, |
|
"logps/rejected": -886.5813598632812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.367500305175781, |
|
"rewards/margins": 27.856563568115234, |
|
"rewards/rejected": -16.489063262939453, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.6991869918699187, |
|
"grad_norm": 0.0004074655589647591, |
|
"learning_rate": 0.0001905053640301176, |
|
"logits/chosen": 0.5256392955780029, |
|
"logits/rejected": 0.4733426570892334, |
|
"logps/chosen": -715.4669189453125, |
|
"logps/rejected": -565.0441284179688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.25009822845459, |
|
"rewards/margins": 21.391075134277344, |
|
"rewards/rejected": -15.14097785949707, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.7154471544715447, |
|
"grad_norm": 0.013145952485501766, |
|
"learning_rate": 0.00018993121301712193, |
|
"logits/chosen": 0.9358551502227783, |
|
"logits/rejected": 0.8306156992912292, |
|
"logps/chosen": -867.1063232421875, |
|
"logps/rejected": -973.7214965820312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.3925018310546875, |
|
"rewards/margins": 21.35105323791504, |
|
"rewards/rejected": -13.958552360534668, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 8.829876605886966e-05, |
|
"learning_rate": 0.00018934112598737777, |
|
"logits/chosen": 2.2844998836517334, |
|
"logits/rejected": 2.831254482269287, |
|
"logps/chosen": -1142.8726806640625, |
|
"logps/rejected": -776.1110229492188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.17538833618164, |
|
"rewards/margins": 33.72625732421875, |
|
"rewards/rejected": -16.550867080688477, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7479674796747967, |
|
"grad_norm": 0.02624354511499405, |
|
"learning_rate": 0.00018873520750565718, |
|
"logits/chosen": 0.1806122362613678, |
|
"logits/rejected": 0.31054702401161194, |
|
"logps/chosen": -692.7060546875, |
|
"logps/rejected": -1032.708740234375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.434965133666992, |
|
"rewards/margins": 16.74932098388672, |
|
"rewards/rejected": -10.314356803894043, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.7642276422764228, |
|
"grad_norm": 4.268178963684477e-05, |
|
"learning_rate": 0.00018811356494210165, |
|
"logits/chosen": 1.1679103374481201, |
|
"logits/rejected": 1.0418663024902344, |
|
"logps/chosen": -720.220703125, |
|
"logps/rejected": -911.58837890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.991888523101807, |
|
"rewards/margins": 21.064565658569336, |
|
"rewards/rejected": -13.072675704956055, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 0.0009461237932555377, |
|
"learning_rate": 0.00018747630845319612, |
|
"logits/chosen": 0.13339552283287048, |
|
"logits/rejected": 0.3655449151992798, |
|
"logps/chosen": -420.11431884765625, |
|
"logps/rejected": -786.4783325195312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.16606330871582, |
|
"rewards/margins": 30.41803741455078, |
|
"rewards/rejected": -19.251976013183594, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7967479674796748, |
|
"grad_norm": 0.0033115639816969633, |
|
"learning_rate": 0.00018682355096224872, |
|
"logits/chosen": 0.4472777247428894, |
|
"logits/rejected": 0.3390260934829712, |
|
"logps/chosen": -536.7960205078125, |
|
"logps/rejected": -901.3749389648438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.887458801269531, |
|
"rewards/margins": 27.701595306396484, |
|
"rewards/rejected": -16.814136505126953, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"grad_norm": 0.01153454091399908, |
|
"learning_rate": 0.0001861554081393806, |
|
"logits/chosen": 0.6489148139953613, |
|
"logits/rejected": 0.689254105091095, |
|
"logps/chosen": -738.5593872070312, |
|
"logps/rejected": -755.362060546875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.205413818359375, |
|
"rewards/margins": 16.344358444213867, |
|
"rewards/rejected": -6.138944625854492, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8292682926829268, |
|
"grad_norm": 0.001985176932066679, |
|
"learning_rate": 0.00018547199838102904, |
|
"logits/chosen": 0.144524484872818, |
|
"logits/rejected": 0.26266002655029297, |
|
"logps/chosen": -893.19482421875, |
|
"logps/rejected": -1031.27294921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.087849617004395, |
|
"rewards/margins": 23.393884658813477, |
|
"rewards/rejected": -14.306035041809082, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.8455284552845529, |
|
"grad_norm": 0.00042794409091584384, |
|
"learning_rate": 0.0001847734427889671, |
|
"logits/chosen": 0.5121033191680908, |
|
"logits/rejected": 1.0676312446594238, |
|
"logps/chosen": -987.8340454101562, |
|
"logps/rejected": -830.7366943359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.409669876098633, |
|
"rewards/margins": 19.569660186767578, |
|
"rewards/rejected": -8.159988403320312, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.8617886178861789, |
|
"grad_norm": 0.0011688657104969025, |
|
"learning_rate": 0.00018405986514884434, |
|
"logits/chosen": 1.793473243713379, |
|
"logits/rejected": 1.9872632026672363, |
|
"logps/chosen": -926.424560546875, |
|
"logps/rejected": -618.4228515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.011417388916016, |
|
"rewards/margins": 22.01776123046875, |
|
"rewards/rejected": -11.006343841552734, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.8780487804878049, |
|
"grad_norm": 0.005157554987818003, |
|
"learning_rate": 0.0001833313919082515, |
|
"logits/chosen": -0.02910199761390686, |
|
"logits/rejected": 0.14243453741073608, |
|
"logps/chosen": -725.36376953125, |
|
"logps/rejected": -997.5311279296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.557222366333008, |
|
"rewards/margins": 15.359309196472168, |
|
"rewards/rejected": -9.802087783813477, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.8943089430894309, |
|
"grad_norm": 0.005044507794082165, |
|
"learning_rate": 0.00018258815215431396, |
|
"logits/chosen": 0.17898443341255188, |
|
"logits/rejected": 0.09989897906780243, |
|
"logps/chosen": -803.9798583984375, |
|
"logps/rejected": -925.3179321289062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.798739433288574, |
|
"rewards/margins": 17.492319107055664, |
|
"rewards/rejected": -10.69357967376709, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.9105691056910569, |
|
"grad_norm": 0.0031374047975987196, |
|
"learning_rate": 0.0001818302775908169, |
|
"logits/chosen": 1.017639398574829, |
|
"logits/rejected": 1.2823631763458252, |
|
"logps/chosen": -824.6445922851562, |
|
"logps/rejected": -860.8942260742188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.019498825073242, |
|
"rewards/margins": 16.16924285888672, |
|
"rewards/rejected": -10.149742126464844, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.926829268292683, |
|
"grad_norm": 0.00014241511235013604, |
|
"learning_rate": 0.0001810579025148674, |
|
"logits/chosen": 1.0959478616714478, |
|
"logits/rejected": 0.9008815288543701, |
|
"logps/chosen": -782.0526123046875, |
|
"logps/rejected": -916.8338623046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.443077087402344, |
|
"rewards/margins": 24.263744354248047, |
|
"rewards/rejected": -15.820667266845703, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.943089430894309, |
|
"grad_norm": 5.913816494285129e-05, |
|
"learning_rate": 0.00018027116379309638, |
|
"logits/chosen": 0.2709883153438568, |
|
"logits/rejected": 0.29769933223724365, |
|
"logps/chosen": -735.5257568359375, |
|
"logps/rejected": -1044.0601806640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.65300178527832, |
|
"rewards/margins": 18.755083084106445, |
|
"rewards/rejected": -10.102080345153809, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.959349593495935, |
|
"grad_norm": 0.01578771322965622, |
|
"learning_rate": 0.00017947020083740575, |
|
"logits/chosen": 1.5522100925445557, |
|
"logits/rejected": 1.7518442869186401, |
|
"logps/chosen": -1019.1099853515625, |
|
"logps/rejected": -624.6131591796875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.32003402709961, |
|
"rewards/margins": 23.75770378112793, |
|
"rewards/rejected": -13.43766975402832, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.0010152229806408286, |
|
"learning_rate": 0.00017865515558026428, |
|
"logits/chosen": 0.8601479530334473, |
|
"logits/rejected": 0.819040060043335, |
|
"logps/chosen": -763.342041015625, |
|
"logps/rejected": -817.870849609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.2501859664917, |
|
"rewards/margins": 16.491539001464844, |
|
"rewards/rejected": -8.241353034973145, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.991869918699187, |
|
"grad_norm": 0.008696873672306538, |
|
"learning_rate": 0.0001778261724495566, |
|
"logits/chosen": 0.7409014701843262, |
|
"logits/rejected": 0.9245580434799194, |
|
"logps/chosen": -888.8350830078125, |
|
"logps/rejected": -796.002685546875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.07230281829834, |
|
"rewards/margins": 22.53582000732422, |
|
"rewards/rejected": -11.463518142700195, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.3132517526391894e-05, |
|
"learning_rate": 0.00017698339834299061, |
|
"logits/chosen": 0.962340772151947, |
|
"logits/rejected": 1.369040608406067, |
|
"logps/chosen": -843.8861083984375, |
|
"logps/rejected": -833.0137329101562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.60971736907959, |
|
"rewards/margins": 22.649456024169922, |
|
"rewards/rejected": -15.039739608764648, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.016260162601626, |
|
"grad_norm": 3.0814584306426696e-07, |
|
"learning_rate": 0.00017612698260206666, |
|
"logits/chosen": 1.7351003885269165, |
|
"logits/rejected": 2.39410400390625, |
|
"logps/chosen": -1081.0841064453125, |
|
"logps/rejected": -664.132080078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.010480880737305, |
|
"rewards/margins": 23.851722717285156, |
|
"rewards/rejected": -11.841242790222168, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.032520325203252, |
|
"grad_norm": 0.0014821357326582074, |
|
"learning_rate": 0.00017525707698561385, |
|
"logits/chosen": 0.8669869899749756, |
|
"logits/rejected": 1.2894644737243652, |
|
"logps/chosen": -794.047607421875, |
|
"logps/rejected": -812.5697631835938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.141783714294434, |
|
"rewards/margins": 23.891061782836914, |
|
"rewards/rejected": -12.749277114868164, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.048780487804878, |
|
"grad_norm": 0.002492019208148122, |
|
"learning_rate": 0.00017437383564289816, |
|
"logits/chosen": 1.1617192029953003, |
|
"logits/rejected": 1.0443211793899536, |
|
"logps/chosen": -706.7365112304688, |
|
"logps/rejected": -834.9153442382812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.32893180847168, |
|
"rewards/margins": 23.380508422851562, |
|
"rewards/rejected": -13.0515775680542, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.065040650406504, |
|
"grad_norm": 0.10320430248975754, |
|
"learning_rate": 0.00017347741508630672, |
|
"logits/chosen": 1.5734750032424927, |
|
"logits/rejected": 2.108652114868164, |
|
"logps/chosen": -919.78125, |
|
"logps/rejected": -843.049560546875, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 14.794572830200195, |
|
"rewards/margins": 27.74661636352539, |
|
"rewards/rejected": -12.952045440673828, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.08130081300813, |
|
"grad_norm": 0.00033748566056601703, |
|
"learning_rate": 0.00017256797416361362, |
|
"logits/chosen": 0.10465478897094727, |
|
"logits/rejected": 0.11954197287559509, |
|
"logps/chosen": -770.0354614257812, |
|
"logps/rejected": -705.5811767578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.188321113586426, |
|
"rewards/margins": 18.007652282714844, |
|
"rewards/rejected": -9.819330215454102, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.0975609756097562, |
|
"grad_norm": 0.4934139549732208, |
|
"learning_rate": 0.00017164567402983152, |
|
"logits/chosen": 0.7908147573471069, |
|
"logits/rejected": 1.0772439241409302, |
|
"logps/chosen": -869.843017578125, |
|
"logps/rejected": -729.0626831054688, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.537101745605469, |
|
"rewards/margins": 12.491724014282227, |
|
"rewards/rejected": -3.9546217918395996, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.113821138211382, |
|
"grad_norm": 2.1183014098369313e-07, |
|
"learning_rate": 0.00017071067811865476, |
|
"logits/chosen": 0.6217237710952759, |
|
"logits/rejected": 0.5386490225791931, |
|
"logps/chosen": -799.1664428710938, |
|
"logps/rejected": -820.0735473632812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.295455932617188, |
|
"rewards/margins": 30.9702091217041, |
|
"rewards/rejected": -18.674753189086914, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.1300813008130082, |
|
"grad_norm": 7.591093162773177e-05, |
|
"learning_rate": 0.0001697631521134985, |
|
"logits/chosen": 1.664866328239441, |
|
"logits/rejected": 1.980355978012085, |
|
"logps/chosen": -1113.451416015625, |
|
"logps/rejected": -825.9473876953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.451591491699219, |
|
"rewards/margins": 29.68605613708496, |
|
"rewards/rejected": -18.23446273803711, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.146341463414634, |
|
"grad_norm": 4.4439241264626617e-07, |
|
"learning_rate": 0.00016880326391813916, |
|
"logits/chosen": -0.02196294069290161, |
|
"logits/rejected": 0.18253503739833832, |
|
"logps/chosen": -661.0505981445312, |
|
"logps/rejected": -834.158203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.791834831237793, |
|
"rewards/margins": 28.233205795288086, |
|
"rewards/rejected": -18.441370010375977, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.1626016260162602, |
|
"grad_norm": 8.045230060815811e-05, |
|
"learning_rate": 0.00016783118362696163, |
|
"logits/chosen": 0.24465110898017883, |
|
"logits/rejected": 0.2313007265329361, |
|
"logps/chosen": -715.2831420898438, |
|
"logps/rejected": -1050.01171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.176504611968994, |
|
"rewards/margins": 19.875812530517578, |
|
"rewards/rejected": -15.699307441711426, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.1788617886178863, |
|
"grad_norm": 5.927664005866973e-06, |
|
"learning_rate": 0.00016684708349481804, |
|
"logits/chosen": 1.5342342853546143, |
|
"logits/rejected": 2.0414443016052246, |
|
"logps/chosen": -1195.0989990234375, |
|
"logps/rejected": -652.9114990234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.883450508117676, |
|
"rewards/margins": 19.403560638427734, |
|
"rewards/rejected": -10.520109176635742, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.1951219512195121, |
|
"grad_norm": 1.7679340089671314e-05, |
|
"learning_rate": 0.00016585113790650388, |
|
"logits/chosen": 0.13918209075927734, |
|
"logits/rejected": 0.21283580362796783, |
|
"logps/chosen": -937.8267211914062, |
|
"logps/rejected": -958.693115234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.578910827636719, |
|
"rewards/margins": 31.493125915527344, |
|
"rewards/rejected": -21.914215087890625, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.2113821138211383, |
|
"grad_norm": 9.838218102231622e-05, |
|
"learning_rate": 0.00016484352334585653, |
|
"logits/chosen": 1.7902581691741943, |
|
"logits/rejected": 1.8008999824523926, |
|
"logps/chosen": -898.8333740234375, |
|
"logps/rejected": -869.8264770507812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.36214828491211, |
|
"rewards/margins": 23.546051025390625, |
|
"rewards/rejected": -15.183902740478516, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.2276422764227641, |
|
"grad_norm": 0.00042859543464146554, |
|
"learning_rate": 0.00016382441836448202, |
|
"logits/chosen": 0.40593788027763367, |
|
"logits/rejected": 0.24162518978118896, |
|
"logps/chosen": -713.95263671875, |
|
"logps/rejected": -873.909423828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.870103359222412, |
|
"rewards/margins": 17.166872024536133, |
|
"rewards/rejected": -13.296768188476562, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.2439024390243902, |
|
"grad_norm": 0.0007489994168281555, |
|
"learning_rate": 0.0001627940035501152, |
|
"logits/chosen": 1.2316575050354004, |
|
"logits/rejected": 1.2072526216506958, |
|
"logps/chosen": -961.4344482421875, |
|
"logps/rejected": -1073.3685302734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.6541852951049805, |
|
"rewards/margins": 27.57451057434082, |
|
"rewards/rejected": -20.920326232910156, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.2601626016260163, |
|
"grad_norm": 3.269678200013004e-05, |
|
"learning_rate": 0.0001617524614946192, |
|
"logits/chosen": 0.06140974164009094, |
|
"logits/rejected": 0.11881747841835022, |
|
"logps/chosen": -900.48876953125, |
|
"logps/rejected": -1085.7061767578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6411392688751221, |
|
"rewards/margins": 19.955745697021484, |
|
"rewards/rejected": -19.314605712890625, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.2764227642276422, |
|
"grad_norm": 3.813441480815527e-06, |
|
"learning_rate": 0.0001606999767616298, |
|
"logits/chosen": 1.1457127332687378, |
|
"logits/rejected": 0.8977339267730713, |
|
"logps/chosen": -757.8355712890625, |
|
"logps/rejected": -838.0936279296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.651698112487793, |
|
"rewards/margins": 31.715707778930664, |
|
"rewards/rejected": -23.064010620117188, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.2926829268292683, |
|
"grad_norm": 2.5300651032011956e-05, |
|
"learning_rate": 0.00015963673585385016, |
|
"logits/chosen": -0.5050560235977173, |
|
"logits/rejected": -0.5818659067153931, |
|
"logps/chosen": -833.4871826171875, |
|
"logps/rejected": -1177.144287109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1878601312637329, |
|
"rewards/margins": 28.51848602294922, |
|
"rewards/rejected": -28.330625534057617, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.3089430894308944, |
|
"grad_norm": 6.81912133586593e-05, |
|
"learning_rate": 0.00015856292718000235, |
|
"logits/chosen": 1.6245973110198975, |
|
"logits/rejected": 1.942758560180664, |
|
"logps/chosen": -925.15966796875, |
|
"logps/rejected": -746.8193969726562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.29654598236084, |
|
"rewards/margins": 26.77484893798828, |
|
"rewards/rejected": -17.478303909301758, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.3252032520325203, |
|
"grad_norm": 1.1350484783179127e-06, |
|
"learning_rate": 0.0001574787410214407, |
|
"logits/chosen": 0.8831353187561035, |
|
"logits/rejected": 1.1747808456420898, |
|
"logps/chosen": -812.7021484375, |
|
"logps/rejected": -1058.893310546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.832669258117676, |
|
"rewards/margins": 33.81871795654297, |
|
"rewards/rejected": -29.986047744750977, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.3414634146341464, |
|
"grad_norm": 7.43222301480273e-07, |
|
"learning_rate": 0.0001563843694984336, |
|
"logits/chosen": 1.199593424797058, |
|
"logits/rejected": 1.2259372472763062, |
|
"logps/chosen": -846.8779296875, |
|
"logps/rejected": -1035.00244140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.645470142364502, |
|
"rewards/margins": 35.18595886230469, |
|
"rewards/rejected": -30.540489196777344, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.3577235772357723, |
|
"grad_norm": 4.4819596951128915e-05, |
|
"learning_rate": 0.00015528000653611935, |
|
"logits/chosen": 1.7928721904754639, |
|
"logits/rejected": 2.1661128997802734, |
|
"logps/chosen": -932.3726806640625, |
|
"logps/rejected": -844.2169189453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.103044509887695, |
|
"rewards/margins": 21.569711685180664, |
|
"rewards/rejected": -17.4666690826416, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.3739837398373984, |
|
"grad_norm": 7.042069594120903e-09, |
|
"learning_rate": 0.0001541658478301421, |
|
"logits/chosen": 0.2531038522720337, |
|
"logits/rejected": 0.2639998197555542, |
|
"logps/chosen": -1010.8427734375, |
|
"logps/rejected": -1247.974609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7464678287506104, |
|
"rewards/margins": 30.038406372070312, |
|
"rewards/rejected": -29.291942596435547, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.3902439024390243, |
|
"grad_norm": 2.4762075057083166e-08, |
|
"learning_rate": 0.00015304209081197425, |
|
"logits/chosen": 2.228158473968506, |
|
"logits/rejected": 2.7146129608154297, |
|
"logps/chosen": -1221.494384765625, |
|
"logps/rejected": -882.4944458007812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.98241901397705, |
|
"rewards/margins": 33.62451171875, |
|
"rewards/rejected": -19.642091751098633, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.4065040650406504, |
|
"grad_norm": 3.7480401715583866e-06, |
|
"learning_rate": 0.00015190893461393108, |
|
"logits/chosen": 1.5811924934387207, |
|
"logits/rejected": 2.0754153728485107, |
|
"logps/chosen": -958.1056518554688, |
|
"logps/rejected": -741.9910278320312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 14.536327362060547, |
|
"rewards/margins": 32.516456604003906, |
|
"rewards/rejected": -17.980131149291992, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.4227642276422765, |
|
"grad_norm": 1.9098067696177168e-06, |
|
"learning_rate": 0.000150766580033884, |
|
"logits/chosen": 1.6907765865325928, |
|
"logits/rejected": 1.9654494524002075, |
|
"logps/chosen": -1132.77978515625, |
|
"logps/rejected": -908.571044921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.22573709487915, |
|
"rewards/margins": 34.5124626159668, |
|
"rewards/rejected": -29.286724090576172, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.4390243902439024, |
|
"grad_norm": 1.1447126780694816e-05, |
|
"learning_rate": 0.00014961522949967886, |
|
"logits/chosen": 0.9937865734100342, |
|
"logits/rejected": 1.2049672603607178, |
|
"logps/chosen": -739.3209838867188, |
|
"logps/rejected": -1007.2611083984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.235821723937988, |
|
"rewards/margins": 34.75508499145508, |
|
"rewards/rejected": -24.51926040649414, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.4552845528455285, |
|
"grad_norm": 1.5996234026260936e-07, |
|
"learning_rate": 0.00014845508703326504, |
|
"logits/chosen": 1.005773663520813, |
|
"logits/rejected": 0.9975143671035767, |
|
"logps/chosen": -912.9910278320312, |
|
"logps/rejected": -1205.926513671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.948190212249756, |
|
"rewards/margins": 31.25839614868164, |
|
"rewards/rejected": -28.310203552246094, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.4715447154471546, |
|
"grad_norm": 1.9003784473170526e-05, |
|
"learning_rate": 0.00014728635821454255, |
|
"logits/chosen": 2.574889659881592, |
|
"logits/rejected": 2.5759711265563965, |
|
"logps/chosen": -915.0121459960938, |
|
"logps/rejected": -623.8654174804688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.099142074584961, |
|
"rewards/margins": 31.881959915161133, |
|
"rewards/rejected": -16.782817840576172, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.4878048780487805, |
|
"grad_norm": 4.1650441318097364e-08, |
|
"learning_rate": 0.0001461092501449326, |
|
"logits/chosen": 1.0031987428665161, |
|
"logits/rejected": 1.2941582202911377, |
|
"logps/chosen": -823.1492309570312, |
|
"logps/rejected": -1055.567626953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.4376673698425293, |
|
"rewards/margins": 26.05483055114746, |
|
"rewards/rejected": -23.617162704467773, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.5040650406504064, |
|
"grad_norm": 4.165614697626552e-08, |
|
"learning_rate": 0.00014492397141067887, |
|
"logits/chosen": 0.8133536577224731, |
|
"logits/rejected": 1.0407506227493286, |
|
"logps/chosen": -961.2422485351562, |
|
"logps/rejected": -1156.6856689453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8701601028442383, |
|
"rewards/margins": 33.655277252197266, |
|
"rewards/rejected": -31.785114288330078, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.5203252032520327, |
|
"grad_norm": 3.824939540209016e-06, |
|
"learning_rate": 0.00014373073204588556, |
|
"logits/chosen": 2.6779818534851074, |
|
"logits/rejected": 2.7686123847961426, |
|
"logps/chosen": -1121.3564453125, |
|
"logps/rejected": -698.586669921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.171032905578613, |
|
"rewards/margins": 27.788890838623047, |
|
"rewards/rejected": -17.617855072021484, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.5365853658536586, |
|
"grad_norm": 3.954168641939759e-05, |
|
"learning_rate": 0.0001425297434952987, |
|
"logits/chosen": 0.22321929037570953, |
|
"logits/rejected": 0.2271191030740738, |
|
"logps/chosen": -671.6175537109375, |
|
"logps/rejected": -1141.6953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.185655355453491, |
|
"rewards/margins": 26.3375301361084, |
|
"rewards/rejected": -28.52318572998047, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.5528455284552845, |
|
"grad_norm": 6.408844566152538e-10, |
|
"learning_rate": 0.00014132121857683783, |
|
"logits/chosen": 1.1100516319274902, |
|
"logits/rejected": 1.0310027599334717, |
|
"logps/chosen": -995.9828491210938, |
|
"logps/rejected": -1024.00244140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.543378829956055, |
|
"rewards/margins": 33.411643981933594, |
|
"rewards/rejected": -24.868263244628906, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.5691056910569106, |
|
"grad_norm": 6.710484399263805e-07, |
|
"learning_rate": 0.00014010537144388416, |
|
"logits/chosen": 0.19941049814224243, |
|
"logits/rejected": 0.2904074490070343, |
|
"logps/chosen": -580.1328125, |
|
"logps/rejected": -1122.187744140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.563772439956665, |
|
"rewards/margins": 23.33687400817871, |
|
"rewards/rejected": -23.900646209716797, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.5853658536585367, |
|
"grad_norm": 2.6136473252336145e-07, |
|
"learning_rate": 0.00013888241754733208, |
|
"logits/chosen": 0.8143081665039062, |
|
"logits/rejected": 1.183271050453186, |
|
"logps/chosen": -973.23583984375, |
|
"logps/rejected": -904.20556640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.3894622325897217, |
|
"rewards/margins": 23.915855407714844, |
|
"rewards/rejected": -20.526391983032227, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.6016260162601625, |
|
"grad_norm": 1.735031582938973e-05, |
|
"learning_rate": 0.00013765257359741063, |
|
"logits/chosen": 0.8897725343704224, |
|
"logits/rejected": 0.8052040338516235, |
|
"logps/chosen": -771.9832763671875, |
|
"logps/rejected": -874.3773193359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.943796157836914, |
|
"rewards/margins": 29.497058868408203, |
|
"rewards/rejected": -22.55326271057129, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.6178861788617886, |
|
"grad_norm": 1.2570103535836097e-07, |
|
"learning_rate": 0.00013641605752528224, |
|
"logits/chosen": 1.0415421724319458, |
|
"logits/rejected": 1.3014307022094727, |
|
"logps/chosen": -918.8525390625, |
|
"logps/rejected": -955.0538330078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.44915771484375, |
|
"rewards/margins": 33.4973258972168, |
|
"rewards/rejected": -26.04817008972168, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6341463414634148, |
|
"grad_norm": 3.719053154327412e-07, |
|
"learning_rate": 0.0001351730884444245, |
|
"logits/chosen": 0.4167521595954895, |
|
"logits/rejected": 0.3483416438102722, |
|
"logps/chosen": -604.3650512695312, |
|
"logps/rejected": -1362.02587890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4617691040039062, |
|
"rewards/margins": 44.77275466918945, |
|
"rewards/rejected": -47.23452377319336, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.6504065040650406, |
|
"grad_norm": 1.487089633656069e-07, |
|
"learning_rate": 0.00013392388661180303, |
|
"logits/chosen": 0.9698238968849182, |
|
"logits/rejected": 1.1324440240859985, |
|
"logps/chosen": -742.9386596679688, |
|
"logps/rejected": -905.581298828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.503021717071533, |
|
"rewards/margins": 32.864501953125, |
|
"rewards/rejected": -27.361482620239258, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.00015168750542216003, |
|
"learning_rate": 0.0001326686733888413, |
|
"logits/chosen": 2.734503746032715, |
|
"logits/rejected": 2.7868616580963135, |
|
"logps/chosen": -845.9635009765625, |
|
"logps/rejected": -674.9261474609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.455021858215332, |
|
"rewards/margins": 21.768619537353516, |
|
"rewards/rejected": -15.3135986328125, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.6829268292682928, |
|
"grad_norm": 5.236762717686361e-06, |
|
"learning_rate": 0.0001314076712021949, |
|
"logits/chosen": 0.8474237322807312, |
|
"logits/rejected": 1.0795999765396118, |
|
"logps/chosen": -844.8881225585938, |
|
"logps/rejected": -1026.413818359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.01052474975586, |
|
"rewards/margins": 34.12953186035156, |
|
"rewards/rejected": -25.119007110595703, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.6991869918699187, |
|
"grad_norm": 4.3044991571150604e-08, |
|
"learning_rate": 0.000130141103504337, |
|
"logits/chosen": 1.0104427337646484, |
|
"logits/rejected": 0.809540867805481, |
|
"logps/chosen": -806.0650634765625, |
|
"logps/rejected": -1019.7612915039062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.093156814575195, |
|
"rewards/margins": 29.144248962402344, |
|
"rewards/rejected": -22.051090240478516, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.7154471544715446, |
|
"grad_norm": 6.236035243745164e-09, |
|
"learning_rate": 0.0001288691947339621, |
|
"logits/chosen": 0.26283663511276245, |
|
"logits/rejected": 0.21620601415634155, |
|
"logps/chosen": -764.7117919921875, |
|
"logps/rejected": -1384.037353515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5661294460296631, |
|
"rewards/margins": 35.904212951660156, |
|
"rewards/rejected": -36.470340728759766, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.7317073170731707, |
|
"grad_norm": 0.0002312189608346671, |
|
"learning_rate": 0.00012759217027621505, |
|
"logits/chosen": 0.8271576166152954, |
|
"logits/rejected": 0.8352835178375244, |
|
"logps/chosen": -639.9276123046875, |
|
"logps/rejected": -721.3944702148438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.1902108192443848, |
|
"rewards/margins": 19.32707977294922, |
|
"rewards/rejected": -16.13686752319336, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.7479674796747968, |
|
"grad_norm": 5.53435963723814e-09, |
|
"learning_rate": 0.00012631025642275212, |
|
"logits/chosen": 0.9540997743606567, |
|
"logits/rejected": 1.0216646194458008, |
|
"logps/chosen": -920.1544189453125, |
|
"logps/rejected": -919.189453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.917628288269043, |
|
"rewards/margins": 31.62308692932129, |
|
"rewards/rejected": -22.705459594726562, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.7642276422764227, |
|
"grad_norm": 5.7604488290508016e-08, |
|
"learning_rate": 0.00012502368033164176, |
|
"logits/chosen": 1.9378834962844849, |
|
"logits/rejected": 2.0527262687683105, |
|
"logps/chosen": -616.1436767578125, |
|
"logps/rejected": -781.5704956054688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.269429683685303, |
|
"rewards/margins": 27.761857986450195, |
|
"rewards/rejected": -23.492429733276367, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.7804878048780488, |
|
"grad_norm": 3.0333463740817024e-08, |
|
"learning_rate": 0.0001237326699871115, |
|
"logits/chosen": 0.784665584564209, |
|
"logits/rejected": 1.0081039667129517, |
|
"logps/chosen": -864.7948608398438, |
|
"logps/rejected": -946.906982421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.097116470336914, |
|
"rewards/margins": 30.87978172302246, |
|
"rewards/rejected": -24.78266716003418, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.796747967479675, |
|
"grad_norm": 3.1582476367475465e-07, |
|
"learning_rate": 0.00012243745415914883, |
|
"logits/chosen": -0.5353690385818481, |
|
"logits/rejected": -0.6592149138450623, |
|
"logps/chosen": -722.5419921875, |
|
"logps/rejected": -1070.7403564453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3367981910705566, |
|
"rewards/margins": 27.85375213623047, |
|
"rewards/rejected": -29.190549850463867, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.8130081300813008, |
|
"grad_norm": 2.334864745989762e-07, |
|
"learning_rate": 0.00012113826236296244, |
|
"logits/chosen": 1.986028790473938, |
|
"logits/rejected": 2.0000312328338623, |
|
"logps/chosen": -1034.116455078125, |
|
"logps/rejected": -924.2823486328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.337306022644043, |
|
"rewards/margins": 34.88032531738281, |
|
"rewards/rejected": -25.54302215576172, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.8292682926829267, |
|
"grad_norm": 1.956110463652294e-05, |
|
"learning_rate": 0.0001198353248183118, |
|
"logits/chosen": 1.1676946878433228, |
|
"logits/rejected": 1.3392938375473022, |
|
"logps/chosen": -839.8267211914062, |
|
"logps/rejected": -966.1685180664062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.940967082977295, |
|
"rewards/margins": 33.268653869628906, |
|
"rewards/rejected": -28.327686309814453, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.845528455284553, |
|
"grad_norm": 1.2582788144754886e-07, |
|
"learning_rate": 0.00011852887240871145, |
|
"logits/chosen": 1.7121946811676025, |
|
"logits/rejected": 1.834307074546814, |
|
"logps/chosen": -825.6591796875, |
|
"logps/rejected": -910.5638427734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.057826519012451, |
|
"rewards/margins": 26.722637176513672, |
|
"rewards/rejected": -21.664812088012695, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.8617886178861789, |
|
"grad_norm": 3.8171506275830325e-06, |
|
"learning_rate": 0.00011721913664051813, |
|
"logits/chosen": 0.09213051199913025, |
|
"logits/rejected": 0.2805327773094177, |
|
"logps/chosen": -785.7156982421875, |
|
"logps/rejected": -1021.4864501953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.823834240436554, |
|
"rewards/margins": 25.152664184570312, |
|
"rewards/rejected": -24.32883071899414, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.8780487804878048, |
|
"grad_norm": 2.6529932029006886e-08, |
|
"learning_rate": 0.00011590634960190721, |
|
"logits/chosen": -0.5069230198860168, |
|
"logits/rejected": -0.5888826847076416, |
|
"logps/chosen": -707.7698974609375, |
|
"logps/rejected": -1266.01904296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.027275919914245605, |
|
"rewards/margins": 27.478078842163086, |
|
"rewards/rejected": -27.450803756713867, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.8943089430894309, |
|
"grad_norm": 9.935014304573997e-07, |
|
"learning_rate": 0.00011459074392174618, |
|
"logits/chosen": 1.5636107921600342, |
|
"logits/rejected": 1.8575186729431152, |
|
"logps/chosen": -1191.93359375, |
|
"logps/rejected": -990.843505859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.92037582397461, |
|
"rewards/margins": 39.89407730102539, |
|
"rewards/rejected": -26.973697662353516, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.910569105691057, |
|
"grad_norm": 1.2037819942634087e-05, |
|
"learning_rate": 0.00011327255272837221, |
|
"logits/chosen": 1.0499224662780762, |
|
"logits/rejected": 0.9787989854812622, |
|
"logps/chosen": -971.0214233398438, |
|
"logps/rejected": -877.3848876953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.003582715988159, |
|
"rewards/margins": 20.236526489257812, |
|
"rewards/rejected": -18.23294448852539, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.9268292682926829, |
|
"grad_norm": 1.8166872450819938e-06, |
|
"learning_rate": 0.00011195200960828139, |
|
"logits/chosen": 1.6961169242858887, |
|
"logits/rejected": 2.2738733291625977, |
|
"logps/chosen": -1074.953369140625, |
|
"logps/rejected": -778.5762939453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.411404609680176, |
|
"rewards/margins": 25.984111785888672, |
|
"rewards/rejected": -17.57270622253418, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.943089430894309, |
|
"grad_norm": 0.002434302121400833, |
|
"learning_rate": 0.00011062934856473655, |
|
"logits/chosen": 0.24992449581623077, |
|
"logits/rejected": 0.18503600358963013, |
|
"logps/chosen": -811.4505615234375, |
|
"logps/rejected": -1088.271240234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.826874017715454, |
|
"rewards/margins": 32.1160888671875, |
|
"rewards/rejected": -29.289215087890625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.959349593495935, |
|
"grad_norm": 3.818647797970698e-08, |
|
"learning_rate": 0.00010930480397630145, |
|
"logits/chosen": 1.889555811882019, |
|
"logits/rejected": 2.055070400238037, |
|
"logps/chosen": -1008.6806640625, |
|
"logps/rejected": -997.8306884765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.727387428283691, |
|
"rewards/margins": 32.15311813354492, |
|
"rewards/rejected": -27.42573356628418, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.975609756097561, |
|
"grad_norm": 4.203374359690315e-08, |
|
"learning_rate": 0.00010797861055530831, |
|
"logits/chosen": 0.33176711201667786, |
|
"logits/rejected": 0.2883341312408447, |
|
"logps/chosen": -764.9257202148438, |
|
"logps/rejected": -1157.33642578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.931965708732605, |
|
"rewards/margins": 29.445417404174805, |
|
"rewards/rejected": -30.377384185791016, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.9918699186991868, |
|
"grad_norm": 0.0003661888767965138, |
|
"learning_rate": 0.00010665100330626625, |
|
"logits/chosen": 2.023690700531006, |
|
"logits/rejected": 2.543468475341797, |
|
"logps/chosen": -1341.046875, |
|
"logps/rejected": -852.0292358398438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.60735034942627, |
|
"rewards/margins": 33.2912483215332, |
|
"rewards/rejected": -19.68389892578125, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.4813576854066923e-07, |
|
"learning_rate": 0.00010532221748421787, |
|
"logits/chosen": 2.4457969665527344, |
|
"logits/rejected": 2.6656110286712646, |
|
"logps/chosen": -1094.49560546875, |
|
"logps/rejected": -546.4738159179688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.480463027954102, |
|
"rewards/margins": 21.069480895996094, |
|
"rewards/rejected": -8.589018821716309, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.016260162601626, |
|
"grad_norm": 1.126546635532577e-06, |
|
"learning_rate": 0.00010399248855305176, |
|
"logits/chosen": 2.4012436866760254, |
|
"logits/rejected": 2.676316022872925, |
|
"logps/chosen": -1016.7650756835938, |
|
"logps/rejected": -629.0308227539062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.325331687927246, |
|
"rewards/margins": 25.8978214263916, |
|
"rewards/rejected": -15.572492599487305, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.032520325203252, |
|
"grad_norm": 3.7227684401841543e-07, |
|
"learning_rate": 0.00010266205214377748, |
|
"logits/chosen": 0.39638862013816833, |
|
"logits/rejected": 0.4992075562477112, |
|
"logps/chosen": -648.75, |
|
"logps/rejected": -1030.2962646484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0494887828826904, |
|
"rewards/margins": 27.84441566467285, |
|
"rewards/rejected": -28.893905639648438, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.048780487804878, |
|
"grad_norm": 8.69819905346958e-06, |
|
"learning_rate": 0.00010133114401277139, |
|
"logits/chosen": 1.1746121644973755, |
|
"logits/rejected": 1.2504253387451172, |
|
"logps/chosen": -591.2756958007812, |
|
"logps/rejected": -956.6802978515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.541916370391846, |
|
"rewards/margins": 27.245861053466797, |
|
"rewards/rejected": -20.70394515991211, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.065040650406504, |
|
"grad_norm": 8.625072211998486e-08, |
|
"learning_rate": 0.0001, |
|
"logits/chosen": 0.2615965008735657, |
|
"logits/rejected": 0.2532449960708618, |
|
"logps/chosen": -716.9295654296875, |
|
"logps/rejected": -1199.100830078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.7087082862854004, |
|
"rewards/margins": 39.123931884765625, |
|
"rewards/rejected": -36.415225982666016, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.08130081300813, |
|
"grad_norm": 1.545291006266325e-08, |
|
"learning_rate": 9.866885598722863e-05, |
|
"logits/chosen": 0.8479726314544678, |
|
"logits/rejected": 0.9798691272735596, |
|
"logps/chosen": -1156.03271484375, |
|
"logps/rejected": -1160.611572265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.804194450378418, |
|
"rewards/margins": 37.919864654541016, |
|
"rewards/rejected": -32.11566925048828, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.097560975609756, |
|
"grad_norm": 2.0759840481332503e-05, |
|
"learning_rate": 9.733794785622253e-05, |
|
"logits/chosen": 1.8465713262557983, |
|
"logits/rejected": 1.999639868736267, |
|
"logps/chosen": -1016.758056640625, |
|
"logps/rejected": -908.3006591796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.583747863769531, |
|
"rewards/margins": 40.76252746582031, |
|
"rewards/rejected": -27.178781509399414, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.113821138211382, |
|
"grad_norm": 9.728922805152251e-07, |
|
"learning_rate": 9.600751144694827e-05, |
|
"logits/chosen": 0.35091227293014526, |
|
"logits/rejected": 0.1413639485836029, |
|
"logps/chosen": -736.62158203125, |
|
"logps/rejected": -1333.1005859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6688979268074036, |
|
"rewards/margins": 32.4841423034668, |
|
"rewards/rejected": -33.153038024902344, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.130081300813008, |
|
"grad_norm": 8.801747242159763e-08, |
|
"learning_rate": 9.467778251578217e-05, |
|
"logits/chosen": 0.14253884553909302, |
|
"logits/rejected": 0.12810415029525757, |
|
"logps/chosen": -657.0384521484375, |
|
"logps/rejected": -1078.23388671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.2970056533813477, |
|
"rewards/margins": 37.40379333496094, |
|
"rewards/rejected": -35.106788635253906, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.1463414634146343, |
|
"grad_norm": 1.7610488067809627e-10, |
|
"learning_rate": 9.334899669373379e-05, |
|
"logits/chosen": 1.6143238544464111, |
|
"logits/rejected": 1.877280354499817, |
|
"logps/chosen": -1136.3955078125, |
|
"logps/rejected": -927.5528564453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.586950302124023, |
|
"rewards/margins": 33.43904113769531, |
|
"rewards/rejected": -25.852088928222656, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.16260162601626, |
|
"grad_norm": 1.4042621288012924e-08, |
|
"learning_rate": 9.202138944469168e-05, |
|
"logits/chosen": 0.2330748736858368, |
|
"logits/rejected": 0.10119885206222534, |
|
"logps/chosen": -655.632568359375, |
|
"logps/rejected": -1187.6663818359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.547595024108887, |
|
"rewards/margins": 44.532859802246094, |
|
"rewards/rejected": -39.985267639160156, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.178861788617886, |
|
"grad_norm": 5.396844926508493e-07, |
|
"learning_rate": 9.069519602369856e-05, |
|
"logits/chosen": 0.9299556016921997, |
|
"logits/rejected": 1.2056376934051514, |
|
"logps/chosen": -1106.3253173828125, |
|
"logps/rejected": -1032.9913330078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.694305419921875, |
|
"rewards/margins": 29.57136344909668, |
|
"rewards/rejected": -21.877056121826172, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": 4.877493847743608e-05, |
|
"learning_rate": 8.937065143526347e-05, |
|
"logits/chosen": 0.9594597816467285, |
|
"logits/rejected": 1.179040551185608, |
|
"logps/chosen": -1040.9154052734375, |
|
"logps/rejected": -1039.5325927734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.09385871887207, |
|
"rewards/margins": 31.479862213134766, |
|
"rewards/rejected": -22.386003494262695, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.2113821138211383, |
|
"grad_norm": 2.6771798111724365e-09, |
|
"learning_rate": 8.804799039171863e-05, |
|
"logits/chosen": 1.9819426536560059, |
|
"logits/rejected": 2.158479690551758, |
|
"logps/chosen": -1134.637451171875, |
|
"logps/rejected": -965.3215942382812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.446025371551514, |
|
"rewards/margins": 35.7391357421875, |
|
"rewards/rejected": -29.293109893798828, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.227642276422764, |
|
"grad_norm": 1.1452775652287528e-06, |
|
"learning_rate": 8.672744727162781e-05, |
|
"logits/chosen": 0.8104963302612305, |
|
"logits/rejected": 0.8570412993431091, |
|
"logps/chosen": -1031.75634765625, |
|
"logps/rejected": -923.9554443359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.884162902832031, |
|
"rewards/margins": 38.34416198730469, |
|
"rewards/rejected": -25.459999084472656, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.2439024390243905, |
|
"grad_norm": 6.028212928832488e-10, |
|
"learning_rate": 8.540925607825384e-05, |
|
"logits/chosen": 0.17743420600891113, |
|
"logits/rejected": 0.07549530267715454, |
|
"logps/chosen": -991.336669921875, |
|
"logps/rejected": -1199.3358154296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.6160173416137695, |
|
"rewards/margins": 32.7667236328125, |
|
"rewards/rejected": -26.150705337524414, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.2601626016260163, |
|
"grad_norm": 2.8898223263240652e-06, |
|
"learning_rate": 8.409365039809281e-05, |
|
"logits/chosen": 0.33150625228881836, |
|
"logits/rejected": 0.3002138137817383, |
|
"logps/chosen": -775.9059448242188, |
|
"logps/rejected": -1114.199462890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.3382678031921387, |
|
"rewards/margins": 34.20747375488281, |
|
"rewards/rejected": -30.86920738220215, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.2764227642276422, |
|
"grad_norm": 4.3099689719383605e-06, |
|
"learning_rate": 8.27808633594819e-05, |
|
"logits/chosen": 0.7698372602462769, |
|
"logits/rejected": 1.1860891580581665, |
|
"logps/chosen": -843.12646484375, |
|
"logps/rejected": -918.1942749023438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.282138347625732, |
|
"rewards/margins": 23.585163116455078, |
|
"rewards/rejected": -19.303022384643555, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.292682926829268, |
|
"grad_norm": 3.220544385840185e-06, |
|
"learning_rate": 8.147112759128859e-05, |
|
"logits/chosen": 0.8874784708023071, |
|
"logits/rejected": 0.9459190368652344, |
|
"logps/chosen": -1038.4764404296875, |
|
"logps/rejected": -1069.7886962890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.8034682273864746, |
|
"rewards/margins": 26.194406509399414, |
|
"rewards/rejected": -22.390939712524414, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.3089430894308944, |
|
"grad_norm": 0.00022328611521515995, |
|
"learning_rate": 8.016467518168821e-05, |
|
"logits/chosen": 2.493546724319458, |
|
"logits/rejected": 2.539395332336426, |
|
"logps/chosen": -893.9352416992188, |
|
"logps/rejected": -696.1506958007812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.509476661682129, |
|
"rewards/margins": 21.499731063842773, |
|
"rewards/rejected": -12.990255355834961, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.3252032520325203, |
|
"grad_norm": 0.00013990582374390215, |
|
"learning_rate": 7.886173763703757e-05, |
|
"logits/chosen": 0.21920743584632874, |
|
"logits/rejected": 0.28335481882095337, |
|
"logps/chosen": -728.2202758789062, |
|
"logps/rejected": -1100.657958984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.098618507385254, |
|
"rewards/margins": 33.223487854003906, |
|
"rewards/rejected": -28.124868392944336, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.341463414634146, |
|
"grad_norm": 2.5570125217200257e-05, |
|
"learning_rate": 7.756254584085121e-05, |
|
"logits/chosen": 1.576183557510376, |
|
"logits/rejected": 2.116095542907715, |
|
"logps/chosen": -1211.36767578125, |
|
"logps/rejected": -841.2113037109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.20867919921875, |
|
"rewards/margins": 23.45158576965332, |
|
"rewards/rejected": -15.242904663085938, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.3577235772357725, |
|
"grad_norm": 1.5557947818933826e-08, |
|
"learning_rate": 7.626733001288851e-05, |
|
"logits/chosen": 1.017463207244873, |
|
"logits/rejected": 1.2662559747695923, |
|
"logps/chosen": -1075.69677734375, |
|
"logps/rejected": -1051.0823974609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.859679937362671, |
|
"rewards/margins": 33.41606521606445, |
|
"rewards/rejected": -30.556386947631836, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.3739837398373984, |
|
"grad_norm": 1.1387073506341494e-08, |
|
"learning_rate": 7.497631966835828e-05, |
|
"logits/chosen": 1.214647889137268, |
|
"logits/rejected": 0.9382815957069397, |
|
"logps/chosen": -861.36181640625, |
|
"logps/rejected": -860.1260375976562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.3777055740356445, |
|
"rewards/margins": 31.344114303588867, |
|
"rewards/rejected": -23.966407775878906, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.3902439024390243, |
|
"grad_norm": 1.4444401131186169e-05, |
|
"learning_rate": 7.368974357724789e-05, |
|
"logits/chosen": 1.4694726467132568, |
|
"logits/rejected": 1.837304711341858, |
|
"logps/chosen": -828.1371459960938, |
|
"logps/rejected": -890.37548828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.28642868995666504, |
|
"rewards/margins": 23.24945068359375, |
|
"rewards/rejected": -22.963022232055664, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.40650406504065, |
|
"grad_norm": 8.854440380900996e-08, |
|
"learning_rate": 7.240782972378496e-05, |
|
"logits/chosen": 0.38753101229667664, |
|
"logits/rejected": 0.24646523594856262, |
|
"logps/chosen": -710.2447509765625, |
|
"logps/rejected": -1220.842041015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22469329833984375, |
|
"rewards/margins": 27.240110397338867, |
|
"rewards/rejected": -27.464805603027344, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 2.4227642276422765, |
|
"grad_norm": 0.0004863929934799671, |
|
"learning_rate": 7.113080526603792e-05, |
|
"logits/chosen": 0.851685106754303, |
|
"logits/rejected": 0.6417226195335388, |
|
"logps/chosen": -741.8690795898438, |
|
"logps/rejected": -1010.4365234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.342030048370361, |
|
"rewards/margins": 33.09426498413086, |
|
"rewards/rejected": -26.752235412597656, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 5.4216638091020286e-05, |
|
"learning_rate": 6.985889649566305e-05, |
|
"logits/chosen": 1.0506223440170288, |
|
"logits/rejected": 0.997691810131073, |
|
"logps/chosen": -695.2083740234375, |
|
"logps/rejected": -622.5052490234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.0346758365631104, |
|
"rewards/margins": 23.93063735961914, |
|
"rewards/rejected": -20.89596176147461, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 2.4552845528455283, |
|
"grad_norm": 1.0896185813180637e-05, |
|
"learning_rate": 6.859232879780515e-05, |
|
"logits/chosen": 0.6958073377609253, |
|
"logits/rejected": 0.7431595325469971, |
|
"logps/chosen": -946.8716430664062, |
|
"logps/rejected": -869.7786865234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.730717420578003, |
|
"rewards/margins": 25.248491287231445, |
|
"rewards/rejected": -22.517772674560547, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.4715447154471546, |
|
"grad_norm": 7.235275489847481e-08, |
|
"learning_rate": 6.73313266111587e-05, |
|
"logits/chosen": 1.8724164962768555, |
|
"logits/rejected": 2.186227560043335, |
|
"logps/chosen": -961.348876953125, |
|
"logps/rejected": -889.3941040039062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.482477188110352, |
|
"rewards/margins": 33.20310974121094, |
|
"rewards/rejected": -24.720630645751953, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.4878048780487805, |
|
"grad_norm": 5.680619324266445e-06, |
|
"learning_rate": 6.607611338819697e-05, |
|
"logits/chosen": 0.2374384105205536, |
|
"logits/rejected": 0.2661726474761963, |
|
"logps/chosen": -884.477783203125, |
|
"logps/rejected": -1196.705810546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1550889015197754, |
|
"rewards/margins": 33.60582733154297, |
|
"rewards/rejected": -31.450740814208984, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.5040650406504064, |
|
"grad_norm": 0.00021473168453667313, |
|
"learning_rate": 6.48269115555755e-05, |
|
"logits/chosen": 1.6578993797302246, |
|
"logits/rejected": 1.9648597240447998, |
|
"logps/chosen": -1154.904541015625, |
|
"logps/rejected": -830.4815673828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.426295280456543, |
|
"rewards/margins": 29.979768753051758, |
|
"rewards/rejected": -20.5534725189209, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.5203252032520327, |
|
"grad_norm": 1.3903934359404957e-06, |
|
"learning_rate": 6.358394247471778e-05, |
|
"logits/chosen": 1.9553877115249634, |
|
"logits/rejected": 1.973337173461914, |
|
"logps/chosen": -982.8421630859375, |
|
"logps/rejected": -899.3438110351562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.616971969604492, |
|
"rewards/margins": 27.25063133239746, |
|
"rewards/rejected": -22.6336612701416, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.5365853658536586, |
|
"grad_norm": 4.822657047043322e-06, |
|
"learning_rate": 6.234742640258938e-05, |
|
"logits/chosen": 0.8568439483642578, |
|
"logits/rejected": 0.8998463749885559, |
|
"logps/chosen": -699.6088256835938, |
|
"logps/rejected": -1193.45751953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.211078643798828, |
|
"rewards/margins": 35.346927642822266, |
|
"rewards/rejected": -28.135848999023438, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 2.5528455284552845, |
|
"grad_norm": 1.5767127881094467e-10, |
|
"learning_rate": 6.111758245266794e-05, |
|
"logits/chosen": 0.2673335671424866, |
|
"logits/rejected": 0.40638232231140137, |
|
"logps/chosen": -872.9669189453125, |
|
"logps/rejected": -1310.6427001953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.302719116210938, |
|
"rewards/margins": 70.62458801269531, |
|
"rewards/rejected": -53.321868896484375, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 2.569105691056911, |
|
"grad_norm": 0.00041443470399826765, |
|
"learning_rate": 5.9894628556115854e-05, |
|
"logits/chosen": 0.14544445276260376, |
|
"logits/rejected": 0.3626626133918762, |
|
"logps/chosen": -622.1597900390625, |
|
"logps/rejected": -962.1544799804688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17218637466430664, |
|
"rewards/margins": 21.543460845947266, |
|
"rewards/rejected": -21.715648651123047, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.5853658536585367, |
|
"grad_norm": 2.103996763480609e-07, |
|
"learning_rate": 5.867878142316221e-05, |
|
"logits/chosen": 1.6551589965820312, |
|
"logits/rejected": 1.5491437911987305, |
|
"logps/chosen": -1024.2724609375, |
|
"logps/rejected": -868.7474975585938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.687625885009766, |
|
"rewards/margins": 29.73490333557129, |
|
"rewards/rejected": -21.047279357910156, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.6016260162601625, |
|
"grad_norm": 4.0969604242491187e-07, |
|
"learning_rate": 5.7470256504701347e-05, |
|
"logits/chosen": 1.521755576133728, |
|
"logits/rejected": 1.847412109375, |
|
"logps/chosen": -1056.821533203125, |
|
"logps/rejected": -826.6946411132812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.350458145141602, |
|
"rewards/margins": 27.10157012939453, |
|
"rewards/rejected": -17.751113891601562, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 2.617886178861789, |
|
"grad_norm": 5.504219870999805e-07, |
|
"learning_rate": 5.626926795411447e-05, |
|
"logits/chosen": 0.2913011908531189, |
|
"logits/rejected": 0.4079492688179016, |
|
"logps/chosen": -718.0723876953125, |
|
"logps/rejected": -1118.736083984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.049485206604004, |
|
"rewards/margins": 43.513614654541016, |
|
"rewards/rejected": -40.46412658691406, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.6341463414634148, |
|
"grad_norm": 7.391007805779282e-10, |
|
"learning_rate": 5.507602858932113e-05, |
|
"logits/chosen": 0.13623125851154327, |
|
"logits/rejected": 0.14287753403186798, |
|
"logps/chosen": -709.7506103515625, |
|
"logps/rejected": -943.9478759765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.784420967102051, |
|
"rewards/margins": 28.368255615234375, |
|
"rewards/rejected": -24.583837509155273, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 2.6504065040650406, |
|
"grad_norm": 2.608588545172097e-07, |
|
"learning_rate": 5.38907498550674e-05, |
|
"logits/chosen": 0.3549523949623108, |
|
"logits/rejected": 0.2945078909397125, |
|
"logps/chosen": -627.5148315429688, |
|
"logps/rejected": -970.0422973632812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.014554023742676, |
|
"rewards/margins": 28.548900604248047, |
|
"rewards/rejected": -24.534347534179688, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 2.4691764188844445e-09, |
|
"learning_rate": 5.27136417854575e-05, |
|
"logits/chosen": 0.393886923789978, |
|
"logits/rejected": 0.25684821605682373, |
|
"logps/chosen": -773.8262329101562, |
|
"logps/rejected": -1119.12060546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5616737008094788, |
|
"rewards/margins": 27.010391235351562, |
|
"rewards/rejected": -26.448719024658203, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.682926829268293, |
|
"grad_norm": 1.6074091035989113e-05, |
|
"learning_rate": 5.1544912966734994e-05, |
|
"logits/chosen": 1.0595850944519043, |
|
"logits/rejected": 1.1324055194854736, |
|
"logps/chosen": -1086.4296875, |
|
"logps/rejected": -1205.9815673828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2086625099182129, |
|
"rewards/margins": 30.370914459228516, |
|
"rewards/rejected": -30.16225242614746, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 2.6991869918699187, |
|
"grad_norm": 4.716870535048656e-06, |
|
"learning_rate": 5.0384770500321176e-05, |
|
"logits/chosen": 0.7150585651397705, |
|
"logits/rejected": 1.0305664539337158, |
|
"logps/chosen": -949.9681396484375, |
|
"logps/rejected": -1113.91015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.314611911773682, |
|
"rewards/margins": 30.07944107055664, |
|
"rewards/rejected": -23.764827728271484, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 2.7154471544715446, |
|
"grad_norm": 3.2816437851579394e-06, |
|
"learning_rate": 4.9233419966116036e-05, |
|
"logits/chosen": 1.9386444091796875, |
|
"logits/rejected": 2.0223605632781982, |
|
"logps/chosen": -868.1651000976562, |
|
"logps/rejected": -765.9869995117188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.12423038482666, |
|
"rewards/margins": 30.5165958404541, |
|
"rewards/rejected": -21.392364501953125, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.7317073170731705, |
|
"grad_norm": 2.4390756152570248e-05, |
|
"learning_rate": 4.809106538606896e-05, |
|
"logits/chosen": 0.955643355846405, |
|
"logits/rejected": 1.1507562398910522, |
|
"logps/chosen": -1002.4882202148438, |
|
"logps/rejected": -1020.2136840820312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6517884731292725, |
|
"rewards/margins": 26.767532348632812, |
|
"rewards/rejected": -25.115745544433594, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 2.747967479674797, |
|
"grad_norm": 0.00012876000255346298, |
|
"learning_rate": 4.695790918802576e-05, |
|
"logits/chosen": 2.1373488903045654, |
|
"logits/rejected": 1.845626950263977, |
|
"logps/chosen": -643.7026977539062, |
|
"logps/rejected": -862.6270751953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.4644973278045654, |
|
"rewards/margins": 26.4927978515625, |
|
"rewards/rejected": -24.028301239013672, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.7642276422764227, |
|
"grad_norm": 8.289234392577782e-05, |
|
"learning_rate": 4.58341521698579e-05, |
|
"logits/chosen": 0.25596243143081665, |
|
"logits/rejected": -0.03055526316165924, |
|
"logps/chosen": -614.50244140625, |
|
"logps/rejected": -1223.715576171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.4099273681640625, |
|
"rewards/margins": 31.352651596069336, |
|
"rewards/rejected": -26.942724227905273, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.7804878048780486, |
|
"grad_norm": 3.854520969071018e-08, |
|
"learning_rate": 4.47199934638807e-05, |
|
"logits/chosen": 0.8832861185073853, |
|
"logits/rejected": 0.8490067720413208, |
|
"logps/chosen": -775.900634765625, |
|
"logps/rejected": -1054.091796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.442215442657471, |
|
"rewards/margins": 29.371417999267578, |
|
"rewards/rejected": -22.929203033447266, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 2.796747967479675, |
|
"grad_norm": 3.370180934325617e-08, |
|
"learning_rate": 4.3615630501566384e-05, |
|
"logits/chosen": 1.1688926219940186, |
|
"logits/rejected": 1.1840847730636597, |
|
"logps/chosen": -789.5611572265625, |
|
"logps/rejected": -892.3736572265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.048530578613281, |
|
"rewards/margins": 35.47740173339844, |
|
"rewards/rejected": -31.428869247436523, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 2.813008130081301, |
|
"grad_norm": 6.220017439773073e-06, |
|
"learning_rate": 4.252125897855932e-05, |
|
"logits/chosen": 0.24903741478919983, |
|
"logits/rejected": 0.07388614118099213, |
|
"logps/chosen": -845.9579467773438, |
|
"logps/rejected": -1296.85400390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.9718475341796875, |
|
"rewards/margins": 31.60814094543457, |
|
"rewards/rejected": -34.57999038696289, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.8292682926829267, |
|
"grad_norm": 4.538567566214624e-07, |
|
"learning_rate": 4.143707281999767e-05, |
|
"logits/chosen": 1.117840051651001, |
|
"logits/rejected": 1.1794054508209229, |
|
"logps/chosen": -692.6531372070312, |
|
"logps/rejected": -1131.69970703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.421784400939941, |
|
"rewards/margins": 30.24844741821289, |
|
"rewards/rejected": -22.826662063598633, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.845528455284553, |
|
"grad_norm": 1.9607491594797466e-06, |
|
"learning_rate": 4.036326414614985e-05, |
|
"logits/chosen": 1.117968201637268, |
|
"logits/rejected": 1.3285045623779297, |
|
"logps/chosen": -915.8657836914062, |
|
"logps/rejected": -880.1917724609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.270617485046387, |
|
"rewards/margins": 27.518800735473633, |
|
"rewards/rejected": -22.248184204101562, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 2.861788617886179, |
|
"grad_norm": 2.6408181952319865e-07, |
|
"learning_rate": 3.930002323837025e-05, |
|
"logits/chosen": 0.2848118543624878, |
|
"logits/rejected": 0.30847471952438354, |
|
"logps/chosen": -777.3819580078125, |
|
"logps/rejected": -1265.9404296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.468026161193848, |
|
"rewards/margins": 30.405376434326172, |
|
"rewards/rejected": -34.8734016418457, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 2.8780487804878048, |
|
"grad_norm": 5.149066055309959e-06, |
|
"learning_rate": 3.824753850538082e-05, |
|
"logits/chosen": -0.513633131980896, |
|
"logits/rejected": -0.5264861583709717, |
|
"logps/chosen": -658.2607421875, |
|
"logps/rejected": -1306.8682861328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.874265670776367, |
|
"rewards/margins": 48.48944091796875, |
|
"rewards/rejected": -43.615177154541016, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 2.894308943089431, |
|
"grad_norm": 0.0007087494013831019, |
|
"learning_rate": 3.720599644988482e-05, |
|
"logits/chosen": 0.9137465357780457, |
|
"logits/rejected": 1.133833885192871, |
|
"logps/chosen": -883.857177734375, |
|
"logps/rejected": -836.129638671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.139035224914551, |
|
"rewards/margins": 25.803987503051758, |
|
"rewards/rejected": -22.664953231811523, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 2.910569105691057, |
|
"grad_norm": 3.135071528959088e-05, |
|
"learning_rate": 3.617558163551802e-05, |
|
"logits/chosen": 0.9635988473892212, |
|
"logits/rejected": 1.133531093597412, |
|
"logps/chosen": -889.0616455078125, |
|
"logps/rejected": -834.8280029296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.593743920326233, |
|
"rewards/margins": 22.950916290283203, |
|
"rewards/rejected": -21.3571720123291, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 9.376124580739997e-06, |
|
"learning_rate": 3.5156476654143497e-05, |
|
"logits/chosen": 0.21040788292884827, |
|
"logits/rejected": 0.14262419939041138, |
|
"logps/chosen": -848.9990844726562, |
|
"logps/rejected": -1117.9007568359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.15429675579071045, |
|
"rewards/margins": 29.727014541625977, |
|
"rewards/rejected": -29.57271957397461, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 2.943089430894309, |
|
"grad_norm": 5.8795808399736416e-06, |
|
"learning_rate": 3.414886209349615e-05, |
|
"logits/chosen": 1.1507726907730103, |
|
"logits/rejected": 0.9590345025062561, |
|
"logps/chosen": -977.4312744140625, |
|
"logps/rejected": -943.8434448242188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.495950222015381, |
|
"rewards/margins": 23.74968719482422, |
|
"rewards/rejected": -21.253738403320312, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 2.959349593495935, |
|
"grad_norm": 3.5330920411524858e-09, |
|
"learning_rate": 3.315291650518197e-05, |
|
"logits/chosen": 1.0992462635040283, |
|
"logits/rejected": 1.1924934387207031, |
|
"logps/chosen": -962.3739624023438, |
|
"logps/rejected": -1141.202880859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.505153179168701, |
|
"rewards/margins": 32.49464416503906, |
|
"rewards/rejected": -28.989490509033203, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.975609756097561, |
|
"grad_norm": 0.00035440587089397013, |
|
"learning_rate": 3.216881637303839e-05, |
|
"logits/chosen": 0.8002848625183105, |
|
"logits/rejected": 1.1536259651184082, |
|
"logps/chosen": -1330.277099609375, |
|
"logps/rejected": -1155.875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3375800848007202, |
|
"rewards/margins": 29.2307186126709, |
|
"rewards/rejected": -27.893136978149414, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 2.991869918699187, |
|
"grad_norm": 4.985774285160005e-05, |
|
"learning_rate": 3.119673608186085e-05, |
|
"logits/chosen": 1.2516355514526367, |
|
"logits/rejected": 1.7440040111541748, |
|
"logps/chosen": -1085.0638427734375, |
|
"logps/rejected": -953.7195434570312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.24714183807373, |
|
"rewards/margins": 41.917320251464844, |
|
"rewards/rejected": -29.67017936706543, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 5.4140009808634204e-08, |
|
"learning_rate": 3.0236847886501542e-05, |
|
"logits/chosen": 2.206167697906494, |
|
"logits/rejected": 2.992643117904663, |
|
"logps/chosen": -1038.874267578125, |
|
"logps/rejected": -695.817626953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.593250274658203, |
|
"rewards/margins": 23.8295841217041, |
|
"rewards/rejected": -15.236334800720215, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 3.016260162601626, |
|
"grad_norm": 9.61216301220702e-06, |
|
"learning_rate": 2.9289321881345254e-05, |
|
"logits/chosen": 0.9993420243263245, |
|
"logits/rejected": 1.1457020044326782, |
|
"logps/chosen": -1117.407958984375, |
|
"logps/rejected": -936.1728515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.916309833526611, |
|
"rewards/margins": 31.818635940551758, |
|
"rewards/rejected": -23.902324676513672, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 3.032520325203252, |
|
"grad_norm": 2.3071846953826025e-05, |
|
"learning_rate": 2.8354325970168484e-05, |
|
"logits/chosen": 2.772648811340332, |
|
"logits/rejected": 2.744749069213867, |
|
"logps/chosen": -768.599609375, |
|
"logps/rejected": -593.22265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.238020420074463, |
|
"rewards/margins": 21.210569381713867, |
|
"rewards/rejected": -15.97254753112793, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 3.048780487804878, |
|
"grad_norm": 2.7818750822916627e-06, |
|
"learning_rate": 2.743202583638641e-05, |
|
"logits/chosen": 1.0377551317214966, |
|
"logits/rejected": 1.1594995260238647, |
|
"logps/chosen": -898.0354614257812, |
|
"logps/rejected": -1189.0675048828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.734022617340088, |
|
"rewards/margins": 34.13422775268555, |
|
"rewards/rejected": -29.40020179748535, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 3.065040650406504, |
|
"grad_norm": 8.155032992362976e-05, |
|
"learning_rate": 2.6522584913693294e-05, |
|
"logits/chosen": 0.19498001039028168, |
|
"logits/rejected": 0.3026728332042694, |
|
"logps/chosen": -835.2607421875, |
|
"logps/rejected": -1164.824951171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.8581042289733887, |
|
"rewards/margins": 35.51533508300781, |
|
"rewards/rejected": -31.657230377197266, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.08130081300813, |
|
"grad_norm": 2.616638017371997e-09, |
|
"learning_rate": 2.5626164357101857e-05, |
|
"logits/chosen": 0.9281441569328308, |
|
"logits/rejected": 0.9870262145996094, |
|
"logps/chosen": -877.86865234375, |
|
"logps/rejected": -1065.238037109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.903160095214844, |
|
"rewards/margins": 35.91914367675781, |
|
"rewards/rejected": -30.01598358154297, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 3.097560975609756, |
|
"grad_norm": 4.8233854613499716e-05, |
|
"learning_rate": 2.4742923014386156e-05, |
|
"logits/chosen": 0.8129276037216187, |
|
"logits/rejected": 0.8291976451873779, |
|
"logps/chosen": -783.6571044921875, |
|
"logps/rejected": -1073.9425048828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.485188961029053, |
|
"rewards/margins": 33.823997497558594, |
|
"rewards/rejected": -26.33880615234375, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.113821138211382, |
|
"grad_norm": 8.640755368105602e-06, |
|
"learning_rate": 2.3873017397933327e-05, |
|
"logits/chosen": 1.2895498275756836, |
|
"logits/rejected": 1.3123798370361328, |
|
"logps/chosen": -966.8514404296875, |
|
"logps/rejected": -899.7991943359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12065728008747101, |
|
"rewards/margins": 23.542198181152344, |
|
"rewards/rejected": -23.42154312133789, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 3.130081300813008, |
|
"grad_norm": 8.55558255352662e-08, |
|
"learning_rate": 2.301660165700936e-05, |
|
"logits/chosen": 1.8061244487762451, |
|
"logits/rejected": 1.917268991470337, |
|
"logps/chosen": -1155.9625244140625, |
|
"logps/rejected": -948.8958740234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.420581817626953, |
|
"rewards/margins": 35.871253967285156, |
|
"rewards/rejected": -25.45067024230957, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 3.1463414634146343, |
|
"grad_norm": 1.6171676975318405e-07, |
|
"learning_rate": 2.2173827550443417e-05, |
|
"logits/chosen": 0.964035153388977, |
|
"logits/rejected": 1.110016942024231, |
|
"logps/chosen": -945.4276733398438, |
|
"logps/rejected": -1273.5848388671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.112401008605957, |
|
"rewards/margins": 36.80622100830078, |
|
"rewards/rejected": -31.693822860717773, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 3.16260162601626, |
|
"grad_norm": 8.99770640216957e-08, |
|
"learning_rate": 2.1344844419735755e-05, |
|
"logits/chosen": 1.1494569778442383, |
|
"logits/rejected": 1.1893397569656372, |
|
"logps/chosen": -973.5465087890625, |
|
"logps/rejected": -926.6387329101562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02785491943359375, |
|
"rewards/margins": 23.685792922973633, |
|
"rewards/rejected": -23.65793800354004, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 3.178861788617886, |
|
"grad_norm": 8.178641763834094e-08, |
|
"learning_rate": 2.0529799162594244e-05, |
|
"logits/chosen": 1.756314992904663, |
|
"logits/rejected": 1.7245032787322998, |
|
"logps/chosen": -897.562255859375, |
|
"logps/rejected": -843.6610717773438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.256314277648926, |
|
"rewards/margins": 28.20868682861328, |
|
"rewards/rejected": -16.95237159729004, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 3.1951219512195124, |
|
"grad_norm": 2.262528141727671e-06, |
|
"learning_rate": 1.9728836206903656e-05, |
|
"logits/chosen": 1.218475341796875, |
|
"logits/rejected": 1.4999449253082275, |
|
"logps/chosen": -1005.2973022460938, |
|
"logps/rejected": -1140.7867431640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.491312503814697, |
|
"rewards/margins": 28.96997833251953, |
|
"rewards/rejected": -23.478666305541992, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 3.2113821138211383, |
|
"grad_norm": 5.2778304961975664e-05, |
|
"learning_rate": 1.8942097485132626e-05, |
|
"logits/chosen": 1.8117187023162842, |
|
"logits/rejected": 1.923075556755066, |
|
"logps/chosen": -923.42041015625, |
|
"logps/rejected": -912.8529052734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.853033065795898, |
|
"rewards/margins": 27.288352966308594, |
|
"rewards/rejected": -20.435319900512695, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 3.227642276422764, |
|
"grad_norm": 1.4666602510260418e-07, |
|
"learning_rate": 1.8169722409183097e-05, |
|
"logits/chosen": 1.0807545185089111, |
|
"logits/rejected": 1.1661359071731567, |
|
"logps/chosen": -952.448486328125, |
|
"logps/rejected": -1058.0380859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.936010360717773, |
|
"rewards/margins": 31.115032196044922, |
|
"rewards/rejected": -22.17902374267578, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.2439024390243905, |
|
"grad_norm": 3.001681747605289e-08, |
|
"learning_rate": 1.741184784568608e-05, |
|
"logits/chosen": 1.1533608436584473, |
|
"logits/rejected": 1.2508865594863892, |
|
"logps/chosen": -928.683349609375, |
|
"logps/rejected": -1097.2528076171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.812358021736145, |
|
"rewards/margins": 29.502267837524414, |
|
"rewards/rejected": -28.689908981323242, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.2601626016260163, |
|
"grad_norm": 0.00038864457746967673, |
|
"learning_rate": 1.6668608091748495e-05, |
|
"logits/chosen": 1.489478349685669, |
|
"logits/rejected": 1.9679566621780396, |
|
"logps/chosen": -757.9615478515625, |
|
"logps/rejected": -894.6292114257812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.9130539894104, |
|
"rewards/margins": 24.963455200195312, |
|
"rewards/rejected": -18.050397872924805, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 3.2764227642276422, |
|
"grad_norm": 4.8542842705501243e-05, |
|
"learning_rate": 1.5940134851155697e-05, |
|
"logits/chosen": -0.526631772518158, |
|
"logits/rejected": -0.6513290405273438, |
|
"logps/chosen": -715.877685546875, |
|
"logps/rejected": -1226.02197265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8326917886734009, |
|
"rewards/margins": 29.091434478759766, |
|
"rewards/rejected": -29.924123764038086, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 3.292682926829268, |
|
"grad_norm": 4.5316621566371396e-08, |
|
"learning_rate": 1.522655721103291e-05, |
|
"logits/chosen": 1.6182302236557007, |
|
"logits/rejected": 1.5821877717971802, |
|
"logps/chosen": -1175.639404296875, |
|
"logps/rejected": -971.0200805664062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.991975784301758, |
|
"rewards/margins": 32.31345748901367, |
|
"rewards/rejected": -24.321483612060547, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 3.3089430894308944, |
|
"grad_norm": 0.0004193031636532396, |
|
"learning_rate": 1.4528001618970966e-05, |
|
"logits/chosen": 0.8675569295883179, |
|
"logits/rejected": 0.6923835873603821, |
|
"logps/chosen": -937.3357543945312, |
|
"logps/rejected": -1099.741943359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.40576171875, |
|
"rewards/margins": 45.40290069580078, |
|
"rewards/rejected": -35.99713897705078, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 3.3252032520325203, |
|
"grad_norm": 2.007274702009454e-08, |
|
"learning_rate": 1.3844591860619383e-05, |
|
"logits/chosen": 1.104245901107788, |
|
"logits/rejected": 1.0692744255065918, |
|
"logps/chosen": -1037.014892578125, |
|
"logps/rejected": -978.7286376953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.5484957695007324, |
|
"rewards/margins": 29.905384063720703, |
|
"rewards/rejected": -27.356887817382812, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 3.341463414634146, |
|
"grad_norm": 2.191713255328409e-09, |
|
"learning_rate": 1.3176449037751293e-05, |
|
"logits/chosen": 1.7502235174179077, |
|
"logits/rejected": 1.8861641883850098, |
|
"logps/chosen": -939.8538818359375, |
|
"logps/rejected": -893.7095336914062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.98280143737793, |
|
"rewards/margins": 59.06371307373047, |
|
"rewards/rejected": -38.080909729003906, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 3.3577235772357725, |
|
"grad_norm": 2.75520211090452e-08, |
|
"learning_rate": 1.2523691546803873e-05, |
|
"logits/chosen": -0.5331703424453735, |
|
"logits/rejected": -0.6084608435630798, |
|
"logps/chosen": -589.6011352539062, |
|
"logps/rejected": -1088.550048828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4032670259475708, |
|
"rewards/margins": 31.809803009033203, |
|
"rewards/rejected": -31.406536102294922, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 3.3739837398373984, |
|
"grad_norm": 9.301492536906153e-05, |
|
"learning_rate": 1.1886435057898337e-05, |
|
"logits/chosen": 1.1433031558990479, |
|
"logits/rejected": 1.2694740295410156, |
|
"logps/chosen": -558.0299682617188, |
|
"logps/rejected": -707.3845825195312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6971948146820068, |
|
"rewards/margins": 19.07242774963379, |
|
"rewards/rejected": -17.375232696533203, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 3.3902439024390243, |
|
"grad_norm": 0.0010420983890071511, |
|
"learning_rate": 1.1264792494342857e-05, |
|
"logits/chosen": 1.0887360572814941, |
|
"logits/rejected": 1.2838869094848633, |
|
"logps/chosen": -835.1876220703125, |
|
"logps/rejected": -818.43603515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0367493629455566, |
|
"rewards/margins": 24.39901351928711, |
|
"rewards/rejected": -23.362262725830078, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.40650406504065, |
|
"grad_norm": 1.8891978470492177e-06, |
|
"learning_rate": 1.0658874012622244e-05, |
|
"logits/chosen": 1.01885986328125, |
|
"logits/rejected": 1.0112289190292358, |
|
"logps/chosen": -871.6119384765625, |
|
"logps/rejected": -1098.082275390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.956085205078125, |
|
"rewards/margins": 35.5787353515625, |
|
"rewards/rejected": -26.62265396118164, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 3.4227642276422765, |
|
"grad_norm": 8.151694146363297e-07, |
|
"learning_rate": 1.0068786982878087e-05, |
|
"logits/chosen": 0.14928454160690308, |
|
"logits/rejected": 0.2887648940086365, |
|
"logps/chosen": -933.3944091796875, |
|
"logps/rejected": -1240.23681640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.421821594238281, |
|
"rewards/margins": 40.01603698730469, |
|
"rewards/rejected": -34.594215393066406, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.4390243902439024, |
|
"grad_norm": 0.00020665739430114627, |
|
"learning_rate": 9.494635969882426e-06, |
|
"logits/chosen": 0.8889873027801514, |
|
"logits/rejected": 0.9832445383071899, |
|
"logps/chosen": -601.9386596679688, |
|
"logps/rejected": -856.8861083984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.8855957984924316, |
|
"rewards/margins": 23.182449340820312, |
|
"rewards/rejected": -19.29685401916504, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 3.4552845528455283, |
|
"grad_norm": 1.000452058974588e-07, |
|
"learning_rate": 8.936522714508678e-06, |
|
"logits/chosen": 2.5088908672332764, |
|
"logits/rejected": 2.547111749649048, |
|
"logps/chosen": -1105.48828125, |
|
"logps/rejected": -805.77587890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.696690559387207, |
|
"rewards/margins": 27.416324615478516, |
|
"rewards/rejected": -19.719633102416992, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 3.4715447154471546, |
|
"grad_norm": 4.656814326153835e-06, |
|
"learning_rate": 8.394546115702928e-06, |
|
"logits/chosen": 0.8327282071113586, |
|
"logits/rejected": 1.2966117858886719, |
|
"logps/chosen": -679.051513671875, |
|
"logps/rejected": -887.1991577148438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.440448760986328, |
|
"rewards/margins": 28.49188995361328, |
|
"rewards/rejected": -25.051441192626953, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.4878048780487805, |
|
"grad_norm": 3.2379211916122586e-05, |
|
"learning_rate": 7.868802212958703e-06, |
|
"logits/chosen": 1.9742733240127563, |
|
"logits/rejected": 2.294674873352051, |
|
"logps/chosen": -1208.1063232421875, |
|
"logps/rejected": -637.0113525390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.201011657714844, |
|
"rewards/margins": 20.031538009643555, |
|
"rewards/rejected": -12.830526351928711, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 3.5040650406504064, |
|
"grad_norm": 7.747532393409529e-09, |
|
"learning_rate": 7.359384169298744e-06, |
|
"logits/chosen": 1.9279037714004517, |
|
"logits/rejected": 1.9304057359695435, |
|
"logps/chosen": -1136.0579833984375, |
|
"logps/rejected": -904.9140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.872076988220215, |
|
"rewards/margins": 38.54069137573242, |
|
"rewards/rejected": -27.66861343383789, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 3.5203252032520327, |
|
"grad_norm": 5.556800020123376e-10, |
|
"learning_rate": 6.866382254766157e-06, |
|
"logits/chosen": -0.5023067593574524, |
|
"logits/rejected": -0.5689560174942017, |
|
"logps/chosen": -463.14056396484375, |
|
"logps/rejected": -1160.8194580078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.831999778747559, |
|
"rewards/margins": 47.75160217285156, |
|
"rewards/rejected": -41.91960144042969, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 3.5365853658536586, |
|
"grad_norm": 1.6526299077668227e-05, |
|
"learning_rate": 6.3898838304284e-06, |
|
"logits/chosen": 1.8988527059555054, |
|
"logits/rejected": 2.0755226612091064, |
|
"logps/chosen": -858.6326293945312, |
|
"logps/rejected": -779.324462890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.013715744018555, |
|
"rewards/margins": 29.005504608154297, |
|
"rewards/rejected": -18.991790771484375, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 3.5528455284552845, |
|
"grad_norm": 3.1803594424673065e-07, |
|
"learning_rate": 5.929973332896677e-06, |
|
"logits/chosen": 0.3545091152191162, |
|
"logits/rejected": 0.2864121198654175, |
|
"logps/chosen": -815.6988525390625, |
|
"logps/rejected": -1193.6893310546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8741790056228638, |
|
"rewards/margins": 25.383888244628906, |
|
"rewards/rejected": -26.258068084716797, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.569105691056911, |
|
"grad_norm": 4.157168689289392e-07, |
|
"learning_rate": 5.486732259363647e-06, |
|
"logits/chosen": 0.30699625611305237, |
|
"logits/rejected": 0.22978034615516663, |
|
"logps/chosen": -628.720703125, |
|
"logps/rejected": -1157.9332275390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.1703996658325195, |
|
"rewards/margins": 41.45426559448242, |
|
"rewards/rejected": -36.28386306762695, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 3.5853658536585367, |
|
"grad_norm": 2.4077553462120704e-06, |
|
"learning_rate": 5.060239153161872e-06, |
|
"logits/chosen": 0.36212480068206787, |
|
"logits/rejected": 0.43432360887527466, |
|
"logps/chosen": -796.969482421875, |
|
"logps/rejected": -1134.615478515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.9879493713378906, |
|
"rewards/margins": 24.588518142700195, |
|
"rewards/rejected": -28.57646942138672, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 3.6016260162601625, |
|
"grad_norm": 0.00031399927684105933, |
|
"learning_rate": 4.6505695898457655e-06, |
|
"logits/chosen": 1.832968831062317, |
|
"logits/rejected": 2.070023775100708, |
|
"logps/chosen": -956.5606689453125, |
|
"logps/rejected": -1024.6470947265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.057786464691162, |
|
"rewards/margins": 32.76300048828125, |
|
"rewards/rejected": -26.705215454101562, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 3.617886178861789, |
|
"grad_norm": 0.0001437750761397183, |
|
"learning_rate": 4.257796163799455e-06, |
|
"logits/chosen": -0.5872640609741211, |
|
"logits/rejected": -0.5590543150901794, |
|
"logps/chosen": -966.5204467773438, |
|
"logps/rejected": -1230.2716064453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.602821350097656, |
|
"rewards/margins": 28.86246681213379, |
|
"rewards/rejected": -33.46529006958008, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 3.6341463414634148, |
|
"grad_norm": 1.4342627707719657e-07, |
|
"learning_rate": 3.8819884753728665e-06, |
|
"logits/chosen": 1.0317366123199463, |
|
"logits/rejected": 1.058630108833313, |
|
"logps/chosen": -919.435791015625, |
|
"logps/rejected": -1093.8701171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.276484727859497, |
|
"rewards/margins": 29.283370971679688, |
|
"rewards/rejected": -26.006885528564453, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.6504065040650406, |
|
"grad_norm": 2.9189145607233513e-06, |
|
"learning_rate": 3.5232131185484076e-06, |
|
"logits/chosen": 1.0348219871520996, |
|
"logits/rejected": 1.0469154119491577, |
|
"logps/chosen": -804.0462646484375, |
|
"logps/rejected": -901.7625122070312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.819074630737305, |
|
"rewards/margins": 37.26897430419922, |
|
"rewards/rejected": -26.449901580810547, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 3.6666666666666665, |
|
"grad_norm": 7.434827864472027e-08, |
|
"learning_rate": 3.181533669140346e-06, |
|
"logits/chosen": 2.3163633346557617, |
|
"logits/rejected": 2.1558704376220703, |
|
"logps/chosen": -1330.4156494140625, |
|
"logps/rejected": -734.6536254882812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.676055908203125, |
|
"rewards/margins": 32.37335968017578, |
|
"rewards/rejected": -19.697303771972656, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 3.682926829268293, |
|
"grad_norm": 5.519868118142313e-09, |
|
"learning_rate": 2.857010673529015e-06, |
|
"logits/chosen": 0.7554388046264648, |
|
"logits/rejected": 1.0454837083816528, |
|
"logps/chosen": -1061.048583984375, |
|
"logps/rejected": -1125.9661865234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.605961799621582, |
|
"rewards/margins": 34.83687973022461, |
|
"rewards/rejected": -29.230918884277344, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 3.6991869918699187, |
|
"grad_norm": 2.5435662109885016e-07, |
|
"learning_rate": 2.5497016379318894e-06, |
|
"logits/chosen": 1.1780487298965454, |
|
"logits/rejected": 0.9616645574569702, |
|
"logps/chosen": -874.20654296875, |
|
"logps/rejected": -1001.5404052734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.586102485656738, |
|
"rewards/margins": 28.590946197509766, |
|
"rewards/rejected": -24.004844665527344, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 3.7154471544715446, |
|
"grad_norm": 7.842224647447438e-08, |
|
"learning_rate": 2.259661018213333e-06, |
|
"logits/chosen": 1.4015605449676514, |
|
"logits/rejected": 1.8417150974273682, |
|
"logps/chosen": -1290.88134765625, |
|
"logps/rejected": -1013.3934936523438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.242486953735352, |
|
"rewards/margins": 27.657352447509766, |
|
"rewards/rejected": -21.414867401123047, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.7317073170731705, |
|
"grad_norm": 2.204809561590082e-06, |
|
"learning_rate": 1.986940210234922e-06, |
|
"logits/chosen": -0.4887985587120056, |
|
"logits/rejected": -0.6181695461273193, |
|
"logps/chosen": -587.0228271484375, |
|
"logps/rejected": -1153.0972900390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6479713916778564, |
|
"rewards/margins": 28.618911743164062, |
|
"rewards/rejected": -31.266887664794922, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 3.747967479674797, |
|
"grad_norm": 3.265151008235989e-06, |
|
"learning_rate": 1.7315875407479032e-06, |
|
"logits/chosen": 1.886859655380249, |
|
"logits/rejected": 1.951560378074646, |
|
"logps/chosen": -1151.87451171875, |
|
"logps/rejected": -919.1624755859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.187823295593262, |
|
"rewards/margins": 33.495697021484375, |
|
"rewards/rejected": -24.307870864868164, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 3.7642276422764227, |
|
"grad_norm": 0.0006769644096493721, |
|
"learning_rate": 1.493648258829694e-06, |
|
"logits/chosen": 1.5636029243469238, |
|
"logits/rejected": 2.0519399642944336, |
|
"logps/chosen": -962.296630859375, |
|
"logps/rejected": -760.23583984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.352012634277344, |
|
"rewards/margins": 21.704378128051758, |
|
"rewards/rejected": -17.352365493774414, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 3.7804878048780486, |
|
"grad_norm": 2.2523332518176176e-05, |
|
"learning_rate": 1.2731645278655445e-06, |
|
"logits/chosen": 0.9352502226829529, |
|
"logits/rejected": 1.0311282873153687, |
|
"logps/chosen": -811.5540771484375, |
|
"logps/rejected": -969.5977172851562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.795368194580078, |
|
"rewards/margins": 23.98063850402832, |
|
"rewards/rejected": -19.18526840209961, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 3.796747967479675, |
|
"grad_norm": 4.502208028611676e-08, |
|
"learning_rate": 1.0701754180771462e-06, |
|
"logits/chosen": 0.2641603350639343, |
|
"logits/rejected": 0.31472957134246826, |
|
"logps/chosen": -848.6556396484375, |
|
"logps/rejected": -1213.4002685546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.694286346435547, |
|
"rewards/margins": 30.445655822753906, |
|
"rewards/rejected": -27.75136947631836, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 3.813008130081301, |
|
"grad_norm": 6.32426554147969e-06, |
|
"learning_rate": 8.847168995992916e-07, |
|
"logits/chosen": 0.1992824822664261, |
|
"logits/rejected": 0.19052676856517792, |
|
"logps/chosen": -401.17205810546875, |
|
"logps/rejected": -1125.676025390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.007885932922363, |
|
"rewards/margins": 24.954639434814453, |
|
"rewards/rejected": -31.9625244140625, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 3.8292682926829267, |
|
"grad_norm": 5.827480435982579e-06, |
|
"learning_rate": 7.16821836105841e-07, |
|
"logits/chosen": 0.20779013633728027, |
|
"logits/rejected": 0.3515350818634033, |
|
"logps/chosen": -841.5047607421875, |
|
"logps/rejected": -1172.7518310546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.262989044189453, |
|
"rewards/margins": 30.74886703491211, |
|
"rewards/rejected": -28.485877990722656, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 3.845528455284553, |
|
"grad_norm": 5.810121820104541e-06, |
|
"learning_rate": 5.665199789862907e-07, |
|
"logits/chosen": 1.4595049619674683, |
|
"logits/rejected": 2.075129747390747, |
|
"logps/chosen": -1167.7393798828125, |
|
"logps/rejected": -774.719970703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.907793998718262, |
|
"rewards/margins": 26.305692672729492, |
|
"rewards/rejected": -16.397899627685547, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 3.861788617886179, |
|
"grad_norm": 0.0003194608143530786, |
|
"learning_rate": 4.3383796207365766e-07, |
|
"logits/chosen": 1.5111838579177856, |
|
"logits/rejected": 1.4651854038238525, |
|
"logps/chosen": -832.2733154296875, |
|
"logps/rejected": -927.6607666015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.360931396484375, |
|
"rewards/margins": 45.037559509277344, |
|
"rewards/rejected": -28.676633834838867, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 3.8780487804878048, |
|
"grad_norm": 9.628876540546116e-08, |
|
"learning_rate": 3.1879929692498757e-07, |
|
"logits/chosen": 2.7370991706848145, |
|
"logits/rejected": 2.8850603103637695, |
|
"logps/chosen": -1059.6279296875, |
|
"logps/rejected": -725.737060546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.765009880065918, |
|
"rewards/margins": 29.055585861206055, |
|
"rewards/rejected": -18.290576934814453, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.894308943089431, |
|
"grad_norm": 1.8444471550083108e-07, |
|
"learning_rate": 2.2142436865499882e-07, |
|
"logits/chosen": 0.2767738699913025, |
|
"logits/rejected": 0.3400687575340271, |
|
"logps/chosen": -803.11669921875, |
|
"logps/rejected": -1104.4150390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12649095058441162, |
|
"rewards/margins": 24.231075286865234, |
|
"rewards/rejected": -24.10458755493164, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 3.910569105691057, |
|
"grad_norm": 1.051975505106384e-05, |
|
"learning_rate": 1.4173043232380557e-07, |
|
"logits/chosen": 0.13623979687690735, |
|
"logits/rejected": 0.2743992805480957, |
|
"logps/chosen": -830.56396484375, |
|
"logps/rejected": -930.9827880859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.407852649688721, |
|
"rewards/margins": 27.83668327331543, |
|
"rewards/rejected": -23.428829193115234, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 3.926829268292683, |
|
"grad_norm": 1.354993361957213e-08, |
|
"learning_rate": 7.973160987931883e-08, |
|
"logits/chosen": 0.9562588930130005, |
|
"logits/rejected": 1.137865424156189, |
|
"logps/chosen": -867.230224609375, |
|
"logps/rejected": -1033.2408447265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.331739902496338, |
|
"rewards/margins": 28.258647918701172, |
|
"rewards/rejected": -24.926908493041992, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 3.943089430894309, |
|
"grad_norm": 2.2354779503075406e-05, |
|
"learning_rate": 3.5438887654737355e-08, |
|
"logits/chosen": 2.4352188110351562, |
|
"logits/rejected": 2.6551947593688965, |
|
"logps/chosen": -945.0474853515625, |
|
"logps/rejected": -577.4002685546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.421252727508545, |
|
"rewards/margins": 22.539770126342773, |
|
"rewards/rejected": -15.11851692199707, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.959349593495935, |
|
"grad_norm": 1.6402739788645704e-07, |
|
"learning_rate": 8.860114421826993e-09, |
|
"logits/chosen": 0.30544334650039673, |
|
"logits/rejected": 0.3768209218978882, |
|
"logps/chosen": -978.500244140625, |
|
"logps/rejected": -1139.66015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.327483892440796, |
|
"rewards/margins": 28.7570858001709, |
|
"rewards/rejected": -30.084569931030273, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 3.975609756097561, |
|
"grad_norm": 4.3748215716732375e-08, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 1.4252970218658447, |
|
"logits/rejected": 1.7851338386535645, |
|
"logps/chosen": -1204.9351806640625, |
|
"logps/rejected": -901.27197265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.897351264953613, |
|
"rewards/margins": 32.149784088134766, |
|
"rewards/rejected": -25.252431869506836, |
|
"step": 246 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 246, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 62, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|