|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 62, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016260162601626018, |
|
"grad_norm": 18.177886962890625, |
|
"learning_rate": 2e-05, |
|
"logits/chosen": -0.3472236394882202, |
|
"logits/rejected": -0.13716036081314087, |
|
"logps/chosen": -780.8181762695312, |
|
"logps/rejected": -909.20263671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.032520325203252036, |
|
"grad_norm": 23.274246215820312, |
|
"learning_rate": 4e-05, |
|
"logits/chosen": -0.2127760350704193, |
|
"logits/rejected": -0.08323362469673157, |
|
"logps/chosen": -583.0169067382812, |
|
"logps/rejected": -715.5615234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.04878048780487805, |
|
"grad_norm": 20.149507522583008, |
|
"learning_rate": 6e-05, |
|
"logits/chosen": -0.18167662620544434, |
|
"logits/rejected": -0.04478086531162262, |
|
"logps/chosen": -941.0387573242188, |
|
"logps/rejected": -825.662841796875, |
|
"loss": 0.6976, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.025517277419567108, |
|
"rewards/margins": 0.022285467013716698, |
|
"rewards/rejected": 0.0032318076118826866, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.06504065040650407, |
|
"grad_norm": 16.67251205444336, |
|
"learning_rate": 8e-05, |
|
"logits/chosen": 0.6866837739944458, |
|
"logits/rejected": 0.971089243888855, |
|
"logps/chosen": -999.306640625, |
|
"logps/rejected": -386.5375671386719, |
|
"loss": 0.563, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2688583433628082, |
|
"rewards/margins": 0.3312031030654907, |
|
"rewards/rejected": -0.062344741076231, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.08130081300813008, |
|
"grad_norm": 15.646084785461426, |
|
"learning_rate": 0.0001, |
|
"logits/chosen": 0.5107800364494324, |
|
"logits/rejected": 0.5942208766937256, |
|
"logps/chosen": -1051.1270751953125, |
|
"logps/rejected": -745.8003540039062, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.3622299134731293, |
|
"rewards/margins": 0.34313660860061646, |
|
"rewards/rejected": 0.01909332349896431, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 38.70280456542969, |
|
"learning_rate": 0.00012, |
|
"logits/chosen": -0.31406939029693604, |
|
"logits/rejected": -0.24293695390224457, |
|
"logps/chosen": -845.9321899414062, |
|
"logps/rejected": -932.499755859375, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5435073971748352, |
|
"rewards/margins": 0.47774890065193176, |
|
"rewards/rejected": 0.06575851887464523, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.11382113821138211, |
|
"grad_norm": 23.665071487426758, |
|
"learning_rate": 0.00014, |
|
"logits/chosen": -0.2646118402481079, |
|
"logits/rejected": -0.11520399153232574, |
|
"logps/chosen": -866.503173828125, |
|
"logps/rejected": -975.55126953125, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.6112838387489319, |
|
"rewards/margins": 0.4790405333042145, |
|
"rewards/rejected": 0.1322433352470398, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.13008130081300814, |
|
"grad_norm": 15.794047355651855, |
|
"learning_rate": 0.00016, |
|
"logits/chosen": -0.8256000876426697, |
|
"logits/rejected": -0.8912097811698914, |
|
"logps/chosen": -523.3858032226562, |
|
"logps/rejected": -1084.9468994140625, |
|
"loss": 0.4442, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.5804435610771179, |
|
"rewards/margins": 0.24081651866436005, |
|
"rewards/rejected": 0.33962705731391907, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.14634146341463414, |
|
"grad_norm": 13.538564682006836, |
|
"learning_rate": 0.00018, |
|
"logits/chosen": -0.11683523654937744, |
|
"logits/rejected": -0.0632472038269043, |
|
"logps/chosen": -652.114501953125, |
|
"logps/rejected": -551.6069946289062, |
|
"loss": 0.1564, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6716469526290894, |
|
"rewards/margins": 2.151698350906372, |
|
"rewards/rejected": -0.4800514578819275, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.16260162601626016, |
|
"grad_norm": 3.9652626514434814, |
|
"learning_rate": 0.0002, |
|
"logits/chosen": 0.4062778949737549, |
|
"logits/rejected": 0.5438919067382812, |
|
"logps/chosen": -771.1934814453125, |
|
"logps/rejected": -616.55908203125, |
|
"loss": 0.0792, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.8721909523010254, |
|
"rewards/margins": 5.208758354187012, |
|
"rewards/rejected": -1.3365669250488281, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17886178861788618, |
|
"grad_norm": 0.18261243402957916, |
|
"learning_rate": 0.0001999911398855782, |
|
"logits/chosen": -0.7774271965026855, |
|
"logits/rejected": -0.8629493117332458, |
|
"logps/chosen": -601.1015014648438, |
|
"logps/rejected": -1039.275146484375, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0800025463104248, |
|
"rewards/margins": 6.853862762451172, |
|
"rewards/rejected": -5.773860454559326, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 0.1421748697757721, |
|
"learning_rate": 0.00019996456111234527, |
|
"logits/chosen": 0.7899215817451477, |
|
"logits/rejected": 1.119359016418457, |
|
"logps/chosen": -1416.412353515625, |
|
"logps/rejected": -827.2066650390625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.7505874633789062, |
|
"rewards/margins": 15.09115982055664, |
|
"rewards/rejected": -11.340574264526367, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.21138211382113822, |
|
"grad_norm": 3.4406840801239014, |
|
"learning_rate": 0.00019992026839012067, |
|
"logits/chosen": -0.8033453226089478, |
|
"logits/rejected": -0.877557098865509, |
|
"logps/chosen": -514.6026611328125, |
|
"logps/rejected": -1206.25537109375, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7983558177947998, |
|
"rewards/margins": 23.49526596069336, |
|
"rewards/rejected": -21.696908950805664, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.22764227642276422, |
|
"grad_norm": 0.19398577511310577, |
|
"learning_rate": 0.0001998582695676762, |
|
"logits/chosen": 0.9254277944564819, |
|
"logits/rejected": 1.1634798049926758, |
|
"logps/chosen": -1028.993408203125, |
|
"logps/rejected": -955.4432983398438, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5009795427322388, |
|
"rewards/margins": 17.867931365966797, |
|
"rewards/rejected": -18.368911743164062, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 0.00010074722376884893, |
|
"learning_rate": 0.000199778575631345, |
|
"logits/chosen": 0.3904605507850647, |
|
"logits/rejected": 0.3719422519207001, |
|
"logps/chosen": -884.9620361328125, |
|
"logps/rejected": -1075.615966796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.482113838195801, |
|
"rewards/margins": 21.95424461364746, |
|
"rewards/rejected": -24.436357498168945, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2601626016260163, |
|
"grad_norm": 3.7136353057576343e-05, |
|
"learning_rate": 0.000199681200703075, |
|
"logits/chosen": 0.2578551769256592, |
|
"logits/rejected": 0.5335351824760437, |
|
"logps/chosen": -1073.548828125, |
|
"logps/rejected": -992.4033813476562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.9434356689453125, |
|
"rewards/margins": 20.854663848876953, |
|
"rewards/rejected": -23.798099517822266, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2764227642276423, |
|
"grad_norm": 8.596338147981442e-07, |
|
"learning_rate": 0.00019956616203792635, |
|
"logits/chosen": 0.5267460346221924, |
|
"logits/rejected": 0.4893237352371216, |
|
"logps/chosen": -987.3567504882812, |
|
"logps/rejected": -1127.171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0684036016464233, |
|
"rewards/margins": 32.558319091796875, |
|
"rewards/rejected": -33.62671661376953, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 0.004051027819514275, |
|
"learning_rate": 0.00019943348002101371, |
|
"logits/chosen": 1.0484071969985962, |
|
"logits/rejected": 1.1081664562225342, |
|
"logps/chosen": -1105.1634521484375, |
|
"logps/rejected": -898.9759521484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.1622314453125, |
|
"rewards/margins": 23.434669494628906, |
|
"rewards/rejected": -26.596900939941406, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.3089430894308943, |
|
"grad_norm": 0.003306547412648797, |
|
"learning_rate": 0.00019928317816389417, |
|
"logits/chosen": 0.5566614866256714, |
|
"logits/rejected": 0.6963181495666504, |
|
"logps/chosen": -932.650390625, |
|
"logps/rejected": -1061.4989013671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.36033821105957, |
|
"rewards/margins": 30.25779914855957, |
|
"rewards/rejected": -34.61813735961914, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.3252032520325203, |
|
"grad_norm": 1.3893560968369911e-08, |
|
"learning_rate": 0.00019911528310040074, |
|
"logits/chosen": 1.239579200744629, |
|
"logits/rejected": 1.046311855316162, |
|
"logps/chosen": -1079.0159912109375, |
|
"logps/rejected": -1033.2017822265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.044548749923706, |
|
"rewards/margins": 41.88936233520508, |
|
"rewards/rejected": -40.844810485839844, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34146341463414637, |
|
"grad_norm": 4.666223851756968e-09, |
|
"learning_rate": 0.00019892982458192288, |
|
"logits/chosen": 0.2726232409477234, |
|
"logits/rejected": 0.14665402472019196, |
|
"logps/chosen": -978.7222900390625, |
|
"logps/rejected": -1133.2047119140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.054238319396973, |
|
"rewards/margins": 54.86410140991211, |
|
"rewards/rejected": -43.80986404418945, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.35772357723577236, |
|
"grad_norm": 4.876813477494579e-07, |
|
"learning_rate": 0.00019872683547213446, |
|
"logits/chosen": -0.16925190389156342, |
|
"logits/rejected": -0.19759103655815125, |
|
"logps/chosen": -965.187255859375, |
|
"logps/rejected": -1239.143798828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.977485656738281, |
|
"rewards/margins": 29.40732765197754, |
|
"rewards/rejected": -44.38481140136719, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.37398373983739835, |
|
"grad_norm": 37.638973236083984, |
|
"learning_rate": 0.00019850635174117033, |
|
"logits/chosen": 0.437714159488678, |
|
"logits/rejected": 0.4761970639228821, |
|
"logps/chosen": -1137.6966552734375, |
|
"logps/rejected": -1166.5640869140625, |
|
"loss": 0.4393, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.159793853759766, |
|
"rewards/margins": 32.14189529418945, |
|
"rewards/rejected": -43.301692962646484, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 1.8173747229344173e-11, |
|
"learning_rate": 0.00019826841245925212, |
|
"logits/chosen": -0.7153763175010681, |
|
"logits/rejected": -0.6940470933914185, |
|
"logps/chosen": -938.263916015625, |
|
"logps/rejected": -1608.4205322265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -24.817350387573242, |
|
"rewards/margins": 34.095001220703125, |
|
"rewards/rejected": -58.912349700927734, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"grad_norm": 83.79772186279297, |
|
"learning_rate": 0.0001980130597897651, |
|
"logits/chosen": 1.1592888832092285, |
|
"logits/rejected": 1.1738824844360352, |
|
"logps/chosen": -948.4622802734375, |
|
"logps/rejected": -865.396728515625, |
|
"loss": 0.3825, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.343675374984741, |
|
"rewards/margins": 26.49417495727539, |
|
"rewards/rejected": -29.837852478027344, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.42276422764227645, |
|
"grad_norm": 2.6143006834900007e-06, |
|
"learning_rate": 0.00019774033898178667, |
|
"logits/chosen": 0.5444796085357666, |
|
"logits/rejected": 0.47586876153945923, |
|
"logps/chosen": -932.6605834960938, |
|
"logps/rejected": -1091.639892578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.2753777503967285, |
|
"rewards/margins": 34.133514404296875, |
|
"rewards/rejected": -38.40888977050781, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.43902439024390244, |
|
"grad_norm": 0.0003061926399823278, |
|
"learning_rate": 0.00019745029836206813, |
|
"logits/chosen": -0.6794779896736145, |
|
"logits/rejected": -0.8602011203765869, |
|
"logps/chosen": -894.3270263671875, |
|
"logps/rejected": -1067.5921630859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.433198928833008, |
|
"rewards/margins": 17.333955764770508, |
|
"rewards/rejected": -30.767154693603516, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.45528455284552843, |
|
"grad_norm": 3.805017101399244e-08, |
|
"learning_rate": 0.00019714298932647098, |
|
"logits/chosen": 0.4980026185512543, |
|
"logits/rejected": 0.6999194025993347, |
|
"logps/chosen": -911.8473510742188, |
|
"logps/rejected": -1126.07421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5412168502807617, |
|
"rewards/margins": 29.520708084106445, |
|
"rewards/rejected": -30.06192398071289, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.4715447154471545, |
|
"grad_norm": 5.17633900187775e-08, |
|
"learning_rate": 0.00019681846633085967, |
|
"logits/chosen": -0.5973828434944153, |
|
"logits/rejected": -0.8376109600067139, |
|
"logps/chosen": -711.66259765625, |
|
"logps/rejected": -1186.1884765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.467390537261963, |
|
"rewards/margins": 25.050704956054688, |
|
"rewards/rejected": -27.518096923828125, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 0.00011633769463514909, |
|
"learning_rate": 0.0001964767868814516, |
|
"logits/chosen": 1.3797093629837036, |
|
"logits/rejected": 1.5397391319274902, |
|
"logps/chosen": -877.42333984375, |
|
"logps/rejected": -1003.4732666015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.624107360839844, |
|
"rewards/margins": 29.784557342529297, |
|
"rewards/rejected": -25.160449981689453, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5040650406504065, |
|
"grad_norm": 6.257723228486611e-09, |
|
"learning_rate": 0.00019611801152462715, |
|
"logits/chosen": 1.2731826305389404, |
|
"logits/rejected": 1.6379995346069336, |
|
"logps/chosen": -1053.573486328125, |
|
"logps/rejected": -1010.915283203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.018058776855469, |
|
"rewards/margins": 32.15219497680664, |
|
"rewards/rejected": -21.13413429260254, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.5203252032520326, |
|
"grad_norm": 0.00035472630406729877, |
|
"learning_rate": 0.00019574220383620055, |
|
"logits/chosen": 0.6649560928344727, |
|
"logits/rejected": 0.983564019203186, |
|
"logps/chosen": -872.1873168945312, |
|
"logps/rejected": -965.9480590820312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.504961967468262, |
|
"rewards/margins": 23.669071197509766, |
|
"rewards/rejected": -18.164108276367188, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5365853658536586, |
|
"grad_norm": 3.0934195820009336e-05, |
|
"learning_rate": 0.00019534943041015423, |
|
"logits/chosen": 0.49574941396713257, |
|
"logits/rejected": 0.5190873742103577, |
|
"logps/chosen": -708.9269409179688, |
|
"logps/rejected": -842.974365234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.209194660186768, |
|
"rewards/margins": 20.690357208251953, |
|
"rewards/rejected": -13.48116397857666, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5528455284552846, |
|
"grad_norm": 0.0006856573163531721, |
|
"learning_rate": 0.00019493976084683813, |
|
"logits/chosen": 0.992796778678894, |
|
"logits/rejected": 1.1291236877441406, |
|
"logps/chosen": -673.6188354492188, |
|
"logps/rejected": -723.4482421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.3715057373046875, |
|
"rewards/margins": 19.963485717773438, |
|
"rewards/rejected": -14.591980934143066, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5691056910569106, |
|
"grad_norm": 5.983891969663091e-05, |
|
"learning_rate": 0.00019451326774063636, |
|
"logits/chosen": 0.7630600929260254, |
|
"logits/rejected": 0.910960853099823, |
|
"logps/chosen": -993.23828125, |
|
"logps/rejected": -1011.3184204101562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.109509468078613, |
|
"rewards/margins": 24.603878021240234, |
|
"rewards/rejected": -17.494367599487305, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 1.9749455532291904e-05, |
|
"learning_rate": 0.00019407002666710336, |
|
"logits/chosen": 1.8401339054107666, |
|
"logits/rejected": 1.9955703020095825, |
|
"logps/chosen": -1152.950927734375, |
|
"logps/rejected": -827.0269775390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.768245697021484, |
|
"rewards/margins": 38.1776123046875, |
|
"rewards/rejected": -22.40936851501465, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.6016260162601627, |
|
"grad_norm": 0.0017285533249378204, |
|
"learning_rate": 0.00019361011616957164, |
|
"logits/chosen": 2.153351306915283, |
|
"logits/rejected": 2.235447883605957, |
|
"logps/chosen": -1090.1943359375, |
|
"logps/rejected": -682.7992553710938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.726329803466797, |
|
"rewards/margins": 24.018630981445312, |
|
"rewards/rejected": -12.292303085327148, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.6178861788617886, |
|
"grad_norm": 0.00919501855969429, |
|
"learning_rate": 0.00019313361774523385, |
|
"logits/chosen": 0.47314736247062683, |
|
"logits/rejected": 0.557833731174469, |
|
"logps/chosen": -691.4217529296875, |
|
"logps/rejected": -673.1847534179688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.087795257568359, |
|
"rewards/margins": 12.628225326538086, |
|
"rewards/rejected": -6.540430068969727, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6341463414634146, |
|
"grad_norm": 0.002680833451449871, |
|
"learning_rate": 0.00019264061583070127, |
|
"logits/chosen": 0.20066705346107483, |
|
"logits/rejected": 0.2085224837064743, |
|
"logps/chosen": -693.7376098632812, |
|
"logps/rejected": -982.19091796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.779763221740723, |
|
"rewards/margins": 22.904094696044922, |
|
"rewards/rejected": -15.124334335327148, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6504065040650406, |
|
"grad_norm": 8.798202907200903e-05, |
|
"learning_rate": 0.00019213119778704128, |
|
"logits/chosen": 1.3898746967315674, |
|
"logits/rejected": 1.5520107746124268, |
|
"logps/chosen": -1247.770263671875, |
|
"logps/rejected": -916.4830322265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.276836395263672, |
|
"rewards/margins": 34.69191360473633, |
|
"rewards/rejected": -19.415077209472656, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.0009758697124198079, |
|
"learning_rate": 0.00019160545388429708, |
|
"logits/chosen": 2.345059633255005, |
|
"logits/rejected": 2.5746054649353027, |
|
"logps/chosen": -1102.5548095703125, |
|
"logps/rejected": -722.4332885742188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.800348281860352, |
|
"rewards/margins": 32.747169494628906, |
|
"rewards/rejected": -18.946823120117188, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.6829268292682927, |
|
"grad_norm": 0.0016077810432761908, |
|
"learning_rate": 0.00019106347728549135, |
|
"logits/chosen": 0.9104095697402954, |
|
"logits/rejected": 0.9921329021453857, |
|
"logps/chosen": -753.8040771484375, |
|
"logps/rejected": -886.5813598632812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.367500305175781, |
|
"rewards/margins": 27.856563568115234, |
|
"rewards/rejected": -16.489063262939453, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.6991869918699187, |
|
"grad_norm": 0.0004074655589647591, |
|
"learning_rate": 0.0001905053640301176, |
|
"logits/chosen": 0.5256392955780029, |
|
"logits/rejected": 0.4733426570892334, |
|
"logps/chosen": -715.4669189453125, |
|
"logps/rejected": -565.0441284179688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.25009822845459, |
|
"rewards/margins": 21.391075134277344, |
|
"rewards/rejected": -15.14097785949707, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.7154471544715447, |
|
"grad_norm": 0.013145952485501766, |
|
"learning_rate": 0.00018993121301712193, |
|
"logits/chosen": 0.9358551502227783, |
|
"logits/rejected": 0.8306156992912292, |
|
"logps/chosen": -867.1063232421875, |
|
"logps/rejected": -973.7214965820312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.3925018310546875, |
|
"rewards/margins": 21.35105323791504, |
|
"rewards/rejected": -13.958552360534668, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 8.829876605886966e-05, |
|
"learning_rate": 0.00018934112598737777, |
|
"logits/chosen": 2.2844998836517334, |
|
"logits/rejected": 2.831254482269287, |
|
"logps/chosen": -1142.8726806640625, |
|
"logps/rejected": -776.1110229492188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.17538833618164, |
|
"rewards/margins": 33.72625732421875, |
|
"rewards/rejected": -16.550867080688477, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7479674796747967, |
|
"grad_norm": 0.02624354511499405, |
|
"learning_rate": 0.00018873520750565718, |
|
"logits/chosen": 0.1806122362613678, |
|
"logits/rejected": 0.31054702401161194, |
|
"logps/chosen": -692.7060546875, |
|
"logps/rejected": -1032.708740234375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.434965133666992, |
|
"rewards/margins": 16.74932098388672, |
|
"rewards/rejected": -10.314356803894043, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.7642276422764228, |
|
"grad_norm": 4.268178963684477e-05, |
|
"learning_rate": 0.00018811356494210165, |
|
"logits/chosen": 1.1679103374481201, |
|
"logits/rejected": 1.0418663024902344, |
|
"logps/chosen": -720.220703125, |
|
"logps/rejected": -911.58837890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.991888523101807, |
|
"rewards/margins": 21.064565658569336, |
|
"rewards/rejected": -13.072675704956055, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 0.0009461237932555377, |
|
"learning_rate": 0.00018747630845319612, |
|
"logits/chosen": 0.13339552283287048, |
|
"logits/rejected": 0.3655449151992798, |
|
"logps/chosen": -420.11431884765625, |
|
"logps/rejected": -786.4783325195312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.16606330871582, |
|
"rewards/margins": 30.41803741455078, |
|
"rewards/rejected": -19.251976013183594, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7967479674796748, |
|
"grad_norm": 0.0033115639816969633, |
|
"learning_rate": 0.00018682355096224872, |
|
"logits/chosen": 0.4472777247428894, |
|
"logits/rejected": 0.3390260934829712, |
|
"logps/chosen": -536.7960205078125, |
|
"logps/rejected": -901.3749389648438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.887458801269531, |
|
"rewards/margins": 27.701595306396484, |
|
"rewards/rejected": -16.814136505126953, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"grad_norm": 0.01153454091399908, |
|
"learning_rate": 0.0001861554081393806, |
|
"logits/chosen": 0.6489148139953613, |
|
"logits/rejected": 0.689254105091095, |
|
"logps/chosen": -738.5593872070312, |
|
"logps/rejected": -755.362060546875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.205413818359375, |
|
"rewards/margins": 16.344358444213867, |
|
"rewards/rejected": -6.138944625854492, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8292682926829268, |
|
"grad_norm": 0.001985176932066679, |
|
"learning_rate": 0.00018547199838102904, |
|
"logits/chosen": 0.144524484872818, |
|
"logits/rejected": 0.26266002655029297, |
|
"logps/chosen": -893.19482421875, |
|
"logps/rejected": -1031.27294921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.087849617004395, |
|
"rewards/margins": 23.393884658813477, |
|
"rewards/rejected": -14.306035041809082, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.8455284552845529, |
|
"grad_norm": 0.00042794409091584384, |
|
"learning_rate": 0.0001847734427889671, |
|
"logits/chosen": 0.5121033191680908, |
|
"logits/rejected": 1.0676312446594238, |
|
"logps/chosen": -987.8340454101562, |
|
"logps/rejected": -830.7366943359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.409669876098633, |
|
"rewards/margins": 19.569660186767578, |
|
"rewards/rejected": -8.159988403320312, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.8617886178861789, |
|
"grad_norm": 0.0011688657104969025, |
|
"learning_rate": 0.00018405986514884434, |
|
"logits/chosen": 1.793473243713379, |
|
"logits/rejected": 1.9872632026672363, |
|
"logps/chosen": -926.424560546875, |
|
"logps/rejected": -618.4228515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.011417388916016, |
|
"rewards/margins": 22.01776123046875, |
|
"rewards/rejected": -11.006343841552734, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.8780487804878049, |
|
"grad_norm": 0.005157554987818003, |
|
"learning_rate": 0.0001833313919082515, |
|
"logits/chosen": -0.02910199761390686, |
|
"logits/rejected": 0.14243453741073608, |
|
"logps/chosen": -725.36376953125, |
|
"logps/rejected": -997.5311279296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.557222366333008, |
|
"rewards/margins": 15.359309196472168, |
|
"rewards/rejected": -9.802087783813477, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.8943089430894309, |
|
"grad_norm": 0.005044507794082165, |
|
"learning_rate": 0.00018258815215431396, |
|
"logits/chosen": 0.17898443341255188, |
|
"logits/rejected": 0.09989897906780243, |
|
"logps/chosen": -803.9798583984375, |
|
"logps/rejected": -925.3179321289062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.798739433288574, |
|
"rewards/margins": 17.492319107055664, |
|
"rewards/rejected": -10.69357967376709, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.9105691056910569, |
|
"grad_norm": 0.0031374047975987196, |
|
"learning_rate": 0.0001818302775908169, |
|
"logits/chosen": 1.017639398574829, |
|
"logits/rejected": 1.2823631763458252, |
|
"logps/chosen": -824.6445922851562, |
|
"logps/rejected": -860.8942260742188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.019498825073242, |
|
"rewards/margins": 16.16924285888672, |
|
"rewards/rejected": -10.149742126464844, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.926829268292683, |
|
"grad_norm": 0.00014241511235013604, |
|
"learning_rate": 0.0001810579025148674, |
|
"logits/chosen": 1.0959478616714478, |
|
"logits/rejected": 0.9008815288543701, |
|
"logps/chosen": -782.0526123046875, |
|
"logps/rejected": -916.8338623046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.443077087402344, |
|
"rewards/margins": 24.263744354248047, |
|
"rewards/rejected": -15.820667266845703, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.943089430894309, |
|
"grad_norm": 5.913816494285129e-05, |
|
"learning_rate": 0.00018027116379309638, |
|
"logits/chosen": 0.2709883153438568, |
|
"logits/rejected": 0.29769933223724365, |
|
"logps/chosen": -735.5257568359375, |
|
"logps/rejected": -1044.0601806640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.65300178527832, |
|
"rewards/margins": 18.755083084106445, |
|
"rewards/rejected": -10.102080345153809, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.959349593495935, |
|
"grad_norm": 0.01578771322965622, |
|
"learning_rate": 0.00017947020083740575, |
|
"logits/chosen": 1.5522100925445557, |
|
"logits/rejected": 1.7518442869186401, |
|
"logps/chosen": -1019.1099853515625, |
|
"logps/rejected": -624.6131591796875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.32003402709961, |
|
"rewards/margins": 23.75770378112793, |
|
"rewards/rejected": -13.43766975402832, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.0010152229806408286, |
|
"learning_rate": 0.00017865515558026428, |
|
"logits/chosen": 0.8601479530334473, |
|
"logits/rejected": 0.819040060043335, |
|
"logps/chosen": -763.342041015625, |
|
"logps/rejected": -817.870849609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.2501859664917, |
|
"rewards/margins": 16.491539001464844, |
|
"rewards/rejected": -8.241353034973145, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.991869918699187, |
|
"grad_norm": 0.008696873672306538, |
|
"learning_rate": 0.0001778261724495566, |
|
"logits/chosen": 0.7409014701843262, |
|
"logits/rejected": 0.9245580434799194, |
|
"logps/chosen": -888.8350830078125, |
|
"logps/rejected": -796.002685546875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.07230281829834, |
|
"rewards/margins": 22.53582000732422, |
|
"rewards/rejected": -11.463518142700195, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.3132517526391894e-05, |
|
"learning_rate": 0.00017698339834299061, |
|
"logits/chosen": 0.962340772151947, |
|
"logits/rejected": 1.369040608406067, |
|
"logps/chosen": -843.8861083984375, |
|
"logps/rejected": -833.0137329101562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.60971736907959, |
|
"rewards/margins": 22.649456024169922, |
|
"rewards/rejected": -15.039739608764648, |
|
"step": 62 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 246, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 62, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|