nicoboss's picture
Upload folder using huggingface_hub
5fadd63 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9918699186991868,
"eval_steps": 500,
"global_step": 123,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016260162601626018,
"grad_norm": 19.880552291870117,
"learning_rate": 2e-05,
"logits/chosen": 0.20684528350830078,
"logits/rejected": 0.4346590042114258,
"logps/chosen": -777.121826171875,
"logps/rejected": -997.1637573242188,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.032520325203252036,
"grad_norm": 20.27885627746582,
"learning_rate": 4e-05,
"logits/chosen": 0.12451896071434021,
"logits/rejected": 0.3398062586784363,
"logps/chosen": -841.6675415039062,
"logps/rejected": -988.1629638671875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 2
},
{
"epoch": 0.04878048780487805,
"grad_norm": 390.8882141113281,
"learning_rate": 6e-05,
"logits/chosen": 0.14335429668426514,
"logits/rejected": 0.32437634468078613,
"logps/chosen": -876.8231811523438,
"logps/rejected": -1356.0509033203125,
"loss": 0.6706,
"rewards/accuracies": 0.25,
"rewards/chosen": -0.12680970132350922,
"rewards/margins": -0.06611938774585724,
"rewards/rejected": -0.06069030612707138,
"step": 3
},
{
"epoch": 0.06504065040650407,
"grad_norm": 21.47028923034668,
"learning_rate": 8e-05,
"logits/chosen": 0.7833376526832581,
"logits/rejected": 1.1811182498931885,
"logps/chosen": -1178.9454345703125,
"logps/rejected": -974.9606323242188,
"loss": 0.6883,
"rewards/accuracies": 0.25,
"rewards/chosen": -0.11406403034925461,
"rewards/margins": -0.005326844751834869,
"rewards/rejected": -0.10873718559741974,
"step": 4
},
{
"epoch": 0.08130081300813008,
"grad_norm": 40.24486541748047,
"learning_rate": 0.0001,
"logits/chosen": -0.44922593235969543,
"logits/rejected": -0.6411373019218445,
"logps/chosen": -559.5548706054688,
"logps/rejected": -1254.8680419921875,
"loss": 0.4832,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.34520798921585083,
"rewards/margins": 0.4895774722099304,
"rewards/rejected": -0.834785521030426,
"step": 5
},
{
"epoch": 0.0975609756097561,
"grad_norm": 16.58538818359375,
"learning_rate": 0.00012,
"logits/chosen": 0.9809624552726746,
"logits/rejected": 1.187626838684082,
"logps/chosen": -757.462158203125,
"logps/rejected": -1020.3145141601562,
"loss": 0.4292,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.2485191375017166,
"rewards/margins": 0.7915412783622742,
"rewards/rejected": -1.0400605201721191,
"step": 6
},
{
"epoch": 0.11382113821138211,
"grad_norm": 18.358051300048828,
"learning_rate": 0.00014,
"logits/chosen": 1.6894466876983643,
"logits/rejected": 1.6828027963638306,
"logps/chosen": -1125.97412109375,
"logps/rejected": -877.0285034179688,
"loss": 0.3812,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.9222716689109802,
"rewards/margins": 0.32721251249313354,
"rewards/rejected": -1.2494843006134033,
"step": 7
},
{
"epoch": 0.13008130081300814,
"grad_norm": 163.26919555664062,
"learning_rate": 0.00016,
"logits/chosen": -0.45762500166893005,
"logits/rejected": -0.5206366777420044,
"logps/chosen": -705.5869750976562,
"logps/rejected": -1347.400390625,
"loss": 0.288,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.067340850830078,
"rewards/margins": 3.900920867919922,
"rewards/rejected": -6.968262195587158,
"step": 8
},
{
"epoch": 0.14634146341463414,
"grad_norm": 5.863889217376709,
"learning_rate": 0.00018,
"logits/chosen": 0.2462751269340515,
"logits/rejected": 0.21955497562885284,
"logps/chosen": -619.6600341796875,
"logps/rejected": -1208.003662109375,
"loss": 0.0717,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.7182769775390625,
"rewards/margins": 8.603934288024902,
"rewards/rejected": -11.322211265563965,
"step": 9
},
{
"epoch": 0.16260162601626016,
"grad_norm": 0.6885181665420532,
"learning_rate": 0.0002,
"logits/chosen": 1.1071248054504395,
"logits/rejected": 1.1347391605377197,
"logps/chosen": -877.805419921875,
"logps/rejected": -1244.745849609375,
"loss": 0.0068,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.3332839012146,
"rewards/margins": 10.358970642089844,
"rewards/rejected": -15.692255020141602,
"step": 10
},
{
"epoch": 0.17886178861788618,
"grad_norm": 2.558082103729248,
"learning_rate": 0.00019996135574945544,
"logits/chosen": 0.24951541423797607,
"logits/rejected": 0.2528836727142334,
"logps/chosen": -740.1439208984375,
"logps/rejected": -1265.59814453125,
"loss": 0.0097,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.989352226257324,
"rewards/margins": 19.463153839111328,
"rewards/rejected": -27.45250701904297,
"step": 11
},
{
"epoch": 0.1951219512195122,
"grad_norm": 0.0005222362815402448,
"learning_rate": 0.0001998454528653836,
"logits/chosen": 0.6122381687164307,
"logits/rejected": 0.8588502407073975,
"logps/chosen": -879.779296875,
"logps/rejected": -1585.720947265625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -18.228717803955078,
"rewards/margins": 32.099365234375,
"rewards/rejected": -50.32808303833008,
"step": 12
},
{
"epoch": 0.21138211382113822,
"grad_norm": 3.927712168660946e-05,
"learning_rate": 0.00019965238092738643,
"logits/chosen": 1.1087465286254883,
"logits/rejected": 1.5179497003555298,
"logps/chosen": -1257.50830078125,
"logps/rejected": -1163.919677734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -15.7935791015625,
"rewards/margins": 20.931385040283203,
"rewards/rejected": -36.72496032714844,
"step": 13
},
{
"epoch": 0.22764227642276422,
"grad_norm": 0.21046003699302673,
"learning_rate": 0.0001993822891578708,
"logits/chosen": 0.23910227417945862,
"logits/rejected": 0.31048309803009033,
"logps/chosen": -1491.3905029296875,
"logps/rejected": -2108.9990234375,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": -56.71916198730469,
"rewards/margins": 42.71849822998047,
"rewards/rejected": -99.43765258789062,
"step": 14
},
{
"epoch": 0.24390243902439024,
"grad_norm": 591.9841918945312,
"learning_rate": 0.0001990353863067169,
"logits/chosen": 0.5623903870582581,
"logits/rejected": 0.6063950061798096,
"logps/chosen": -1970.40576171875,
"logps/rejected": -2018.9765625,
"loss": 0.5538,
"rewards/accuracies": 0.75,
"rewards/chosen": -86.55944061279297,
"rewards/margins": 29.65001106262207,
"rewards/rejected": -116.2094497680664,
"step": 15
},
{
"epoch": 0.2601626016260163,
"grad_norm": 90.19036865234375,
"learning_rate": 0.00019861194048993863,
"logits/chosen": 0.6143627166748047,
"logits/rejected": 0.7420700788497925,
"logps/chosen": -1821.3201904296875,
"logps/rejected": -1930.827880859375,
"loss": 1.0906,
"rewards/accuracies": 0.75,
"rewards/chosen": -76.42454528808594,
"rewards/margins": 28.595970153808594,
"rewards/rejected": -105.02052307128906,
"step": 16
},
{
"epoch": 0.2764227642276423,
"grad_norm": 0.0009420510032214224,
"learning_rate": 0.0001981122789824607,
"logits/chosen": 0.20949414372444153,
"logits/rejected": 0.1935410499572754,
"logps/chosen": -1610.02783203125,
"logps/rejected": -2431.318359375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -65.77059936523438,
"rewards/margins": 73.17414855957031,
"rewards/rejected": -138.94476318359375,
"step": 17
},
{
"epoch": 0.2926829268292683,
"grad_norm": 132.33953857421875,
"learning_rate": 0.00019753678796517282,
"logits/chosen": 0.728495717048645,
"logits/rejected": 1.0449868440628052,
"logps/chosen": -1515.9527587890625,
"logps/rejected": -1517.2254638671875,
"loss": 2.6435,
"rewards/accuracies": 0.5,
"rewards/chosen": -61.27394104003906,
"rewards/margins": 20.481342315673828,
"rewards/rejected": -81.75528717041016,
"step": 18
},
{
"epoch": 0.3089430894308943,
"grad_norm": 0.00032979066600091755,
"learning_rate": 0.00019688591222645607,
"logits/chosen": 0.8106945753097534,
"logits/rejected": 0.6099438071250916,
"logps/chosen": -1138.11767578125,
"logps/rejected": -1558.903076171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -46.01788330078125,
"rewards/margins": 41.312171936035156,
"rewards/rejected": -87.33006286621094,
"step": 19
},
{
"epoch": 0.3252032520325203,
"grad_norm": 0.22872093319892883,
"learning_rate": 0.0001961601548184129,
"logits/chosen": -0.05689544230699539,
"logits/rejected": 0.0633389949798584,
"logps/chosen": -1466.4468994140625,
"logps/rejected": -2267.798828125,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -76.84449005126953,
"rewards/margins": 48.28419494628906,
"rewards/rejected": -125.12869262695312,
"step": 20
},
{
"epoch": 0.34146341463414637,
"grad_norm": 1.10204017162323,
"learning_rate": 0.00019536007666806556,
"logits/chosen": 0.5605583786964417,
"logits/rejected": 0.45388907194137573,
"logps/chosen": -1369.92529296875,
"logps/rejected": -1706.2607421875,
"loss": 0.003,
"rewards/accuracies": 1.0,
"rewards/chosen": -33.74466323852539,
"rewards/margins": 45.32139587402344,
"rewards/rejected": -79.06605529785156,
"step": 21
},
{
"epoch": 0.35772357723577236,
"grad_norm": 0.7084241509437561,
"learning_rate": 0.0001944862961438239,
"logits/chosen": 0.7291379570960999,
"logits/rejected": 0.9067746996879578,
"logps/chosen": -998.4527587890625,
"logps/rejected": -1456.096923828125,
"loss": 0.0025,
"rewards/accuracies": 1.0,
"rewards/chosen": -19.574996948242188,
"rewards/margins": 45.93708038330078,
"rewards/rejected": -65.51207733154297,
"step": 22
},
{
"epoch": 0.37398373983739835,
"grad_norm": 3.134854793548584,
"learning_rate": 0.00019353948857755803,
"logits/chosen": 0.9795281887054443,
"logits/rejected": 0.8698853850364685,
"logps/chosen": -1127.320068359375,
"logps/rejected": -1399.870849609375,
"loss": 0.0096,
"rewards/accuracies": 1.0,
"rewards/chosen": -28.826623916625977,
"rewards/margins": 29.93848419189453,
"rewards/rejected": -58.765106201171875,
"step": 23
},
{
"epoch": 0.3902439024390244,
"grad_norm": 2.085594654083252,
"learning_rate": 0.00019252038574264405,
"logits/chosen": 0.17023050785064697,
"logits/rejected": -0.1173945814371109,
"logps/chosen": -1615.32568359375,
"logps/rejected": -2291.47509765625,
"loss": 0.0021,
"rewards/accuracies": 1.0,
"rewards/chosen": -82.27009582519531,
"rewards/margins": 44.62742614746094,
"rewards/rejected": -126.89752197265625,
"step": 24
},
{
"epoch": 0.4065040650406504,
"grad_norm": 7.152135367505252e-05,
"learning_rate": 0.00019142977528838762,
"logits/chosen": 0.6659821271896362,
"logits/rejected": 0.6975608468055725,
"logps/chosen": -1023.6649169921875,
"logps/rejected": -1710.140380859375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -33.36669921875,
"rewards/margins": 49.14038848876953,
"rewards/rejected": -82.50708770751953,
"step": 25
},
{
"epoch": 0.42276422764227645,
"grad_norm": 2.22769040192361e-06,
"learning_rate": 0.00019026850013126157,
"logits/chosen": -0.624580442905426,
"logits/rejected": -0.42581236362457275,
"logps/chosen": -1117.0599365234375,
"logps/rejected": -2134.2626953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -57.8393669128418,
"rewards/margins": 44.58246994018555,
"rewards/rejected": -102.42182922363281,
"step": 26
},
{
"epoch": 0.43902439024390244,
"grad_norm": 0.7476986050605774,
"learning_rate": 0.00018903745780342839,
"logits/chosen": 0.17943906784057617,
"logits/rejected": 0.21112221479415894,
"logps/chosen": -1208.960205078125,
"logps/rejected": -1999.635009765625,
"loss": 0.0018,
"rewards/accuracies": 1.0,
"rewards/chosen": -55.38972473144531,
"rewards/margins": 40.17228317260742,
"rewards/rejected": -95.56201171875,
"step": 27
},
{
"epoch": 0.45528455284552843,
"grad_norm": 0.6162808537483215,
"learning_rate": 0.00018773759975905098,
"logits/chosen": 0.15270072221755981,
"logits/rejected": 0.32134106755256653,
"logps/chosen": -1206.7701416015625,
"logps/rejected": -2007.0269775390625,
"loss": 0.0009,
"rewards/accuracies": 1.0,
"rewards/chosen": -38.11735916137695,
"rewards/margins": 50.446754455566406,
"rewards/rejected": -88.5641098022461,
"step": 28
},
{
"epoch": 0.4715447154471545,
"grad_norm": 8.754213354222884e-07,
"learning_rate": 0.0001863699306389282,
"logits/chosen": 0.8678311109542847,
"logits/rejected": 0.8028951287269592,
"logps/chosen": -1161.56591796875,
"logps/rejected": -1967.0069580078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -15.882237434387207,
"rewards/margins": 65.84603881835938,
"rewards/rejected": -81.72827911376953,
"step": 29
},
{
"epoch": 0.4878048780487805,
"grad_norm": 0.0023462281096726656,
"learning_rate": 0.00018493550749402278,
"logits/chosen": 1.54906165599823,
"logits/rejected": 1.6790410280227661,
"logps/chosen": -951.4666748046875,
"logps/rejected": -1339.60107421875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.993054389953613,
"rewards/margins": 40.59773635864258,
"rewards/rejected": -47.590789794921875,
"step": 30
},
{
"epoch": 0.5040650406504065,
"grad_norm": 0.00014203626778908074,
"learning_rate": 0.00018343543896848273,
"logits/chosen": 1.832588791847229,
"logits/rejected": 1.6241607666015625,
"logps/chosen": -1032.7232666015625,
"logps/rejected": -1197.1595458984375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -14.2398042678833,
"rewards/margins": 28.274524688720703,
"rewards/rejected": -42.51432800292969,
"step": 31
},
{
"epoch": 0.5203252032520326,
"grad_norm": 2.814833402633667,
"learning_rate": 0.00018187088444278674,
"logits/chosen": 2.1444239616394043,
"logits/rejected": 1.8101916313171387,
"logps/chosen": -874.6080322265625,
"logps/rejected": -1012.015625,
"loss": 0.0062,
"rewards/accuracies": 1.0,
"rewards/chosen": -13.471307754516602,
"rewards/margins": 20.194053649902344,
"rewards/rejected": -33.66536331176758,
"step": 32
},
{
"epoch": 0.5365853658536586,
"grad_norm": 0.06849005818367004,
"learning_rate": 0.00018024305313767646,
"logits/chosen": 1.9995535612106323,
"logits/rejected": 1.8331811428070068,
"logps/chosen": -1230.6785888671875,
"logps/rejected": -1346.717041015625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -10.62438678741455,
"rewards/margins": 31.655826568603516,
"rewards/rejected": -42.280216217041016,
"step": 33
},
{
"epoch": 0.5528455284552846,
"grad_norm": 0.01905296929180622,
"learning_rate": 0.00017855320317956784,
"logits/chosen": 1.1833341121673584,
"logits/rejected": 1.240072250366211,
"logps/chosen": -841.6439208984375,
"logps/rejected": -1193.967041015625,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -15.020572662353516,
"rewards/margins": 28.115928649902344,
"rewards/rejected": -43.136505126953125,
"step": 34
},
{
"epoch": 0.5691056910569106,
"grad_norm": 1.866630009317305e-05,
"learning_rate": 0.0001768026406281642,
"logits/chosen": 1.0859436988830566,
"logits/rejected": 1.226615309715271,
"logps/chosen": -1046.376708984375,
"logps/rejected": -1418.09228515625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -13.104580879211426,
"rewards/margins": 34.29302978515625,
"rewards/rejected": -47.397613525390625,
"step": 35
},
{
"epoch": 0.5853658536585366,
"grad_norm": 0.0032898751087486744,
"learning_rate": 0.00017499271846702213,
"logits/chosen": -0.23074638843536377,
"logits/rejected": -0.09211879968643188,
"logps/chosen": -1246.923095703125,
"logps/rejected": -2060.51123046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -44.84193801879883,
"rewards/margins": 45.95753479003906,
"rewards/rejected": -90.79946899414062,
"step": 36
},
{
"epoch": 0.6016260162601627,
"grad_norm": 0.008372440002858639,
"learning_rate": 0.00017312483555785086,
"logits/chosen": 0.5074482560157776,
"logits/rejected": 0.48830437660217285,
"logps/chosen": -920.7339477539062,
"logps/rejected": -1666.024658203125,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -18.29103660583496,
"rewards/margins": 32.98884582519531,
"rewards/rejected": -51.27988052368164,
"step": 37
},
{
"epoch": 0.6178861788617886,
"grad_norm": 0.0008834120817482471,
"learning_rate": 0.00017120043555935298,
"logits/chosen": 1.3600270748138428,
"logits/rejected": 1.2087562084197998,
"logps/chosen": -1251.687744140625,
"logps/rejected": -1775.605224609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -19.525299072265625,
"rewards/margins": 45.839603424072266,
"rewards/rejected": -65.36489868164062,
"step": 38
},
{
"epoch": 0.6341463414634146,
"grad_norm": 9.272828901885077e-05,
"learning_rate": 0.00016922100581144228,
"logits/chosen": 1.4009983539581299,
"logits/rejected": 1.2046518325805664,
"logps/chosen": -1155.6650390625,
"logps/rejected": -1281.83740234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -16.521747589111328,
"rewards/margins": 24.7418155670166,
"rewards/rejected": -41.2635612487793,
"step": 39
},
{
"epoch": 0.6504065040650406,
"grad_norm": 0.0009182749781757593,
"learning_rate": 0.00016718807618570106,
"logits/chosen": 1.3781325817108154,
"logits/rejected": 1.565840244293213,
"logps/chosen": -1133.72216796875,
"logps/rejected": -1346.7265625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -9.05687427520752,
"rewards/margins": 18.654136657714844,
"rewards/rejected": -27.711009979248047,
"step": 40
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.004382506478577852,
"learning_rate": 0.00016510321790296525,
"logits/chosen": 1.1266183853149414,
"logits/rejected": 1.2493317127227783,
"logps/chosen": -926.239501953125,
"logps/rejected": -1293.30322265625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -11.177988052368164,
"rewards/margins": 22.40888786315918,
"rewards/rejected": -33.586875915527344,
"step": 41
},
{
"epoch": 0.6829268292682927,
"grad_norm": 0.15565475821495056,
"learning_rate": 0.00016296804231895142,
"logits/chosen": 1.099910020828247,
"logits/rejected": 0.820236086845398,
"logps/chosen": -626.5668334960938,
"logps/rejected": -1386.260498046875,
"loss": 0.0009,
"rewards/accuracies": 1.0,
"rewards/chosen": -10.778373718261719,
"rewards/margins": 27.383846282958984,
"rewards/rejected": -38.16221618652344,
"step": 42
},
{
"epoch": 0.6991869918699187,
"grad_norm": 3.971878322772682e-05,
"learning_rate": 0.00016078419967886402,
"logits/chosen": 1.4016125202178955,
"logits/rejected": 1.5134223699569702,
"logps/chosen": -1066.9713134765625,
"logps/rejected": -1517.39208984375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -11.4629487991333,
"rewards/margins": 27.75263214111328,
"rewards/rejected": -39.215576171875,
"step": 43
},
{
"epoch": 0.7154471544715447,
"grad_norm": 0.004684010986238718,
"learning_rate": 0.00015855337784194577,
"logits/chosen": 1.989326000213623,
"logits/rejected": 2.3816940784454346,
"logps/chosen": -956.5921630859375,
"logps/rejected": -1014.5316162109375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.150079727172852,
"rewards/margins": 12.83597183227539,
"rewards/rejected": -18.986051559448242,
"step": 44
},
{
"epoch": 0.7317073170731707,
"grad_norm": 0.03292777016758919,
"learning_rate": 0.00015627730097695638,
"logits/chosen": 2.072270631790161,
"logits/rejected": 2.0922999382019043,
"logps/chosen": -1218.990478515625,
"logps/rejected": -1251.8980712890625,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.599820137023926,
"rewards/margins": 19.980201721191406,
"rewards/rejected": -27.580020904541016,
"step": 45
},
{
"epoch": 0.7479674796747967,
"grad_norm": 0.06399545818567276,
"learning_rate": 0.00015395772822958845,
"logits/chosen": 1.245821475982666,
"logits/rejected": 1.3717162609100342,
"logps/chosen": -960.6263427734375,
"logps/rejected": -1502.2239990234375,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": -8.884254455566406,
"rewards/margins": 28.055803298950195,
"rewards/rejected": -36.94005584716797,
"step": 46
},
{
"epoch": 0.7642276422764228,
"grad_norm": 0.022615160793066025,
"learning_rate": 0.0001515964523628501,
"logits/chosen": 1.4772993326187134,
"logits/rejected": 1.3233076333999634,
"logps/chosen": -900.41552734375,
"logps/rejected": -1422.0224609375,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -8.169479370117188,
"rewards/margins": 29.0593204498291,
"rewards/rejected": -37.228797912597656,
"step": 47
},
{
"epoch": 0.7804878048780488,
"grad_norm": 0.7834580540657043,
"learning_rate": 0.00014919529837146528,
"logits/chosen": 2.019958019256592,
"logits/rejected": 2.0058090686798096,
"logps/chosen": -908.94970703125,
"logps/rejected": -1153.9830322265625,
"loss": 0.004,
"rewards/accuracies": 1.0,
"rewards/chosen": -10.564983367919922,
"rewards/margins": 15.311219215393066,
"rewards/rejected": -25.87619972229004,
"step": 48
},
{
"epoch": 0.7967479674796748,
"grad_norm": 0.0006066004862077534,
"learning_rate": 0.0001467561220713628,
"logits/chosen": 1.297697901725769,
"logits/rejected": 1.5303912162780762,
"logps/chosen": -1167.181640625,
"logps/rejected": -1485.501953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -11.699865341186523,
"rewards/margins": 47.49958801269531,
"rewards/rejected": -59.19945526123047,
"step": 49
},
{
"epoch": 0.8130081300813008,
"grad_norm": 0.03268749639391899,
"learning_rate": 0.00014428080866534396,
"logits/chosen": 0.707965612411499,
"logits/rejected": 0.7305536866188049,
"logps/chosen": -1051.2691650390625,
"logps/rejected": -1463.647705078125,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -14.360027313232422,
"rewards/margins": 24.690279006958008,
"rewards/rejected": -39.05030822753906,
"step": 50
},
{
"epoch": 0.8292682926829268,
"grad_norm": 0.06594517827033997,
"learning_rate": 0.00014177127128603745,
"logits/chosen": 1.219120740890503,
"logits/rejected": 1.2810195684432983,
"logps/chosen": -1020.8298950195312,
"logps/rejected": -1290.2015380859375,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": -12.565038681030273,
"rewards/margins": 20.74908447265625,
"rewards/rejected": -33.314125061035156,
"step": 51
},
{
"epoch": 0.8455284552845529,
"grad_norm": 0.008960689418017864,
"learning_rate": 0.0001392294495172681,
"logits/chosen": 0.49424344301223755,
"logits/rejected": 0.4817698895931244,
"logps/chosen": -988.3806762695312,
"logps/rejected": -1388.4130859375,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -14.987248420715332,
"rewards/margins": 38.28583908081055,
"rewards/rejected": -53.27308654785156,
"step": 52
},
{
"epoch": 0.8617886178861789,
"grad_norm": 4.988933142158203e-07,
"learning_rate": 0.0001366573078949813,
"logits/chosen": -0.09240919351577759,
"logits/rejected": -0.1942935436964035,
"logps/chosen": -863.5594482421875,
"logps/rejected": -1951.684814453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -21.636280059814453,
"rewards/margins": 39.47431182861328,
"rewards/rejected": -61.110591888427734,
"step": 53
},
{
"epoch": 0.8780487804878049,
"grad_norm": 0.36996814608573914,
"learning_rate": 0.00013405683438888282,
"logits/chosen": 1.8010693788528442,
"logits/rejected": 1.9799494743347168,
"logps/chosen": -1090.9835205078125,
"logps/rejected": -1244.3988037109375,
"loss": 0.0019,
"rewards/accuracies": 1.0,
"rewards/chosen": -10.118224143981934,
"rewards/margins": 23.42540740966797,
"rewards/rejected": -33.54362869262695,
"step": 54
},
{
"epoch": 0.8943089430894309,
"grad_norm": 0.0004369132802821696,
"learning_rate": 0.00013143003886596669,
"logits/chosen": 1.255205750465393,
"logits/rejected": 1.1578245162963867,
"logps/chosen": -1015.79541015625,
"logps/rejected": -1361.6103515625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -18.066598892211914,
"rewards/margins": 27.31325340270996,
"rewards/rejected": -45.379852294921875,
"step": 55
},
{
"epoch": 0.9105691056910569,
"grad_norm": 3.5815644423564663e-06,
"learning_rate": 0.00012877895153711935,
"logits/chosen": 0.5448588132858276,
"logits/rejected": 0.6314257383346558,
"logps/chosen": -1082.805908203125,
"logps/rejected": -1538.261962890625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -23.810945510864258,
"rewards/margins": 29.520732879638672,
"rewards/rejected": -53.3316764831543,
"step": 56
},
{
"epoch": 0.926829268292683,
"grad_norm": 58.86332702636719,
"learning_rate": 0.00012610562138799978,
"logits/chosen": 1.9793856143951416,
"logits/rejected": 2.0082552433013916,
"logps/chosen": -1352.8492431640625,
"logps/rejected": -1265.2257080078125,
"loss": 0.3774,
"rewards/accuracies": 0.75,
"rewards/chosen": -20.378952026367188,
"rewards/margins": 17.73773193359375,
"rewards/rejected": -38.1166877746582,
"step": 57
},
{
"epoch": 0.943089430894309,
"grad_norm": 5.57162458392213e-08,
"learning_rate": 0.0001234121145954094,
"logits/chosen": 0.7738958597183228,
"logits/rejected": 0.6971035599708557,
"logps/chosen": -927.3837280273438,
"logps/rejected": -1710.65771484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -17.810049057006836,
"rewards/margins": 38.65287780761719,
"rewards/rejected": -56.462928771972656,
"step": 58
},
{
"epoch": 0.959349593495935,
"grad_norm": 0.10466321557760239,
"learning_rate": 0.00012070051293037492,
"logits/chosen": 1.3470133543014526,
"logits/rejected": 1.3975563049316406,
"logps/chosen": -1097.9437255859375,
"logps/rejected": -1693.154541015625,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/chosen": -20.652606964111328,
"rewards/margins": 36.89767074584961,
"rewards/rejected": -57.55027770996094,
"step": 59
},
{
"epoch": 0.975609756097561,
"grad_norm": 2.4582501282566227e-05,
"learning_rate": 0.00011797291214917881,
"logits/chosen": 1.379901647567749,
"logits/rejected": 1.2993323802947998,
"logps/chosen": -1204.1943359375,
"logps/rejected": -1411.241455078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -19.423160552978516,
"rewards/margins": 26.866172790527344,
"rewards/rejected": -46.28933334350586,
"step": 60
},
{
"epoch": 0.991869918699187,
"grad_norm": 7.934165478218347e-05,
"learning_rate": 0.0001152314203735805,
"logits/chosen": 1.951298713684082,
"logits/rejected": 2.0110878944396973,
"logps/chosen": -1275.750732421875,
"logps/rejected": -1257.931640625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -16.708940505981445,
"rewards/margins": 21.205249786376953,
"rewards/rejected": -37.914188385009766,
"step": 61
},
{
"epoch": 1.0,
"grad_norm": 2.9418702141015274e-08,
"learning_rate": 0.00011247815646148087,
"logits/chosen": 1.219478964805603,
"logits/rejected": 1.4597835540771484,
"logps/chosen": -1298.3076171875,
"logps/rejected": -1700.546142578125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -26.570446014404297,
"rewards/margins": 39.88042449951172,
"rewards/rejected": -66.45086669921875,
"step": 62
},
{
"epoch": 1.016260162601626,
"grad_norm": 0.0003046558704227209,
"learning_rate": 0.0001097152483692886,
"logits/chosen": 1.216448187828064,
"logits/rejected": 1.2576086521148682,
"logps/chosen": -1297.49267578125,
"logps/rejected": -1655.1431884765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -27.540584564208984,
"rewards/margins": 25.584327697753906,
"rewards/rejected": -53.12491226196289,
"step": 63
},
{
"epoch": 1.032520325203252,
"grad_norm": 5.492000604290226e-11,
"learning_rate": 0.00010694483150725458,
"logits/chosen": 0.5165296196937561,
"logits/rejected": 0.5458570122718811,
"logps/chosen": -1003.1471557617188,
"logps/rejected": -1591.346435546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -11.605949401855469,
"rewards/margins": 46.321319580078125,
"rewards/rejected": -57.92727279663086,
"step": 64
},
{
"epoch": 1.048780487804878,
"grad_norm": 0.0003143485519103706,
"learning_rate": 0.00010416904708904548,
"logits/chosen": 0.6694925427436829,
"logits/rejected": 0.6114668846130371,
"logps/chosen": -812.6236572265625,
"logps/rejected": -1500.825439453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -17.084518432617188,
"rewards/margins": 35.370384216308594,
"rewards/rejected": -52.45490264892578,
"step": 65
},
{
"epoch": 1.065040650406504,
"grad_norm": 5.148892228135082e-07,
"learning_rate": 0.00010139004047683151,
"logits/chosen": 1.3868217468261719,
"logits/rejected": 1.2723997831344604,
"logps/chosen": -1227.2484130859375,
"logps/rejected": -1608.285400390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -24.8009033203125,
"rewards/margins": 34.73870086669922,
"rewards/rejected": -59.53960418701172,
"step": 66
},
{
"epoch": 1.08130081300813,
"grad_norm": 0.005973002407699823,
"learning_rate": 9.860995952316851e-05,
"logits/chosen": 0.5520488023757935,
"logits/rejected": 1.013694405555725,
"logps/chosen": -918.3431396484375,
"logps/rejected": -1930.933349609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -17.301834106445312,
"rewards/margins": 54.176063537597656,
"rewards/rejected": -71.4779052734375,
"step": 67
},
{
"epoch": 1.0975609756097562,
"grad_norm": 0.0016096890904009342,
"learning_rate": 9.583095291095453e-05,
"logits/chosen": 1.927367925643921,
"logits/rejected": 2.1797337532043457,
"logps/chosen": -1027.62255859375,
"logps/rejected": -1242.6591796875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -10.128509521484375,
"rewards/margins": 44.30337905883789,
"rewards/rejected": -54.431888580322266,
"step": 68
},
{
"epoch": 1.113821138211382,
"grad_norm": 0.00028535688761621714,
"learning_rate": 9.305516849274541e-05,
"logits/chosen": 0.9750661849975586,
"logits/rejected": 1.2060834169387817,
"logps/chosen": -1015.9608154296875,
"logps/rejected": -1445.724609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -13.628022193908691,
"rewards/margins": 35.57917785644531,
"rewards/rejected": -49.20719909667969,
"step": 69
},
{
"epoch": 1.1300813008130082,
"grad_norm": 0.5866624712944031,
"learning_rate": 9.028475163071141e-05,
"logits/chosen": 1.4004566669464111,
"logits/rejected": 1.3820116519927979,
"logps/chosen": -1156.070556640625,
"logps/rejected": -1605.488525390625,
"loss": 0.0021,
"rewards/accuracies": 1.0,
"rewards/chosen": -29.29137420654297,
"rewards/margins": 34.68971633911133,
"rewards/rejected": -63.9810905456543,
"step": 70
},
{
"epoch": 1.146341463414634,
"grad_norm": 0.002478301292285323,
"learning_rate": 8.752184353851916e-05,
"logits/chosen": 0.6324145197868347,
"logits/rejected": 0.6125429272651672,
"logps/chosen": -836.22900390625,
"logps/rejected": -1863.617919921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -19.06183433532715,
"rewards/margins": 52.36142349243164,
"rewards/rejected": -71.42325592041016,
"step": 71
},
{
"epoch": 1.1626016260162602,
"grad_norm": 1.2947886034453404e-06,
"learning_rate": 8.47685796264195e-05,
"logits/chosen": 1.245481014251709,
"logits/rejected": 1.2732493877410889,
"logps/chosen": -1120.00146484375,
"logps/rejected": -1680.321533203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -20.079360961914062,
"rewards/margins": 38.847572326660156,
"rewards/rejected": -58.92693328857422,
"step": 72
},
{
"epoch": 1.1788617886178863,
"grad_norm": 7.430622645188123e-05,
"learning_rate": 8.202708785082121e-05,
"logits/chosen": 1.3398401737213135,
"logits/rejected": 1.310295820236206,
"logps/chosen": -979.2159423828125,
"logps/rejected": -1660.695068359375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -17.719205856323242,
"rewards/margins": 44.77515411376953,
"rewards/rejected": -62.494354248046875,
"step": 73
},
{
"epoch": 1.1951219512195121,
"grad_norm": 0.008477458730340004,
"learning_rate": 7.929948706962508e-05,
"logits/chosen": 1.2300162315368652,
"logits/rejected": 1.4617760181427002,
"logps/chosen": -1189.85791015625,
"logps/rejected": -1378.9652099609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -14.7158842086792,
"rewards/margins": 37.057861328125,
"rewards/rejected": -51.77375030517578,
"step": 74
},
{
"epoch": 1.2113821138211383,
"grad_norm": 2.7032048819819465e-05,
"learning_rate": 7.658788540459062e-05,
"logits/chosen": 0.43838104605674744,
"logits/rejected": 0.5289822220802307,
"logps/chosen": -988.083251953125,
"logps/rejected": -1331.2569580078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -17.296829223632812,
"rewards/margins": 34.85190963745117,
"rewards/rejected": -52.14873504638672,
"step": 75
},
{
"epoch": 1.2276422764227641,
"grad_norm": 4.829147570717396e-08,
"learning_rate": 7.389437861200024e-05,
"logits/chosen": 1.997933030128479,
"logits/rejected": 1.9013891220092773,
"logps/chosen": -1068.2757568359375,
"logps/rejected": -1249.0604248046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -14.518118858337402,
"rewards/margins": 28.58959197998047,
"rewards/rejected": -43.10770797729492,
"step": 76
},
{
"epoch": 1.2439024390243902,
"grad_norm": 2.3297241913411426e-10,
"learning_rate": 7.122104846288064e-05,
"logits/chosen": 1.2531983852386475,
"logits/rejected": 1.4057786464691162,
"logps/chosen": -1080.928466796875,
"logps/rejected": -1503.05615234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -14.961380958557129,
"rewards/margins": 36.710487365722656,
"rewards/rejected": -51.67186737060547,
"step": 77
},
{
"epoch": 1.2601626016260163,
"grad_norm": 3.4512660931795835e-05,
"learning_rate": 6.85699611340333e-05,
"logits/chosen": 1.8900461196899414,
"logits/rejected": 2.0945119857788086,
"logps/chosen": -1128.474365234375,
"logps/rejected": -1140.455810546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -12.547296524047852,
"rewards/margins": 22.667064666748047,
"rewards/rejected": -35.214359283447266,
"step": 78
},
{
"epoch": 1.2764227642276422,
"grad_norm": 9.897094059851952e-06,
"learning_rate": 6.594316561111724e-05,
"logits/chosen": 1.3735342025756836,
"logits/rejected": 1.4095773696899414,
"logps/chosen": -899.8128662109375,
"logps/rejected": -1251.731689453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -17.026573181152344,
"rewards/margins": 29.826189041137695,
"rewards/rejected": -46.85276412963867,
"step": 79
},
{
"epoch": 1.2926829268292683,
"grad_norm": 1.6814607079140842e-05,
"learning_rate": 6.334269210501875e-05,
"logits/chosen": 0.5582981705665588,
"logits/rejected": 0.6065884232521057,
"logps/chosen": -1002.4566650390625,
"logps/rejected": -1512.957275390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -22.382816314697266,
"rewards/margins": 31.659029006958008,
"rewards/rejected": -54.041847229003906,
"step": 80
},
{
"epoch": 1.3089430894308944,
"grad_norm": 2.0822379156015813e-05,
"learning_rate": 6.0770550482731924e-05,
"logits/chosen": 0.5204108357429504,
"logits/rejected": 0.6756694912910461,
"logps/chosen": -1329.38134765625,
"logps/rejected": -1816.52392578125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -36.05492401123047,
"rewards/margins": 34.550933837890625,
"rewards/rejected": -70.6058578491211,
"step": 81
},
{
"epoch": 1.3252032520325203,
"grad_norm": 3.052237573797356e-08,
"learning_rate": 5.8228728713962543e-05,
"logits/chosen": 0.6427198648452759,
"logits/rejected": 0.7359005212783813,
"logps/chosen": -989.2234497070312,
"logps/rejected": -2282.662841796875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -18.713542938232422,
"rewards/margins": 77.4079360961914,
"rewards/rejected": -96.1214828491211,
"step": 82
},
{
"epoch": 1.3414634146341464,
"grad_norm": 0.0013960793148726225,
"learning_rate": 5.571919133465605e-05,
"logits/chosen": 2.0142054557800293,
"logits/rejected": 1.9838088750839233,
"logps/chosen": -1325.515380859375,
"logps/rejected": -1202.38134765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -18.17080307006836,
"rewards/margins": 22.907329559326172,
"rewards/rejected": -41.07813262939453,
"step": 83
},
{
"epoch": 1.3577235772357723,
"grad_norm": 7.671826460864395e-05,
"learning_rate": 5.324387792863719e-05,
"logits/chosen": 1.3578662872314453,
"logits/rejected": 2.439218044281006,
"logps/chosen": -757.6051635742188,
"logps/rejected": -1135.0416259765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.389976739883423,
"rewards/margins": 42.346309661865234,
"rewards/rejected": -38.95633316040039,
"step": 84
},
{
"epoch": 1.3739837398373984,
"grad_norm": 3.062094037886709e-06,
"learning_rate": 5.080470162853472e-05,
"logits/chosen": 1.2051855325698853,
"logits/rejected": 1.2651633024215698,
"logps/chosen": -1020.686767578125,
"logps/rejected": -1463.1270751953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -10.808335304260254,
"rewards/margins": 38.411285400390625,
"rewards/rejected": -49.21961975097656,
"step": 85
},
{
"epoch": 1.3902439024390243,
"grad_norm": 0.00018378288950771093,
"learning_rate": 4.840354763714991e-05,
"logits/chosen": 0.03289281576871872,
"logits/rejected": 0.014516504481434822,
"logps/chosen": -995.1809692382812,
"logps/rejected": -2124.506591796875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -32.061710357666016,
"rewards/margins": 57.61822509765625,
"rewards/rejected": -89.67993927001953,
"step": 86
},
{
"epoch": 1.4065040650406504,
"grad_norm": 5.109325866214931e-05,
"learning_rate": 4.604227177041156e-05,
"logits/chosen": 1.2230056524276733,
"logits/rejected": 1.476953387260437,
"logps/chosen": -1030.1702880859375,
"logps/rejected": -1326.158935546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -13.08495044708252,
"rewards/margins": 34.212921142578125,
"rewards/rejected": -47.29787063598633,
"step": 87
},
{
"epoch": 1.4227642276422765,
"grad_norm": 1.226226800099539e-07,
"learning_rate": 4.372269902304363e-05,
"logits/chosen": 2.002579689025879,
"logits/rejected": 2.0382652282714844,
"logps/chosen": -1250.2037353515625,
"logps/rejected": -1071.18896484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -11.541341781616211,
"rewards/margins": 32.357688903808594,
"rewards/rejected": -43.89903259277344,
"step": 88
},
{
"epoch": 1.4390243902439024,
"grad_norm": 6.719565863022581e-05,
"learning_rate": 4.144662215805426e-05,
"logits/chosen": 2.3775994777679443,
"logits/rejected": 2.751979351043701,
"logps/chosen": -828.1460571289062,
"logps/rejected": -906.63037109375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.038515090942383,
"rewards/margins": 18.016881942749023,
"rewards/rejected": -23.055395126342773,
"step": 89
},
{
"epoch": 1.4552845528455285,
"grad_norm": 0.003350652754306793,
"learning_rate": 3.921580032113602e-05,
"logits/chosen": 2.568944215774536,
"logits/rejected": 2.653423547744751,
"logps/chosen": -1348.401123046875,
"logps/rejected": -1087.044921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -8.072247505187988,
"rewards/margins": 23.256484985351562,
"rewards/rejected": -31.328731536865234,
"step": 90
},
{
"epoch": 1.4715447154471546,
"grad_norm": 1.6966988596323063e-06,
"learning_rate": 3.7031957681048604e-05,
"logits/chosen": 0.7617810964584351,
"logits/rejected": 0.810763418674469,
"logps/chosen": -818.6165161132812,
"logps/rejected": -1948.71728515625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.259980201721191,
"rewards/margins": 87.85292053222656,
"rewards/rejected": -95.1128921508789,
"step": 91
},
{
"epoch": 1.4878048780487805,
"grad_norm": 1.3153041322766512e-07,
"learning_rate": 3.489678209703475e-05,
"logits/chosen": 0.7253928780555725,
"logits/rejected": 0.7696207761764526,
"logps/chosen": -1109.42919921875,
"logps/rejected": -1995.980712890625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -18.064022064208984,
"rewards/margins": 62.025482177734375,
"rewards/rejected": -80.08950805664062,
"step": 92
},
{
"epoch": 1.5040650406504064,
"grad_norm": 7.262394319695886e-06,
"learning_rate": 3.281192381429894e-05,
"logits/chosen": 1.3864871263504028,
"logits/rejected": 1.5070679187774658,
"logps/chosen": -1201.9698486328125,
"logps/rejected": -1620.9224853515625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -16.929353713989258,
"rewards/margins": 49.26674270629883,
"rewards/rejected": -66.19609069824219,
"step": 93
},
{
"epoch": 1.5203252032520327,
"grad_norm": 6.851015768916113e-06,
"learning_rate": 3.077899418855772e-05,
"logits/chosen": 0.7263829112052917,
"logits/rejected": 0.6369051337242126,
"logps/chosen": -747.6914672851562,
"logps/rejected": -1705.2852783203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -15.3454008102417,
"rewards/margins": 49.285179138183594,
"rewards/rejected": -64.63057708740234,
"step": 94
},
{
"epoch": 1.5365853658536586,
"grad_norm": 0.0002986456092912704,
"learning_rate": 2.879956444064703e-05,
"logits/chosen": 1.4310306310653687,
"logits/rejected": 1.2261309623718262,
"logps/chosen": -936.9393310546875,
"logps/rejected": -1461.7275390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -13.54560661315918,
"rewards/margins": 38.0745735168457,
"rewards/rejected": -51.62017822265625,
"step": 95
},
{
"epoch": 1.5528455284552845,
"grad_norm": 5.264350306788401e-07,
"learning_rate": 2.6875164442149147e-05,
"logits/chosen": 0.5105292797088623,
"logits/rejected": 0.7118083834648132,
"logps/chosen": -936.799560546875,
"logps/rejected": -1879.8419189453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -16.81096649169922,
"rewards/margins": 43.707740783691406,
"rewards/rejected": -60.518707275390625,
"step": 96
},
{
"epoch": 1.5691056910569106,
"grad_norm": 0.00016159842198248953,
"learning_rate": 2.500728153297788e-05,
"logits/chosen": 1.8368278741836548,
"logits/rejected": 2.204590082168579,
"logps/chosen": -1461.580078125,
"logps/rejected": -1380.7667236328125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -13.631231307983398,
"rewards/margins": 26.685359954833984,
"rewards/rejected": -40.316593170166016,
"step": 97
},
{
"epoch": 1.5853658536585367,
"grad_norm": 0.00013451933045871556,
"learning_rate": 2.3197359371835802e-05,
"logits/chosen": 1.1100133657455444,
"logits/rejected": 1.2370729446411133,
"logps/chosen": -948.371826171875,
"logps/rejected": -1276.979248046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -9.95567512512207,
"rewards/margins": 37.89854431152344,
"rewards/rejected": -47.854225158691406,
"step": 98
},
{
"epoch": 1.6016260162601625,
"grad_norm": 0.00024462357396259904,
"learning_rate": 2.1446796820432167e-05,
"logits/chosen": 1.7180746793746948,
"logits/rejected": 2.153879404067993,
"logps/chosen": -1276.5830078125,
"logps/rejected": -1113.281494140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -14.072443008422852,
"rewards/margins": 17.009380340576172,
"rewards/rejected": -31.081825256347656,
"step": 99
},
{
"epoch": 1.6178861788617886,
"grad_norm": 1.6178487882712034e-08,
"learning_rate": 1.9756946862323535e-05,
"logits/chosen": 1.3304284811019897,
"logits/rejected": 1.1570796966552734,
"logps/chosen": -1224.40380859375,
"logps/rejected": -1765.047119140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -16.283369064331055,
"rewards/margins": 56.30316925048828,
"rewards/rejected": -72.58653259277344,
"step": 100
},
{
"epoch": 1.6341463414634148,
"grad_norm": 1.8081759378674178e-07,
"learning_rate": 1.8129115557213262e-05,
"logits/chosen": 0.5725196599960327,
"logits/rejected": 0.7406933903694153,
"logps/chosen": -808.1942138671875,
"logps/rejected": -1623.4114990234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -17.64067840576172,
"rewards/margins": 40.391014099121094,
"rewards/rejected": -58.03169250488281,
"step": 101
},
{
"epoch": 1.6504065040650406,
"grad_norm": 0.00023044626868795604,
"learning_rate": 1.656456103151728e-05,
"logits/chosen": 2.142577886581421,
"logits/rejected": 2.108786106109619,
"logps/chosen": -951.4678955078125,
"logps/rejected": -1318.56201171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.911703109741211,
"rewards/margins": 40.60116958618164,
"rewards/rejected": -47.512874603271484,
"step": 102
},
{
"epoch": 1.6666666666666665,
"grad_norm": 2.5419683424843242e-06,
"learning_rate": 1.5064492505977234e-05,
"logits/chosen": 1.2146611213684082,
"logits/rejected": 1.1194839477539062,
"logps/chosen": -994.2359619140625,
"logps/rejected": -1273.3843994140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -9.964194297790527,
"rewards/margins": 37.999244689941406,
"rewards/rejected": -47.963443756103516,
"step": 103
},
{
"epoch": 1.6829268292682928,
"grad_norm": 2.680222932482934e-09,
"learning_rate": 1.363006936107183e-05,
"logits/chosen": 1.9312256574630737,
"logits/rejected": 1.8441157341003418,
"logps/chosen": -984.7633666992188,
"logps/rejected": -1123.7462158203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.190778732299805,
"rewards/margins": 35.19913864135742,
"rewards/rejected": -42.389915466308594,
"step": 104
},
{
"epoch": 1.6991869918699187,
"grad_norm": 1.2424061424098909e-05,
"learning_rate": 1.2262400240949023e-05,
"logits/chosen": 1.6461536884307861,
"logits/rejected": 1.8136305809020996,
"logps/chosen": -904.748291015625,
"logps/rejected": -1393.095947265625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.034971237182617,
"rewards/margins": 42.80604553222656,
"rewards/rejected": -47.84101867675781,
"step": 105
},
{
"epoch": 1.7154471544715446,
"grad_norm": 4.1589805732655805e-07,
"learning_rate": 1.0962542196571634e-05,
"logits/chosen": 1.3145643472671509,
"logits/rejected": 1.1997283697128296,
"logps/chosen": -939.1678466796875,
"logps/rejected": -1638.798583984375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -14.597799301147461,
"rewards/margins": 44.598976135253906,
"rewards/rejected": -59.19677734375,
"step": 106
},
{
"epoch": 1.7317073170731707,
"grad_norm": 6.540443564517773e-08,
"learning_rate": 9.731499868738447e-06,
"logits/chosen": 2.1823389530181885,
"logits/rejected": 2.301424264907837,
"logps/chosen": -1150.3404541015625,
"logps/rejected": -1366.84814453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -12.673786163330078,
"rewards/margins": 34.13035202026367,
"rewards/rejected": -46.804134368896484,
"step": 107
},
{
"epoch": 1.7479674796747968,
"grad_norm": 4.622437700163573e-05,
"learning_rate": 8.570224711612385e-06,
"logits/chosen": 0.4944400489330292,
"logits/rejected": 0.5377110242843628,
"logps/chosen": -945.9273681640625,
"logps/rejected": -1679.0079345703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -17.38947296142578,
"rewards/margins": 47.88871383666992,
"rewards/rejected": -65.27819061279297,
"step": 108
},
{
"epoch": 1.7642276422764227,
"grad_norm": 3.809813506450155e-06,
"learning_rate": 7.479614257355971e-06,
"logits/chosen": 1.2999298572540283,
"logits/rejected": 1.300133228302002,
"logps/chosen": -1008.9362182617188,
"logps/rejected": -1288.076416015625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -9.351741790771484,
"rewards/margins": 42.22937774658203,
"rewards/rejected": -51.581119537353516,
"step": 109
},
{
"epoch": 1.7804878048780488,
"grad_norm": 0.007235921919345856,
"learning_rate": 6.460511422441984e-06,
"logits/chosen": 1.9115304946899414,
"logits/rejected": 2.1205523014068604,
"logps/chosen": -1132.468017578125,
"logps/rejected": -1027.97802734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -13.733047485351562,
"rewards/margins": 16.740474700927734,
"rewards/rejected": -30.47352409362793,
"step": 110
},
{
"epoch": 1.796747967479675,
"grad_norm": 1.4731797364220256e-06,
"learning_rate": 5.5137038561761115e-06,
"logits/chosen": 0.6670889854431152,
"logits/rejected": 0.6521254181861877,
"logps/chosen": -742.6629638671875,
"logps/rejected": -1944.6416015625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -14.560412406921387,
"rewards/margins": 63.10647964477539,
"rewards/rejected": -77.6668930053711,
"step": 111
},
{
"epoch": 1.8130081300813008,
"grad_norm": 5.7062050473177806e-05,
"learning_rate": 4.639923331934471e-06,
"logits/chosen": 0.9131884574890137,
"logits/rejected": 1.1928483247756958,
"logps/chosen": -1271.8701171875,
"logps/rejected": -1448.082763671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -16.25135040283203,
"rewards/margins": 34.5776252746582,
"rewards/rejected": -50.82897186279297,
"step": 112
},
{
"epoch": 1.8292682926829267,
"grad_norm": 2.0286324797780253e-05,
"learning_rate": 3.839845181587098e-06,
"logits/chosen": 0.6853426694869995,
"logits/rejected": 0.7730221748352051,
"logps/chosen": -847.8319702148438,
"logps/rejected": -2002.734130859375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -18.896442413330078,
"rewards/margins": 51.54301071166992,
"rewards/rejected": -70.439453125,
"step": 113
},
{
"epoch": 1.845528455284553,
"grad_norm": 4.680402525991667e-06,
"learning_rate": 3.1140877735439387e-06,
"logits/chosen": 0.8352583050727844,
"logits/rejected": 0.7815011143684387,
"logps/chosen": -1006.5256958007812,
"logps/rejected": -1871.0528564453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -23.025442123413086,
"rewards/margins": 47.73127746582031,
"rewards/rejected": -70.75672149658203,
"step": 114
},
{
"epoch": 1.8617886178861789,
"grad_norm": 4.835527761315461e-06,
"learning_rate": 2.4632120348272003e-06,
"logits/chosen": 0.6664273142814636,
"logits/rejected": 0.7628079056739807,
"logps/chosen": -1057.7972412109375,
"logps/rejected": -1896.2288818359375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -26.96924591064453,
"rewards/margins": 47.0149040222168,
"rewards/rejected": -73.9841537475586,
"step": 115
},
{
"epoch": 1.8780487804878048,
"grad_norm": 1.7554378928252845e-06,
"learning_rate": 1.88772101753929e-06,
"logits/chosen": 1.4583988189697266,
"logits/rejected": 1.4834201335906982,
"logps/chosen": -1100.9306640625,
"logps/rejected": -1776.69091796875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -19.52985954284668,
"rewards/margins": 46.82954788208008,
"rewards/rejected": -66.35940551757812,
"step": 116
},
{
"epoch": 1.8943089430894309,
"grad_norm": 0.0001541744713904336,
"learning_rate": 1.3880595100613792e-06,
"logits/chosen": 1.328132152557373,
"logits/rejected": 1.6395397186279297,
"logps/chosen": -1433.81689453125,
"logps/rejected": -1625.1180419921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -22.608409881591797,
"rewards/margins": 31.696552276611328,
"rewards/rejected": -54.304962158203125,
"step": 117
},
{
"epoch": 1.910569105691057,
"grad_norm": 3.519949677865952e-05,
"learning_rate": 9.64613693283123e-07,
"logits/chosen": 1.856284737586975,
"logits/rejected": 1.8918788433074951,
"logps/chosen": -1302.91796875,
"logps/rejected": -1380.99365234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -15.29294204711914,
"rewards/margins": 32.75577926635742,
"rewards/rejected": -48.0487174987793,
"step": 118
},
{
"epoch": 1.9268292682926829,
"grad_norm": 8.586041076341644e-05,
"learning_rate": 6.177108421292266e-07,
"logits/chosen": 1.2806370258331299,
"logits/rejected": 1.3649016618728638,
"logps/chosen": -988.1577758789062,
"logps/rejected": -1595.25244140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -16.122652053833008,
"rewards/margins": 36.193511962890625,
"rewards/rejected": -52.316162109375,
"step": 119
},
{
"epoch": 1.943089430894309,
"grad_norm": 0.008627010509371758,
"learning_rate": 3.4761907261356976e-07,
"logits/chosen": 1.951653003692627,
"logits/rejected": 1.9814622402191162,
"logps/chosen": -1180.52294921875,
"logps/rejected": -1512.510986328125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -16.302892684936523,
"rewards/margins": 42.75213623046875,
"rewards/rejected": -59.05502700805664,
"step": 120
},
{
"epoch": 1.959349593495935,
"grad_norm": 1.4577848617136624e-07,
"learning_rate": 1.545471346164007e-07,
"logits/chosen": 1.3570653200149536,
"logits/rejected": 1.1423208713531494,
"logps/chosen": -1353.2474365234375,
"logps/rejected": -1461.6622314453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -22.633544921875,
"rewards/margins": 28.00894546508789,
"rewards/rejected": -50.642486572265625,
"step": 121
},
{
"epoch": 1.975609756097561,
"grad_norm": 2.505672682673321e-07,
"learning_rate": 3.8644250544594975e-08,
"logits/chosen": 0.8167323470115662,
"logits/rejected": 0.649781346321106,
"logps/chosen": -991.8995971679688,
"logps/rejected": -1850.18994140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -21.644643783569336,
"rewards/margins": 54.82267761230469,
"rewards/rejected": -76.46732330322266,
"step": 122
},
{
"epoch": 1.9918699186991868,
"grad_norm": 0.0001769052614690736,
"learning_rate": 0.0,
"logits/chosen": 1.7628881931304932,
"logits/rejected": 1.8846670389175415,
"logps/chosen": -1067.9901123046875,
"logps/rejected": -1213.6796875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.579381942749023,
"rewards/margins": 32.53736114501953,
"rewards/rejected": -40.11674118041992,
"step": 123
}
],
"logging_steps": 1,
"max_steps": 123,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 62,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}