|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9918699186991868, |
|
"eval_steps": 500, |
|
"global_step": 123, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016260162601626018, |
|
"grad_norm": 19.880552291870117, |
|
"learning_rate": 2e-05, |
|
"logits/chosen": 0.20684528350830078, |
|
"logits/rejected": 0.4346590042114258, |
|
"logps/chosen": -777.121826171875, |
|
"logps/rejected": -997.1637573242188, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.032520325203252036, |
|
"grad_norm": 20.27885627746582, |
|
"learning_rate": 4e-05, |
|
"logits/chosen": 0.12451896071434021, |
|
"logits/rejected": 0.3398062586784363, |
|
"logps/chosen": -841.6675415039062, |
|
"logps/rejected": -988.1629638671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.04878048780487805, |
|
"grad_norm": 390.8882141113281, |
|
"learning_rate": 6e-05, |
|
"logits/chosen": 0.14335429668426514, |
|
"logits/rejected": 0.32437634468078613, |
|
"logps/chosen": -876.8231811523438, |
|
"logps/rejected": -1356.0509033203125, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.12680970132350922, |
|
"rewards/margins": -0.06611938774585724, |
|
"rewards/rejected": -0.06069030612707138, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.06504065040650407, |
|
"grad_norm": 21.47028923034668, |
|
"learning_rate": 8e-05, |
|
"logits/chosen": 0.7833376526832581, |
|
"logits/rejected": 1.1811182498931885, |
|
"logps/chosen": -1178.9454345703125, |
|
"logps/rejected": -974.9606323242188, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.11406403034925461, |
|
"rewards/margins": -0.005326844751834869, |
|
"rewards/rejected": -0.10873718559741974, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.08130081300813008, |
|
"grad_norm": 40.24486541748047, |
|
"learning_rate": 0.0001, |
|
"logits/chosen": -0.44922593235969543, |
|
"logits/rejected": -0.6411373019218445, |
|
"logps/chosen": -559.5548706054688, |
|
"logps/rejected": -1254.8680419921875, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34520798921585083, |
|
"rewards/margins": 0.4895774722099304, |
|
"rewards/rejected": -0.834785521030426, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 16.58538818359375, |
|
"learning_rate": 0.00012, |
|
"logits/chosen": 0.9809624552726746, |
|
"logits/rejected": 1.187626838684082, |
|
"logps/chosen": -757.462158203125, |
|
"logps/rejected": -1020.3145141601562, |
|
"loss": 0.4292, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2485191375017166, |
|
"rewards/margins": 0.7915412783622742, |
|
"rewards/rejected": -1.0400605201721191, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.11382113821138211, |
|
"grad_norm": 18.358051300048828, |
|
"learning_rate": 0.00014, |
|
"logits/chosen": 1.6894466876983643, |
|
"logits/rejected": 1.6828027963638306, |
|
"logps/chosen": -1125.97412109375, |
|
"logps/rejected": -877.0285034179688, |
|
"loss": 0.3812, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9222716689109802, |
|
"rewards/margins": 0.32721251249313354, |
|
"rewards/rejected": -1.2494843006134033, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.13008130081300814, |
|
"grad_norm": 163.26919555664062, |
|
"learning_rate": 0.00016, |
|
"logits/chosen": -0.45762500166893005, |
|
"logits/rejected": -0.5206366777420044, |
|
"logps/chosen": -705.5869750976562, |
|
"logps/rejected": -1347.400390625, |
|
"loss": 0.288, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.067340850830078, |
|
"rewards/margins": 3.900920867919922, |
|
"rewards/rejected": -6.968262195587158, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.14634146341463414, |
|
"grad_norm": 5.863889217376709, |
|
"learning_rate": 0.00018, |
|
"logits/chosen": 0.2462751269340515, |
|
"logits/rejected": 0.21955497562885284, |
|
"logps/chosen": -619.6600341796875, |
|
"logps/rejected": -1208.003662109375, |
|
"loss": 0.0717, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.7182769775390625, |
|
"rewards/margins": 8.603934288024902, |
|
"rewards/rejected": -11.322211265563965, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.16260162601626016, |
|
"grad_norm": 0.6885181665420532, |
|
"learning_rate": 0.0002, |
|
"logits/chosen": 1.1071248054504395, |
|
"logits/rejected": 1.1347391605377197, |
|
"logps/chosen": -877.805419921875, |
|
"logps/rejected": -1244.745849609375, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.3332839012146, |
|
"rewards/margins": 10.358970642089844, |
|
"rewards/rejected": -15.692255020141602, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17886178861788618, |
|
"grad_norm": 2.558082103729248, |
|
"learning_rate": 0.00019996135574945544, |
|
"logits/chosen": 0.24951541423797607, |
|
"logits/rejected": 0.2528836727142334, |
|
"logps/chosen": -740.1439208984375, |
|
"logps/rejected": -1265.59814453125, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.989352226257324, |
|
"rewards/margins": 19.463153839111328, |
|
"rewards/rejected": -27.45250701904297, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 0.0005222362815402448, |
|
"learning_rate": 0.0001998454528653836, |
|
"logits/chosen": 0.6122381687164307, |
|
"logits/rejected": 0.8588502407073975, |
|
"logps/chosen": -879.779296875, |
|
"logps/rejected": -1585.720947265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -18.228717803955078, |
|
"rewards/margins": 32.099365234375, |
|
"rewards/rejected": -50.32808303833008, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.21138211382113822, |
|
"grad_norm": 3.927712168660946e-05, |
|
"learning_rate": 0.00019965238092738643, |
|
"logits/chosen": 1.1087465286254883, |
|
"logits/rejected": 1.5179497003555298, |
|
"logps/chosen": -1257.50830078125, |
|
"logps/rejected": -1163.919677734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -15.7935791015625, |
|
"rewards/margins": 20.931385040283203, |
|
"rewards/rejected": -36.72496032714844, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.22764227642276422, |
|
"grad_norm": 0.21046003699302673, |
|
"learning_rate": 0.0001993822891578708, |
|
"logits/chosen": 0.23910227417945862, |
|
"logits/rejected": 0.31048309803009033, |
|
"logps/chosen": -1491.3905029296875, |
|
"logps/rejected": -2108.9990234375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -56.71916198730469, |
|
"rewards/margins": 42.71849822998047, |
|
"rewards/rejected": -99.43765258789062, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 591.9841918945312, |
|
"learning_rate": 0.0001990353863067169, |
|
"logits/chosen": 0.5623903870582581, |
|
"logits/rejected": 0.6063950061798096, |
|
"logps/chosen": -1970.40576171875, |
|
"logps/rejected": -2018.9765625, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -86.55944061279297, |
|
"rewards/margins": 29.65001106262207, |
|
"rewards/rejected": -116.2094497680664, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2601626016260163, |
|
"grad_norm": 90.19036865234375, |
|
"learning_rate": 0.00019861194048993863, |
|
"logits/chosen": 0.6143627166748047, |
|
"logits/rejected": 0.7420700788497925, |
|
"logps/chosen": -1821.3201904296875, |
|
"logps/rejected": -1930.827880859375, |
|
"loss": 1.0906, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -76.42454528808594, |
|
"rewards/margins": 28.595970153808594, |
|
"rewards/rejected": -105.02052307128906, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2764227642276423, |
|
"grad_norm": 0.0009420510032214224, |
|
"learning_rate": 0.0001981122789824607, |
|
"logits/chosen": 0.20949414372444153, |
|
"logits/rejected": 0.1935410499572754, |
|
"logps/chosen": -1610.02783203125, |
|
"logps/rejected": -2431.318359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -65.77059936523438, |
|
"rewards/margins": 73.17414855957031, |
|
"rewards/rejected": -138.94476318359375, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 132.33953857421875, |
|
"learning_rate": 0.00019753678796517282, |
|
"logits/chosen": 0.728495717048645, |
|
"logits/rejected": 1.0449868440628052, |
|
"logps/chosen": -1515.9527587890625, |
|
"logps/rejected": -1517.2254638671875, |
|
"loss": 2.6435, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -61.27394104003906, |
|
"rewards/margins": 20.481342315673828, |
|
"rewards/rejected": -81.75528717041016, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.3089430894308943, |
|
"grad_norm": 0.00032979066600091755, |
|
"learning_rate": 0.00019688591222645607, |
|
"logits/chosen": 0.8106945753097534, |
|
"logits/rejected": 0.6099438071250916, |
|
"logps/chosen": -1138.11767578125, |
|
"logps/rejected": -1558.903076171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -46.01788330078125, |
|
"rewards/margins": 41.312171936035156, |
|
"rewards/rejected": -87.33006286621094, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.3252032520325203, |
|
"grad_norm": 0.22872093319892883, |
|
"learning_rate": 0.0001961601548184129, |
|
"logits/chosen": -0.05689544230699539, |
|
"logits/rejected": 0.0633389949798584, |
|
"logps/chosen": -1466.4468994140625, |
|
"logps/rejected": -2267.798828125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -76.84449005126953, |
|
"rewards/margins": 48.28419494628906, |
|
"rewards/rejected": -125.12869262695312, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34146341463414637, |
|
"grad_norm": 1.10204017162323, |
|
"learning_rate": 0.00019536007666806556, |
|
"logits/chosen": 0.5605583786964417, |
|
"logits/rejected": 0.45388907194137573, |
|
"logps/chosen": -1369.92529296875, |
|
"logps/rejected": -1706.2607421875, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -33.74466323852539, |
|
"rewards/margins": 45.32139587402344, |
|
"rewards/rejected": -79.06605529785156, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.35772357723577236, |
|
"grad_norm": 0.7084241509437561, |
|
"learning_rate": 0.0001944862961438239, |
|
"logits/chosen": 0.7291379570960999, |
|
"logits/rejected": 0.9067746996879578, |
|
"logps/chosen": -998.4527587890625, |
|
"logps/rejected": -1456.096923828125, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -19.574996948242188, |
|
"rewards/margins": 45.93708038330078, |
|
"rewards/rejected": -65.51207733154297, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.37398373983739835, |
|
"grad_norm": 3.134854793548584, |
|
"learning_rate": 0.00019353948857755803, |
|
"logits/chosen": 0.9795281887054443, |
|
"logits/rejected": 0.8698853850364685, |
|
"logps/chosen": -1127.320068359375, |
|
"logps/rejected": -1399.870849609375, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -28.826623916625977, |
|
"rewards/margins": 29.93848419189453, |
|
"rewards/rejected": -58.765106201171875, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 2.085594654083252, |
|
"learning_rate": 0.00019252038574264405, |
|
"logits/chosen": 0.17023050785064697, |
|
"logits/rejected": -0.1173945814371109, |
|
"logps/chosen": -1615.32568359375, |
|
"logps/rejected": -2291.47509765625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -82.27009582519531, |
|
"rewards/margins": 44.62742614746094, |
|
"rewards/rejected": -126.89752197265625, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"grad_norm": 7.152135367505252e-05, |
|
"learning_rate": 0.00019142977528838762, |
|
"logits/chosen": 0.6659821271896362, |
|
"logits/rejected": 0.6975608468055725, |
|
"logps/chosen": -1023.6649169921875, |
|
"logps/rejected": -1710.140380859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -33.36669921875, |
|
"rewards/margins": 49.14038848876953, |
|
"rewards/rejected": -82.50708770751953, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.42276422764227645, |
|
"grad_norm": 2.22769040192361e-06, |
|
"learning_rate": 0.00019026850013126157, |
|
"logits/chosen": -0.624580442905426, |
|
"logits/rejected": -0.42581236362457275, |
|
"logps/chosen": -1117.0599365234375, |
|
"logps/rejected": -2134.2626953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -57.8393669128418, |
|
"rewards/margins": 44.58246994018555, |
|
"rewards/rejected": -102.42182922363281, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.43902439024390244, |
|
"grad_norm": 0.7476986050605774, |
|
"learning_rate": 0.00018903745780342839, |
|
"logits/chosen": 0.17943906784057617, |
|
"logits/rejected": 0.21112221479415894, |
|
"logps/chosen": -1208.960205078125, |
|
"logps/rejected": -1999.635009765625, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -55.38972473144531, |
|
"rewards/margins": 40.17228317260742, |
|
"rewards/rejected": -95.56201171875, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.45528455284552843, |
|
"grad_norm": 0.6162808537483215, |
|
"learning_rate": 0.00018773759975905098, |
|
"logits/chosen": 0.15270072221755981, |
|
"logits/rejected": 0.32134106755256653, |
|
"logps/chosen": -1206.7701416015625, |
|
"logps/rejected": -2007.0269775390625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -38.11735916137695, |
|
"rewards/margins": 50.446754455566406, |
|
"rewards/rejected": -88.5641098022461, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.4715447154471545, |
|
"grad_norm": 8.754213354222884e-07, |
|
"learning_rate": 0.0001863699306389282, |
|
"logits/chosen": 0.8678311109542847, |
|
"logits/rejected": 0.8028951287269592, |
|
"logps/chosen": -1161.56591796875, |
|
"logps/rejected": -1967.0069580078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -15.882237434387207, |
|
"rewards/margins": 65.84603881835938, |
|
"rewards/rejected": -81.72827911376953, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 0.0023462281096726656, |
|
"learning_rate": 0.00018493550749402278, |
|
"logits/chosen": 1.54906165599823, |
|
"logits/rejected": 1.6790410280227661, |
|
"logps/chosen": -951.4666748046875, |
|
"logps/rejected": -1339.60107421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.993054389953613, |
|
"rewards/margins": 40.59773635864258, |
|
"rewards/rejected": -47.590789794921875, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5040650406504065, |
|
"grad_norm": 0.00014203626778908074, |
|
"learning_rate": 0.00018343543896848273, |
|
"logits/chosen": 1.832588791847229, |
|
"logits/rejected": 1.6241607666015625, |
|
"logps/chosen": -1032.7232666015625, |
|
"logps/rejected": -1197.1595458984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.2398042678833, |
|
"rewards/margins": 28.274524688720703, |
|
"rewards/rejected": -42.51432800292969, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.5203252032520326, |
|
"grad_norm": 2.814833402633667, |
|
"learning_rate": 0.00018187088444278674, |
|
"logits/chosen": 2.1444239616394043, |
|
"logits/rejected": 1.8101916313171387, |
|
"logps/chosen": -874.6080322265625, |
|
"logps/rejected": -1012.015625, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.471307754516602, |
|
"rewards/margins": 20.194053649902344, |
|
"rewards/rejected": -33.66536331176758, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5365853658536586, |
|
"grad_norm": 0.06849005818367004, |
|
"learning_rate": 0.00018024305313767646, |
|
"logits/chosen": 1.9995535612106323, |
|
"logits/rejected": 1.8331811428070068, |
|
"logps/chosen": -1230.6785888671875, |
|
"logps/rejected": -1346.717041015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.62438678741455, |
|
"rewards/margins": 31.655826568603516, |
|
"rewards/rejected": -42.280216217041016, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5528455284552846, |
|
"grad_norm": 0.01905296929180622, |
|
"learning_rate": 0.00017855320317956784, |
|
"logits/chosen": 1.1833341121673584, |
|
"logits/rejected": 1.240072250366211, |
|
"logps/chosen": -841.6439208984375, |
|
"logps/rejected": -1193.967041015625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -15.020572662353516, |
|
"rewards/margins": 28.115928649902344, |
|
"rewards/rejected": -43.136505126953125, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5691056910569106, |
|
"grad_norm": 1.866630009317305e-05, |
|
"learning_rate": 0.0001768026406281642, |
|
"logits/chosen": 1.0859436988830566, |
|
"logits/rejected": 1.226615309715271, |
|
"logps/chosen": -1046.376708984375, |
|
"logps/rejected": -1418.09228515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.104580879211426, |
|
"rewards/margins": 34.29302978515625, |
|
"rewards/rejected": -47.397613525390625, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 0.0032898751087486744, |
|
"learning_rate": 0.00017499271846702213, |
|
"logits/chosen": -0.23074638843536377, |
|
"logits/rejected": -0.09211879968643188, |
|
"logps/chosen": -1246.923095703125, |
|
"logps/rejected": -2060.51123046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -44.84193801879883, |
|
"rewards/margins": 45.95753479003906, |
|
"rewards/rejected": -90.79946899414062, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.6016260162601627, |
|
"grad_norm": 0.008372440002858639, |
|
"learning_rate": 0.00017312483555785086, |
|
"logits/chosen": 0.5074482560157776, |
|
"logits/rejected": 0.48830437660217285, |
|
"logps/chosen": -920.7339477539062, |
|
"logps/rejected": -1666.024658203125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -18.29103660583496, |
|
"rewards/margins": 32.98884582519531, |
|
"rewards/rejected": -51.27988052368164, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.6178861788617886, |
|
"grad_norm": 0.0008834120817482471, |
|
"learning_rate": 0.00017120043555935298, |
|
"logits/chosen": 1.3600270748138428, |
|
"logits/rejected": 1.2087562084197998, |
|
"logps/chosen": -1251.687744140625, |
|
"logps/rejected": -1775.605224609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -19.525299072265625, |
|
"rewards/margins": 45.839603424072266, |
|
"rewards/rejected": -65.36489868164062, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6341463414634146, |
|
"grad_norm": 9.272828901885077e-05, |
|
"learning_rate": 0.00016922100581144228, |
|
"logits/chosen": 1.4009983539581299, |
|
"logits/rejected": 1.2046518325805664, |
|
"logps/chosen": -1155.6650390625, |
|
"logps/rejected": -1281.83740234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -16.521747589111328, |
|
"rewards/margins": 24.7418155670166, |
|
"rewards/rejected": -41.2635612487793, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6504065040650406, |
|
"grad_norm": 0.0009182749781757593, |
|
"learning_rate": 0.00016718807618570106, |
|
"logits/chosen": 1.3781325817108154, |
|
"logits/rejected": 1.565840244293213, |
|
"logps/chosen": -1133.72216796875, |
|
"logps/rejected": -1346.7265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.05687427520752, |
|
"rewards/margins": 18.654136657714844, |
|
"rewards/rejected": -27.711009979248047, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.004382506478577852, |
|
"learning_rate": 0.00016510321790296525, |
|
"logits/chosen": 1.1266183853149414, |
|
"logits/rejected": 1.2493317127227783, |
|
"logps/chosen": -926.239501953125, |
|
"logps/rejected": -1293.30322265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.177988052368164, |
|
"rewards/margins": 22.40888786315918, |
|
"rewards/rejected": -33.586875915527344, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.6829268292682927, |
|
"grad_norm": 0.15565475821495056, |
|
"learning_rate": 0.00016296804231895142, |
|
"logits/chosen": 1.099910020828247, |
|
"logits/rejected": 0.820236086845398, |
|
"logps/chosen": -626.5668334960938, |
|
"logps/rejected": -1386.260498046875, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.778373718261719, |
|
"rewards/margins": 27.383846282958984, |
|
"rewards/rejected": -38.16221618652344, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.6991869918699187, |
|
"grad_norm": 3.971878322772682e-05, |
|
"learning_rate": 0.00016078419967886402, |
|
"logits/chosen": 1.4016125202178955, |
|
"logits/rejected": 1.5134223699569702, |
|
"logps/chosen": -1066.9713134765625, |
|
"logps/rejected": -1517.39208984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.4629487991333, |
|
"rewards/margins": 27.75263214111328, |
|
"rewards/rejected": -39.215576171875, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.7154471544715447, |
|
"grad_norm": 0.004684010986238718, |
|
"learning_rate": 0.00015855337784194577, |
|
"logits/chosen": 1.989326000213623, |
|
"logits/rejected": 2.3816940784454346, |
|
"logps/chosen": -956.5921630859375, |
|
"logps/rejected": -1014.5316162109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.150079727172852, |
|
"rewards/margins": 12.83597183227539, |
|
"rewards/rejected": -18.986051559448242, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 0.03292777016758919, |
|
"learning_rate": 0.00015627730097695638, |
|
"logits/chosen": 2.072270631790161, |
|
"logits/rejected": 2.0922999382019043, |
|
"logps/chosen": -1218.990478515625, |
|
"logps/rejected": -1251.8980712890625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.599820137023926, |
|
"rewards/margins": 19.980201721191406, |
|
"rewards/rejected": -27.580020904541016, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7479674796747967, |
|
"grad_norm": 0.06399545818567276, |
|
"learning_rate": 0.00015395772822958845, |
|
"logits/chosen": 1.245821475982666, |
|
"logits/rejected": 1.3717162609100342, |
|
"logps/chosen": -960.6263427734375, |
|
"logps/rejected": -1502.2239990234375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.884254455566406, |
|
"rewards/margins": 28.055803298950195, |
|
"rewards/rejected": -36.94005584716797, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.7642276422764228, |
|
"grad_norm": 0.022615160793066025, |
|
"learning_rate": 0.0001515964523628501, |
|
"logits/chosen": 1.4772993326187134, |
|
"logits/rejected": 1.3233076333999634, |
|
"logps/chosen": -900.41552734375, |
|
"logps/rejected": -1422.0224609375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.169479370117188, |
|
"rewards/margins": 29.0593204498291, |
|
"rewards/rejected": -37.228797912597656, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 0.7834580540657043, |
|
"learning_rate": 0.00014919529837146528, |
|
"logits/chosen": 2.019958019256592, |
|
"logits/rejected": 2.0058090686798096, |
|
"logps/chosen": -908.94970703125, |
|
"logps/rejected": -1153.9830322265625, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.564983367919922, |
|
"rewards/margins": 15.311219215393066, |
|
"rewards/rejected": -25.87619972229004, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7967479674796748, |
|
"grad_norm": 0.0006066004862077534, |
|
"learning_rate": 0.0001467561220713628, |
|
"logits/chosen": 1.297697901725769, |
|
"logits/rejected": 1.5303912162780762, |
|
"logps/chosen": -1167.181640625, |
|
"logps/rejected": -1485.501953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.699865341186523, |
|
"rewards/margins": 47.49958801269531, |
|
"rewards/rejected": -59.19945526123047, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"grad_norm": 0.03268749639391899, |
|
"learning_rate": 0.00014428080866534396, |
|
"logits/chosen": 0.707965612411499, |
|
"logits/rejected": 0.7305536866188049, |
|
"logps/chosen": -1051.2691650390625, |
|
"logps/rejected": -1463.647705078125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.360027313232422, |
|
"rewards/margins": 24.690279006958008, |
|
"rewards/rejected": -39.05030822753906, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8292682926829268, |
|
"grad_norm": 0.06594517827033997, |
|
"learning_rate": 0.00014177127128603745, |
|
"logits/chosen": 1.219120740890503, |
|
"logits/rejected": 1.2810195684432983, |
|
"logps/chosen": -1020.8298950195312, |
|
"logps/rejected": -1290.2015380859375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.565038681030273, |
|
"rewards/margins": 20.74908447265625, |
|
"rewards/rejected": -33.314125061035156, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.8455284552845529, |
|
"grad_norm": 0.008960689418017864, |
|
"learning_rate": 0.0001392294495172681, |
|
"logits/chosen": 0.49424344301223755, |
|
"logits/rejected": 0.4817698895931244, |
|
"logps/chosen": -988.3806762695312, |
|
"logps/rejected": -1388.4130859375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.987248420715332, |
|
"rewards/margins": 38.28583908081055, |
|
"rewards/rejected": -53.27308654785156, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.8617886178861789, |
|
"grad_norm": 4.988933142158203e-07, |
|
"learning_rate": 0.0001366573078949813, |
|
"logits/chosen": -0.09240919351577759, |
|
"logits/rejected": -0.1942935436964035, |
|
"logps/chosen": -863.5594482421875, |
|
"logps/rejected": -1951.684814453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -21.636280059814453, |
|
"rewards/margins": 39.47431182861328, |
|
"rewards/rejected": -61.110591888427734, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.8780487804878049, |
|
"grad_norm": 0.36996814608573914, |
|
"learning_rate": 0.00013405683438888282, |
|
"logits/chosen": 1.8010693788528442, |
|
"logits/rejected": 1.9799494743347168, |
|
"logps/chosen": -1090.9835205078125, |
|
"logps/rejected": -1244.3988037109375, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.118224143981934, |
|
"rewards/margins": 23.42540740966797, |
|
"rewards/rejected": -33.54362869262695, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.8943089430894309, |
|
"grad_norm": 0.0004369132802821696, |
|
"learning_rate": 0.00013143003886596669, |
|
"logits/chosen": 1.255205750465393, |
|
"logits/rejected": 1.1578245162963867, |
|
"logps/chosen": -1015.79541015625, |
|
"logps/rejected": -1361.6103515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -18.066598892211914, |
|
"rewards/margins": 27.31325340270996, |
|
"rewards/rejected": -45.379852294921875, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.9105691056910569, |
|
"grad_norm": 3.5815644423564663e-06, |
|
"learning_rate": 0.00012877895153711935, |
|
"logits/chosen": 0.5448588132858276, |
|
"logits/rejected": 0.6314257383346558, |
|
"logps/chosen": -1082.805908203125, |
|
"logps/rejected": -1538.261962890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -23.810945510864258, |
|
"rewards/margins": 29.520732879638672, |
|
"rewards/rejected": -53.3316764831543, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.926829268292683, |
|
"grad_norm": 58.86332702636719, |
|
"learning_rate": 0.00012610562138799978, |
|
"logits/chosen": 1.9793856143951416, |
|
"logits/rejected": 2.0082552433013916, |
|
"logps/chosen": -1352.8492431640625, |
|
"logps/rejected": -1265.2257080078125, |
|
"loss": 0.3774, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -20.378952026367188, |
|
"rewards/margins": 17.73773193359375, |
|
"rewards/rejected": -38.1166877746582, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.943089430894309, |
|
"grad_norm": 5.57162458392213e-08, |
|
"learning_rate": 0.0001234121145954094, |
|
"logits/chosen": 0.7738958597183228, |
|
"logits/rejected": 0.6971035599708557, |
|
"logps/chosen": -927.3837280273438, |
|
"logps/rejected": -1710.65771484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.810049057006836, |
|
"rewards/margins": 38.65287780761719, |
|
"rewards/rejected": -56.462928771972656, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.959349593495935, |
|
"grad_norm": 0.10466321557760239, |
|
"learning_rate": 0.00012070051293037492, |
|
"logits/chosen": 1.3470133543014526, |
|
"logits/rejected": 1.3975563049316406, |
|
"logps/chosen": -1097.9437255859375, |
|
"logps/rejected": -1693.154541015625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -20.652606964111328, |
|
"rewards/margins": 36.89767074584961, |
|
"rewards/rejected": -57.55027770996094, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 2.4582501282566227e-05, |
|
"learning_rate": 0.00011797291214917881, |
|
"logits/chosen": 1.379901647567749, |
|
"logits/rejected": 1.2993323802947998, |
|
"logps/chosen": -1204.1943359375, |
|
"logps/rejected": -1411.241455078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -19.423160552978516, |
|
"rewards/margins": 26.866172790527344, |
|
"rewards/rejected": -46.28933334350586, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.991869918699187, |
|
"grad_norm": 7.934165478218347e-05, |
|
"learning_rate": 0.0001152314203735805, |
|
"logits/chosen": 1.951298713684082, |
|
"logits/rejected": 2.0110878944396973, |
|
"logps/chosen": -1275.750732421875, |
|
"logps/rejected": -1257.931640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -16.708940505981445, |
|
"rewards/margins": 21.205249786376953, |
|
"rewards/rejected": -37.914188385009766, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.9418702141015274e-08, |
|
"learning_rate": 0.00011247815646148087, |
|
"logits/chosen": 1.219478964805603, |
|
"logits/rejected": 1.4597835540771484, |
|
"logps/chosen": -1298.3076171875, |
|
"logps/rejected": -1700.546142578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -26.570446014404297, |
|
"rewards/margins": 39.88042449951172, |
|
"rewards/rejected": -66.45086669921875, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.016260162601626, |
|
"grad_norm": 0.0003046558704227209, |
|
"learning_rate": 0.0001097152483692886, |
|
"logits/chosen": 1.216448187828064, |
|
"logits/rejected": 1.2576086521148682, |
|
"logps/chosen": -1297.49267578125, |
|
"logps/rejected": -1655.1431884765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -27.540584564208984, |
|
"rewards/margins": 25.584327697753906, |
|
"rewards/rejected": -53.12491226196289, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.032520325203252, |
|
"grad_norm": 5.492000604290226e-11, |
|
"learning_rate": 0.00010694483150725458, |
|
"logits/chosen": 0.5165296196937561, |
|
"logits/rejected": 0.5458570122718811, |
|
"logps/chosen": -1003.1471557617188, |
|
"logps/rejected": -1591.346435546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.605949401855469, |
|
"rewards/margins": 46.321319580078125, |
|
"rewards/rejected": -57.92727279663086, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.048780487804878, |
|
"grad_norm": 0.0003143485519103706, |
|
"learning_rate": 0.00010416904708904548, |
|
"logits/chosen": 0.6694925427436829, |
|
"logits/rejected": 0.6114668846130371, |
|
"logps/chosen": -812.6236572265625, |
|
"logps/rejected": -1500.825439453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.084518432617188, |
|
"rewards/margins": 35.370384216308594, |
|
"rewards/rejected": -52.45490264892578, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.065040650406504, |
|
"grad_norm": 5.148892228135082e-07, |
|
"learning_rate": 0.00010139004047683151, |
|
"logits/chosen": 1.3868217468261719, |
|
"logits/rejected": 1.2723997831344604, |
|
"logps/chosen": -1227.2484130859375, |
|
"logps/rejected": -1608.285400390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -24.8009033203125, |
|
"rewards/margins": 34.73870086669922, |
|
"rewards/rejected": -59.53960418701172, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.08130081300813, |
|
"grad_norm": 0.005973002407699823, |
|
"learning_rate": 9.860995952316851e-05, |
|
"logits/chosen": 0.5520488023757935, |
|
"logits/rejected": 1.013694405555725, |
|
"logps/chosen": -918.3431396484375, |
|
"logps/rejected": -1930.933349609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.301834106445312, |
|
"rewards/margins": 54.176063537597656, |
|
"rewards/rejected": -71.4779052734375, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.0975609756097562, |
|
"grad_norm": 0.0016096890904009342, |
|
"learning_rate": 9.583095291095453e-05, |
|
"logits/chosen": 1.927367925643921, |
|
"logits/rejected": 2.1797337532043457, |
|
"logps/chosen": -1027.62255859375, |
|
"logps/rejected": -1242.6591796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.128509521484375, |
|
"rewards/margins": 44.30337905883789, |
|
"rewards/rejected": -54.431888580322266, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.113821138211382, |
|
"grad_norm": 0.00028535688761621714, |
|
"learning_rate": 9.305516849274541e-05, |
|
"logits/chosen": 0.9750661849975586, |
|
"logits/rejected": 1.2060834169387817, |
|
"logps/chosen": -1015.9608154296875, |
|
"logps/rejected": -1445.724609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.628022193908691, |
|
"rewards/margins": 35.57917785644531, |
|
"rewards/rejected": -49.20719909667969, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.1300813008130082, |
|
"grad_norm": 0.5866624712944031, |
|
"learning_rate": 9.028475163071141e-05, |
|
"logits/chosen": 1.4004566669464111, |
|
"logits/rejected": 1.3820116519927979, |
|
"logps/chosen": -1156.070556640625, |
|
"logps/rejected": -1605.488525390625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -29.29137420654297, |
|
"rewards/margins": 34.68971633911133, |
|
"rewards/rejected": -63.9810905456543, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.146341463414634, |
|
"grad_norm": 0.002478301292285323, |
|
"learning_rate": 8.752184353851916e-05, |
|
"logits/chosen": 0.6324145197868347, |
|
"logits/rejected": 0.6125429272651672, |
|
"logps/chosen": -836.22900390625, |
|
"logps/rejected": -1863.617919921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -19.06183433532715, |
|
"rewards/margins": 52.36142349243164, |
|
"rewards/rejected": -71.42325592041016, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.1626016260162602, |
|
"grad_norm": 1.2947886034453404e-06, |
|
"learning_rate": 8.47685796264195e-05, |
|
"logits/chosen": 1.245481014251709, |
|
"logits/rejected": 1.2732493877410889, |
|
"logps/chosen": -1120.00146484375, |
|
"logps/rejected": -1680.321533203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -20.079360961914062, |
|
"rewards/margins": 38.847572326660156, |
|
"rewards/rejected": -58.92693328857422, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.1788617886178863, |
|
"grad_norm": 7.430622645188123e-05, |
|
"learning_rate": 8.202708785082121e-05, |
|
"logits/chosen": 1.3398401737213135, |
|
"logits/rejected": 1.310295820236206, |
|
"logps/chosen": -979.2159423828125, |
|
"logps/rejected": -1660.695068359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.719205856323242, |
|
"rewards/margins": 44.77515411376953, |
|
"rewards/rejected": -62.494354248046875, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.1951219512195121, |
|
"grad_norm": 0.008477458730340004, |
|
"learning_rate": 7.929948706962508e-05, |
|
"logits/chosen": 1.2300162315368652, |
|
"logits/rejected": 1.4617760181427002, |
|
"logps/chosen": -1189.85791015625, |
|
"logps/rejected": -1378.9652099609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.7158842086792, |
|
"rewards/margins": 37.057861328125, |
|
"rewards/rejected": -51.77375030517578, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.2113821138211383, |
|
"grad_norm": 2.7032048819819465e-05, |
|
"learning_rate": 7.658788540459062e-05, |
|
"logits/chosen": 0.43838104605674744, |
|
"logits/rejected": 0.5289822220802307, |
|
"logps/chosen": -988.083251953125, |
|
"logps/rejected": -1331.2569580078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.296829223632812, |
|
"rewards/margins": 34.85190963745117, |
|
"rewards/rejected": -52.14873504638672, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.2276422764227641, |
|
"grad_norm": 4.829147570717396e-08, |
|
"learning_rate": 7.389437861200024e-05, |
|
"logits/chosen": 1.997933030128479, |
|
"logits/rejected": 1.9013891220092773, |
|
"logps/chosen": -1068.2757568359375, |
|
"logps/rejected": -1249.0604248046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.518118858337402, |
|
"rewards/margins": 28.58959197998047, |
|
"rewards/rejected": -43.10770797729492, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.2439024390243902, |
|
"grad_norm": 2.3297241913411426e-10, |
|
"learning_rate": 7.122104846288064e-05, |
|
"logits/chosen": 1.2531983852386475, |
|
"logits/rejected": 1.4057786464691162, |
|
"logps/chosen": -1080.928466796875, |
|
"logps/rejected": -1503.05615234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.961380958557129, |
|
"rewards/margins": 36.710487365722656, |
|
"rewards/rejected": -51.67186737060547, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.2601626016260163, |
|
"grad_norm": 3.4512660931795835e-05, |
|
"learning_rate": 6.85699611340333e-05, |
|
"logits/chosen": 1.8900461196899414, |
|
"logits/rejected": 2.0945119857788086, |
|
"logps/chosen": -1128.474365234375, |
|
"logps/rejected": -1140.455810546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.547296524047852, |
|
"rewards/margins": 22.667064666748047, |
|
"rewards/rejected": -35.214359283447266, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.2764227642276422, |
|
"grad_norm": 9.897094059851952e-06, |
|
"learning_rate": 6.594316561111724e-05, |
|
"logits/chosen": 1.3735342025756836, |
|
"logits/rejected": 1.4095773696899414, |
|
"logps/chosen": -899.8128662109375, |
|
"logps/rejected": -1251.731689453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.026573181152344, |
|
"rewards/margins": 29.826189041137695, |
|
"rewards/rejected": -46.85276412963867, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.2926829268292683, |
|
"grad_norm": 1.6814607079140842e-05, |
|
"learning_rate": 6.334269210501875e-05, |
|
"logits/chosen": 0.5582981705665588, |
|
"logits/rejected": 0.6065884232521057, |
|
"logps/chosen": -1002.4566650390625, |
|
"logps/rejected": -1512.957275390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -22.382816314697266, |
|
"rewards/margins": 31.659029006958008, |
|
"rewards/rejected": -54.041847229003906, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.3089430894308944, |
|
"grad_norm": 2.0822379156015813e-05, |
|
"learning_rate": 6.0770550482731924e-05, |
|
"logits/chosen": 0.5204108357429504, |
|
"logits/rejected": 0.6756694912910461, |
|
"logps/chosen": -1329.38134765625, |
|
"logps/rejected": -1816.52392578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -36.05492401123047, |
|
"rewards/margins": 34.550933837890625, |
|
"rewards/rejected": -70.6058578491211, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.3252032520325203, |
|
"grad_norm": 3.052237573797356e-08, |
|
"learning_rate": 5.8228728713962543e-05, |
|
"logits/chosen": 0.6427198648452759, |
|
"logits/rejected": 0.7359005212783813, |
|
"logps/chosen": -989.2234497070312, |
|
"logps/rejected": -2282.662841796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -18.713542938232422, |
|
"rewards/margins": 77.4079360961914, |
|
"rewards/rejected": -96.1214828491211, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.3414634146341464, |
|
"grad_norm": 0.0013960793148726225, |
|
"learning_rate": 5.571919133465605e-05, |
|
"logits/chosen": 2.0142054557800293, |
|
"logits/rejected": 1.9838088750839233, |
|
"logps/chosen": -1325.515380859375, |
|
"logps/rejected": -1202.38134765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -18.17080307006836, |
|
"rewards/margins": 22.907329559326172, |
|
"rewards/rejected": -41.07813262939453, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.3577235772357723, |
|
"grad_norm": 7.671826460864395e-05, |
|
"learning_rate": 5.324387792863719e-05, |
|
"logits/chosen": 1.3578662872314453, |
|
"logits/rejected": 2.439218044281006, |
|
"logps/chosen": -757.6051635742188, |
|
"logps/rejected": -1135.0416259765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.389976739883423, |
|
"rewards/margins": 42.346309661865234, |
|
"rewards/rejected": -38.95633316040039, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.3739837398373984, |
|
"grad_norm": 3.062094037886709e-06, |
|
"learning_rate": 5.080470162853472e-05, |
|
"logits/chosen": 1.2051855325698853, |
|
"logits/rejected": 1.2651633024215698, |
|
"logps/chosen": -1020.686767578125, |
|
"logps/rejected": -1463.1270751953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.808335304260254, |
|
"rewards/margins": 38.411285400390625, |
|
"rewards/rejected": -49.21961975097656, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.3902439024390243, |
|
"grad_norm": 0.00018378288950771093, |
|
"learning_rate": 4.840354763714991e-05, |
|
"logits/chosen": 0.03289281576871872, |
|
"logits/rejected": 0.014516504481434822, |
|
"logps/chosen": -995.1809692382812, |
|
"logps/rejected": -2124.506591796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -32.061710357666016, |
|
"rewards/margins": 57.61822509765625, |
|
"rewards/rejected": -89.67993927001953, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.4065040650406504, |
|
"grad_norm": 5.109325866214931e-05, |
|
"learning_rate": 4.604227177041156e-05, |
|
"logits/chosen": 1.2230056524276733, |
|
"logits/rejected": 1.476953387260437, |
|
"logps/chosen": -1030.1702880859375, |
|
"logps/rejected": -1326.158935546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.08495044708252, |
|
"rewards/margins": 34.212921142578125, |
|
"rewards/rejected": -47.29787063598633, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.4227642276422765, |
|
"grad_norm": 1.226226800099539e-07, |
|
"learning_rate": 4.372269902304363e-05, |
|
"logits/chosen": 2.002579689025879, |
|
"logits/rejected": 2.0382652282714844, |
|
"logps/chosen": -1250.2037353515625, |
|
"logps/rejected": -1071.18896484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.541341781616211, |
|
"rewards/margins": 32.357688903808594, |
|
"rewards/rejected": -43.89903259277344, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.4390243902439024, |
|
"grad_norm": 6.719565863022581e-05, |
|
"learning_rate": 4.144662215805426e-05, |
|
"logits/chosen": 2.3775994777679443, |
|
"logits/rejected": 2.751979351043701, |
|
"logps/chosen": -828.1460571289062, |
|
"logps/rejected": -906.63037109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.038515090942383, |
|
"rewards/margins": 18.016881942749023, |
|
"rewards/rejected": -23.055395126342773, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.4552845528455285, |
|
"grad_norm": 0.003350652754306793, |
|
"learning_rate": 3.921580032113602e-05, |
|
"logits/chosen": 2.568944215774536, |
|
"logits/rejected": 2.653423547744751, |
|
"logps/chosen": -1348.401123046875, |
|
"logps/rejected": -1087.044921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.072247505187988, |
|
"rewards/margins": 23.256484985351562, |
|
"rewards/rejected": -31.328731536865234, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.4715447154471546, |
|
"grad_norm": 1.6966988596323063e-06, |
|
"learning_rate": 3.7031957681048604e-05, |
|
"logits/chosen": 0.7617810964584351, |
|
"logits/rejected": 0.810763418674469, |
|
"logps/chosen": -818.6165161132812, |
|
"logps/rejected": -1948.71728515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.259980201721191, |
|
"rewards/margins": 87.85292053222656, |
|
"rewards/rejected": -95.1128921508789, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.4878048780487805, |
|
"grad_norm": 1.3153041322766512e-07, |
|
"learning_rate": 3.489678209703475e-05, |
|
"logits/chosen": 0.7253928780555725, |
|
"logits/rejected": 0.7696207761764526, |
|
"logps/chosen": -1109.42919921875, |
|
"logps/rejected": -1995.980712890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -18.064022064208984, |
|
"rewards/margins": 62.025482177734375, |
|
"rewards/rejected": -80.08950805664062, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.5040650406504064, |
|
"grad_norm": 7.262394319695886e-06, |
|
"learning_rate": 3.281192381429894e-05, |
|
"logits/chosen": 1.3864871263504028, |
|
"logits/rejected": 1.5070679187774658, |
|
"logps/chosen": -1201.9698486328125, |
|
"logps/rejected": -1620.9224853515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -16.929353713989258, |
|
"rewards/margins": 49.26674270629883, |
|
"rewards/rejected": -66.19609069824219, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.5203252032520327, |
|
"grad_norm": 6.851015768916113e-06, |
|
"learning_rate": 3.077899418855772e-05, |
|
"logits/chosen": 0.7263829112052917, |
|
"logits/rejected": 0.6369051337242126, |
|
"logps/chosen": -747.6914672851562, |
|
"logps/rejected": -1705.2852783203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -15.3454008102417, |
|
"rewards/margins": 49.285179138183594, |
|
"rewards/rejected": -64.63057708740234, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.5365853658536586, |
|
"grad_norm": 0.0002986456092912704, |
|
"learning_rate": 2.879956444064703e-05, |
|
"logits/chosen": 1.4310306310653687, |
|
"logits/rejected": 1.2261309623718262, |
|
"logps/chosen": -936.9393310546875, |
|
"logps/rejected": -1461.7275390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.54560661315918, |
|
"rewards/margins": 38.0745735168457, |
|
"rewards/rejected": -51.62017822265625, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.5528455284552845, |
|
"grad_norm": 5.264350306788401e-07, |
|
"learning_rate": 2.6875164442149147e-05, |
|
"logits/chosen": 0.5105292797088623, |
|
"logits/rejected": 0.7118083834648132, |
|
"logps/chosen": -936.799560546875, |
|
"logps/rejected": -1879.8419189453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -16.81096649169922, |
|
"rewards/margins": 43.707740783691406, |
|
"rewards/rejected": -60.518707275390625, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.5691056910569106, |
|
"grad_norm": 0.00016159842198248953, |
|
"learning_rate": 2.500728153297788e-05, |
|
"logits/chosen": 1.8368278741836548, |
|
"logits/rejected": 2.204590082168579, |
|
"logps/chosen": -1461.580078125, |
|
"logps/rejected": -1380.7667236328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.631231307983398, |
|
"rewards/margins": 26.685359954833984, |
|
"rewards/rejected": -40.316593170166016, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.5853658536585367, |
|
"grad_norm": 0.00013451933045871556, |
|
"learning_rate": 2.3197359371835802e-05, |
|
"logits/chosen": 1.1100133657455444, |
|
"logits/rejected": 1.2370729446411133, |
|
"logps/chosen": -948.371826171875, |
|
"logps/rejected": -1276.979248046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.95567512512207, |
|
"rewards/margins": 37.89854431152344, |
|
"rewards/rejected": -47.854225158691406, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.6016260162601625, |
|
"grad_norm": 0.00024462357396259904, |
|
"learning_rate": 2.1446796820432167e-05, |
|
"logits/chosen": 1.7180746793746948, |
|
"logits/rejected": 2.153879404067993, |
|
"logps/chosen": -1276.5830078125, |
|
"logps/rejected": -1113.281494140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.072443008422852, |
|
"rewards/margins": 17.009380340576172, |
|
"rewards/rejected": -31.081825256347656, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.6178861788617886, |
|
"grad_norm": 1.6178487882712034e-08, |
|
"learning_rate": 1.9756946862323535e-05, |
|
"logits/chosen": 1.3304284811019897, |
|
"logits/rejected": 1.1570796966552734, |
|
"logps/chosen": -1224.40380859375, |
|
"logps/rejected": -1765.047119140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -16.283369064331055, |
|
"rewards/margins": 56.30316925048828, |
|
"rewards/rejected": -72.58653259277344, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6341463414634148, |
|
"grad_norm": 1.8081759378674178e-07, |
|
"learning_rate": 1.8129115557213262e-05, |
|
"logits/chosen": 0.5725196599960327, |
|
"logits/rejected": 0.7406933903694153, |
|
"logps/chosen": -808.1942138671875, |
|
"logps/rejected": -1623.4114990234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.64067840576172, |
|
"rewards/margins": 40.391014099121094, |
|
"rewards/rejected": -58.03169250488281, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.6504065040650406, |
|
"grad_norm": 0.00023044626868795604, |
|
"learning_rate": 1.656456103151728e-05, |
|
"logits/chosen": 2.142577886581421, |
|
"logits/rejected": 2.108786106109619, |
|
"logps/chosen": -951.4678955078125, |
|
"logps/rejected": -1318.56201171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.911703109741211, |
|
"rewards/margins": 40.60116958618164, |
|
"rewards/rejected": -47.512874603271484, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 2.5419683424843242e-06, |
|
"learning_rate": 1.5064492505977234e-05, |
|
"logits/chosen": 1.2146611213684082, |
|
"logits/rejected": 1.1194839477539062, |
|
"logps/chosen": -994.2359619140625, |
|
"logps/rejected": -1273.3843994140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.964194297790527, |
|
"rewards/margins": 37.999244689941406, |
|
"rewards/rejected": -47.963443756103516, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.6829268292682928, |
|
"grad_norm": 2.680222932482934e-09, |
|
"learning_rate": 1.363006936107183e-05, |
|
"logits/chosen": 1.9312256574630737, |
|
"logits/rejected": 1.8441157341003418, |
|
"logps/chosen": -984.7633666992188, |
|
"logps/rejected": -1123.7462158203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.190778732299805, |
|
"rewards/margins": 35.19913864135742, |
|
"rewards/rejected": -42.389915466308594, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.6991869918699187, |
|
"grad_norm": 1.2424061424098909e-05, |
|
"learning_rate": 1.2262400240949023e-05, |
|
"logits/chosen": 1.6461536884307861, |
|
"logits/rejected": 1.8136305809020996, |
|
"logps/chosen": -904.748291015625, |
|
"logps/rejected": -1393.095947265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.034971237182617, |
|
"rewards/margins": 42.80604553222656, |
|
"rewards/rejected": -47.84101867675781, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.7154471544715446, |
|
"grad_norm": 4.1589805732655805e-07, |
|
"learning_rate": 1.0962542196571634e-05, |
|
"logits/chosen": 1.3145643472671509, |
|
"logits/rejected": 1.1997283697128296, |
|
"logps/chosen": -939.1678466796875, |
|
"logps/rejected": -1638.798583984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.597799301147461, |
|
"rewards/margins": 44.598976135253906, |
|
"rewards/rejected": -59.19677734375, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.7317073170731707, |
|
"grad_norm": 6.540443564517773e-08, |
|
"learning_rate": 9.731499868738447e-06, |
|
"logits/chosen": 2.1823389530181885, |
|
"logits/rejected": 2.301424264907837, |
|
"logps/chosen": -1150.3404541015625, |
|
"logps/rejected": -1366.84814453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.673786163330078, |
|
"rewards/margins": 34.13035202026367, |
|
"rewards/rejected": -46.804134368896484, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.7479674796747968, |
|
"grad_norm": 4.622437700163573e-05, |
|
"learning_rate": 8.570224711612385e-06, |
|
"logits/chosen": 0.4944400489330292, |
|
"logits/rejected": 0.5377110242843628, |
|
"logps/chosen": -945.9273681640625, |
|
"logps/rejected": -1679.0079345703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.38947296142578, |
|
"rewards/margins": 47.88871383666992, |
|
"rewards/rejected": -65.27819061279297, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.7642276422764227, |
|
"grad_norm": 3.809813506450155e-06, |
|
"learning_rate": 7.479614257355971e-06, |
|
"logits/chosen": 1.2999298572540283, |
|
"logits/rejected": 1.300133228302002, |
|
"logps/chosen": -1008.9362182617188, |
|
"logps/rejected": -1288.076416015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.351741790771484, |
|
"rewards/margins": 42.22937774658203, |
|
"rewards/rejected": -51.581119537353516, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.7804878048780488, |
|
"grad_norm": 0.007235921919345856, |
|
"learning_rate": 6.460511422441984e-06, |
|
"logits/chosen": 1.9115304946899414, |
|
"logits/rejected": 2.1205523014068604, |
|
"logps/chosen": -1132.468017578125, |
|
"logps/rejected": -1027.97802734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.733047485351562, |
|
"rewards/margins": 16.740474700927734, |
|
"rewards/rejected": -30.47352409362793, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.796747967479675, |
|
"grad_norm": 1.4731797364220256e-06, |
|
"learning_rate": 5.5137038561761115e-06, |
|
"logits/chosen": 0.6670889854431152, |
|
"logits/rejected": 0.6521254181861877, |
|
"logps/chosen": -742.6629638671875, |
|
"logps/rejected": -1944.6416015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.560412406921387, |
|
"rewards/margins": 63.10647964477539, |
|
"rewards/rejected": -77.6668930053711, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.8130081300813008, |
|
"grad_norm": 5.7062050473177806e-05, |
|
"learning_rate": 4.639923331934471e-06, |
|
"logits/chosen": 0.9131884574890137, |
|
"logits/rejected": 1.1928483247756958, |
|
"logps/chosen": -1271.8701171875, |
|
"logps/rejected": -1448.082763671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -16.25135040283203, |
|
"rewards/margins": 34.5776252746582, |
|
"rewards/rejected": -50.82897186279297, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.8292682926829267, |
|
"grad_norm": 2.0286324797780253e-05, |
|
"learning_rate": 3.839845181587098e-06, |
|
"logits/chosen": 0.6853426694869995, |
|
"logits/rejected": 0.7730221748352051, |
|
"logps/chosen": -847.8319702148438, |
|
"logps/rejected": -2002.734130859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -18.896442413330078, |
|
"rewards/margins": 51.54301071166992, |
|
"rewards/rejected": -70.439453125, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.845528455284553, |
|
"grad_norm": 4.680402525991667e-06, |
|
"learning_rate": 3.1140877735439387e-06, |
|
"logits/chosen": 0.8352583050727844, |
|
"logits/rejected": 0.7815011143684387, |
|
"logps/chosen": -1006.5256958007812, |
|
"logps/rejected": -1871.0528564453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -23.025442123413086, |
|
"rewards/margins": 47.73127746582031, |
|
"rewards/rejected": -70.75672149658203, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.8617886178861789, |
|
"grad_norm": 4.835527761315461e-06, |
|
"learning_rate": 2.4632120348272003e-06, |
|
"logits/chosen": 0.6664273142814636, |
|
"logits/rejected": 0.7628079056739807, |
|
"logps/chosen": -1057.7972412109375, |
|
"logps/rejected": -1896.2288818359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -26.96924591064453, |
|
"rewards/margins": 47.0149040222168, |
|
"rewards/rejected": -73.9841537475586, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.8780487804878048, |
|
"grad_norm": 1.7554378928252845e-06, |
|
"learning_rate": 1.88772101753929e-06, |
|
"logits/chosen": 1.4583988189697266, |
|
"logits/rejected": 1.4834201335906982, |
|
"logps/chosen": -1100.9306640625, |
|
"logps/rejected": -1776.69091796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -19.52985954284668, |
|
"rewards/margins": 46.82954788208008, |
|
"rewards/rejected": -66.35940551757812, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.8943089430894309, |
|
"grad_norm": 0.0001541744713904336, |
|
"learning_rate": 1.3880595100613792e-06, |
|
"logits/chosen": 1.328132152557373, |
|
"logits/rejected": 1.6395397186279297, |
|
"logps/chosen": -1433.81689453125, |
|
"logps/rejected": -1625.1180419921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -22.608409881591797, |
|
"rewards/margins": 31.696552276611328, |
|
"rewards/rejected": -54.304962158203125, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.910569105691057, |
|
"grad_norm": 3.519949677865952e-05, |
|
"learning_rate": 9.64613693283123e-07, |
|
"logits/chosen": 1.856284737586975, |
|
"logits/rejected": 1.8918788433074951, |
|
"logps/chosen": -1302.91796875, |
|
"logps/rejected": -1380.99365234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -15.29294204711914, |
|
"rewards/margins": 32.75577926635742, |
|
"rewards/rejected": -48.0487174987793, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.9268292682926829, |
|
"grad_norm": 8.586041076341644e-05, |
|
"learning_rate": 6.177108421292266e-07, |
|
"logits/chosen": 1.2806370258331299, |
|
"logits/rejected": 1.3649016618728638, |
|
"logps/chosen": -988.1577758789062, |
|
"logps/rejected": -1595.25244140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -16.122652053833008, |
|
"rewards/margins": 36.193511962890625, |
|
"rewards/rejected": -52.316162109375, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.943089430894309, |
|
"grad_norm": 0.008627010509371758, |
|
"learning_rate": 3.4761907261356976e-07, |
|
"logits/chosen": 1.951653003692627, |
|
"logits/rejected": 1.9814622402191162, |
|
"logps/chosen": -1180.52294921875, |
|
"logps/rejected": -1512.510986328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -16.302892684936523, |
|
"rewards/margins": 42.75213623046875, |
|
"rewards/rejected": -59.05502700805664, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.959349593495935, |
|
"grad_norm": 1.4577848617136624e-07, |
|
"learning_rate": 1.545471346164007e-07, |
|
"logits/chosen": 1.3570653200149536, |
|
"logits/rejected": 1.1423208713531494, |
|
"logps/chosen": -1353.2474365234375, |
|
"logps/rejected": -1461.6622314453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -22.633544921875, |
|
"rewards/margins": 28.00894546508789, |
|
"rewards/rejected": -50.642486572265625, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.975609756097561, |
|
"grad_norm": 2.505672682673321e-07, |
|
"learning_rate": 3.8644250544594975e-08, |
|
"logits/chosen": 0.8167323470115662, |
|
"logits/rejected": 0.649781346321106, |
|
"logps/chosen": -991.8995971679688, |
|
"logps/rejected": -1850.18994140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -21.644643783569336, |
|
"rewards/margins": 54.82267761230469, |
|
"rewards/rejected": -76.46732330322266, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.9918699186991868, |
|
"grad_norm": 0.0001769052614690736, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 1.7628881931304932, |
|
"logits/rejected": 1.8846670389175415, |
|
"logps/chosen": -1067.9901123046875, |
|
"logps/rejected": -1213.6796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.579381942749023, |
|
"rewards/margins": 32.53736114501953, |
|
"rewards/rejected": -40.11674118041992, |
|
"step": 123 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 123, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 62, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|