|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 2000, |
|
"global_step": 4168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002399232245681382, |
|
"grad_norm": 20.90535270812656, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": -0.48379573225975037, |
|
"logits/rejected": -0.48017197847366333, |
|
"logps/chosen": -250.1331329345703, |
|
"logps/rejected": -232.6839141845703, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023992322456813818, |
|
"grad_norm": 20.316799458165775, |
|
"learning_rate": 1.199040767386091e-08, |
|
"logits/chosen": -0.4963577091693878, |
|
"logits/rejected": -0.5276286005973816, |
|
"logps/chosen": -441.6046142578125, |
|
"logps/rejected": -363.4785461425781, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.0010866652010008693, |
|
"rewards/margins": 0.0004253386869095266, |
|
"rewards/rejected": 0.0006613265140913427, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0047984644913627635, |
|
"grad_norm": 20.614630611685648, |
|
"learning_rate": 2.398081534772182e-08, |
|
"logits/chosen": -0.5616664290428162, |
|
"logits/rejected": -0.5348426103591919, |
|
"logps/chosen": -311.93389892578125, |
|
"logps/rejected": -278.0029602050781, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.00013055796443950385, |
|
"rewards/margins": 0.0002189161314163357, |
|
"rewards/rejected": -8.835792687023059e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007197696737044146, |
|
"grad_norm": 19.284588490453608, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -0.5190974473953247, |
|
"logits/rejected": -0.5706892013549805, |
|
"logps/chosen": -319.748779296875, |
|
"logps/rejected": -331.7994689941406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0010061769280582666, |
|
"rewards/margins": 0.0013537806225940585, |
|
"rewards/rejected": -0.0003476037527434528, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009596928982725527, |
|
"grad_norm": 19.442113506121437, |
|
"learning_rate": 4.796163069544364e-08, |
|
"logits/chosen": -0.5577880144119263, |
|
"logits/rejected": -0.5859715938568115, |
|
"logps/chosen": -338.12628173828125, |
|
"logps/rejected": -314.81982421875, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00019955830066464841, |
|
"rewards/margins": -5.653758853441104e-05, |
|
"rewards/rejected": 0.000256095954682678, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01199616122840691, |
|
"grad_norm": 20.77025303650937, |
|
"learning_rate": 5.995203836930455e-08, |
|
"logits/chosen": -0.5782157182693481, |
|
"logits/rejected": -0.5549123287200928, |
|
"logps/chosen": -335.87646484375, |
|
"logps/rejected": -289.2035217285156, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.0005626773927360773, |
|
"rewards/margins": -0.00042233389103785157, |
|
"rewards/rejected": 0.0009850109927356243, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014395393474088292, |
|
"grad_norm": 19.993953212894812, |
|
"learning_rate": 7.194244604316546e-08, |
|
"logits/chosen": -0.5433920621871948, |
|
"logits/rejected": -0.49929919838905334, |
|
"logps/chosen": -355.02740478515625, |
|
"logps/rejected": -338.33148193359375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0006808604812249541, |
|
"rewards/margins": -0.0011384403333067894, |
|
"rewards/rejected": 0.0004575795610435307, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.016794625719769675, |
|
"grad_norm": 17.979585480540507, |
|
"learning_rate": 8.393285371702638e-08, |
|
"logits/chosen": -0.49723702669143677, |
|
"logits/rejected": -0.48305654525756836, |
|
"logps/chosen": -353.8045654296875, |
|
"logps/rejected": -327.27716064453125, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.002497387584298849, |
|
"rewards/margins": 0.0025346879847347736, |
|
"rewards/rejected": -3.7300120311556384e-05, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019193857965451054, |
|
"grad_norm": 25.26789980160209, |
|
"learning_rate": 9.592326139088728e-08, |
|
"logits/chosen": -0.5563893914222717, |
|
"logits/rejected": -0.4884260594844818, |
|
"logps/chosen": -261.504638671875, |
|
"logps/rejected": -315.64349365234375, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.002656942466273904, |
|
"rewards/margins": 0.0009416007669642568, |
|
"rewards/rejected": 0.0017153415828943253, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021593090211132437, |
|
"grad_norm": 18.57391434910598, |
|
"learning_rate": 1.0791366906474819e-07, |
|
"logits/chosen": -0.5545334815979004, |
|
"logits/rejected": -0.5614916086196899, |
|
"logps/chosen": -396.33416748046875, |
|
"logps/rejected": -342.1172180175781, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.004076135344803333, |
|
"rewards/margins": 0.0013626832515001297, |
|
"rewards/rejected": 0.002713452558964491, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02399232245681382, |
|
"grad_norm": 20.078943593256316, |
|
"learning_rate": 1.199040767386091e-07, |
|
"logits/chosen": -0.5230361819267273, |
|
"logits/rejected": -0.4858153760433197, |
|
"logps/chosen": -326.3655090332031, |
|
"logps/rejected": -351.19390869140625, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0061579798348248005, |
|
"rewards/margins": 0.001345540746115148, |
|
"rewards/rejected": 0.004812438972294331, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026391554702495202, |
|
"grad_norm": 20.875381676857184, |
|
"learning_rate": 1.3189448441247004e-07, |
|
"logits/chosen": -0.5459330677986145, |
|
"logits/rejected": -0.5579243898391724, |
|
"logps/chosen": -287.3791198730469, |
|
"logps/rejected": -290.72393798828125, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.009638044983148575, |
|
"rewards/margins": 0.00030891623464412987, |
|
"rewards/rejected": 0.009329128079116344, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.028790786948176585, |
|
"grad_norm": 20.107786625562355, |
|
"learning_rate": 1.4388489208633092e-07, |
|
"logits/chosen": -0.5211232900619507, |
|
"logits/rejected": -0.5448856353759766, |
|
"logps/chosen": -363.11431884765625, |
|
"logps/rejected": -348.9471435546875, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.014917564578354359, |
|
"rewards/margins": 0.003776032943278551, |
|
"rewards/rejected": 0.01114153116941452, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.031190019193857964, |
|
"grad_norm": 17.419578673944976, |
|
"learning_rate": 1.5587529976019183e-07, |
|
"logits/chosen": -0.5541412830352783, |
|
"logits/rejected": -0.5439847707748413, |
|
"logps/chosen": -273.02838134765625, |
|
"logps/rejected": -365.01483154296875, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.019415050745010376, |
|
"rewards/margins": 0.00881609134376049, |
|
"rewards/rejected": 0.01059896033257246, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03358925143953935, |
|
"grad_norm": 20.19183366811833, |
|
"learning_rate": 1.6786570743405277e-07, |
|
"logits/chosen": -0.45227426290512085, |
|
"logits/rejected": -0.45624417066574097, |
|
"logps/chosen": -366.0704040527344, |
|
"logps/rejected": -355.80474853515625, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.015087930485606194, |
|
"rewards/margins": 0.005366227589547634, |
|
"rewards/rejected": 0.009721704758703709, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03598848368522073, |
|
"grad_norm": 19.066102175382554, |
|
"learning_rate": 1.7985611510791365e-07, |
|
"logits/chosen": -0.5239461064338684, |
|
"logits/rejected": -0.5222934484481812, |
|
"logps/chosen": -282.2486267089844, |
|
"logps/rejected": -280.42718505859375, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.020530493929982185, |
|
"rewards/margins": 0.0036348134744912386, |
|
"rewards/rejected": 0.016895681619644165, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03838771593090211, |
|
"grad_norm": 19.027643825440478, |
|
"learning_rate": 1.9184652278177456e-07, |
|
"logits/chosen": -0.46417126059532166, |
|
"logits/rejected": -0.47142887115478516, |
|
"logps/chosen": -372.260009765625, |
|
"logps/rejected": -299.72418212890625, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.024696629494428635, |
|
"rewards/margins": 0.01829499378800392, |
|
"rewards/rejected": 0.006401637103408575, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.040786948176583494, |
|
"grad_norm": 20.05176445155851, |
|
"learning_rate": 2.038369304556355e-07, |
|
"logits/chosen": -0.4728211760520935, |
|
"logits/rejected": -0.4653477072715759, |
|
"logps/chosen": -410.3612365722656, |
|
"logps/rejected": -395.3166198730469, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0359230674803257, |
|
"rewards/margins": 0.016820725053548813, |
|
"rewards/rejected": 0.019102338701486588, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04318618042226487, |
|
"grad_norm": 19.717298290033078, |
|
"learning_rate": 2.1582733812949638e-07, |
|
"logits/chosen": -0.5537582039833069, |
|
"logits/rejected": -0.5516412854194641, |
|
"logps/chosen": -294.61224365234375, |
|
"logps/rejected": -295.9138488769531, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.02052612230181694, |
|
"rewards/margins": 0.01923990622162819, |
|
"rewards/rejected": 0.0012862167786806822, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04558541266794626, |
|
"grad_norm": 22.451123090776118, |
|
"learning_rate": 2.278177458033573e-07, |
|
"logits/chosen": -0.4669855535030365, |
|
"logits/rejected": -0.46975016593933105, |
|
"logps/chosen": -386.79052734375, |
|
"logps/rejected": -322.21063232421875, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0220388974994421, |
|
"rewards/margins": 0.006877691484987736, |
|
"rewards/rejected": 0.015161206014454365, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04798464491362764, |
|
"grad_norm": 18.561601949682256, |
|
"learning_rate": 2.398081534772182e-07, |
|
"logits/chosen": -0.5395389199256897, |
|
"logits/rejected": -0.4788607060909271, |
|
"logps/chosen": -370.59832763671875, |
|
"logps/rejected": -354.6778869628906, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.026123318821191788, |
|
"rewards/margins": 0.02886904776096344, |
|
"rewards/rejected": -0.002745730336755514, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05038387715930902, |
|
"grad_norm": 22.094980613810247, |
|
"learning_rate": 2.517985611510791e-07, |
|
"logits/chosen": -0.5375515818595886, |
|
"logits/rejected": -0.547138512134552, |
|
"logps/chosen": -304.6062927246094, |
|
"logps/rejected": -330.10687255859375, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.03125152364373207, |
|
"rewards/margins": 0.03389766812324524, |
|
"rewards/rejected": -0.002646142616868019, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.052783109404990404, |
|
"grad_norm": 18.489557098025607, |
|
"learning_rate": 2.637889688249401e-07, |
|
"logits/chosen": -0.5321250557899475, |
|
"logits/rejected": -0.5411959886550903, |
|
"logps/chosen": -391.7878723144531, |
|
"logps/rejected": -377.56280517578125, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.012276771478354931, |
|
"rewards/margins": -0.0032779511529952288, |
|
"rewards/rejected": 0.01555472332984209, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05518234165067178, |
|
"grad_norm": 22.525575101088698, |
|
"learning_rate": 2.7577937649880093e-07, |
|
"logits/chosen": -0.5341587066650391, |
|
"logits/rejected": -0.5006336569786072, |
|
"logps/chosen": -303.1698913574219, |
|
"logps/rejected": -340.33331298828125, |
|
"loss": 0.6738, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.005457176826894283, |
|
"rewards/margins": 0.024495940655469894, |
|
"rewards/rejected": -0.019038762897253036, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05758157389635317, |
|
"grad_norm": 20.562033234462188, |
|
"learning_rate": 2.8776978417266184e-07, |
|
"logits/chosen": -0.5429738759994507, |
|
"logits/rejected": -0.5385856032371521, |
|
"logps/chosen": -357.30609130859375, |
|
"logps/rejected": -311.60260009765625, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.030288681387901306, |
|
"rewards/margins": 0.048918746411800385, |
|
"rewards/rejected": -0.01863006316125393, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05998080614203455, |
|
"grad_norm": 22.776979550503004, |
|
"learning_rate": 2.997601918465228e-07, |
|
"logits/chosen": -0.5102118253707886, |
|
"logits/rejected": -0.5135980844497681, |
|
"logps/chosen": -294.0608825683594, |
|
"logps/rejected": -275.83673095703125, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.005173470359295607, |
|
"rewards/margins": 0.06198770925402641, |
|
"rewards/rejected": -0.05681424215435982, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06238003838771593, |
|
"grad_norm": 21.890329640256528, |
|
"learning_rate": 3.1175059952038366e-07, |
|
"logits/chosen": -0.5791837573051453, |
|
"logits/rejected": -0.5334831476211548, |
|
"logps/chosen": -353.4739074707031, |
|
"logps/rejected": -343.4547119140625, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.02404799312353134, |
|
"rewards/margins": 0.04426788166165352, |
|
"rewards/rejected": -0.06831587105989456, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0647792706333973, |
|
"grad_norm": 20.523920744785585, |
|
"learning_rate": 3.2374100719424457e-07, |
|
"logits/chosen": -0.49191370606422424, |
|
"logits/rejected": -0.5529422163963318, |
|
"logps/chosen": -347.00494384765625, |
|
"logps/rejected": -282.3544921875, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03170743212103844, |
|
"rewards/margins": 0.039711810648441315, |
|
"rewards/rejected": -0.07141923159360886, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0671785028790787, |
|
"grad_norm": 20.886438274884952, |
|
"learning_rate": 3.3573141486810554e-07, |
|
"logits/chosen": -0.5886783599853516, |
|
"logits/rejected": -0.5640865564346313, |
|
"logps/chosen": -364.08575439453125, |
|
"logps/rejected": -354.1321105957031, |
|
"loss": 0.6504, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.020602982491254807, |
|
"rewards/margins": 0.0775846317410469, |
|
"rewards/rejected": -0.09818761050701141, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06957773512476008, |
|
"grad_norm": 19.345700277409666, |
|
"learning_rate": 3.477218225419664e-07, |
|
"logits/chosen": -0.5530000329017639, |
|
"logits/rejected": -0.5117976665496826, |
|
"logps/chosen": -350.86199951171875, |
|
"logps/rejected": -327.6963806152344, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.05797697231173515, |
|
"rewards/margins": 0.062293171882629395, |
|
"rewards/rejected": -0.12027014791965485, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07197696737044146, |
|
"grad_norm": 22.275769826792928, |
|
"learning_rate": 3.597122302158273e-07, |
|
"logits/chosen": -0.6038728952407837, |
|
"logits/rejected": -0.6336754560470581, |
|
"logps/chosen": -332.75714111328125, |
|
"logps/rejected": -356.898193359375, |
|
"loss": 0.6532, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.10048200935125351, |
|
"rewards/margins": 0.09853404760360718, |
|
"rewards/rejected": -0.1990160346031189, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07437619961612284, |
|
"grad_norm": 22.363116052564926, |
|
"learning_rate": 3.7170263788968827e-07, |
|
"logits/chosen": -0.5675481557846069, |
|
"logits/rejected": -0.6176060438156128, |
|
"logps/chosen": -353.2454833984375, |
|
"logps/rejected": -325.49066162109375, |
|
"loss": 0.6582, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0909217894077301, |
|
"rewards/margins": 0.13223211467266083, |
|
"rewards/rejected": -0.22315391898155212, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07677543186180422, |
|
"grad_norm": 24.95798686492851, |
|
"learning_rate": 3.836930455635491e-07, |
|
"logits/chosen": -0.6077001094818115, |
|
"logits/rejected": -0.609139621257782, |
|
"logps/chosen": -343.24127197265625, |
|
"logps/rejected": -309.5650634765625, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.055519819259643555, |
|
"rewards/margins": 0.10148320347070694, |
|
"rewards/rejected": -0.1570030152797699, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07917466410748561, |
|
"grad_norm": 21.14907440323966, |
|
"learning_rate": 3.9568345323741003e-07, |
|
"logits/chosen": -0.5618354082107544, |
|
"logits/rejected": -0.5163384079933167, |
|
"logps/chosen": -333.4284973144531, |
|
"logps/rejected": -383.4358825683594, |
|
"loss": 0.6351, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13757416605949402, |
|
"rewards/margins": 0.1750974953174591, |
|
"rewards/rejected": -0.3126716911792755, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08157389635316699, |
|
"grad_norm": 23.505161956514563, |
|
"learning_rate": 4.07673860911271e-07, |
|
"logits/chosen": -0.5577572584152222, |
|
"logits/rejected": -0.5682773590087891, |
|
"logps/chosen": -311.04046630859375, |
|
"logps/rejected": -350.06011962890625, |
|
"loss": 0.6331, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10792098939418793, |
|
"rewards/margins": 0.2111283242702484, |
|
"rewards/rejected": -0.31904932856559753, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08397312859884837, |
|
"grad_norm": 24.141694779806222, |
|
"learning_rate": 4.1966426858513185e-07, |
|
"logits/chosen": -0.6674095392227173, |
|
"logits/rejected": -0.6525458097457886, |
|
"logps/chosen": -385.8694152832031, |
|
"logps/rejected": -387.1976013183594, |
|
"loss": 0.6451, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.24417324364185333, |
|
"rewards/margins": 0.12837204337120056, |
|
"rewards/rejected": -0.3725453317165375, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08637236084452975, |
|
"grad_norm": 25.804069948612213, |
|
"learning_rate": 4.3165467625899276e-07, |
|
"logits/chosen": -0.5833398699760437, |
|
"logits/rejected": -0.6397580504417419, |
|
"logps/chosen": -350.5534973144531, |
|
"logps/rejected": -299.1941833496094, |
|
"loss": 0.6415, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2430131882429123, |
|
"rewards/margins": 0.10048626363277435, |
|
"rewards/rejected": -0.34349945187568665, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08877159309021113, |
|
"grad_norm": 31.03793475076566, |
|
"learning_rate": 4.436450839328537e-07, |
|
"logits/chosen": -0.5922696590423584, |
|
"logits/rejected": -0.5713749527931213, |
|
"logps/chosen": -338.27667236328125, |
|
"logps/rejected": -366.77166748046875, |
|
"loss": 0.6267, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.27600157260894775, |
|
"rewards/margins": 0.22378632426261902, |
|
"rewards/rejected": -0.49978795647621155, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09117082533589252, |
|
"grad_norm": 21.29384938198475, |
|
"learning_rate": 4.556354916067146e-07, |
|
"logits/chosen": -0.6022308468818665, |
|
"logits/rejected": -0.5682617425918579, |
|
"logps/chosen": -323.4892272949219, |
|
"logps/rejected": -347.95111083984375, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2343587428331375, |
|
"rewards/margins": 0.22317072749137878, |
|
"rewards/rejected": -0.4575294554233551, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0935700575815739, |
|
"grad_norm": 23.74603674013515, |
|
"learning_rate": 4.676258992805755e-07, |
|
"logits/chosen": -0.5804970860481262, |
|
"logits/rejected": -0.5728699564933777, |
|
"logps/chosen": -381.6591796875, |
|
"logps/rejected": -358.8669128417969, |
|
"loss": 0.6172, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3958897590637207, |
|
"rewards/margins": 0.13457268476486206, |
|
"rewards/rejected": -0.5304625034332275, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09596928982725528, |
|
"grad_norm": 22.488076430906265, |
|
"learning_rate": 4.796163069544364e-07, |
|
"logits/chosen": -0.6037659049034119, |
|
"logits/rejected": -0.6473450660705566, |
|
"logps/chosen": -350.5100402832031, |
|
"logps/rejected": -356.1009826660156, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3837326467037201, |
|
"rewards/margins": 0.2828107476234436, |
|
"rewards/rejected": -0.6665433645248413, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09836852207293666, |
|
"grad_norm": 28.205747441162394, |
|
"learning_rate": 4.916067146282974e-07, |
|
"logits/chosen": -0.6226581335067749, |
|
"logits/rejected": -0.606611430644989, |
|
"logps/chosen": -347.90966796875, |
|
"logps/rejected": -401.1394958496094, |
|
"loss": 0.6069, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3836399018764496, |
|
"rewards/margins": 0.22059115767478943, |
|
"rewards/rejected": -0.6042311191558838, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10076775431861804, |
|
"grad_norm": 24.57218142171684, |
|
"learning_rate": 4.999992108529978e-07, |
|
"logits/chosen": -0.5291169881820679, |
|
"logits/rejected": -0.5468065142631531, |
|
"logps/chosen": -444.72589111328125, |
|
"logps/rejected": -439.670654296875, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.48520898818969727, |
|
"rewards/margins": 0.3270217478275299, |
|
"rewards/rejected": -0.8122307062149048, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10316698656429943, |
|
"grad_norm": 34.823244275804946, |
|
"learning_rate": 4.999851817115532e-07, |
|
"logits/chosen": -0.6540865302085876, |
|
"logits/rejected": -0.5904898047447205, |
|
"logps/chosen": -351.2285461425781, |
|
"logps/rejected": -386.56890869140625, |
|
"loss": 0.6093, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4589855670928955, |
|
"rewards/margins": 0.3846796751022339, |
|
"rewards/rejected": -0.8436653017997742, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10556621880998081, |
|
"grad_norm": 33.199263240349794, |
|
"learning_rate": 4.999536171027889e-07, |
|
"logits/chosen": -0.5496717691421509, |
|
"logits/rejected": -0.5985559225082397, |
|
"logps/chosen": -409.6986389160156, |
|
"logps/rejected": -411.371826171875, |
|
"loss": 0.604, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5133577585220337, |
|
"rewards/margins": 0.20064587891101837, |
|
"rewards/rejected": -0.7140035629272461, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10796545105566219, |
|
"grad_norm": 28.95788929645283, |
|
"learning_rate": 4.999045192408369e-07, |
|
"logits/chosen": -0.5078392028808594, |
|
"logits/rejected": -0.4781821370124817, |
|
"logps/chosen": -352.42578125, |
|
"logps/rejected": -345.4123840332031, |
|
"loss": 0.6122, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5539526343345642, |
|
"rewards/margins": 0.1369965374469757, |
|
"rewards/rejected": -0.6909492611885071, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11036468330134357, |
|
"grad_norm": 24.918675200058328, |
|
"learning_rate": 4.998378915697171e-07, |
|
"logits/chosen": -0.5960583090782166, |
|
"logits/rejected": -0.5872009992599487, |
|
"logps/chosen": -367.5823669433594, |
|
"logps/rejected": -395.4332580566406, |
|
"loss": 0.583, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3061702251434326, |
|
"rewards/margins": 0.42768678069114685, |
|
"rewards/rejected": -0.7338569164276123, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11276391554702495, |
|
"grad_norm": 24.386517807951574, |
|
"learning_rate": 4.997537387630958e-07, |
|
"logits/chosen": -0.5429798364639282, |
|
"logits/rejected": -0.5464817881584167, |
|
"logps/chosen": -310.02203369140625, |
|
"logps/rejected": -340.80865478515625, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4528660178184509, |
|
"rewards/margins": 0.3107239603996277, |
|
"rewards/rejected": -0.7635899782180786, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11516314779270634, |
|
"grad_norm": 27.255184142896073, |
|
"learning_rate": 4.996520667239582e-07, |
|
"logits/chosen": -0.6526015996932983, |
|
"logits/rejected": -0.6507179737091064, |
|
"logps/chosen": -353.07098388671875, |
|
"logps/rejected": -445.12237548828125, |
|
"loss": 0.5705, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6045628786087036, |
|
"rewards/margins": 0.3956468403339386, |
|
"rewards/rejected": -1.0002095699310303, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11756238003838772, |
|
"grad_norm": 32.84348746795199, |
|
"learning_rate": 4.995328825841939e-07, |
|
"logits/chosen": -0.4966016709804535, |
|
"logits/rejected": -0.49989452958106995, |
|
"logps/chosen": -317.2383117675781, |
|
"logps/rejected": -374.27508544921875, |
|
"loss": 0.5899, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4817740321159363, |
|
"rewards/margins": 0.5593485236167908, |
|
"rewards/rejected": -1.041122555732727, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1199616122840691, |
|
"grad_norm": 29.672640234170935, |
|
"learning_rate": 4.993961947040967e-07, |
|
"logits/chosen": -0.525520920753479, |
|
"logits/rejected": -0.5563070178031921, |
|
"logps/chosen": -427.6673889160156, |
|
"logps/rejected": -412.65008544921875, |
|
"loss": 0.5935, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7312201261520386, |
|
"rewards/margins": 0.3187289237976074, |
|
"rewards/rejected": -1.0499489307403564, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12236084452975048, |
|
"grad_norm": 27.84738833817779, |
|
"learning_rate": 4.992420126717784e-07, |
|
"logits/chosen": -0.5528146028518677, |
|
"logits/rejected": -0.5479222536087036, |
|
"logps/chosen": -356.24041748046875, |
|
"logps/rejected": -422.6918029785156, |
|
"loss": 0.5781, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.4140992760658264, |
|
"rewards/margins": 0.6510533094406128, |
|
"rewards/rejected": -1.065152645111084, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12476007677543186, |
|
"grad_norm": 32.48041619734842, |
|
"learning_rate": 4.990703473024958e-07, |
|
"logits/chosen": -0.45184358954429626, |
|
"logits/rejected": -0.48187708854675293, |
|
"logps/chosen": -417.84405517578125, |
|
"logps/rejected": -444.81353759765625, |
|
"loss": 0.5991, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7138451933860779, |
|
"rewards/margins": 0.37000906467437744, |
|
"rewards/rejected": -1.083854079246521, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12715930902111325, |
|
"grad_norm": 28.128177801840295, |
|
"learning_rate": 4.98881210637893e-07, |
|
"logits/chosen": -0.42285671830177307, |
|
"logits/rejected": -0.4019806385040283, |
|
"logps/chosen": -320.9397277832031, |
|
"logps/rejected": -411.93255615234375, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4967781603336334, |
|
"rewards/margins": 0.4927561283111572, |
|
"rewards/rejected": -0.9895342588424683, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1295585412667946, |
|
"grad_norm": 21.381239649867126, |
|
"learning_rate": 4.986746159451553e-07, |
|
"logits/chosen": -0.29445725679397583, |
|
"logits/rejected": -0.2827056646347046, |
|
"logps/chosen": -360.28509521484375, |
|
"logps/rejected": -394.31768798828125, |
|
"loss": 0.5976, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4901258945465088, |
|
"rewards/margins": 0.35657569766044617, |
|
"rewards/rejected": -0.8467016220092773, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.131957773512476, |
|
"grad_norm": 23.053578304971253, |
|
"learning_rate": 4.984505777160795e-07, |
|
"logits/chosen": -0.2335212230682373, |
|
"logits/rejected": -0.2651960253715515, |
|
"logps/chosen": -433.4956970214844, |
|
"logps/rejected": -464.2955017089844, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6037947535514832, |
|
"rewards/margins": 0.3152288496494293, |
|
"rewards/rejected": -0.9190236330032349, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1343570057581574, |
|
"grad_norm": 28.905417781337384, |
|
"learning_rate": 4.982091116660574e-07, |
|
"logits/chosen": -0.321607768535614, |
|
"logits/rejected": -0.3338220715522766, |
|
"logps/chosen": -305.93658447265625, |
|
"logps/rejected": -300.59124755859375, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5145214796066284, |
|
"rewards/margins": 0.22794541716575623, |
|
"rewards/rejected": -0.7424668669700623, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13675623800383876, |
|
"grad_norm": 32.795386120218325, |
|
"learning_rate": 4.979502347329732e-07, |
|
"logits/chosen": -0.23663392663002014, |
|
"logits/rejected": -0.24166357517242432, |
|
"logps/chosen": -423.2027282714844, |
|
"logps/rejected": -491.15106201171875, |
|
"loss": 0.5998, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6477493643760681, |
|
"rewards/margins": 0.4360308051109314, |
|
"rewards/rejected": -1.08378005027771, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13915547024952016, |
|
"grad_norm": 37.84072378443296, |
|
"learning_rate": 4.976739650760151e-07, |
|
"logits/chosen": -0.29570311307907104, |
|
"logits/rejected": -0.3070180118083954, |
|
"logps/chosen": -375.17962646484375, |
|
"logps/rejected": -388.2039489746094, |
|
"loss": 0.586, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4810148775577545, |
|
"rewards/margins": 0.3502056300640106, |
|
"rewards/rejected": -0.8312205076217651, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14155470249520152, |
|
"grad_norm": 45.677768580981564, |
|
"learning_rate": 4.97380322074402e-07, |
|
"logits/chosen": -0.2370149791240692, |
|
"logits/rejected": -0.25640061497688293, |
|
"logps/chosen": -349.70941162109375, |
|
"logps/rejected": -374.61456298828125, |
|
"loss": 0.5895, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6930117607116699, |
|
"rewards/margins": 0.2920604646205902, |
|
"rewards/rejected": -0.985072135925293, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14395393474088292, |
|
"grad_norm": 31.065347761695264, |
|
"learning_rate": 4.970693263260237e-07, |
|
"logits/chosen": -0.26885563135147095, |
|
"logits/rejected": -0.3041172921657562, |
|
"logps/chosen": -403.6191101074219, |
|
"logps/rejected": -410.84967041015625, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5729845762252808, |
|
"rewards/margins": 0.46888118982315063, |
|
"rewards/rejected": -1.0418657064437866, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1463531669865643, |
|
"grad_norm": 29.080698158567, |
|
"learning_rate": 4.967409996459966e-07, |
|
"logits/chosen": -0.2872675359249115, |
|
"logits/rejected": -0.3306855261325836, |
|
"logps/chosen": -405.076904296875, |
|
"logps/rejected": -423.62664794921875, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.49526625871658325, |
|
"rewards/margins": 0.3949028551578522, |
|
"rewards/rejected": -0.8901691436767578, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14875239923224567, |
|
"grad_norm": 27.549771571534542, |
|
"learning_rate": 4.963953650651326e-07, |
|
"logits/chosen": -0.15485969185829163, |
|
"logits/rejected": -0.16681411862373352, |
|
"logps/chosen": -478.8113708496094, |
|
"logps/rejected": -422.03955078125, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.63862544298172, |
|
"rewards/margins": 0.40379634499549866, |
|
"rewards/rejected": -1.042421817779541, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15115163147792707, |
|
"grad_norm": 28.772933296866565, |
|
"learning_rate": 4.960324468283248e-07, |
|
"logits/chosen": -0.20728620886802673, |
|
"logits/rejected": -0.2060108482837677, |
|
"logps/chosen": -367.0924377441406, |
|
"logps/rejected": -390.70458984375, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8087286949157715, |
|
"rewards/margins": 0.3034602999687195, |
|
"rewards/rejected": -1.1121888160705566, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15355086372360843, |
|
"grad_norm": 29.609222546231578, |
|
"learning_rate": 4.956522703928451e-07, |
|
"logits/chosen": -0.06690754741430283, |
|
"logits/rejected": -0.06723584234714508, |
|
"logps/chosen": -370.3538818359375, |
|
"logps/rejected": -409.23065185546875, |
|
"loss": 0.5409, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.788346529006958, |
|
"rewards/margins": 0.4131564199924469, |
|
"rewards/rejected": -1.2015029191970825, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15595009596928983, |
|
"grad_norm": 38.41016264507651, |
|
"learning_rate": 4.952548624265606e-07, |
|
"logits/chosen": -0.03009071573615074, |
|
"logits/rejected": 0.02059212513267994, |
|
"logps/chosen": -436.8095703125, |
|
"logps/rejected": -453.0166931152344, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8183758854866028, |
|
"rewards/margins": 0.35901501774787903, |
|
"rewards/rejected": -1.1773908138275146, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15834932821497122, |
|
"grad_norm": 25.869613582575887, |
|
"learning_rate": 4.948402508060607e-07, |
|
"logits/chosen": -0.0018309459555894136, |
|
"logits/rejected": -0.01893061026930809, |
|
"logps/chosen": -356.6624755859375, |
|
"logps/rejected": -409.0708923339844, |
|
"loss": 0.6026, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6256797313690186, |
|
"rewards/margins": 0.601173460483551, |
|
"rewards/rejected": -1.2268530130386353, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16074856046065258, |
|
"grad_norm": 35.22680312796026, |
|
"learning_rate": 4.944084646147038e-07, |
|
"logits/chosen": 0.0020178346894681454, |
|
"logits/rejected": 0.031680598855018616, |
|
"logps/chosen": -452.8055114746094, |
|
"logps/rejected": -465.51678466796875, |
|
"loss": 0.5999, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6981381177902222, |
|
"rewards/margins": 0.3360704779624939, |
|
"rewards/rejected": -1.0342086553573608, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16314779270633398, |
|
"grad_norm": 28.0016914634874, |
|
"learning_rate": 4.939595341405754e-07, |
|
"logits/chosen": -0.039152443408966064, |
|
"logits/rejected": -0.05885768681764603, |
|
"logps/chosen": -401.278564453125, |
|
"logps/rejected": -409.3609924316406, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7071236968040466, |
|
"rewards/margins": 0.3430066704750061, |
|
"rewards/rejected": -1.0501302480697632, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16554702495201534, |
|
"grad_norm": 30.023172826044828, |
|
"learning_rate": 4.93493490874365e-07, |
|
"logits/chosen": -0.00025105997337959707, |
|
"logits/rejected": 0.005772613920271397, |
|
"logps/chosen": -390.638427734375, |
|
"logps/rejected": -424.7112731933594, |
|
"loss": 0.5461, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7401353716850281, |
|
"rewards/margins": 0.30253323912620544, |
|
"rewards/rejected": -1.0426685810089111, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16794625719769674, |
|
"grad_norm": 41.393690265481474, |
|
"learning_rate": 4.93010367507156e-07, |
|
"logits/chosen": -0.051719047129154205, |
|
"logits/rejected": -0.06900392472743988, |
|
"logps/chosen": -346.08837890625, |
|
"logps/rejected": -374.042724609375, |
|
"loss": 0.5537, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7158280611038208, |
|
"rewards/margins": 0.5490631461143494, |
|
"rewards/rejected": -1.264891266822815, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17034548944337813, |
|
"grad_norm": 33.6357655925115, |
|
"learning_rate": 4.925101979281332e-07, |
|
"logits/chosen": 0.02222558856010437, |
|
"logits/rejected": 0.006278800778090954, |
|
"logps/chosen": -424.63726806640625, |
|
"logps/rejected": -441.04644775390625, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6551335453987122, |
|
"rewards/margins": 0.6523554921150208, |
|
"rewards/rejected": -1.3074891567230225, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1727447216890595, |
|
"grad_norm": 32.69679455555672, |
|
"learning_rate": 4.919930172222054e-07, |
|
"logits/chosen": -0.12917150557041168, |
|
"logits/rejected": -0.12720082700252533, |
|
"logps/chosen": -402.8379821777344, |
|
"logps/rejected": -441.12677001953125, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7797117829322815, |
|
"rewards/margins": 0.4280470311641693, |
|
"rewards/rejected": -1.2077586650848389, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1751439539347409, |
|
"grad_norm": 38.43936411357028, |
|
"learning_rate": 4.914588616675445e-07, |
|
"logits/chosen": -0.17864573001861572, |
|
"logits/rejected": -0.20761199295520782, |
|
"logps/chosen": -344.26312255859375, |
|
"logps/rejected": -408.573486328125, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.555050253868103, |
|
"rewards/margins": 0.5430852174758911, |
|
"rewards/rejected": -1.098135232925415, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.17754318618042225, |
|
"grad_norm": 36.631671098915504, |
|
"learning_rate": 4.909077687330404e-07, |
|
"logits/chosen": -0.11447083950042725, |
|
"logits/rejected": -0.09544442594051361, |
|
"logps/chosen": -418.90838623046875, |
|
"logps/rejected": -417.09844970703125, |
|
"loss": 0.5492, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7083614468574524, |
|
"rewards/margins": 0.3665878176689148, |
|
"rewards/rejected": -1.0749492645263672, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17994241842610365, |
|
"grad_norm": 32.57669985590322, |
|
"learning_rate": 4.903397770756729e-07, |
|
"logits/chosen": -0.06074325367808342, |
|
"logits/rejected": -0.08299403637647629, |
|
"logps/chosen": -401.88800048828125, |
|
"logps/rejected": -449.99169921875, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6976863145828247, |
|
"rewards/margins": 0.6443861722946167, |
|
"rewards/rejected": -1.342072606086731, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18234165067178504, |
|
"grad_norm": 27.495851946761935, |
|
"learning_rate": 4.897549265378004e-07, |
|
"logits/chosen": -0.18077705800533295, |
|
"logits/rejected": -0.15703561902046204, |
|
"logps/chosen": -486.8914489746094, |
|
"logps/rejected": -522.5025024414062, |
|
"loss": 0.559, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.925071120262146, |
|
"rewards/margins": 0.4355601668357849, |
|
"rewards/rejected": -1.3606312274932861, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1847408829174664, |
|
"grad_norm": 32.88428335628656, |
|
"learning_rate": 4.891532581443643e-07, |
|
"logits/chosen": -0.10509393364191055, |
|
"logits/rejected": -0.13191482424736023, |
|
"logps/chosen": -433.39697265625, |
|
"logps/rejected": -510.9073791503906, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.7138081789016724, |
|
"rewards/margins": 0.8751919865608215, |
|
"rewards/rejected": -1.5890003442764282, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1871401151631478, |
|
"grad_norm": 34.828556425360944, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": -0.02448561228811741, |
|
"logits/rejected": -0.08334103226661682, |
|
"logps/chosen": -395.90985107421875, |
|
"logps/rejected": -471.1104431152344, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.878553569316864, |
|
"rewards/margins": 0.5636481046676636, |
|
"rewards/rejected": -1.4422016143798828, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18953934740882916, |
|
"grad_norm": 34.102875954970415, |
|
"learning_rate": 4.878996377861367e-07, |
|
"logits/chosen": -0.04264168441295624, |
|
"logits/rejected": -0.09717553108930588, |
|
"logps/chosen": -374.21063232421875, |
|
"logps/rejected": -424.62701416015625, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0093395709991455, |
|
"rewards/margins": 0.43130987882614136, |
|
"rewards/rejected": -1.4406496286392212, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19193857965451055, |
|
"grad_norm": 34.09209485411543, |
|
"learning_rate": 4.872477737578327e-07, |
|
"logits/chosen": -0.023031553253531456, |
|
"logits/rejected": -0.03800968453288078, |
|
"logps/chosen": -431.33782958984375, |
|
"logps/rejected": -534.5132446289062, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8465608358383179, |
|
"rewards/margins": 1.0085922479629517, |
|
"rewards/rejected": -1.8551530838012695, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19433781190019195, |
|
"grad_norm": 44.78458025907374, |
|
"learning_rate": 4.865792677407718e-07, |
|
"logits/chosen": -0.09794610738754272, |
|
"logits/rejected": -0.08297122269868851, |
|
"logps/chosen": -404.9684143066406, |
|
"logps/rejected": -423.43896484375, |
|
"loss": 0.5785, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8548682928085327, |
|
"rewards/margins": 0.37679168581962585, |
|
"rewards/rejected": -1.2316598892211914, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1967370441458733, |
|
"grad_norm": 37.30397170950818, |
|
"learning_rate": 4.858941666279955e-07, |
|
"logits/chosen": -0.20108501613140106, |
|
"logits/rejected": -0.15580318868160248, |
|
"logps/chosen": -440.3353576660156, |
|
"logps/rejected": -437.89337158203125, |
|
"loss": 0.5843, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7743014693260193, |
|
"rewards/margins": 0.3654334843158722, |
|
"rewards/rejected": -1.1397349834442139, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1991362763915547, |
|
"grad_norm": 37.97880335267858, |
|
"learning_rate": 4.851925184766247e-07, |
|
"logits/chosen": -0.07934032380580902, |
|
"logits/rejected": -0.06675902754068375, |
|
"logps/chosen": -400.7498779296875, |
|
"logps/rejected": -435.03387451171875, |
|
"loss": 0.5744, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8046972155570984, |
|
"rewards/margins": 0.6158983707427979, |
|
"rewards/rejected": -1.4205955266952515, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20153550863723607, |
|
"grad_norm": 34.877131137485314, |
|
"learning_rate": 4.844743725044897e-07, |
|
"logits/chosen": -0.1209510788321495, |
|
"logits/rejected": -0.12060485780239105, |
|
"logps/chosen": -390.33575439453125, |
|
"logps/rejected": -407.412841796875, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7886186838150024, |
|
"rewards/margins": 0.5530378222465515, |
|
"rewards/rejected": -1.3416564464569092, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.20393474088291746, |
|
"grad_norm": 40.19277289158246, |
|
"learning_rate": 4.837397790866774e-07, |
|
"logits/chosen": -0.07084405422210693, |
|
"logits/rejected": -0.10281334072351456, |
|
"logps/chosen": -429.7625427246094, |
|
"logps/rejected": -490.245361328125, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7108091115951538, |
|
"rewards/margins": 0.9831811785697937, |
|
"rewards/rejected": -1.6939903497695923, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20633397312859886, |
|
"grad_norm": 35.43380411461513, |
|
"learning_rate": 4.829887897519974e-07, |
|
"logits/chosen": 0.014303353615105152, |
|
"logits/rejected": -0.007743634283542633, |
|
"logps/chosen": -381.1875, |
|
"logps/rejected": -453.6239318847656, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8310438990592957, |
|
"rewards/margins": 0.5285369157791138, |
|
"rewards/rejected": -1.3595808744430542, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.20873320537428022, |
|
"grad_norm": 30.773099092132018, |
|
"learning_rate": 4.82221457179368e-07, |
|
"logits/chosen": 0.005006339401006699, |
|
"logits/rejected": -0.01996953971683979, |
|
"logps/chosen": -400.3504638671875, |
|
"logps/rejected": -444.50653076171875, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6430622339248657, |
|
"rewards/margins": 0.6689059138298035, |
|
"rewards/rejected": -1.3119680881500244, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21113243761996162, |
|
"grad_norm": 30.941839797295746, |
|
"learning_rate": 4.814378351941206e-07, |
|
"logits/chosen": -0.03190199285745621, |
|
"logits/rejected": -0.032009296119213104, |
|
"logps/chosen": -378.9139099121094, |
|
"logps/rejected": -410.24896240234375, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6023445129394531, |
|
"rewards/margins": 0.4468112885951996, |
|
"rewards/rejected": -1.049155831336975, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21353166986564298, |
|
"grad_norm": 30.2018268544055, |
|
"learning_rate": 4.806379787642241e-07, |
|
"logits/chosen": 0.03415294736623764, |
|
"logits/rejected": -0.008319585584104061, |
|
"logps/chosen": -373.29327392578125, |
|
"logps/rejected": -426.83856201171875, |
|
"loss": 0.6009, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6300404071807861, |
|
"rewards/margins": 0.5418477058410645, |
|
"rewards/rejected": -1.171887993812561, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21593090211132437, |
|
"grad_norm": 30.69767076541483, |
|
"learning_rate": 4.798219439964293e-07, |
|
"logits/chosen": -0.022300051525235176, |
|
"logits/rejected": -0.07942859828472137, |
|
"logps/chosen": -382.47088623046875, |
|
"logps/rejected": -428.5862731933594, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.79583740234375, |
|
"rewards/margins": 0.3617878556251526, |
|
"rewards/rejected": -1.1576253175735474, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21833013435700577, |
|
"grad_norm": 64.36072526993395, |
|
"learning_rate": 4.78989788132333e-07, |
|
"logits/chosen": -0.07167644053697586, |
|
"logits/rejected": -0.07725416123867035, |
|
"logps/chosen": -351.6461181640625, |
|
"logps/rejected": -432.82916259765625, |
|
"loss": 0.5306, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7295014262199402, |
|
"rewards/margins": 0.7431732416152954, |
|
"rewards/rejected": -1.4726746082305908, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22072936660268713, |
|
"grad_norm": 31.37167746375858, |
|
"learning_rate": 4.781415695443631e-07, |
|
"logits/chosen": 0.07153941690921783, |
|
"logits/rejected": 0.1024637222290039, |
|
"logps/chosen": -490.06402587890625, |
|
"logps/rejected": -516.02294921875, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.3680822849273682, |
|
"rewards/margins": 0.20954158902168274, |
|
"rewards/rejected": -1.5776238441467285, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22312859884836853, |
|
"grad_norm": 29.004390037425598, |
|
"learning_rate": 4.772773477316836e-07, |
|
"logits/chosen": 0.03397312015295029, |
|
"logits/rejected": 0.03711385652422905, |
|
"logps/chosen": -467.2877502441406, |
|
"logps/rejected": -509.09716796875, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1240530014038086, |
|
"rewards/margins": 0.45423418283462524, |
|
"rewards/rejected": -1.578287124633789, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2255278310940499, |
|
"grad_norm": 41.30297632121209, |
|
"learning_rate": 4.7639718331602117e-07, |
|
"logits/chosen": 0.08684961497783661, |
|
"logits/rejected": 0.05986959859728813, |
|
"logps/chosen": -420.59814453125, |
|
"logps/rejected": -490.409912109375, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9049364924430847, |
|
"rewards/margins": 0.7652468681335449, |
|
"rewards/rejected": -1.6701834201812744, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22792706333973128, |
|
"grad_norm": 39.96596996172262, |
|
"learning_rate": 4.7550113803741275e-07, |
|
"logits/chosen": 0.13893774151802063, |
|
"logits/rejected": 0.16821300983428955, |
|
"logps/chosen": -432.7577209472656, |
|
"logps/rejected": -411.2371520996094, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9445673823356628, |
|
"rewards/margins": 0.5061396360397339, |
|
"rewards/rejected": -1.450706958770752, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23032629558541268, |
|
"grad_norm": 35.26144835245691, |
|
"learning_rate": 4.7458927474987454e-07, |
|
"logits/chosen": 0.13862411677837372, |
|
"logits/rejected": 0.17463508248329163, |
|
"logps/chosen": -470.4085998535156, |
|
"logps/rejected": -434.1971740722656, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8276304006576538, |
|
"rewards/margins": 0.33768096566200256, |
|
"rewards/rejected": -1.1653112173080444, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23272552783109404, |
|
"grad_norm": 34.457370464422794, |
|
"learning_rate": 4.7366165741699347e-07, |
|
"logits/chosen": 0.06780462712049484, |
|
"logits/rejected": 0.033076416701078415, |
|
"logps/chosen": -474.2489318847656, |
|
"logps/rejected": -491.00421142578125, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8541940450668335, |
|
"rewards/margins": 0.41960257291793823, |
|
"rewards/rejected": -1.2737966775894165, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.23512476007677544, |
|
"grad_norm": 50.081994664008306, |
|
"learning_rate": 4.727183511074401e-07, |
|
"logits/chosen": 0.12627606093883514, |
|
"logits/rejected": 0.1392831802368164, |
|
"logps/chosen": -422.95947265625, |
|
"logps/rejected": -465.1092834472656, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8909593820571899, |
|
"rewards/margins": 0.47495001554489136, |
|
"rewards/rejected": -1.3659093379974365, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2375239923224568, |
|
"grad_norm": 33.56933991120958, |
|
"learning_rate": 4.717594219904043e-07, |
|
"logits/chosen": 0.11548285186290741, |
|
"logits/rejected": 0.17751248180866241, |
|
"logps/chosen": -428.980224609375, |
|
"logps/rejected": -429.6700134277344, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9127674102783203, |
|
"rewards/margins": 0.5175878405570984, |
|
"rewards/rejected": -1.4303553104400635, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2399232245681382, |
|
"grad_norm": 38.76253931692222, |
|
"learning_rate": 4.7078493733095393e-07, |
|
"logits/chosen": 0.07841446250677109, |
|
"logits/rejected": 0.07714001089334488, |
|
"logps/chosen": -396.0744323730469, |
|
"logps/rejected": -459.9576721191406, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7617571353912354, |
|
"rewards/margins": 0.5821165442466736, |
|
"rewards/rejected": -1.3438737392425537, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2423224568138196, |
|
"grad_norm": 40.40221370647514, |
|
"learning_rate": 4.6979496548531614e-07, |
|
"logits/chosen": 0.282027930021286, |
|
"logits/rejected": 0.2300875186920166, |
|
"logps/chosen": -417.5662536621094, |
|
"logps/rejected": -517.0253295898438, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0011545419692993, |
|
"rewards/margins": 0.4213111400604248, |
|
"rewards/rejected": -1.4224655628204346, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24472168905950095, |
|
"grad_norm": 38.63551061711667, |
|
"learning_rate": 4.6878957589608293e-07, |
|
"logits/chosen": 0.15491922199726105, |
|
"logits/rejected": 0.10176967084407806, |
|
"logps/chosen": -423.72412109375, |
|
"logps/rejected": -521.2840576171875, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8717803955078125, |
|
"rewards/margins": 0.6004728078842163, |
|
"rewards/rejected": -1.4722532033920288, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24712092130518235, |
|
"grad_norm": 33.04785124844753, |
|
"learning_rate": 4.6776883908733956e-07, |
|
"logits/chosen": 0.3141445815563202, |
|
"logits/rejected": 0.40079420804977417, |
|
"logps/chosen": -444.20037841796875, |
|
"logps/rejected": -440.73992919921875, |
|
"loss": 0.5367, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9564323425292969, |
|
"rewards/margins": 0.6285899877548218, |
|
"rewards/rejected": -1.5850223302841187, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2495201535508637, |
|
"grad_norm": 56.89197225086117, |
|
"learning_rate": 4.667328266597178e-07, |
|
"logits/chosen": 0.32467955350875854, |
|
"logits/rejected": 0.3737574815750122, |
|
"logps/chosen": -425.14764404296875, |
|
"logps/rejected": -474.8291015625, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9110026359558105, |
|
"rewards/margins": 0.6014169454574585, |
|
"rewards/rejected": -1.5124194622039795, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2519193857965451, |
|
"grad_norm": 42.632075100473685, |
|
"learning_rate": 4.6568161128537354e-07, |
|
"logits/chosen": 0.23409466445446014, |
|
"logits/rejected": 0.4846338629722595, |
|
"logps/chosen": -437.87469482421875, |
|
"logps/rejected": -416.75433349609375, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -1.0662238597869873, |
|
"rewards/margins": 0.3336094319820404, |
|
"rewards/rejected": -1.3998332023620605, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2543186180422265, |
|
"grad_norm": 46.72130069794758, |
|
"learning_rate": 4.6461526670288877e-07, |
|
"logits/chosen": 0.4986523687839508, |
|
"logits/rejected": 0.5356402397155762, |
|
"logps/chosen": -453.13543701171875, |
|
"logps/rejected": -487.8929748535156, |
|
"loss": 0.5798, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0444475412368774, |
|
"rewards/margins": 0.7141000032424927, |
|
"rewards/rejected": -1.7585475444793701, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2567178502879079, |
|
"grad_norm": 30.901707992623376, |
|
"learning_rate": 4.635338677120994e-07, |
|
"logits/chosen": 0.6319410800933838, |
|
"logits/rejected": 0.5878476500511169, |
|
"logps/chosen": -435.103271484375, |
|
"logps/rejected": -522.3030395507812, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.001800775527954, |
|
"rewards/margins": 0.7891290187835693, |
|
"rewards/rejected": -1.7909300327301025, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2591170825335892, |
|
"grad_norm": 35.62993699091359, |
|
"learning_rate": 4.6243749016884835e-07, |
|
"logits/chosen": 0.6645074486732483, |
|
"logits/rejected": 0.6307970285415649, |
|
"logps/chosen": -460.1568298339844, |
|
"logps/rejected": -596.9638671875, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2296950817108154, |
|
"rewards/margins": 0.8476268649101257, |
|
"rewards/rejected": -2.077322006225586, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2615163147792706, |
|
"grad_norm": 55.48184293718509, |
|
"learning_rate": 4.613262109796645e-07, |
|
"logits/chosen": 0.5279312133789062, |
|
"logits/rejected": 0.44912824034690857, |
|
"logps/chosen": -445.187744140625, |
|
"logps/rejected": -569.0379028320312, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0215142965316772, |
|
"rewards/margins": 0.903215765953064, |
|
"rewards/rejected": -1.9247299432754517, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.263915547024952, |
|
"grad_norm": 35.82390141377677, |
|
"learning_rate": 4.602001080963678e-07, |
|
"logits/chosen": 0.5199450254440308, |
|
"logits/rejected": 0.580736517906189, |
|
"logps/chosen": -457.9462890625, |
|
"logps/rejected": -484.3863830566406, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.047241449356079, |
|
"rewards/margins": 0.6471258997917175, |
|
"rewards/rejected": -1.6943671703338623, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2663147792706334, |
|
"grad_norm": 51.55318372805118, |
|
"learning_rate": 4.590592605106017e-07, |
|
"logits/chosen": 0.34312915802001953, |
|
"logits/rejected": 0.3462589979171753, |
|
"logps/chosen": -462.97137451171875, |
|
"logps/rejected": -475.85235595703125, |
|
"loss": 0.5757, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8622667193412781, |
|
"rewards/margins": 0.5276774168014526, |
|
"rewards/rejected": -1.389944076538086, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2687140115163148, |
|
"grad_norm": 46.55805600175398, |
|
"learning_rate": 4.5790374824829165e-07, |
|
"logits/chosen": 0.5497294068336487, |
|
"logits/rejected": 0.5141938924789429, |
|
"logps/chosen": -329.0898132324219, |
|
"logps/rejected": -395.189208984375, |
|
"loss": 0.5305, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8122035264968872, |
|
"rewards/margins": 0.555601179599762, |
|
"rewards/rejected": -1.367804765701294, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.27111324376199614, |
|
"grad_norm": 41.30209061097155, |
|
"learning_rate": 4.5673365236403216e-07, |
|
"logits/chosen": 0.5173945426940918, |
|
"logits/rejected": 0.538547158241272, |
|
"logps/chosen": -337.64508056640625, |
|
"logps/rejected": -434.7604064941406, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6650754809379578, |
|
"rewards/margins": 0.7895157337188721, |
|
"rewards/rejected": -1.454591155052185, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.27351247600767753, |
|
"grad_norm": 35.39722075486902, |
|
"learning_rate": 4.5554905493540075e-07, |
|
"logits/chosen": 0.7431238293647766, |
|
"logits/rejected": 0.720431923866272, |
|
"logps/chosen": -369.39825439453125, |
|
"logps/rejected": -469.0665588378906, |
|
"loss": 0.4965, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8106037974357605, |
|
"rewards/margins": 0.9515643119812012, |
|
"rewards/rejected": -1.762168288230896, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2759117082533589, |
|
"grad_norm": 80.31107636026294, |
|
"learning_rate": 4.5435003905720074e-07, |
|
"logits/chosen": 0.6994370222091675, |
|
"logits/rejected": 0.7717106938362122, |
|
"logps/chosen": -467.26922607421875, |
|
"logps/rejected": -501.1815490722656, |
|
"loss": 0.5376, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.1343969106674194, |
|
"rewards/margins": 0.7365877628326416, |
|
"rewards/rejected": -1.870984673500061, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2783109404990403, |
|
"grad_norm": 50.544256487524144, |
|
"learning_rate": 4.531366888356324e-07, |
|
"logits/chosen": 0.604827880859375, |
|
"logits/rejected": 0.5405411720275879, |
|
"logps/chosen": -349.20196533203125, |
|
"logps/rejected": -467.71160888671875, |
|
"loss": 0.5206, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9413111805915833, |
|
"rewards/margins": 0.8840651512145996, |
|
"rewards/rejected": -1.8253761529922485, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2807101727447217, |
|
"grad_norm": 48.679810649088054, |
|
"learning_rate": 4.519090893823931e-07, |
|
"logits/chosen": 0.7196705341339111, |
|
"logits/rejected": 0.7503910660743713, |
|
"logps/chosen": -434.93377685546875, |
|
"logps/rejected": -479.38836669921875, |
|
"loss": 0.5375, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1369459629058838, |
|
"rewards/margins": 0.6235243678092957, |
|
"rewards/rejected": -1.7604703903198242, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.28310940499040305, |
|
"grad_norm": 42.39683927792113, |
|
"learning_rate": 4.5066732680870734e-07, |
|
"logits/chosen": 0.7495613694190979, |
|
"logits/rejected": 0.7793896794319153, |
|
"logps/chosen": -413.90557861328125, |
|
"logps/rejected": -447.45452880859375, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0040034055709839, |
|
"rewards/margins": 0.8347917795181274, |
|
"rewards/rejected": -1.8387953042984009, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28550863723608444, |
|
"grad_norm": 53.13082069754931, |
|
"learning_rate": 4.494114882192862e-07, |
|
"logits/chosen": 0.4293566644191742, |
|
"logits/rejected": 0.44527220726013184, |
|
"logps/chosen": -425.08538818359375, |
|
"logps/rejected": -490.40765380859375, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9455874562263489, |
|
"rewards/margins": 1.0670359134674072, |
|
"rewards/rejected": -2.0126233100891113, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28790786948176583, |
|
"grad_norm": 49.87269165648676, |
|
"learning_rate": 4.4814166170621735e-07, |
|
"logits/chosen": 0.6792656183242798, |
|
"logits/rejected": 0.6856303811073303, |
|
"logps/chosen": -430.4750061035156, |
|
"logps/rejected": -503.75634765625, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.135371446609497, |
|
"rewards/margins": 1.0104650259017944, |
|
"rewards/rejected": -2.145836591720581, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2903071017274472, |
|
"grad_norm": 37.72444617932776, |
|
"learning_rate": 4.468579363427858e-07, |
|
"logits/chosen": 0.41752809286117554, |
|
"logits/rejected": 0.4529293477535248, |
|
"logps/chosen": -450.9541931152344, |
|
"logps/rejected": -478.5503845214844, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3859989643096924, |
|
"rewards/margins": 0.5862727165222168, |
|
"rewards/rejected": -1.9722716808319092, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2927063339731286, |
|
"grad_norm": 49.47973747014418, |
|
"learning_rate": 4.4556040217722555e-07, |
|
"logits/chosen": 0.6199735403060913, |
|
"logits/rejected": 0.5173524618148804, |
|
"logps/chosen": -390.5383605957031, |
|
"logps/rejected": -529.1012573242188, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9236103892326355, |
|
"rewards/margins": 0.9195195436477661, |
|
"rewards/rejected": -1.8431298732757568, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29510556621880996, |
|
"grad_norm": 43.77387295728714, |
|
"learning_rate": 4.442491502264033e-07, |
|
"logits/chosen": 0.5372000932693481, |
|
"logits/rejected": 0.5111404061317444, |
|
"logps/chosen": -398.32928466796875, |
|
"logps/rejected": -427.90142822265625, |
|
"loss": 0.5579, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1015335321426392, |
|
"rewards/margins": 0.36569902300834656, |
|
"rewards/rejected": -1.467232584953308, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.29750479846449135, |
|
"grad_norm": 35.2179506302823, |
|
"learning_rate": 4.429242724694338e-07, |
|
"logits/chosen": 0.596865177154541, |
|
"logits/rejected": 0.5551019906997681, |
|
"logps/chosen": -403.04803466796875, |
|
"logps/rejected": -482.10455322265625, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8973898887634277, |
|
"rewards/margins": 0.7202876806259155, |
|
"rewards/rejected": -1.6176776885986328, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.29990403071017274, |
|
"grad_norm": 35.54165989722752, |
|
"learning_rate": 4.4158586184122817e-07, |
|
"logits/chosen": 0.6986425518989563, |
|
"logits/rejected": 0.7786028385162354, |
|
"logps/chosen": -455.0581970214844, |
|
"logps/rejected": -487.45220947265625, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9782212376594543, |
|
"rewards/margins": 0.773512065410614, |
|
"rewards/rejected": -1.751733422279358, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.30230326295585414, |
|
"grad_norm": 34.976845816469115, |
|
"learning_rate": 4.4023401222597443e-07, |
|
"logits/chosen": 0.5812339782714844, |
|
"logits/rejected": 0.6533055305480957, |
|
"logps/chosen": -456.7413635253906, |
|
"logps/rejected": -492.81500244140625, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0948355197906494, |
|
"rewards/margins": 0.6430230140686035, |
|
"rewards/rejected": -1.737858533859253, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.30470249520153553, |
|
"grad_norm": 52.15846550296518, |
|
"learning_rate": 4.3886881845055235e-07, |
|
"logits/chosen": 0.6851636171340942, |
|
"logits/rejected": 0.7039676904678345, |
|
"logps/chosen": -395.2878723144531, |
|
"logps/rejected": -475.46319580078125, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8985649347305298, |
|
"rewards/margins": 0.9367402195930481, |
|
"rewards/rejected": -1.8353052139282227, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.30710172744721687, |
|
"grad_norm": 35.79256631055672, |
|
"learning_rate": 4.374903762778814e-07, |
|
"logits/chosen": 0.6985992193222046, |
|
"logits/rejected": 0.6866432428359985, |
|
"logps/chosen": -429.9559020996094, |
|
"logps/rejected": -467.31231689453125, |
|
"loss": 0.5222, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0913515090942383, |
|
"rewards/margins": 0.6997131109237671, |
|
"rewards/rejected": -1.7910646200180054, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.30950095969289826, |
|
"grad_norm": 68.77406798145645, |
|
"learning_rate": 4.3609878240020356e-07, |
|
"logits/chosen": 0.45225849747657776, |
|
"logits/rejected": 0.5497337579727173, |
|
"logps/chosen": -510.07659912109375, |
|
"logps/rejected": -510.8426208496094, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.366571307182312, |
|
"rewards/margins": 0.6978545784950256, |
|
"rewards/rejected": -2.0644257068634033, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.31190019193857965, |
|
"grad_norm": 36.68902407720006, |
|
"learning_rate": 4.346941344323005e-07, |
|
"logits/chosen": 0.585986316204071, |
|
"logits/rejected": 0.6672986745834351, |
|
"logps/chosen": -437.39324951171875, |
|
"logps/rejected": -430.4087829589844, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.3243210315704346, |
|
"rewards/margins": 0.4856715798377991, |
|
"rewards/rejected": -1.8099925518035889, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31429942418426104, |
|
"grad_norm": 38.77370809872286, |
|
"learning_rate": 4.332765309046467e-07, |
|
"logits/chosen": 0.7318406105041504, |
|
"logits/rejected": 0.7771567106246948, |
|
"logps/chosen": -450.19427490234375, |
|
"logps/rejected": -471.3865661621094, |
|
"loss": 0.5554, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.116821527481079, |
|
"rewards/margins": 0.6649090051651001, |
|
"rewards/rejected": -1.7817304134368896, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.31669865642994244, |
|
"grad_norm": 45.82746891169888, |
|
"learning_rate": 4.3184607125649754e-07, |
|
"logits/chosen": 0.49596285820007324, |
|
"logits/rejected": 0.5003286600112915, |
|
"logps/chosen": -430.02996826171875, |
|
"logps/rejected": -527.7648315429688, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8098013997077942, |
|
"rewards/margins": 0.9345352053642273, |
|
"rewards/rejected": -1.744336485862732, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3190978886756238, |
|
"grad_norm": 37.74246685501154, |
|
"learning_rate": 4.304028558289141e-07, |
|
"logits/chosen": 0.38717252016067505, |
|
"logits/rejected": 0.39220350980758667, |
|
"logps/chosen": -416.00494384765625, |
|
"logps/rejected": -460.89739990234375, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7099177241325378, |
|
"rewards/margins": 0.6884833574295044, |
|
"rewards/rejected": -1.3984010219573975, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.32149712092130517, |
|
"grad_norm": 34.11722436437858, |
|
"learning_rate": 4.28946985857725e-07, |
|
"logits/chosen": 0.5080984234809875, |
|
"logits/rejected": 0.4866611063480377, |
|
"logps/chosen": -444.2494201660156, |
|
"logps/rejected": -542.6705932617188, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9977186918258667, |
|
"rewards/margins": 1.2409141063690186, |
|
"rewards/rejected": -2.2386326789855957, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32389635316698656, |
|
"grad_norm": 38.18058435916063, |
|
"learning_rate": 4.2747856346642445e-07, |
|
"logits/chosen": 0.4064346253871918, |
|
"logits/rejected": 0.4254288077354431, |
|
"logps/chosen": -389.40472412109375, |
|
"logps/rejected": -465.85906982421875, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9736809730529785, |
|
"rewards/margins": 0.8628204464912415, |
|
"rewards/rejected": -1.8365013599395752, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.32629558541266795, |
|
"grad_norm": 45.560969124424204, |
|
"learning_rate": 4.2599769165900933e-07, |
|
"logits/chosen": 0.4976237714290619, |
|
"logits/rejected": 0.4918050765991211, |
|
"logps/chosen": -478.25140380859375, |
|
"logps/rejected": -495.969482421875, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.5931600332260132, |
|
"rewards/margins": 0.48865580558776855, |
|
"rewards/rejected": -2.0818159580230713, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.32869481765834935, |
|
"grad_norm": 35.94258808540943, |
|
"learning_rate": 4.245044743127535e-07, |
|
"logits/chosen": 0.5548725128173828, |
|
"logits/rejected": 0.46006709337234497, |
|
"logps/chosen": -428.947021484375, |
|
"logps/rejected": -524.8760986328125, |
|
"loss": 0.5207, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1485120058059692, |
|
"rewards/margins": 0.7874538898468018, |
|
"rewards/rejected": -1.9359657764434814, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3310940499040307, |
|
"grad_norm": 42.265392991866655, |
|
"learning_rate": 4.229990161709214e-07, |
|
"logits/chosen": 0.547171950340271, |
|
"logits/rejected": 0.4217755198478699, |
|
"logps/chosen": -401.33447265625, |
|
"logps/rejected": -532.9482421875, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8861383199691772, |
|
"rewards/margins": 1.117545485496521, |
|
"rewards/rejected": -2.0036838054656982, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3334932821497121, |
|
"grad_norm": 32.39940263140558, |
|
"learning_rate": 4.214814228354204e-07, |
|
"logits/chosen": 0.4310382008552551, |
|
"logits/rejected": 0.47493353486061096, |
|
"logps/chosen": -467.65216064453125, |
|
"logps/rejected": -549.2056884765625, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1547861099243164, |
|
"rewards/margins": 1.1499736309051514, |
|
"rewards/rejected": -2.304759979248047, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.33589251439539347, |
|
"grad_norm": 37.76328528326702, |
|
"learning_rate": 4.1995180075939375e-07, |
|
"logits/chosen": 0.6290279626846313, |
|
"logits/rejected": 0.5864508748054504, |
|
"logps/chosen": -488.428466796875, |
|
"logps/rejected": -547.3450927734375, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2981139421463013, |
|
"rewards/margins": 0.8224126100540161, |
|
"rewards/rejected": -2.1205263137817383, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.33829174664107486, |
|
"grad_norm": 41.27479960235454, |
|
"learning_rate": 4.1841025723975297e-07, |
|
"logits/chosen": 0.42811208963394165, |
|
"logits/rejected": 0.41358089447021484, |
|
"logps/chosen": -445.8192443847656, |
|
"logps/rejected": -505.38653564453125, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9554083943367004, |
|
"rewards/margins": 0.7210197448730469, |
|
"rewards/rejected": -1.676428198814392, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.34069097888675626, |
|
"grad_norm": 37.87482486935292, |
|
"learning_rate": 4.168569004096516e-07, |
|
"logits/chosen": 0.4879208207130432, |
|
"logits/rejected": 0.37299996614456177, |
|
"logps/chosen": -421.3837890625, |
|
"logps/rejected": -540.9444580078125, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2045072317123413, |
|
"rewards/margins": 1.0760588645935059, |
|
"rewards/rejected": -2.2805662155151367, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3430902111324376, |
|
"grad_norm": 34.143233451160405, |
|
"learning_rate": 4.152918392308997e-07, |
|
"logits/chosen": 0.4631095826625824, |
|
"logits/rejected": 0.44977670907974243, |
|
"logps/chosen": -420.1924743652344, |
|
"logps/rejected": -457.46673583984375, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1693400144577026, |
|
"rewards/margins": 0.540154218673706, |
|
"rewards/rejected": -1.7094943523406982, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.345489443378119, |
|
"grad_norm": 79.72399784718598, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": 0.30308836698532104, |
|
"logits/rejected": 0.19191868603229523, |
|
"logps/chosen": -421.8958435058594, |
|
"logps/rejected": -534.3394775390625, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1978578567504883, |
|
"rewards/margins": 0.8276159167289734, |
|
"rewards/rejected": -2.0254738330841064, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3478886756238004, |
|
"grad_norm": 53.81472585528722, |
|
"learning_rate": 4.121270437720526e-07, |
|
"logits/chosen": 0.2503531575202942, |
|
"logits/rejected": 0.20632532238960266, |
|
"logps/chosen": -388.9275817871094, |
|
"logps/rejected": -504.99627685546875, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.158739447593689, |
|
"rewards/margins": 0.6908172965049744, |
|
"rewards/rejected": -1.8495569229125977, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3502879078694818, |
|
"grad_norm": 45.6784642712931, |
|
"learning_rate": 4.105275314897852e-07, |
|
"logits/chosen": 0.48888054490089417, |
|
"logits/rejected": 0.3766574263572693, |
|
"logps/chosen": -397.5686340332031, |
|
"logps/rejected": -535.5113525390625, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.021269679069519, |
|
"rewards/margins": 1.1115610599517822, |
|
"rewards/rejected": -2.1328306198120117, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.35268714011516317, |
|
"grad_norm": 42.50127277305204, |
|
"learning_rate": 4.089167588389508e-07, |
|
"logits/chosen": 0.35595473647117615, |
|
"logits/rejected": 0.4420366883277893, |
|
"logps/chosen": -525.4200439453125, |
|
"logps/rejected": -575.6399536132812, |
|
"loss": 0.5315, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.174477219581604, |
|
"rewards/margins": 0.9557849168777466, |
|
"rewards/rejected": -2.1302618980407715, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3550863723608445, |
|
"grad_norm": 66.9134896066362, |
|
"learning_rate": 4.072948388088515e-07, |
|
"logits/chosen": 0.4660380482673645, |
|
"logits/rejected": 0.48526984453201294, |
|
"logps/chosen": -472.48773193359375, |
|
"logps/rejected": -540.60546875, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.309525966644287, |
|
"rewards/margins": 0.729021430015564, |
|
"rewards/rejected": -2.0385475158691406, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3574856046065259, |
|
"grad_norm": 48.14455914875948, |
|
"learning_rate": 4.056618851707334e-07, |
|
"logits/chosen": 0.3936781585216522, |
|
"logits/rejected": 0.37658897042274475, |
|
"logps/chosen": -417.9375915527344, |
|
"logps/rejected": -505.556396484375, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8807679414749146, |
|
"rewards/margins": 0.8251503109931946, |
|
"rewards/rejected": -1.7059180736541748, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3598848368522073, |
|
"grad_norm": 44.71962261776299, |
|
"learning_rate": 4.0401801246980675e-07, |
|
"logits/chosen": 0.2104732245206833, |
|
"logits/rejected": 0.22102966904640198, |
|
"logps/chosen": -413.0994567871094, |
|
"logps/rejected": -452.8529357910156, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.222390055656433, |
|
"rewards/margins": 0.6135789155960083, |
|
"rewards/rejected": -1.8359689712524414, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3622840690978887, |
|
"grad_norm": 38.59038142711945, |
|
"learning_rate": 4.0236333601721043e-07, |
|
"logits/chosen": 0.36115556955337524, |
|
"logits/rejected": 0.27192938327789307, |
|
"logps/chosen": -518.89306640625, |
|
"logps/rejected": -567.1900024414062, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.524860143661499, |
|
"rewards/margins": 0.5048703551292419, |
|
"rewards/rejected": -2.0297303199768066, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3646833013435701, |
|
"grad_norm": 48.99560916590031, |
|
"learning_rate": 4.0069797188192364e-07, |
|
"logits/chosen": 0.2493390589952469, |
|
"logits/rejected": 0.25582900643348694, |
|
"logps/chosen": -457.5439453125, |
|
"logps/rejected": -511.53466796875, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0995330810546875, |
|
"rewards/margins": 0.8029910326004028, |
|
"rewards/rejected": -1.9025242328643799, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3670825335892514, |
|
"grad_norm": 43.28635521609486, |
|
"learning_rate": 3.9902203688262417e-07, |
|
"logits/chosen": 0.24590995907783508, |
|
"logits/rejected": 0.2573690414428711, |
|
"logps/chosen": -447.503173828125, |
|
"logps/rejected": -495.9049377441406, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1626179218292236, |
|
"rewards/margins": 0.7147835493087769, |
|
"rewards/rejected": -1.87740159034729, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3694817658349328, |
|
"grad_norm": 75.4063331165295, |
|
"learning_rate": 3.9733564857949365e-07, |
|
"logits/chosen": 0.36004549264907837, |
|
"logits/rejected": 0.39339983463287354, |
|
"logps/chosen": -538.8134765625, |
|
"logps/rejected": -569.4513549804688, |
|
"loss": 0.5428, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.430328369140625, |
|
"rewards/margins": 0.773267388343811, |
|
"rewards/rejected": -2.2035956382751465, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3718809980806142, |
|
"grad_norm": 47.00943225874421, |
|
"learning_rate": 3.9563892526597177e-07, |
|
"logits/chosen": 0.38262271881103516, |
|
"logits/rejected": 0.3127327561378479, |
|
"logps/chosen": -405.52008056640625, |
|
"logps/rejected": -523.7188720703125, |
|
"loss": 0.5275, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2701631784439087, |
|
"rewards/margins": 0.673926591873169, |
|
"rewards/rejected": -1.944089651107788, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3742802303262956, |
|
"grad_norm": 44.77491303021576, |
|
"learning_rate": 3.9393198596045795e-07, |
|
"logits/chosen": 0.2474546879529953, |
|
"logits/rejected": 0.1317511945962906, |
|
"logps/chosen": -421.62994384765625, |
|
"logps/rejected": -519.5099487304688, |
|
"loss": 0.5383, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2208540439605713, |
|
"rewards/margins": 0.7671472430229187, |
|
"rewards/rejected": -1.9880012273788452, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.376679462571977, |
|
"grad_norm": 37.95179606415185, |
|
"learning_rate": 3.922149503979628e-07, |
|
"logits/chosen": 0.2700248658657074, |
|
"logits/rejected": 0.21610090136528015, |
|
"logps/chosen": -471.33056640625, |
|
"logps/rejected": -593.8278198242188, |
|
"loss": 0.5122, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2792729139328003, |
|
"rewards/margins": 1.1995410919189453, |
|
"rewards/rejected": -2.4788146018981934, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3790786948176583, |
|
"grad_norm": 55.896865397911, |
|
"learning_rate": 3.904879390217095e-07, |
|
"logits/chosen": 0.12995900213718414, |
|
"logits/rejected": 0.12265945971012115, |
|
"logps/chosen": -443.599365234375, |
|
"logps/rejected": -492.68450927734375, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2007102966308594, |
|
"rewards/margins": 0.6766700744628906, |
|
"rewards/rejected": -1.87738037109375, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3814779270633397, |
|
"grad_norm": 49.93484321544338, |
|
"learning_rate": 3.8875107297468463e-07, |
|
"logits/chosen": 0.20564258098602295, |
|
"logits/rejected": 0.0780414491891861, |
|
"logps/chosen": -411.8665466308594, |
|
"logps/rejected": -573.74951171875, |
|
"loss": 0.5229, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.0018932819366455, |
|
"rewards/margins": 1.2348394393920898, |
|
"rewards/rejected": -2.2367329597473145, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 38.069521505621516, |
|
"learning_rate": 3.87004474091141e-07, |
|
"logits/chosen": 0.3447803258895874, |
|
"logits/rejected": 0.3082936704158783, |
|
"logps/chosen": -405.9560852050781, |
|
"logps/rejected": -489.4607849121094, |
|
"loss": 0.541, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0926564931869507, |
|
"rewards/margins": 0.7233616709709167, |
|
"rewards/rejected": -1.8160178661346436, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3862763915547025, |
|
"grad_norm": 44.24824114407542, |
|
"learning_rate": 3.8524826488805114e-07, |
|
"logits/chosen": 0.3052324950695038, |
|
"logits/rejected": 0.3181813657283783, |
|
"logps/chosen": -473.97796630859375, |
|
"logps/rejected": -500.7769470214844, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2699439525604248, |
|
"rewards/margins": 0.7647022008895874, |
|
"rewards/rejected": -2.0346462726593018, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3886756238003839, |
|
"grad_norm": 47.309718786937964, |
|
"learning_rate": 3.834825685565133e-07, |
|
"logits/chosen": 0.33559301495552063, |
|
"logits/rejected": 0.3656995892524719, |
|
"logps/chosen": -414.19256591796875, |
|
"logps/rejected": -421.0203552246094, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.065473198890686, |
|
"rewards/margins": 0.5177011489868164, |
|
"rewards/rejected": -1.5831743478775024, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.39107485604606523, |
|
"grad_norm": 42.86172629937328, |
|
"learning_rate": 3.8170750895311007e-07, |
|
"logits/chosen": 0.1855572611093521, |
|
"logits/rejected": 0.17679139971733093, |
|
"logps/chosen": -452.2789001464844, |
|
"logps/rejected": -500.349609375, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9900191426277161, |
|
"rewards/margins": 0.7418977618217468, |
|
"rewards/rejected": -1.7319167852401733, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3934740882917466, |
|
"grad_norm": 45.001223140761674, |
|
"learning_rate": 3.7992321059122045e-07, |
|
"logits/chosen": 0.2781444787979126, |
|
"logits/rejected": 0.30307430028915405, |
|
"logps/chosen": -414.05523681640625, |
|
"logps/rejected": -462.14239501953125, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1657673120498657, |
|
"rewards/margins": 0.6909239888191223, |
|
"rewards/rejected": -1.8566913604736328, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.395873320537428, |
|
"grad_norm": 60.544325020503095, |
|
"learning_rate": 3.7812979863228576e-07, |
|
"logits/chosen": 0.2274487465620041, |
|
"logits/rejected": 0.16551566123962402, |
|
"logps/chosen": -405.3382873535156, |
|
"logps/rejected": -493.6697692871094, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2897754907608032, |
|
"rewards/margins": 0.8161094784736633, |
|
"rewards/rejected": -2.1058847904205322, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3982725527831094, |
|
"grad_norm": 50.449927443360075, |
|
"learning_rate": 3.763273988770296e-07, |
|
"logits/chosen": 0.40345683693885803, |
|
"logits/rejected": 0.39551275968551636, |
|
"logps/chosen": -453.79803466796875, |
|
"logps/rejected": -535.8180541992188, |
|
"loss": 0.4965, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3234025239944458, |
|
"rewards/margins": 0.8552868962287903, |
|
"rewards/rejected": -2.178689479827881, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4006717850287908, |
|
"grad_norm": 45.22606638463477, |
|
"learning_rate": 3.7451613775663405e-07, |
|
"logits/chosen": 0.2254648655653, |
|
"logits/rejected": 0.15715382993221283, |
|
"logps/chosen": -444.4361267089844, |
|
"logps/rejected": -565.7696533203125, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3264541625976562, |
|
"rewards/margins": 1.2607003450393677, |
|
"rewards/rejected": -2.5871543884277344, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.40307101727447214, |
|
"grad_norm": 56.89213037695923, |
|
"learning_rate": 3.726961423238706e-07, |
|
"logits/chosen": 0.2933524250984192, |
|
"logits/rejected": 0.212088942527771, |
|
"logps/chosen": -426.76080322265625, |
|
"logps/rejected": -546.6845703125, |
|
"loss": 0.5149, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2648175954818726, |
|
"rewards/margins": 1.0558243989944458, |
|
"rewards/rejected": -2.3206419944763184, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.40547024952015354, |
|
"grad_norm": 48.93178210300578, |
|
"learning_rate": 3.708675402441882e-07, |
|
"logits/chosen": 0.2865277826786041, |
|
"logits/rejected": 0.37102895975112915, |
|
"logps/chosen": -484.88519287109375, |
|
"logps/rejected": -502.8384704589844, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2438859939575195, |
|
"rewards/margins": 0.6120424270629883, |
|
"rewards/rejected": -1.855928659439087, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.40786948176583493, |
|
"grad_norm": 41.4789900308926, |
|
"learning_rate": 3.6903045978675775e-07, |
|
"logits/chosen": 0.3034370541572571, |
|
"logits/rejected": 0.2625337243080139, |
|
"logps/chosen": -386.1392517089844, |
|
"logps/rejected": -470.08135986328125, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9028989672660828, |
|
"rewards/margins": 1.0875600576400757, |
|
"rewards/rejected": -1.9904590845108032, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4102687140115163, |
|
"grad_norm": 35.84427094735192, |
|
"learning_rate": 3.6718502981547474e-07, |
|
"logits/chosen": 0.385175883769989, |
|
"logits/rejected": 0.2869270443916321, |
|
"logps/chosen": -436.753662109375, |
|
"logps/rejected": -548.1320190429688, |
|
"loss": 0.512, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1051702499389648, |
|
"rewards/margins": 0.6421515345573425, |
|
"rewards/rejected": -1.7473220825195312, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4126679462571977, |
|
"grad_norm": 36.76356281345392, |
|
"learning_rate": 3.6533137977991986e-07, |
|
"logits/chosen": 0.2681284248828888, |
|
"logits/rejected": 0.27597135305404663, |
|
"logps/chosen": -444.15826416015625, |
|
"logps/rejected": -524.8231201171875, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9779054522514343, |
|
"rewards/margins": 0.62675940990448, |
|
"rewards/rejected": -1.6046650409698486, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.41506717850287905, |
|
"grad_norm": 42.054568712185926, |
|
"learning_rate": 3.6346963970627865e-07, |
|
"logits/chosen": 0.3877958655357361, |
|
"logits/rejected": 0.2975226044654846, |
|
"logps/chosen": -420.9158630371094, |
|
"logps/rejected": -515.4686889648438, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0269657373428345, |
|
"rewards/margins": 0.7597817182540894, |
|
"rewards/rejected": -1.7867473363876343, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.41746641074856045, |
|
"grad_norm": 44.183218072360475, |
|
"learning_rate": 3.615999401882207e-07, |
|
"logits/chosen": 0.5101007223129272, |
|
"logits/rejected": 0.44053035974502563, |
|
"logps/chosen": -388.8902282714844, |
|
"logps/rejected": -512.8978881835938, |
|
"loss": 0.5121, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2427517175674438, |
|
"rewards/margins": 0.933813214302063, |
|
"rewards/rejected": -2.1765646934509277, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.41986564299424184, |
|
"grad_norm": 38.84095884357132, |
|
"learning_rate": 3.597224123777389e-07, |
|
"logits/chosen": 0.4116114675998688, |
|
"logits/rejected": 0.3661612570285797, |
|
"logps/chosen": -430.21990966796875, |
|
"logps/rejected": -544.9900512695312, |
|
"loss": 0.4882, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1606342792510986, |
|
"rewards/margins": 1.0220921039581299, |
|
"rewards/rejected": -2.1827263832092285, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.42226487523992323, |
|
"grad_norm": 48.48200071110331, |
|
"learning_rate": 3.5783718797595e-07, |
|
"logits/chosen": 0.3250165581703186, |
|
"logits/rejected": 0.41192755103111267, |
|
"logps/chosen": -487.70404052734375, |
|
"logps/rejected": -506.49530029296875, |
|
"loss": 0.5418, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.321012020111084, |
|
"rewards/margins": 0.7061235308647156, |
|
"rewards/rejected": -2.0271353721618652, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4246641074856046, |
|
"grad_norm": 41.976087972205285, |
|
"learning_rate": 3.559443992238558e-07, |
|
"logits/chosen": 0.38490504026412964, |
|
"logits/rejected": 0.3506616950035095, |
|
"logps/chosen": -414.7301330566406, |
|
"logps/rejected": -553.3479614257812, |
|
"loss": 0.5277, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9950034022331238, |
|
"rewards/margins": 1.1345813274383545, |
|
"rewards/rejected": -2.129584550857544, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.42706333973128596, |
|
"grad_norm": 44.81649257476256, |
|
"learning_rate": 3.540441788930673e-07, |
|
"logits/chosen": 0.3962218165397644, |
|
"logits/rejected": 0.325061172246933, |
|
"logps/chosen": -467.65155029296875, |
|
"logps/rejected": -539.2551879882812, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1214022636413574, |
|
"rewards/margins": 1.1308571100234985, |
|
"rewards/rejected": -2.2522594928741455, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.42946257197696736, |
|
"grad_norm": 45.2856769850179, |
|
"learning_rate": 3.5213666027649123e-07, |
|
"logits/chosen": 0.33266204595565796, |
|
"logits/rejected": 0.3824441134929657, |
|
"logps/chosen": -480.2119140625, |
|
"logps/rejected": -476.08984375, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3972989320755005, |
|
"rewards/margins": 0.541749119758606, |
|
"rewards/rejected": -1.9390478134155273, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.43186180422264875, |
|
"grad_norm": 54.06455040727181, |
|
"learning_rate": 3.5022197717898017e-07, |
|
"logits/chosen": 0.19602210819721222, |
|
"logits/rejected": 0.23719044029712677, |
|
"logps/chosen": -394.2027587890625, |
|
"logps/rejected": -459.33221435546875, |
|
"loss": 0.4769, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.173718810081482, |
|
"rewards/margins": 0.8884655833244324, |
|
"rewards/rejected": -2.0621845722198486, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43426103646833014, |
|
"grad_norm": 36.13993495552892, |
|
"learning_rate": 3.4830026390794633e-07, |
|
"logits/chosen": 0.16905806958675385, |
|
"logits/rejected": 0.14926643669605255, |
|
"logps/chosen": -505.55126953125, |
|
"logps/rejected": -551.2828979492188, |
|
"loss": 0.4745, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3583290576934814, |
|
"rewards/margins": 1.004029631614685, |
|
"rewards/rejected": -2.362358570098877, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.43666026871401153, |
|
"grad_norm": 32.25356254003183, |
|
"learning_rate": 3.4637165526394104e-07, |
|
"logits/chosen": 0.23928511142730713, |
|
"logits/rejected": 0.22237971425056458, |
|
"logps/chosen": -415.0269470214844, |
|
"logps/rejected": -494.46405029296875, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.063435435295105, |
|
"rewards/margins": 0.7565540671348572, |
|
"rewards/rejected": -1.819989562034607, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43905950095969287, |
|
"grad_norm": 34.4419746511506, |
|
"learning_rate": 3.4443628653119814e-07, |
|
"logits/chosen": 0.27581119537353516, |
|
"logits/rejected": 0.24289298057556152, |
|
"logps/chosen": -466.73529052734375, |
|
"logps/rejected": -645.5926513671875, |
|
"loss": 0.5157, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2824543714523315, |
|
"rewards/margins": 1.4018195867538452, |
|
"rewards/rejected": -2.684274196624756, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.44145873320537427, |
|
"grad_norm": 41.075704870340594, |
|
"learning_rate": 3.424942934681453e-07, |
|
"logits/chosen": 0.27590471506118774, |
|
"logits/rejected": 0.33334219455718994, |
|
"logps/chosen": -408.6986083984375, |
|
"logps/rejected": -506.6996154785156, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9812175035476685, |
|
"rewards/margins": 1.1212607622146606, |
|
"rewards/rejected": -2.10247802734375, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44385796545105566, |
|
"grad_norm": 51.35010815764105, |
|
"learning_rate": 3.405458122978804e-07, |
|
"logits/chosen": 0.28459858894348145, |
|
"logits/rejected": 0.24139773845672607, |
|
"logps/chosen": -467.11932373046875, |
|
"logps/rejected": -536.8555908203125, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.151214838027954, |
|
"rewards/margins": 0.9230279922485352, |
|
"rewards/rejected": -2.0742428302764893, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.44625719769673705, |
|
"grad_norm": 58.94555175884757, |
|
"learning_rate": 3.3859097969861633e-07, |
|
"logits/chosen": 0.3147757649421692, |
|
"logits/rejected": 0.296464741230011, |
|
"logps/chosen": -475.61175537109375, |
|
"logps/rejected": -521.8074951171875, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.389574646949768, |
|
"rewards/margins": 0.8023090362548828, |
|
"rewards/rejected": -2.1918835639953613, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.44865642994241844, |
|
"grad_norm": 43.58295874945141, |
|
"learning_rate": 3.366299327940936e-07, |
|
"logits/chosen": 0.2593730092048645, |
|
"logits/rejected": 0.1364545077085495, |
|
"logps/chosen": -485.0771484375, |
|
"logps/rejected": -578.6021728515625, |
|
"loss": 0.512, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2985343933105469, |
|
"rewards/margins": 0.7179661989212036, |
|
"rewards/rejected": -2.01650071144104, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4510556621880998, |
|
"grad_norm": 33.90433109567072, |
|
"learning_rate": 3.3466280914396117e-07, |
|
"logits/chosen": 0.17524075508117676, |
|
"logits/rejected": 0.12327942997217178, |
|
"logps/chosen": -436.56536865234375, |
|
"logps/rejected": -551.0841064453125, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3039577007293701, |
|
"rewards/margins": 0.9412840604782104, |
|
"rewards/rejected": -2.24524188041687, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4534548944337812, |
|
"grad_norm": 48.57573160920276, |
|
"learning_rate": 3.326897467341281e-07, |
|
"logits/chosen": 0.10545514523983002, |
|
"logits/rejected": 0.10264859348535538, |
|
"logps/chosen": -394.76776123046875, |
|
"logps/rejected": -494.9923400878906, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1733514070510864, |
|
"rewards/margins": 0.8517268896102905, |
|
"rewards/rejected": -2.025078296661377, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.45585412667946257, |
|
"grad_norm": 52.20592211080183, |
|
"learning_rate": 3.3071088396708335e-07, |
|
"logits/chosen": 0.16945740580558777, |
|
"logits/rejected": 0.12676987051963806, |
|
"logps/chosen": -370.2272033691406, |
|
"logps/rejected": -489.0442810058594, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9613567590713501, |
|
"rewards/margins": 1.0801159143447876, |
|
"rewards/rejected": -2.0414726734161377, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.45825335892514396, |
|
"grad_norm": 39.11646762477483, |
|
"learning_rate": 3.2872635965218824e-07, |
|
"logits/chosen": 0.36154884099960327, |
|
"logits/rejected": 0.3266182541847229, |
|
"logps/chosen": -472.8251037597656, |
|
"logps/rejected": -570.266845703125, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5062012672424316, |
|
"rewards/margins": 0.7707003355026245, |
|
"rewards/rejected": -2.2769012451171875, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.46065259117082535, |
|
"grad_norm": 33.57125004563553, |
|
"learning_rate": 3.2673631299593905e-07, |
|
"logits/chosen": 0.20721454918384552, |
|
"logits/rejected": 0.24143996834754944, |
|
"logps/chosen": -474.7398376464844, |
|
"logps/rejected": -535.4746704101562, |
|
"loss": 0.5204, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3117005825042725, |
|
"rewards/margins": 0.8077449798583984, |
|
"rewards/rejected": -2.119445562362671, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4630518234165067, |
|
"grad_norm": 49.103327518032735, |
|
"learning_rate": 3.247408835922024e-07, |
|
"logits/chosen": 0.3439037799835205, |
|
"logits/rejected": 0.2654734253883362, |
|
"logps/chosen": -527.8143310546875, |
|
"logps/rejected": -611.921875, |
|
"loss": 0.506, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5940402746200562, |
|
"rewards/margins": 0.856569766998291, |
|
"rewards/rejected": -2.4506099224090576, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4654510556621881, |
|
"grad_norm": 49.876953655813104, |
|
"learning_rate": 3.2274021141242306e-07, |
|
"logits/chosen": 0.43298500776290894, |
|
"logits/rejected": 0.4217461049556732, |
|
"logps/chosen": -458.80535888671875, |
|
"logps/rejected": -546.6775512695312, |
|
"loss": 0.4867, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2974848747253418, |
|
"rewards/margins": 0.8723615407943726, |
|
"rewards/rejected": -2.169846534729004, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4678502879078695, |
|
"grad_norm": 59.65377964792493, |
|
"learning_rate": 3.2073443679580613e-07, |
|
"logits/chosen": 0.2417244166135788, |
|
"logits/rejected": 0.23499338328838348, |
|
"logps/chosen": -469.7822265625, |
|
"logps/rejected": -545.030517578125, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3010753393173218, |
|
"rewards/margins": 0.6901552081108093, |
|
"rewards/rejected": -1.9912303686141968, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.47024952015355087, |
|
"grad_norm": 48.010287716369675, |
|
"learning_rate": 3.1872370043947194e-07, |
|
"logits/chosen": 0.44626665115356445, |
|
"logits/rejected": 0.40652981400489807, |
|
"logps/chosen": -418.04608154296875, |
|
"logps/rejected": -536.6441650390625, |
|
"loss": 0.464, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.8487616777420044, |
|
"rewards/margins": 1.3238131999969482, |
|
"rewards/rejected": -2.1725752353668213, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.47264875239923226, |
|
"grad_norm": 46.983751656355615, |
|
"learning_rate": 3.167081433885874e-07, |
|
"logits/chosen": 0.4636153280735016, |
|
"logits/rejected": 0.4149314761161804, |
|
"logps/chosen": -560.605712890625, |
|
"logps/rejected": -700.2041015625, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5527485609054565, |
|
"rewards/margins": 0.9842365384101868, |
|
"rewards/rejected": -2.536984920501709, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4750479846449136, |
|
"grad_norm": 46.832783839770435, |
|
"learning_rate": 3.14687907026472e-07, |
|
"logits/chosen": 0.34756892919540405, |
|
"logits/rejected": 0.3631365895271301, |
|
"logps/chosen": -452.46533203125, |
|
"logps/rejected": -579.3941650390625, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4736191034317017, |
|
"rewards/margins": 1.1666433811187744, |
|
"rewards/rejected": -2.6402623653411865, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.477447216890595, |
|
"grad_norm": 51.65835958499199, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": 0.28329282999038696, |
|
"logits/rejected": 0.23793701827526093, |
|
"logps/chosen": -574.2579956054688, |
|
"logps/rejected": -644.44677734375, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.125730037689209, |
|
"rewards/margins": 0.6223888397216797, |
|
"rewards/rejected": -2.7481188774108887, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"grad_norm": 56.94846979096699, |
|
"learning_rate": 3.1063396353306097e-07, |
|
"logits/chosen": 0.370736300945282, |
|
"logits/rejected": 0.43973660469055176, |
|
"logps/chosen": -495.07171630859375, |
|
"logps/rejected": -519.3072509765625, |
|
"loss": 0.5009, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2859165668487549, |
|
"rewards/margins": 0.9244076609611511, |
|
"rewards/rejected": -2.2103240489959717, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"eval_logits/chosen": 0.5136142373085022, |
|
"eval_logits/rejected": 0.4682252109050751, |
|
"eval_logps/chosen": -468.3976135253906, |
|
"eval_logps/rejected": -586.2582397460938, |
|
"eval_loss": 0.49979615211486816, |
|
"eval_rewards/accuracies": 0.7803571224212646, |
|
"eval_rewards/chosen": -1.4972540140151978, |
|
"eval_rewards/margins": 1.1174662113189697, |
|
"eval_rewards/rejected": -2.614720106124878, |
|
"eval_runtime": 185.1772, |
|
"eval_samples_per_second": 24.09, |
|
"eval_steps_per_second": 0.378, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4822456813819578, |
|
"grad_norm": 61.64399490626884, |
|
"learning_rate": 3.0860054076979535e-07, |
|
"logits/chosen": 0.34754273295402527, |
|
"logits/rejected": 0.3305366635322571, |
|
"logps/chosen": -490.9940490722656, |
|
"logps/rejected": -570.1460571289062, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5707345008850098, |
|
"rewards/margins": 1.0668280124664307, |
|
"rewards/rejected": -2.6375622749328613, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4846449136276392, |
|
"grad_norm": 54.18063166333372, |
|
"learning_rate": 3.065630074114115e-07, |
|
"logits/chosen": 0.3459337651729584, |
|
"logits/rejected": 0.36747267842292786, |
|
"logps/chosen": -486.1033630371094, |
|
"logps/rejected": -554.5687866210938, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3654518127441406, |
|
"rewards/margins": 1.115387201309204, |
|
"rewards/rejected": -2.4808387756347656, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4870441458733205, |
|
"grad_norm": 48.37481533662561, |
|
"learning_rate": 3.0452150638277947e-07, |
|
"logits/chosen": 0.3800879120826721, |
|
"logits/rejected": 0.3224307894706726, |
|
"logps/chosen": -418.9652404785156, |
|
"logps/rejected": -507.47747802734375, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2180391550064087, |
|
"rewards/margins": 0.8009985685348511, |
|
"rewards/rejected": -2.0190374851226807, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4894433781190019, |
|
"grad_norm": 35.82205388348395, |
|
"learning_rate": 3.024761808870856e-07, |
|
"logits/chosen": 0.43575650453567505, |
|
"logits/rejected": 0.3246951997280121, |
|
"logps/chosen": -394.49700927734375, |
|
"logps/rejected": -528.8324584960938, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9007269740104675, |
|
"rewards/margins": 1.4299715757369995, |
|
"rewards/rejected": -2.3306984901428223, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4918426103646833, |
|
"grad_norm": 69.46282819499118, |
|
"learning_rate": 3.004271743957875e-07, |
|
"logits/chosen": 0.1757555603981018, |
|
"logits/rejected": 0.11282005161046982, |
|
"logps/chosen": -492.5611877441406, |
|
"logps/rejected": -602.2737426757812, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.534406304359436, |
|
"rewards/margins": 0.9544004201889038, |
|
"rewards/rejected": -2.48880672454834, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4942418426103647, |
|
"grad_norm": 45.79139932334549, |
|
"learning_rate": 2.983746306385499e-07, |
|
"logits/chosen": 0.26720863580703735, |
|
"logits/rejected": 0.22653250396251678, |
|
"logps/chosen": -450.50970458984375, |
|
"logps/rejected": -577.3936767578125, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3142211437225342, |
|
"rewards/margins": 1.129241704940796, |
|
"rewards/rejected": -2.44346284866333, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4966410748560461, |
|
"grad_norm": 43.321422686785745, |
|
"learning_rate": 2.963186935931628e-07, |
|
"logits/chosen": 0.3077266812324524, |
|
"logits/rejected": 0.2476225346326828, |
|
"logps/chosen": -480.34619140625, |
|
"logps/rejected": -587.4752197265625, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.302478551864624, |
|
"rewards/margins": 1.1229597330093384, |
|
"rewards/rejected": -2.425438404083252, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.4990403071017274, |
|
"grad_norm": 45.25903246638121, |
|
"learning_rate": 2.9425950747544176e-07, |
|
"logits/chosen": 0.2362133264541626, |
|
"logits/rejected": 0.20862069725990295, |
|
"logps/chosen": -528.6657104492188, |
|
"logps/rejected": -640.0977172851562, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.531398057937622, |
|
"rewards/margins": 1.434666633605957, |
|
"rewards/rejected": -2.966064929962158, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5014395393474088, |
|
"grad_norm": 46.57096517661485, |
|
"learning_rate": 2.921972167291119e-07, |
|
"logits/chosen": 0.1148526519536972, |
|
"logits/rejected": 0.0883648619055748, |
|
"logps/chosen": -483.4339904785156, |
|
"logps/rejected": -601.8978271484375, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3184032440185547, |
|
"rewards/margins": 0.981913685798645, |
|
"rewards/rejected": -2.3003170490264893, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5038387715930902, |
|
"grad_norm": 39.63447443095947, |
|
"learning_rate": 2.9013196601567567e-07, |
|
"logits/chosen": 0.08627250045537949, |
|
"logits/rejected": 0.10158304125070572, |
|
"logps/chosen": -421.615478515625, |
|
"logps/rejected": -528.127685546875, |
|
"loss": 0.5409, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1398550271987915, |
|
"rewards/margins": 0.95441073179245, |
|
"rewards/rejected": -2.0942656993865967, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5062380038387716, |
|
"grad_norm": 36.05433569174663, |
|
"learning_rate": 2.8806390020426555e-07, |
|
"logits/chosen": 0.0710478127002716, |
|
"logits/rejected": 0.05162844806909561, |
|
"logps/chosen": -453.2339782714844, |
|
"logps/rejected": -557.0374145507812, |
|
"loss": 0.4948, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1150130033493042, |
|
"rewards/margins": 1.007678747177124, |
|
"rewards/rejected": -2.1226916313171387, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.508637236084453, |
|
"grad_norm": 50.26433068268168, |
|
"learning_rate": 2.8599316436148187e-07, |
|
"logits/chosen": 0.24139384925365448, |
|
"logits/rejected": 0.21716871857643127, |
|
"logps/chosen": -447.4005432128906, |
|
"logps/rejected": -504.754638671875, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.421812653541565, |
|
"rewards/margins": 0.5791618227958679, |
|
"rewards/rejected": -2.000974655151367, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5110364683301344, |
|
"grad_norm": 48.79279161854594, |
|
"learning_rate": 2.8391990374121723e-07, |
|
"logits/chosen": 0.14107191562652588, |
|
"logits/rejected": 0.05996360257267952, |
|
"logps/chosen": -447.6856384277344, |
|
"logps/rejected": -572.6692504882812, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3457109928131104, |
|
"rewards/margins": 1.006974458694458, |
|
"rewards/rejected": -2.3526854515075684, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5134357005758158, |
|
"grad_norm": 49.554326824350056, |
|
"learning_rate": 2.818442637744669e-07, |
|
"logits/chosen": 0.14974358677864075, |
|
"logits/rejected": 0.07151228934526443, |
|
"logps/chosen": -468.7398986816406, |
|
"logps/rejected": -561.0519409179688, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4705628156661987, |
|
"rewards/margins": 0.9486227035522461, |
|
"rewards/rejected": -2.4191856384277344, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5158349328214972, |
|
"grad_norm": 49.03414708222374, |
|
"learning_rate": 2.797663900591284e-07, |
|
"logits/chosen": 0.12192866951227188, |
|
"logits/rejected": 0.1623045951128006, |
|
"logps/chosen": -484.0595703125, |
|
"logps/rejected": -536.2966918945312, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.466080904006958, |
|
"rewards/margins": 0.9258912205696106, |
|
"rewards/rejected": -2.3919718265533447, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5182341650671785, |
|
"grad_norm": 47.623947511820035, |
|
"learning_rate": 2.776864283497874e-07, |
|
"logits/chosen": 0.2551673352718353, |
|
"logits/rejected": 0.25919514894485474, |
|
"logps/chosen": -450.1902770996094, |
|
"logps/rejected": -603.4628295898438, |
|
"loss": 0.5047, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.427159309387207, |
|
"rewards/margins": 1.6337556838989258, |
|
"rewards/rejected": -3.0609147548675537, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5206333973128598, |
|
"grad_norm": 39.32544622434657, |
|
"learning_rate": 2.756045245474943e-07, |
|
"logits/chosen": 0.1113271713256836, |
|
"logits/rejected": 0.06980106979608536, |
|
"logps/chosen": -477.89288330078125, |
|
"logps/rejected": -590.3850708007812, |
|
"loss": 0.5136, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3194444179534912, |
|
"rewards/margins": 0.8708696365356445, |
|
"rewards/rejected": -2.1903140544891357, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5230326295585412, |
|
"grad_norm": 41.32084808239206, |
|
"learning_rate": 2.7352082468952977e-07, |
|
"logits/chosen": 0.18554797768592834, |
|
"logits/rejected": 0.10465570539236069, |
|
"logps/chosen": -464.09027099609375, |
|
"logps/rejected": -614.1561279296875, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5660805702209473, |
|
"rewards/margins": 1.2777998447418213, |
|
"rewards/rejected": -2.8438801765441895, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5254318618042226, |
|
"grad_norm": 67.92788558845768, |
|
"learning_rate": 2.7143547493916e-07, |
|
"logits/chosen": 0.18377096951007843, |
|
"logits/rejected": 0.10271792113780975, |
|
"logps/chosen": -409.5820007324219, |
|
"logps/rejected": -570.1399536132812, |
|
"loss": 0.4842, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.100378394126892, |
|
"rewards/margins": 1.5573487281799316, |
|
"rewards/rejected": -2.657727003097534, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.527831094049904, |
|
"grad_norm": 50.707037903665324, |
|
"learning_rate": 2.693486215753853e-07, |
|
"logits/chosen": 0.12866708636283875, |
|
"logits/rejected": 0.06411238014698029, |
|
"logps/chosen": -418.26715087890625, |
|
"logps/rejected": -512.0023193359375, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2301840782165527, |
|
"rewards/margins": 1.0781285762786865, |
|
"rewards/rejected": -2.30831241607666, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5302303262955854, |
|
"grad_norm": 46.86231101360818, |
|
"learning_rate": 2.6726041098267805e-07, |
|
"logits/chosen": -0.031896281987428665, |
|
"logits/rejected": -0.030716899782419205, |
|
"logps/chosen": -487.5838928222656, |
|
"logps/rejected": -485.93646240234375, |
|
"loss": 0.5532, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3748475313186646, |
|
"rewards/margins": 0.40763726830482483, |
|
"rewards/rejected": -1.7824846506118774, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5326295585412668, |
|
"grad_norm": 72.55957637434223, |
|
"learning_rate": 2.6517098964071507e-07, |
|
"logits/chosen": 0.23304399847984314, |
|
"logits/rejected": 0.22425612807273865, |
|
"logps/chosen": -444.8831481933594, |
|
"logps/rejected": -506.5235900878906, |
|
"loss": 0.546, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1518399715423584, |
|
"rewards/margins": 0.4722941517829895, |
|
"rewards/rejected": -1.6241340637207031, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5350287907869482, |
|
"grad_norm": 43.727526427782365, |
|
"learning_rate": 2.630805041141023e-07, |
|
"logits/chosen": 0.2689264118671417, |
|
"logits/rejected": 0.2254217565059662, |
|
"logps/chosen": -403.8577880859375, |
|
"logps/rejected": -512.3160400390625, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.058393120765686, |
|
"rewards/margins": 0.9750925302505493, |
|
"rewards/rejected": -2.0334856510162354, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5374280230326296, |
|
"grad_norm": 49.11098211804801, |
|
"learning_rate": 2.609891010420941e-07, |
|
"logits/chosen": 0.18164226412773132, |
|
"logits/rejected": 0.17124636471271515, |
|
"logps/chosen": -454.2110290527344, |
|
"logps/rejected": -558.4046630859375, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.1852877140045166, |
|
"rewards/margins": 1.1636625528335571, |
|
"rewards/rejected": -2.3489503860473633, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.539827255278311, |
|
"grad_norm": 43.26353709722887, |
|
"learning_rate": 2.5889692712830674e-07, |
|
"logits/chosen": 0.052560679614543915, |
|
"logits/rejected": 0.03842206671833992, |
|
"logps/chosen": -396.25408935546875, |
|
"logps/rejected": -478.73236083984375, |
|
"loss": 0.4734, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9645735621452332, |
|
"rewards/margins": 0.9311714172363281, |
|
"rewards/rejected": -1.895745038986206, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5422264875239923, |
|
"grad_norm": 47.10267589353339, |
|
"learning_rate": 2.5680412913042843e-07, |
|
"logits/chosen": 0.23019644618034363, |
|
"logits/rejected": 0.179383784532547, |
|
"logps/chosen": -421.8323669433594, |
|
"logps/rejected": -528.0892333984375, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2181730270385742, |
|
"rewards/margins": 1.0768169164657593, |
|
"rewards/rejected": -2.294990062713623, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5446257197696737, |
|
"grad_norm": 49.07465366967735, |
|
"learning_rate": 2.5471085384992404e-07, |
|
"logits/chosen": 0.21075716614723206, |
|
"logits/rejected": 0.0905676931142807, |
|
"logps/chosen": -402.1150207519531, |
|
"logps/rejected": -584.5428466796875, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.061937928199768, |
|
"rewards/margins": 1.6330102682113647, |
|
"rewards/rejected": -2.694948196411133, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5470249520153551, |
|
"grad_norm": 47.06580983617911, |
|
"learning_rate": 2.526172481217381e-07, |
|
"logits/chosen": 0.28002408146858215, |
|
"logits/rejected": 0.19437995553016663, |
|
"logps/chosen": -421.2408142089844, |
|
"logps/rejected": -556.0794067382812, |
|
"loss": 0.5198, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5300331115722656, |
|
"rewards/margins": 1.192158579826355, |
|
"rewards/rejected": -2.722191572189331, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5494241842610365, |
|
"grad_norm": 42.697690556320396, |
|
"learning_rate": 2.5052345880399456e-07, |
|
"logits/chosen": 0.336375892162323, |
|
"logits/rejected": 0.33653944730758667, |
|
"logps/chosen": -417.27496337890625, |
|
"logps/rejected": -494.6957092285156, |
|
"loss": 0.4616, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.37350594997406, |
|
"rewards/margins": 0.7126041650772095, |
|
"rewards/rejected": -2.0861101150512695, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5518234165067178, |
|
"grad_norm": 44.24690759792965, |
|
"learning_rate": 2.4842963276769555e-07, |
|
"logits/chosen": 0.46479305624961853, |
|
"logits/rejected": 0.34474366903305054, |
|
"logps/chosen": -428.14227294921875, |
|
"logps/rejected": -594.80224609375, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.475941777229309, |
|
"rewards/margins": 1.2353615760803223, |
|
"rewards/rejected": -2.711303472518921, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5542226487523992, |
|
"grad_norm": 42.732671934213585, |
|
"learning_rate": 2.463359168864189e-07, |
|
"logits/chosen": 0.25363442301750183, |
|
"logits/rejected": 0.3057165741920471, |
|
"logps/chosen": -501.9913635253906, |
|
"logps/rejected": -549.3098754882812, |
|
"loss": 0.5308, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.290345311164856, |
|
"rewards/margins": 1.010578989982605, |
|
"rewards/rejected": -2.300924301147461, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5566218809980806, |
|
"grad_norm": 56.4377037562831, |
|
"learning_rate": 2.4424245802601555e-07, |
|
"logits/chosen": 0.2584269642829895, |
|
"logits/rejected": 0.18541845679283142, |
|
"logps/chosen": -429.4263610839844, |
|
"logps/rejected": -564.8827514648438, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1343291997909546, |
|
"rewards/margins": 0.8212429285049438, |
|
"rewards/rejected": -1.9555721282958984, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.559021113243762, |
|
"grad_norm": 43.562067174648554, |
|
"learning_rate": 2.421494030343072e-07, |
|
"logits/chosen": 0.3927503228187561, |
|
"logits/rejected": 0.4579402506351471, |
|
"logps/chosen": -454.2933044433594, |
|
"logps/rejected": -463.19879150390625, |
|
"loss": 0.5602, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2649152278900146, |
|
"rewards/margins": 0.6680216193199158, |
|
"rewards/rejected": -1.9329369068145752, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5614203454894434, |
|
"grad_norm": 58.65475476508653, |
|
"learning_rate": 2.400568987307861e-07, |
|
"logits/chosen": 0.4964686334133148, |
|
"logits/rejected": 0.5107001662254333, |
|
"logps/chosen": -432.31341552734375, |
|
"logps/rejected": -462.37957763671875, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.243574857711792, |
|
"rewards/margins": 0.5827276110649109, |
|
"rewards/rejected": -1.8263022899627686, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5638195777351248, |
|
"grad_norm": 58.315209990127244, |
|
"learning_rate": 2.379650918963156e-07, |
|
"logits/chosen": 0.3746911585330963, |
|
"logits/rejected": 0.3054753541946411, |
|
"logps/chosen": -421.2218322753906, |
|
"logps/rejected": -541.8524169921875, |
|
"loss": 0.4768, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4736093282699585, |
|
"rewards/margins": 1.0763620138168335, |
|
"rewards/rejected": -2.549971342086792, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5662188099808061, |
|
"grad_norm": 48.959910400597586, |
|
"learning_rate": 2.3587412926283438e-07, |
|
"logits/chosen": 0.35963717103004456, |
|
"logits/rejected": 0.28781235218048096, |
|
"logps/chosen": -480.2315368652344, |
|
"logps/rejected": -566.6637573242188, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1883009672164917, |
|
"rewards/margins": 1.270986795425415, |
|
"rewards/rejected": -2.459287643432617, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5686180422264875, |
|
"grad_norm": 30.962931166603095, |
|
"learning_rate": 2.337841575030642e-07, |
|
"logits/chosen": 0.35713425278663635, |
|
"logits/rejected": 0.30424803495407104, |
|
"logps/chosen": -492.8209533691406, |
|
"logps/rejected": -574.771240234375, |
|
"loss": 0.5061, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.396402359008789, |
|
"rewards/margins": 0.7614862322807312, |
|
"rewards/rejected": -2.157888889312744, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5710172744721689, |
|
"grad_norm": 42.85766086532651, |
|
"learning_rate": 2.316953232202206e-07, |
|
"logits/chosen": 0.550395131111145, |
|
"logits/rejected": 0.6783905029296875, |
|
"logps/chosen": -430.48162841796875, |
|
"logps/rejected": -421.599853515625, |
|
"loss": 0.485, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2630993127822876, |
|
"rewards/margins": 0.6726707220077515, |
|
"rewards/rejected": -1.9357702732086182, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5734165067178503, |
|
"grad_norm": 38.41377863495817, |
|
"learning_rate": 2.2960777293772958e-07, |
|
"logits/chosen": 0.5615749359130859, |
|
"logits/rejected": 0.6018954515457153, |
|
"logps/chosen": -397.6216125488281, |
|
"logps/rejected": -480.299560546875, |
|
"loss": 0.4712, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1725983619689941, |
|
"rewards/margins": 0.9747906923294067, |
|
"rewards/rejected": -2.1473889350891113, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5758157389635317, |
|
"grad_norm": 40.03422172905619, |
|
"learning_rate": 2.2752165308894974e-07, |
|
"logits/chosen": 0.46104907989501953, |
|
"logits/rejected": 0.44198736548423767, |
|
"logps/chosen": -378.5218505859375, |
|
"logps/rejected": -456.65576171875, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1959477663040161, |
|
"rewards/margins": 0.9340691566467285, |
|
"rewards/rejected": -2.130016803741455, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5782149712092131, |
|
"grad_norm": 54.11084171812038, |
|
"learning_rate": 2.254371100069005e-07, |
|
"logits/chosen": 0.457451730966568, |
|
"logits/rejected": 0.320446252822876, |
|
"logps/chosen": -431.6908264160156, |
|
"logps/rejected": -538.86865234375, |
|
"loss": 0.4874, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1469987630844116, |
|
"rewards/margins": 0.8605014085769653, |
|
"rewards/rejected": -2.007500171661377, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5806142034548945, |
|
"grad_norm": 54.79494157401916, |
|
"learning_rate": 2.2335428991399725e-07, |
|
"logits/chosen": 0.47143587470054626, |
|
"logits/rejected": 0.4143534302711487, |
|
"logps/chosen": -398.853271484375, |
|
"logps/rejected": -594.490966796875, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.449357509613037, |
|
"rewards/margins": 1.8377138376235962, |
|
"rewards/rejected": -3.287071704864502, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5830134357005758, |
|
"grad_norm": 47.37355935293041, |
|
"learning_rate": 2.2127333891179458e-07, |
|
"logits/chosen": 0.4510342478752136, |
|
"logits/rejected": 0.36793094873428345, |
|
"logps/chosen": -419.24163818359375, |
|
"logps/rejected": -571.1874389648438, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3948280811309814, |
|
"rewards/margins": 1.2814536094665527, |
|
"rewards/rejected": -2.676281690597534, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5854126679462572, |
|
"grad_norm": 65.89730578952388, |
|
"learning_rate": 2.1919440297073782e-07, |
|
"logits/chosen": 0.3510410785675049, |
|
"logits/rejected": 0.3182118535041809, |
|
"logps/chosen": -415.6324157714844, |
|
"logps/rejected": -527.152099609375, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.5300906896591187, |
|
"rewards/margins": 1.0138437747955322, |
|
"rewards/rejected": -2.5439343452453613, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5878119001919386, |
|
"grad_norm": 42.30705937238165, |
|
"learning_rate": 2.1711762791992368e-07, |
|
"logits/chosen": 0.43873363733291626, |
|
"logits/rejected": 0.46004414558410645, |
|
"logps/chosen": -474.68341064453125, |
|
"logps/rejected": -534.9579467773438, |
|
"loss": 0.5254, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1993557214736938, |
|
"rewards/margins": 0.8993379473686218, |
|
"rewards/rejected": -2.098693370819092, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5902111324376199, |
|
"grad_norm": 45.61922027456477, |
|
"learning_rate": 2.1504315943687114e-07, |
|
"logits/chosen": 0.18166793882846832, |
|
"logits/rejected": 0.07724637538194656, |
|
"logps/chosen": -408.96893310546875, |
|
"logps/rejected": -581.2828979492188, |
|
"loss": 0.4625, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.0965474843978882, |
|
"rewards/margins": 1.2784209251403809, |
|
"rewards/rejected": -2.3749685287475586, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5926103646833013, |
|
"grad_norm": 53.517726559327514, |
|
"learning_rate": 2.1297114303730248e-07, |
|
"logits/chosen": 0.3896231949329376, |
|
"logits/rejected": 0.2409631460905075, |
|
"logps/chosen": -423.19287109375, |
|
"logps/rejected": -586.328857421875, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.327804446220398, |
|
"rewards/margins": 1.076370120048523, |
|
"rewards/rejected": -2.404174327850342, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5950095969289827, |
|
"grad_norm": 41.01802265556329, |
|
"learning_rate": 2.1090172406493616e-07, |
|
"logits/chosen": 0.3331597149372101, |
|
"logits/rejected": 0.2225189208984375, |
|
"logps/chosen": -397.5386657714844, |
|
"logps/rejected": -519.840087890625, |
|
"loss": 0.4603, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.131462812423706, |
|
"rewards/margins": 0.950838565826416, |
|
"rewards/rejected": -2.082301378250122, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5974088291746641, |
|
"grad_norm": 60.21604361600221, |
|
"learning_rate": 2.0883504768129146e-07, |
|
"logits/chosen": 0.30570241808891296, |
|
"logits/rejected": 0.24165184795856476, |
|
"logps/chosen": -461.2522888183594, |
|
"logps/rejected": -565.5457763671875, |
|
"loss": 0.5168, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1941462755203247, |
|
"rewards/margins": 1.1086372137069702, |
|
"rewards/rejected": -2.302783489227295, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5998080614203455, |
|
"grad_norm": 45.67541969535949, |
|
"learning_rate": 2.0677125885550571e-07, |
|
"logits/chosen": 0.4085448384284973, |
|
"logits/rejected": 0.48327702283859253, |
|
"logps/chosen": -436.59857177734375, |
|
"logps/rejected": -471.85498046875, |
|
"loss": 0.4864, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.352922797203064, |
|
"rewards/margins": 0.7829147577285767, |
|
"rewards/rejected": -2.1358375549316406, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6022072936660269, |
|
"grad_norm": 60.523710599155514, |
|
"learning_rate": 2.0471050235416587e-07, |
|
"logits/chosen": 0.14623039960861206, |
|
"logits/rejected": 0.19062075018882751, |
|
"logps/chosen": -451.1435546875, |
|
"logps/rejected": -491.1160583496094, |
|
"loss": 0.4579, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3129417896270752, |
|
"rewards/margins": 0.9290571212768555, |
|
"rewards/rejected": -2.2419991493225098, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6046065259117083, |
|
"grad_norm": 52.23271499985374, |
|
"learning_rate": 2.026529227311532e-07, |
|
"logits/chosen": 0.29617246985435486, |
|
"logits/rejected": 0.2822147011756897, |
|
"logps/chosen": -423.54315185546875, |
|
"logps/rejected": -501.5276794433594, |
|
"loss": 0.5351, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.437732458114624, |
|
"rewards/margins": 0.6575521230697632, |
|
"rewards/rejected": -2.0952847003936768, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6070057581573897, |
|
"grad_norm": 44.954316731149845, |
|
"learning_rate": 2.005986643175036e-07, |
|
"logits/chosen": 0.3328186571598053, |
|
"logits/rejected": 0.2537630498409271, |
|
"logps/chosen": -454.51580810546875, |
|
"logps/rejected": -574.1419677734375, |
|
"loss": 0.4529, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1176398992538452, |
|
"rewards/margins": 1.3029248714447021, |
|
"rewards/rejected": -2.420564889907837, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6094049904030711, |
|
"grad_norm": 63.16908223607974, |
|
"learning_rate": 1.9854787121128328e-07, |
|
"logits/chosen": 0.31036069989204407, |
|
"logits/rejected": 0.34982046484947205, |
|
"logps/chosen": -397.23980712890625, |
|
"logps/rejected": -403.78509521484375, |
|
"loss": 0.5048, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.217882752418518, |
|
"rewards/margins": 0.5209786891937256, |
|
"rewards/rejected": -1.7388614416122437, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6118042226487524, |
|
"grad_norm": 54.60861450055549, |
|
"learning_rate": 1.9650068726748106e-07, |
|
"logits/chosen": 0.3659752309322357, |
|
"logits/rejected": 0.35895493626594543, |
|
"logps/chosen": -461.1573181152344, |
|
"logps/rejected": -573.6448364257812, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.474023699760437, |
|
"rewards/margins": 1.084517240524292, |
|
"rewards/rejected": -2.5585405826568604, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6142034548944337, |
|
"grad_norm": 60.46600684768552, |
|
"learning_rate": 1.9445725608791718e-07, |
|
"logits/chosen": 0.34406715631484985, |
|
"logits/rejected": 0.28216245770454407, |
|
"logps/chosen": -460.77978515625, |
|
"logps/rejected": -619.3160400390625, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3329025506973267, |
|
"rewards/margins": 1.6671111583709717, |
|
"rewards/rejected": -3.000014066696167, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6166026871401151, |
|
"grad_norm": 47.40884309447939, |
|
"learning_rate": 1.924177210111705e-07, |
|
"logits/chosen": 0.29457220435142517, |
|
"logits/rejected": 0.29915186762809753, |
|
"logps/chosen": -407.5345153808594, |
|
"logps/rejected": -541.1593017578125, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2408500909805298, |
|
"rewards/margins": 1.2775036096572876, |
|
"rewards/rejected": -2.5183534622192383, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6190019193857965, |
|
"grad_norm": 45.99798362644753, |
|
"learning_rate": 1.9038222510252364e-07, |
|
"logits/chosen": 0.25425729155540466, |
|
"logits/rejected": 0.24261541664600372, |
|
"logps/chosen": -444.73992919921875, |
|
"logps/rejected": -504.9520568847656, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1787656545639038, |
|
"rewards/margins": 0.8464619517326355, |
|
"rewards/rejected": -2.0252277851104736, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6214011516314779, |
|
"grad_norm": 52.02289887758591, |
|
"learning_rate": 1.883509111439277e-07, |
|
"logits/chosen": 0.3976004123687744, |
|
"logits/rejected": 0.28759509325027466, |
|
"logps/chosen": -438.90008544921875, |
|
"logps/rejected": -641.5413818359375, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4814860820770264, |
|
"rewards/margins": 1.3605000972747803, |
|
"rewards/rejected": -2.8419861793518066, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6238003838771593, |
|
"grad_norm": 32.303683781858304, |
|
"learning_rate": 1.8632392162398665e-07, |
|
"logits/chosen": 0.23672600090503693, |
|
"logits/rejected": 0.15976786613464355, |
|
"logps/chosen": -484.119873046875, |
|
"logps/rejected": -645.5545654296875, |
|
"loss": 0.4698, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.1118555068969727, |
|
"rewards/margins": 1.7390865087509155, |
|
"rewards/rejected": -2.8509418964385986, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6261996161228407, |
|
"grad_norm": 45.29250569251351, |
|
"learning_rate": 1.84301398727962e-07, |
|
"logits/chosen": 0.4794914722442627, |
|
"logits/rejected": 0.37679189443588257, |
|
"logps/chosen": -368.31109619140625, |
|
"logps/rejected": -579.0586547851562, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0693645477294922, |
|
"rewards/margins": 1.7801597118377686, |
|
"rewards/rejected": -2.8495242595672607, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6285988483685221, |
|
"grad_norm": 62.710557092048646, |
|
"learning_rate": 1.8228348432779966e-07, |
|
"logits/chosen": 0.2735206186771393, |
|
"logits/rejected": 0.24361078441143036, |
|
"logps/chosen": -426.20111083984375, |
|
"logps/rejected": -496.5086364746094, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3294744491577148, |
|
"rewards/margins": 0.8788881301879883, |
|
"rewards/rejected": -2.208362579345703, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6309980806142035, |
|
"grad_norm": 73.52463716987671, |
|
"learning_rate": 1.8027031997217773e-07, |
|
"logits/chosen": 0.3862006962299347, |
|
"logits/rejected": 0.27332574129104614, |
|
"logps/chosen": -411.3087463378906, |
|
"logps/rejected": -538.5333251953125, |
|
"loss": 0.4613, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4152649641036987, |
|
"rewards/margins": 1.0575337409973145, |
|
"rewards/rejected": -2.4727988243103027, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6333973128598849, |
|
"grad_norm": 47.969317887923054, |
|
"learning_rate": 1.7826204687657758e-07, |
|
"logits/chosen": 0.2865044176578522, |
|
"logits/rejected": 0.33233708143234253, |
|
"logps/chosen": -468.67108154296875, |
|
"logps/rejected": -502.1822814941406, |
|
"loss": 0.4738, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1685740947723389, |
|
"rewards/margins": 0.8533760905265808, |
|
"rewards/rejected": -2.0219502449035645, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6357965451055663, |
|
"grad_norm": 46.75296720560617, |
|
"learning_rate": 1.762588059133781e-07, |
|
"logits/chosen": 0.3442167043685913, |
|
"logits/rejected": 0.4161573350429535, |
|
"logps/chosen": -470.08807373046875, |
|
"logps/rejected": -546.142578125, |
|
"loss": 0.4768, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.227698564529419, |
|
"rewards/margins": 1.140878677368164, |
|
"rewards/rejected": -2.368577003479004, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6381957773512476, |
|
"grad_norm": 50.401385190215464, |
|
"learning_rate": 1.7426073760197406e-07, |
|
"logits/chosen": 0.10545764863491058, |
|
"logits/rejected": 0.012745514512062073, |
|
"logps/chosen": -432.08685302734375, |
|
"logps/rejected": -608.319091796875, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.235762119293213, |
|
"rewards/margins": 1.4522688388824463, |
|
"rewards/rejected": -2.688030958175659, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6405950095969289, |
|
"grad_norm": 45.71956495654583, |
|
"learning_rate": 1.7226798209891935e-07, |
|
"logits/chosen": 0.20430830121040344, |
|
"logits/rejected": 0.2912927269935608, |
|
"logps/chosen": -453.98944091796875, |
|
"logps/rejected": -510.1253967285156, |
|
"loss": 0.4619, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4355871677398682, |
|
"rewards/margins": 1.2053136825561523, |
|
"rewards/rejected": -2.6409008502960205, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6429942418426103, |
|
"grad_norm": 45.15170026766255, |
|
"learning_rate": 1.7028067918809535e-07, |
|
"logits/chosen": 0.3014266788959503, |
|
"logits/rejected": 0.22763225436210632, |
|
"logps/chosen": -384.4528503417969, |
|
"logps/rejected": -595.7291870117188, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.171205997467041, |
|
"rewards/margins": 1.6346750259399414, |
|
"rewards/rejected": -2.8058810234069824, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6453934740882917, |
|
"grad_norm": 64.83143640863342, |
|
"learning_rate": 1.6829896827090584e-07, |
|
"logits/chosen": 0.21800704300403595, |
|
"logits/rejected": 0.220147043466568, |
|
"logps/chosen": -443.7188415527344, |
|
"logps/rejected": -480.36907958984375, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3957051038742065, |
|
"rewards/margins": 0.6850441694259644, |
|
"rewards/rejected": -2.080749273300171, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6477927063339731, |
|
"grad_norm": 37.274772223125495, |
|
"learning_rate": 1.6632298835649844e-07, |
|
"logits/chosen": 0.3046364486217499, |
|
"logits/rejected": 0.18461188673973083, |
|
"logps/chosen": -469.7455139160156, |
|
"logps/rejected": -623.6341552734375, |
|
"loss": 0.477, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3073039054870605, |
|
"rewards/margins": 1.1427420377731323, |
|
"rewards/rejected": -2.4500460624694824, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6501919385796545, |
|
"grad_norm": 91.33316289592031, |
|
"learning_rate": 1.6435287805201364e-07, |
|
"logits/chosen": 0.46277904510498047, |
|
"logits/rejected": 0.40250563621520996, |
|
"logps/chosen": -462.3423767089844, |
|
"logps/rejected": -543.9591064453125, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.518293023109436, |
|
"rewards/margins": 0.8617793321609497, |
|
"rewards/rejected": -2.3800723552703857, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6525911708253359, |
|
"grad_norm": 44.87565160003366, |
|
"learning_rate": 1.6238877555286207e-07, |
|
"logits/chosen": 0.35751184821128845, |
|
"logits/rejected": 0.29743391275405884, |
|
"logps/chosen": -469.69561767578125, |
|
"logps/rejected": -606.2034301757812, |
|
"loss": 0.4479, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2520592212677002, |
|
"rewards/margins": 1.2662980556488037, |
|
"rewards/rejected": -2.518357276916504, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6549904030710173, |
|
"grad_norm": 44.32240198316999, |
|
"learning_rate": 1.60430818633031e-07, |
|
"logits/chosen": 0.16691644489765167, |
|
"logits/rejected": 0.14331945776939392, |
|
"logps/chosen": -449.48876953125, |
|
"logps/rejected": -561.8910522460938, |
|
"loss": 0.4539, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.131084680557251, |
|
"rewards/margins": 1.254732370376587, |
|
"rewards/rejected": -2.385816812515259, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6573896353166987, |
|
"grad_norm": 39.01641038215159, |
|
"learning_rate": 1.5847914463541939e-07, |
|
"logits/chosen": 0.3676902651786804, |
|
"logits/rejected": 0.34273606538772583, |
|
"logps/chosen": -374.8681640625, |
|
"logps/rejected": -478.32330322265625, |
|
"loss": 0.4745, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1049184799194336, |
|
"rewards/margins": 0.8594606518745422, |
|
"rewards/rejected": -1.964379072189331, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6597888675623801, |
|
"grad_norm": 35.89167519955917, |
|
"learning_rate": 1.5653389046220427e-07, |
|
"logits/chosen": 0.3571329414844513, |
|
"logits/rejected": 0.27262359857559204, |
|
"logps/chosen": -399.9897155761719, |
|
"logps/rejected": -535.5849609375, |
|
"loss": 0.4737, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.1511871814727783, |
|
"rewards/margins": 1.0949671268463135, |
|
"rewards/rejected": -2.246154308319092, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6621880998080614, |
|
"grad_norm": 74.88730166916955, |
|
"learning_rate": 1.545951925652375e-07, |
|
"logits/chosen": 0.3250289559364319, |
|
"logits/rejected": 0.39422863721847534, |
|
"logps/chosen": -507.34735107421875, |
|
"logps/rejected": -564.6778564453125, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2604314088821411, |
|
"rewards/margins": 1.2383835315704346, |
|
"rewards/rejected": -2.4988150596618652, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6645873320537428, |
|
"grad_norm": 43.31844151941509, |
|
"learning_rate": 1.5266318693647423e-07, |
|
"logits/chosen": 0.38096925616264343, |
|
"logits/rejected": 0.4018251299858093, |
|
"logps/chosen": -460.6954040527344, |
|
"logps/rejected": -567.9718017578125, |
|
"loss": 0.4693, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2558958530426025, |
|
"rewards/margins": 1.0993343591690063, |
|
"rewards/rejected": -2.3552298545837402, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6669865642994242, |
|
"grad_norm": 72.85191786475721, |
|
"learning_rate": 1.5073800909843353e-07, |
|
"logits/chosen": 0.25220975279808044, |
|
"logits/rejected": 0.3646177649497986, |
|
"logps/chosen": -450.23193359375, |
|
"logps/rejected": -503.99127197265625, |
|
"loss": 0.4707, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2164779901504517, |
|
"rewards/margins": 1.0934855937957764, |
|
"rewards/rejected": -2.3099634647369385, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6693857965451055, |
|
"grad_norm": 63.40322968247712, |
|
"learning_rate": 1.488197940946922e-07, |
|
"logits/chosen": 0.23376190662384033, |
|
"logits/rejected": 0.22447574138641357, |
|
"logps/chosen": -456.8228454589844, |
|
"logps/rejected": -523.5567626953125, |
|
"loss": 0.4689, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1302400827407837, |
|
"rewards/margins": 1.2681411504745483, |
|
"rewards/rejected": -2.398381233215332, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6717850287907869, |
|
"grad_norm": 66.69199487516, |
|
"learning_rate": 1.4690867648041167e-07, |
|
"logits/chosen": 0.16230645775794983, |
|
"logits/rejected": 0.1882302314043045, |
|
"logps/chosen": -434.5381774902344, |
|
"logps/rejected": -552.4396362304688, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1278281211853027, |
|
"rewards/margins": 1.5061180591583252, |
|
"rewards/rejected": -2.633945941925049, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6741842610364683, |
|
"grad_norm": 46.91530215907862, |
|
"learning_rate": 1.4500479031289987e-07, |
|
"logits/chosen": 0.15237310528755188, |
|
"logits/rejected": 0.1518753319978714, |
|
"logps/chosen": -466.0179138183594, |
|
"logps/rejected": -572.3275146484375, |
|
"loss": 0.517, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2266777753829956, |
|
"rewards/margins": 1.2364604473114014, |
|
"rewards/rejected": -2.4631385803222656, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6765834932821497, |
|
"grad_norm": 55.81983630093274, |
|
"learning_rate": 1.4310826914220747e-07, |
|
"logits/chosen": 0.17195823788642883, |
|
"logits/rejected": 0.16844519972801208, |
|
"logps/chosen": -536.1735229492188, |
|
"logps/rejected": -609.2791748046875, |
|
"loss": 0.5282, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5774872303009033, |
|
"rewards/margins": 0.9089029431343079, |
|
"rewards/rejected": -2.4863903522491455, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6789827255278311, |
|
"grad_norm": 53.80796549341863, |
|
"learning_rate": 1.412192460017597e-07, |
|
"logits/chosen": 0.1955575793981552, |
|
"logits/rejected": 0.12785163521766663, |
|
"logps/chosen": -444.7312927246094, |
|
"logps/rejected": -568.8245849609375, |
|
"loss": 0.5024, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.395450472831726, |
|
"rewards/margins": 1.22873055934906, |
|
"rewards/rejected": -2.624181032180786, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6813819577735125, |
|
"grad_norm": 44.72827252256254, |
|
"learning_rate": 1.3933785339902504e-07, |
|
"logits/chosen": 0.27861329913139343, |
|
"logits/rejected": 0.13766932487487793, |
|
"logps/chosen": -376.69805908203125, |
|
"logps/rejected": -530.3146362304688, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1893621683120728, |
|
"rewards/margins": 1.1048251390457153, |
|
"rewards/rejected": -2.294187307357788, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6837811900191939, |
|
"grad_norm": 38.90141505727746, |
|
"learning_rate": 1.374642233062197e-07, |
|
"logits/chosen": 0.1925538331270218, |
|
"logits/rejected": 0.17995335161685944, |
|
"logps/chosen": -486.0704650878906, |
|
"logps/rejected": -545.6419067382812, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2210705280303955, |
|
"rewards/margins": 1.031243085861206, |
|
"rewards/rejected": -2.2523136138916016, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6861804222648752, |
|
"grad_norm": 38.934538649501114, |
|
"learning_rate": 1.355984871510511e-07, |
|
"logits/chosen": 0.24185729026794434, |
|
"logits/rejected": 0.16981028020381927, |
|
"logps/chosen": -488.8545837402344, |
|
"logps/rejected": -586.666015625, |
|
"loss": 0.4586, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2682818174362183, |
|
"rewards/margins": 0.9824774861335754, |
|
"rewards/rejected": -2.2507593631744385, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6885796545105566, |
|
"grad_norm": 45.29750933331666, |
|
"learning_rate": 1.3374077580749783e-07, |
|
"logits/chosen": 0.29279276728630066, |
|
"logits/rejected": 0.1869848519563675, |
|
"logps/chosen": -351.5196838378906, |
|
"logps/rejected": -475.97308349609375, |
|
"loss": 0.5016, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.038088321685791, |
|
"rewards/margins": 1.0932366847991943, |
|
"rewards/rejected": -2.1313250064849854, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.690978886756238, |
|
"grad_norm": 48.530711516673115, |
|
"learning_rate": 1.3189121958663024e-07, |
|
"logits/chosen": 0.1910950392484665, |
|
"logits/rejected": 0.2789291739463806, |
|
"logps/chosen": -532.8553466796875, |
|
"logps/rejected": -549.0731201171875, |
|
"loss": 0.502, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6616315841674805, |
|
"rewards/margins": 0.5708137154579163, |
|
"rewards/rejected": -2.232445240020752, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6933781190019194, |
|
"grad_norm": 49.658508332103274, |
|
"learning_rate": 1.3004994822746895e-07, |
|
"logits/chosen": 0.08187554031610489, |
|
"logits/rejected": 0.053650178015232086, |
|
"logps/chosen": -428.33087158203125, |
|
"logps/rejected": -530.8117065429688, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.238884687423706, |
|
"rewards/margins": 0.9637983441352844, |
|
"rewards/rejected": -2.2026829719543457, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6957773512476008, |
|
"grad_norm": 49.423150125943955, |
|
"learning_rate": 1.2821709088788434e-07, |
|
"logits/chosen": 0.2585221827030182, |
|
"logits/rejected": 0.17918451130390167, |
|
"logps/chosen": -400.4766845703125, |
|
"logps/rejected": -514.9324951171875, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2878179550170898, |
|
"rewards/margins": 1.140520691871643, |
|
"rewards/rejected": -2.4283385276794434, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6981765834932822, |
|
"grad_norm": 59.81963849634542, |
|
"learning_rate": 1.2639277613553736e-07, |
|
"logits/chosen": 0.39327603578567505, |
|
"logits/rejected": 0.3338584899902344, |
|
"logps/chosen": -380.1231994628906, |
|
"logps/rejected": -466.969482421875, |
|
"loss": 0.4731, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2669470310211182, |
|
"rewards/margins": 0.8520339727401733, |
|
"rewards/rejected": -2.11898136138916, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7005758157389635, |
|
"grad_norm": 47.91120083091996, |
|
"learning_rate": 1.2457713193885975e-07, |
|
"logits/chosen": 0.23712964355945587, |
|
"logits/rejected": 0.10274624824523926, |
|
"logps/chosen": -363.88116455078125, |
|
"logps/rejected": -501.58477783203125, |
|
"loss": 0.4744, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3259981870651245, |
|
"rewards/margins": 1.0507750511169434, |
|
"rewards/rejected": -2.3767733573913574, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7029750479846449, |
|
"grad_norm": 56.163566511516365, |
|
"learning_rate": 1.2277028565807838e-07, |
|
"logits/chosen": 0.2799941599369049, |
|
"logits/rejected": 0.2706086039543152, |
|
"logps/chosen": -432.5113220214844, |
|
"logps/rejected": -512.887451171875, |
|
"loss": 0.5009, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1611391305923462, |
|
"rewards/margins": 0.9016637802124023, |
|
"rewards/rejected": -2.062802791595459, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7053742802303263, |
|
"grad_norm": 62.43277712323061, |
|
"learning_rate": 1.209723640362815e-07, |
|
"logits/chosen": 0.16554930806159973, |
|
"logits/rejected": 0.1359563171863556, |
|
"logps/chosen": -462.38568115234375, |
|
"logps/rejected": -574.197998046875, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3521636724472046, |
|
"rewards/margins": 1.263946294784546, |
|
"rewards/rejected": -2.616110324859619, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7077735124760077, |
|
"grad_norm": 33.88826120125574, |
|
"learning_rate": 1.191834931905277e-07, |
|
"logits/chosen": 0.20565947890281677, |
|
"logits/rejected": 0.13917942345142365, |
|
"logps/chosen": -520.4049072265625, |
|
"logps/rejected": -632.361328125, |
|
"loss": 0.4599, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5277377367019653, |
|
"rewards/margins": 1.1449778079986572, |
|
"rewards/rejected": -2.672715425491333, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.710172744721689, |
|
"grad_norm": 45.009587506259074, |
|
"learning_rate": 1.1740379860299988e-07, |
|
"logits/chosen": 0.2947765588760376, |
|
"logits/rejected": 0.23910513520240784, |
|
"logps/chosen": -472.980712890625, |
|
"logps/rejected": -579.03125, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3068562746047974, |
|
"rewards/margins": 0.9038209915161133, |
|
"rewards/rejected": -2.210677146911621, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7125719769673704, |
|
"grad_norm": 47.44511342924861, |
|
"learning_rate": 1.1563340511220254e-07, |
|
"logits/chosen": 0.2019500434398651, |
|
"logits/rejected": 0.2147335559129715, |
|
"logps/chosen": -510.0350646972656, |
|
"logps/rejected": -596.2499389648438, |
|
"loss": 0.5062, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.336971402168274, |
|
"rewards/margins": 1.1666083335876465, |
|
"rewards/rejected": -2.503579616546631, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7149712092130518, |
|
"grad_norm": 42.57941151152834, |
|
"learning_rate": 1.1387243690420556e-07, |
|
"logits/chosen": 0.23384490609169006, |
|
"logits/rejected": 0.20733702182769775, |
|
"logps/chosen": -481.803955078125, |
|
"logps/rejected": -632.8770751953125, |
|
"loss": 0.4655, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2207635641098022, |
|
"rewards/margins": 1.6195507049560547, |
|
"rewards/rejected": -2.8403146266937256, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7173704414587332, |
|
"grad_norm": 64.95455682456064, |
|
"learning_rate": 1.1212101750393235e-07, |
|
"logits/chosen": 0.3023291528224945, |
|
"logits/rejected": 0.30834710597991943, |
|
"logps/chosen": -450.0244140625, |
|
"logps/rejected": -551.0206298828125, |
|
"loss": 0.4357, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.409967303276062, |
|
"rewards/margins": 1.2566007375717163, |
|
"rewards/rejected": -2.666567802429199, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7197696737044146, |
|
"grad_norm": 46.170115289110555, |
|
"learning_rate": 1.1037926976649562e-07, |
|
"logits/chosen": 0.22152157127857208, |
|
"logits/rejected": 0.16806095838546753, |
|
"logps/chosen": -476.97320556640625, |
|
"logps/rejected": -616.3040161132812, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4931066036224365, |
|
"rewards/margins": 1.1700246334075928, |
|
"rewards/rejected": -2.6631312370300293, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.722168905950096, |
|
"grad_norm": 57.89732258915135, |
|
"learning_rate": 1.0864731586857936e-07, |
|
"logits/chosen": 0.3043791949748993, |
|
"logits/rejected": 0.36210864782333374, |
|
"logps/chosen": -495.12164306640625, |
|
"logps/rejected": -574.9592895507812, |
|
"loss": 0.46, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4336802959442139, |
|
"rewards/margins": 1.215421199798584, |
|
"rewards/rejected": -2.649101495742798, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7245681381957774, |
|
"grad_norm": 49.111012867250984, |
|
"learning_rate": 1.0692527729986839e-07, |
|
"logits/chosen": 0.11315940320491791, |
|
"logits/rejected": 0.11848314106464386, |
|
"logps/chosen": -460.1648864746094, |
|
"logps/rejected": -543.1566162109375, |
|
"loss": 0.4285, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2527508735656738, |
|
"rewards/margins": 1.067068338394165, |
|
"rewards/rejected": -2.3198189735412598, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7269673704414588, |
|
"grad_norm": 57.35606077595889, |
|
"learning_rate": 1.0521327485452692e-07, |
|
"logits/chosen": 0.347392201423645, |
|
"logits/rejected": 0.3210673928260803, |
|
"logps/chosen": -450.1835021972656, |
|
"logps/rejected": -524.6959228515625, |
|
"loss": 0.4912, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2702230215072632, |
|
"rewards/margins": 1.0865848064422607, |
|
"rewards/rejected": -2.3568077087402344, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7293666026871402, |
|
"grad_norm": 56.3165468310005, |
|
"learning_rate": 1.0351142862272468e-07, |
|
"logits/chosen": 0.209666445851326, |
|
"logits/rejected": 0.20282307267189026, |
|
"logps/chosen": -423.931396484375, |
|
"logps/rejected": -572.3831176757812, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4169180393218994, |
|
"rewards/margins": 1.6322886943817139, |
|
"rewards/rejected": -3.049206256866455, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7317658349328215, |
|
"grad_norm": 47.57484794011745, |
|
"learning_rate": 1.0181985798221343e-07, |
|
"logits/chosen": 0.3013080060482025, |
|
"logits/rejected": 0.2218068540096283, |
|
"logps/chosen": -470.23480224609375, |
|
"logps/rejected": -589.8034057617188, |
|
"loss": 0.5126, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3861111402511597, |
|
"rewards/margins": 1.0853662490844727, |
|
"rewards/rejected": -2.4714770317077637, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7341650671785028, |
|
"grad_norm": 48.06894623911944, |
|
"learning_rate": 1.0013868158995329e-07, |
|
"logits/chosen": 0.3860154449939728, |
|
"logits/rejected": 0.3630084991455078, |
|
"logps/chosen": -442.2177734375, |
|
"logps/rejected": -527.9283447265625, |
|
"loss": 0.4642, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2381409406661987, |
|
"rewards/margins": 1.22637939453125, |
|
"rewards/rejected": -2.4645204544067383, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7365642994241842, |
|
"grad_norm": 51.169486765513234, |
|
"learning_rate": 9.84680173737887e-08, |
|
"logits/chosen": 0.2769750952720642, |
|
"logits/rejected": 0.2646028995513916, |
|
"logps/chosen": -475.0011291503906, |
|
"logps/rejected": -548.9890747070312, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4719974994659424, |
|
"rewards/margins": 1.0918984413146973, |
|
"rewards/rejected": -2.5638959407806396, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7389635316698656, |
|
"grad_norm": 58.59714970661162, |
|
"learning_rate": 9.680798252417713e-08, |
|
"logits/chosen": 0.2717548906803131, |
|
"logits/rejected": 0.2305576503276825, |
|
"logps/chosen": -379.20928955078125, |
|
"logps/rejected": -515.2022094726562, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.101180076599121, |
|
"rewards/margins": 0.944907009601593, |
|
"rewards/rejected": -2.0460872650146484, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.741362763915547, |
|
"grad_norm": 61.65719550385752, |
|
"learning_rate": 9.515869348596808e-08, |
|
"logits/chosen": 0.11913663148880005, |
|
"logits/rejected": 0.09342759847640991, |
|
"logps/chosen": -497.87109375, |
|
"logps/rejected": -598.5771484375, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4787302017211914, |
|
"rewards/margins": 1.3738664388656616, |
|
"rewards/rejected": -2.8525967597961426, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7437619961612284, |
|
"grad_norm": 41.218790972775324, |
|
"learning_rate": 9.352026595023493e-08, |
|
"logits/chosen": 0.10429096221923828, |
|
"logits/rejected": 0.1541799008846283, |
|
"logps/chosen": -517.5309448242188, |
|
"logps/rejected": -543.2216796875, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4588356018066406, |
|
"rewards/margins": 0.6233514547348022, |
|
"rewards/rejected": -2.0821871757507324, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7461612284069098, |
|
"grad_norm": 64.90615052640716, |
|
"learning_rate": 9.189281484616004e-08, |
|
"logits/chosen": 0.22654108703136444, |
|
"logits/rejected": 0.1651889979839325, |
|
"logps/chosen": -402.1455383300781, |
|
"logps/rejected": -556.6002807617188, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.490733027458191, |
|
"rewards/margins": 0.9633838534355164, |
|
"rewards/rejected": -2.4541170597076416, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7485604606525912, |
|
"grad_norm": 55.8925921234848, |
|
"learning_rate": 9.027645433297249e-08, |
|
"logits/chosen": 0.11542461812496185, |
|
"logits/rejected": 0.17937800288200378, |
|
"logps/chosen": -566.5679321289062, |
|
"logps/rejected": -637.2242431640625, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.827355146408081, |
|
"rewards/margins": 1.0894877910614014, |
|
"rewards/rejected": -2.9168429374694824, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7509596928982726, |
|
"grad_norm": 54.00400306277147, |
|
"learning_rate": 8.867129779194066e-08, |
|
"logits/chosen": 0.16981378197669983, |
|
"logits/rejected": 0.16173888742923737, |
|
"logps/chosen": -371.0014953613281, |
|
"logps/rejected": -522.0635986328125, |
|
"loss": 0.4864, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9543946981430054, |
|
"rewards/margins": 1.564900517463684, |
|
"rewards/rejected": -2.5192952156066895, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.753358925143954, |
|
"grad_norm": 54.599194100775584, |
|
"learning_rate": 8.707745781841866e-08, |
|
"logits/chosen": 0.14470471441745758, |
|
"logits/rejected": 0.1468985676765442, |
|
"logps/chosen": -400.3184509277344, |
|
"logps/rejected": -513.9260864257812, |
|
"loss": 0.5147, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.221286416053772, |
|
"rewards/margins": 1.1234190464019775, |
|
"rewards/rejected": -2.344705581665039, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7557581573896354, |
|
"grad_norm": 34.75601738944086, |
|
"learning_rate": 8.549504621394831e-08, |
|
"logits/chosen": 0.15695925056934357, |
|
"logits/rejected": 0.14711011946201324, |
|
"logps/chosen": -413.1109313964844, |
|
"logps/rejected": -539.1842041015625, |
|
"loss": 0.427, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.069771409034729, |
|
"rewards/margins": 1.3148638010025024, |
|
"rewards/rejected": -2.3846354484558105, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7581573896353166, |
|
"grad_norm": 53.105874033112755, |
|
"learning_rate": 8.392417397841703e-08, |
|
"logits/chosen": 0.26591944694519043, |
|
"logits/rejected": 0.26246827840805054, |
|
"logps/chosen": -447.65557861328125, |
|
"logps/rejected": -544.1351318359375, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2605499029159546, |
|
"rewards/margins": 0.8398078083992004, |
|
"rewards/rejected": -2.1003577709198, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.760556621880998, |
|
"grad_norm": 47.78743297051488, |
|
"learning_rate": 8.236495130227083e-08, |
|
"logits/chosen": 0.24016205966472626, |
|
"logits/rejected": 0.31962883472442627, |
|
"logps/chosen": -456.42724609375, |
|
"logps/rejected": -570.5203247070312, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.1726109981536865, |
|
"rewards/margins": 1.4965537786483765, |
|
"rewards/rejected": -2.6691648960113525, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7629558541266794, |
|
"grad_norm": 47.980852069933555, |
|
"learning_rate": 8.081748755878612e-08, |
|
"logits/chosen": 0.2495994120836258, |
|
"logits/rejected": 0.2759885787963867, |
|
"logps/chosen": -469.076171875, |
|
"logps/rejected": -524.9566040039062, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3896160125732422, |
|
"rewards/margins": 0.9030616879463196, |
|
"rewards/rejected": -2.292677879333496, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7653550863723608, |
|
"grad_norm": 44.55472031419905, |
|
"learning_rate": 7.928189129639632e-08, |
|
"logits/chosen": 0.2707396149635315, |
|
"logits/rejected": 0.21236738562583923, |
|
"logps/chosen": -433.556884765625, |
|
"logps/rejected": -539.517578125, |
|
"loss": 0.4522, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2524731159210205, |
|
"rewards/margins": 0.9954684972763062, |
|
"rewards/rejected": -2.247941493988037, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 77.95104725852434, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": 0.18351641297340393, |
|
"logits/rejected": 0.18833932280540466, |
|
"logps/chosen": -446.3948669433594, |
|
"logps/rejected": -545.6039428710938, |
|
"loss": 0.5189, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3583651781082153, |
|
"rewards/margins": 0.7764785289764404, |
|
"rewards/rejected": -2.1348438262939453, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7701535508637236, |
|
"grad_norm": 60.91165565345474, |
|
"learning_rate": 7.624673123879682e-08, |
|
"logits/chosen": 0.03742004930973053, |
|
"logits/rejected": 0.07750044018030167, |
|
"logps/chosen": -426.2344665527344, |
|
"logps/rejected": -510.4020080566406, |
|
"loss": 0.5048, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3010095357894897, |
|
"rewards/margins": 0.9792767763137817, |
|
"rewards/rejected": -2.2802863121032715, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.772552783109405, |
|
"grad_norm": 43.5674600823645, |
|
"learning_rate": 7.474738034800663e-08, |
|
"logits/chosen": 0.13723036646842957, |
|
"logits/rejected": 0.04767593368887901, |
|
"logps/chosen": -369.3411560058594, |
|
"logps/rejected": -486.69439697265625, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0865916013717651, |
|
"rewards/margins": 1.3701813220977783, |
|
"rewards/rejected": -2.456772804260254, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7749520153550864, |
|
"grad_norm": 65.92599613926842, |
|
"learning_rate": 7.326032273221606e-08, |
|
"logits/chosen": 0.23154711723327637, |
|
"logits/rejected": 0.1886422336101532, |
|
"logps/chosen": -478.3605041503906, |
|
"logps/rejected": -570.8961791992188, |
|
"loss": 0.4831, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2519872188568115, |
|
"rewards/margins": 1.1383593082427979, |
|
"rewards/rejected": -2.3903465270996094, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7773512476007678, |
|
"grad_norm": 45.83026398609644, |
|
"learning_rate": 7.178566270260872e-08, |
|
"logits/chosen": 0.31105470657348633, |
|
"logits/rejected": 0.22554393112659454, |
|
"logps/chosen": -447.1808166503906, |
|
"logps/rejected": -576.032958984375, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2397021055221558, |
|
"rewards/margins": 0.951197624206543, |
|
"rewards/rejected": -2.190899610519409, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7797504798464492, |
|
"grad_norm": 50.507648401741996, |
|
"learning_rate": 7.032350370072709e-08, |
|
"logits/chosen": 0.19485214352607727, |
|
"logits/rejected": 0.18930187821388245, |
|
"logps/chosen": -456.4967346191406, |
|
"logps/rejected": -569.6973266601562, |
|
"loss": 0.4481, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2981529235839844, |
|
"rewards/margins": 1.2217051982879639, |
|
"rewards/rejected": -2.5198581218719482, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7821497120921305, |
|
"grad_norm": 40.68087545077646, |
|
"learning_rate": 6.887394829121596e-08, |
|
"logits/chosen": 0.2527236044406891, |
|
"logits/rejected": 0.20923948287963867, |
|
"logps/chosen": -455.47454833984375, |
|
"logps/rejected": -632.2030029296875, |
|
"loss": 0.4543, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2574024200439453, |
|
"rewards/margins": 1.9225542545318604, |
|
"rewards/rejected": -3.1799566745758057, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7845489443378119, |
|
"grad_norm": 37.54734198368332, |
|
"learning_rate": 6.743709815462833e-08, |
|
"logits/chosen": 0.10011599957942963, |
|
"logits/rejected": 0.1131478101015091, |
|
"logps/chosen": -462.3744201660156, |
|
"logps/rejected": -519.977783203125, |
|
"loss": 0.4665, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3748726844787598, |
|
"rewards/margins": 0.9147384762763977, |
|
"rewards/rejected": -2.2896108627319336, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7869481765834933, |
|
"grad_norm": 51.364098272276145, |
|
"learning_rate": 6.601305408029287e-08, |
|
"logits/chosen": 0.41624197363853455, |
|
"logits/rejected": 0.4190692901611328, |
|
"logps/chosen": -458.64141845703125, |
|
"logps/rejected": -567.7257080078125, |
|
"loss": 0.4664, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5802090167999268, |
|
"rewards/margins": 1.1085751056671143, |
|
"rewards/rejected": -2.688784122467041, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7893474088291746, |
|
"grad_norm": 48.68347118403701, |
|
"learning_rate": 6.460191595924366e-08, |
|
"logits/chosen": 0.23670163750648499, |
|
"logits/rejected": 0.21305176615715027, |
|
"logps/chosen": -472.20654296875, |
|
"logps/rejected": -575.2575073242188, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4002991914749146, |
|
"rewards/margins": 1.0437225103378296, |
|
"rewards/rejected": -2.444021701812744, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.791746641074856, |
|
"grad_norm": 56.29885219772071, |
|
"learning_rate": 6.320378277721342e-08, |
|
"logits/chosen": 0.3236589729785919, |
|
"logits/rejected": 0.2942892014980316, |
|
"logps/chosen": -485.74609375, |
|
"logps/rejected": -548.2264404296875, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.748716950416565, |
|
"rewards/margins": 0.7379652261734009, |
|
"rewards/rejected": -2.4866819381713867, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7941458733205374, |
|
"grad_norm": 47.30513911873481, |
|
"learning_rate": 6.181875260769032e-08, |
|
"logits/chosen": 0.21434447169303894, |
|
"logits/rejected": 0.29501864314079285, |
|
"logps/chosen": -473.3141174316406, |
|
"logps/rejected": -513.0935668945312, |
|
"loss": 0.4824, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.042864441871643, |
|
"rewards/margins": 1.146907925605774, |
|
"rewards/rejected": -2.189772129058838, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7965451055662188, |
|
"grad_norm": 43.836702306292864, |
|
"learning_rate": 6.044692260503797e-08, |
|
"logits/chosen": 0.2978779673576355, |
|
"logits/rejected": 0.2920413911342621, |
|
"logps/chosen": -517.2478637695312, |
|
"logps/rejected": -626.5977172851562, |
|
"loss": 0.4366, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4270732402801514, |
|
"rewards/margins": 1.378542184829712, |
|
"rewards/rejected": -2.805615186691284, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7989443378119002, |
|
"grad_norm": 49.783712311366116, |
|
"learning_rate": 5.9088388997680984e-08, |
|
"logits/chosen": 0.15503938496112823, |
|
"logits/rejected": 0.19135913252830505, |
|
"logps/chosen": -540.2918090820312, |
|
"logps/rejected": -589.90185546875, |
|
"loss": 0.4691, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3969385623931885, |
|
"rewards/margins": 1.2311842441558838, |
|
"rewards/rejected": -2.6281230449676514, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8013435700575816, |
|
"grad_norm": 52.57420620553349, |
|
"learning_rate": 5.774324708135439e-08, |
|
"logits/chosen": 0.2751420736312866, |
|
"logits/rejected": 0.28755050897598267, |
|
"logps/chosen": -397.3004150390625, |
|
"logps/rejected": -484.384521484375, |
|
"loss": 0.4858, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2007606029510498, |
|
"rewards/margins": 1.0477242469787598, |
|
"rewards/rejected": -2.2484848499298096, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.803742802303263, |
|
"grad_norm": 40.1972577695682, |
|
"learning_rate": 5.641159121241953e-08, |
|
"logits/chosen": 0.32921257615089417, |
|
"logits/rejected": 0.24844393134117126, |
|
"logps/chosen": -387.3114013671875, |
|
"logps/rejected": -536.5883178710938, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1569197177886963, |
|
"rewards/margins": 1.0728175640106201, |
|
"rewards/rejected": -2.2297370433807373, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8061420345489443, |
|
"grad_norm": 41.69598167340838, |
|
"learning_rate": 5.5093514801245106e-08, |
|
"logits/chosen": 0.3076106905937195, |
|
"logits/rejected": 0.2400285303592682, |
|
"logps/chosen": -443.32000732421875, |
|
"logps/rejected": -577.48388671875, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3214082717895508, |
|
"rewards/margins": 0.9961814880371094, |
|
"rewards/rejected": -2.317589521408081, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8085412667946257, |
|
"grad_norm": 38.422027437084395, |
|
"learning_rate": 5.378911030565453e-08, |
|
"logits/chosen": 0.3213488757610321, |
|
"logits/rejected": 0.26428383588790894, |
|
"logps/chosen": -506.6258239746094, |
|
"logps/rejected": -639.1539916992188, |
|
"loss": 0.4834, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5233229398727417, |
|
"rewards/margins": 1.1094070672988892, |
|
"rewards/rejected": -2.6327297687530518, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8109404990403071, |
|
"grad_norm": 44.33236145563771, |
|
"learning_rate": 5.249846922444101e-08, |
|
"logits/chosen": 0.3445442318916321, |
|
"logits/rejected": 0.2675052285194397, |
|
"logps/chosen": -402.5491638183594, |
|
"logps/rejected": -543.9547729492188, |
|
"loss": 0.462, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3842418193817139, |
|
"rewards/margins": 1.5147311687469482, |
|
"rewards/rejected": -2.898972988128662, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8133397312859885, |
|
"grad_norm": 58.63982281658398, |
|
"learning_rate": 5.122168209094865e-08, |
|
"logits/chosen": 0.38930395245552063, |
|
"logits/rejected": 0.36614999175071716, |
|
"logps/chosen": -429.3528747558594, |
|
"logps/rejected": -487.43408203125, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5928863286972046, |
|
"rewards/margins": 0.6625052094459534, |
|
"rewards/rejected": -2.2553915977478027, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8157389635316699, |
|
"grad_norm": 41.613843164350314, |
|
"learning_rate": 4.995883846672222e-08, |
|
"logits/chosen": 0.14363157749176025, |
|
"logits/rejected": 0.2796134054660797, |
|
"logps/chosen": -592.6302490234375, |
|
"logps/rejected": -587.6798706054688, |
|
"loss": 0.4708, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5286242961883545, |
|
"rewards/margins": 0.759235143661499, |
|
"rewards/rejected": -2.2878596782684326, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8181381957773513, |
|
"grad_norm": 49.604272632089646, |
|
"learning_rate": 4.871002693522486e-08, |
|
"logits/chosen": 0.2720317244529724, |
|
"logits/rejected": 0.25077277421951294, |
|
"logps/chosen": -463.8601989746094, |
|
"logps/rejected": -517.4568481445312, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3305190801620483, |
|
"rewards/margins": 0.8297308683395386, |
|
"rewards/rejected": -2.160250186920166, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8205374280230326, |
|
"grad_norm": 40.03296260410171, |
|
"learning_rate": 4.7475335095623956e-08, |
|
"logits/chosen": 0.34070852398872375, |
|
"logits/rejected": 0.2651143968105316, |
|
"logps/chosen": -466.72686767578125, |
|
"logps/rejected": -563.7396240234375, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4763226509094238, |
|
"rewards/margins": 1.2001755237579346, |
|
"rewards/rejected": -2.6764981746673584, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.822936660268714, |
|
"grad_norm": 80.18666057349425, |
|
"learning_rate": 4.6254849556646714e-08, |
|
"logits/chosen": 0.22728531062602997, |
|
"logits/rejected": 0.229964017868042, |
|
"logps/chosen": -496.54852294921875, |
|
"logps/rejected": -587.7725830078125, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4686188697814941, |
|
"rewards/margins": 1.2523690462112427, |
|
"rewards/rejected": -2.7209877967834473, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8253358925143954, |
|
"grad_norm": 52.794660060456266, |
|
"learning_rate": 4.504865593050483e-08, |
|
"logits/chosen": 0.27111780643463135, |
|
"logits/rejected": 0.2475912868976593, |
|
"logps/chosen": -477.52685546875, |
|
"logps/rejected": -583.2651977539062, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5320662260055542, |
|
"rewards/margins": 0.9184083938598633, |
|
"rewards/rejected": -2.450474500656128, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8277351247600768, |
|
"grad_norm": 63.34169787369902, |
|
"learning_rate": 4.385683882688895e-08, |
|
"logits/chosen": 0.15275821089744568, |
|
"logits/rejected": 0.20857541263103485, |
|
"logps/chosen": -512.3521728515625, |
|
"logps/rejected": -510.0169982910156, |
|
"loss": 0.5622, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6563478708267212, |
|
"rewards/margins": 0.5664867162704468, |
|
"rewards/rejected": -2.222834587097168, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8301343570057581, |
|
"grad_norm": 59.152646437947276, |
|
"learning_rate": 4.2679481847033985e-08, |
|
"logits/chosen": 0.3345550298690796, |
|
"logits/rejected": 0.3184022009372711, |
|
"logps/chosen": -458.943115234375, |
|
"logps/rejected": -580.5496826171875, |
|
"loss": 0.5152, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.367032527923584, |
|
"rewards/margins": 1.1215214729309082, |
|
"rewards/rejected": -2.488554000854492, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8325335892514395, |
|
"grad_norm": 41.522970046635024, |
|
"learning_rate": 4.151666757785435e-08, |
|
"logits/chosen": 0.25053077936172485, |
|
"logits/rejected": 0.21285638213157654, |
|
"logps/chosen": -415.4532165527344, |
|
"logps/rejected": -565.3435668945312, |
|
"loss": 0.467, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.0187116861343384, |
|
"rewards/margins": 1.5517404079437256, |
|
"rewards/rejected": -2.5704522132873535, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8349328214971209, |
|
"grad_norm": 50.23472101986963, |
|
"learning_rate": 4.036847758615136e-08, |
|
"logits/chosen": 0.23763033747673035, |
|
"logits/rejected": 0.23918600380420685, |
|
"logps/chosen": -477.56292724609375, |
|
"logps/rejected": -576.02490234375, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8143908977508545, |
|
"rewards/margins": 0.8722183108329773, |
|
"rewards/rejected": -2.6866097450256348, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8373320537428023, |
|
"grad_norm": 45.74910365878837, |
|
"learning_rate": 3.923499241289113e-08, |
|
"logits/chosen": 0.160926952958107, |
|
"logits/rejected": 0.19261090457439423, |
|
"logps/chosen": -533.9952392578125, |
|
"logps/rejected": -552.327880859375, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6078903675079346, |
|
"rewards/margins": 0.8734768033027649, |
|
"rewards/rejected": -2.4813671112060547, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8397312859884837, |
|
"grad_norm": 47.87346283993082, |
|
"learning_rate": 3.811629156755541e-08, |
|
"logits/chosen": 0.1999920904636383, |
|
"logits/rejected": 0.14960861206054688, |
|
"logps/chosen": -488.1973571777344, |
|
"logps/rejected": -596.3568115234375, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2728978395462036, |
|
"rewards/margins": 1.2190895080566406, |
|
"rewards/rejected": -2.4919872283935547, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8421305182341651, |
|
"grad_norm": 41.03513231238894, |
|
"learning_rate": 3.701245352256391e-08, |
|
"logits/chosen": 0.2294701635837555, |
|
"logits/rejected": 0.25733810663223267, |
|
"logps/chosen": -478.146484375, |
|
"logps/rejected": -508.96856689453125, |
|
"loss": 0.4831, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1556288003921509, |
|
"rewards/margins": 0.6834120750427246, |
|
"rewards/rejected": -1.839040756225586, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8445297504798465, |
|
"grad_norm": 40.5674011892533, |
|
"learning_rate": 3.592355570776984e-08, |
|
"logits/chosen": 0.1878044307231903, |
|
"logits/rejected": 0.14977982640266418, |
|
"logps/chosen": -398.69970703125, |
|
"logps/rejected": -515.438232421875, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.043157935142517, |
|
"rewards/margins": 1.0835729837417603, |
|
"rewards/rejected": -2.1267309188842773, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8469289827255279, |
|
"grad_norm": 42.604163064101506, |
|
"learning_rate": 3.484967450502904e-08, |
|
"logits/chosen": 0.3040066361427307, |
|
"logits/rejected": 0.23765726387500763, |
|
"logps/chosen": -383.44561767578125, |
|
"logps/rejected": -547.45703125, |
|
"loss": 0.4804, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1220704317092896, |
|
"rewards/margins": 1.1863175630569458, |
|
"rewards/rejected": -2.3083879947662354, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8493282149712092, |
|
"grad_norm": 59.806153925908724, |
|
"learning_rate": 3.3790885242841296e-08, |
|
"logits/chosen": 0.13462401926517487, |
|
"logits/rejected": 0.1024751216173172, |
|
"logps/chosen": -459.56915283203125, |
|
"logps/rejected": -603.9002685546875, |
|
"loss": 0.4678, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4029033184051514, |
|
"rewards/margins": 1.477888584136963, |
|
"rewards/rejected": -2.8807921409606934, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8517274472168906, |
|
"grad_norm": 58.73779989635176, |
|
"learning_rate": 3.274726219106677e-08, |
|
"logits/chosen": 0.09248481690883636, |
|
"logits/rejected": 0.07832972705364227, |
|
"logps/chosen": -512.8543701171875, |
|
"logps/rejected": -601.7901611328125, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3987281322479248, |
|
"rewards/margins": 1.0073614120483398, |
|
"rewards/rejected": -2.4060897827148438, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8541266794625719, |
|
"grad_norm": 47.36729249212975, |
|
"learning_rate": 3.171887855571642e-08, |
|
"logits/chosen": 0.23542580008506775, |
|
"logits/rejected": 0.21016255021095276, |
|
"logps/chosen": -400.10943603515625, |
|
"logps/rejected": -472.996337890625, |
|
"loss": 0.4859, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2242614030838013, |
|
"rewards/margins": 0.7899783253669739, |
|
"rewards/rejected": -2.014239549636841, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8565259117082533, |
|
"grad_norm": 51.17436258863895, |
|
"learning_rate": 3.070580647381643e-08, |
|
"logits/chosen": 0.2268662452697754, |
|
"logits/rejected": 0.17909319698810577, |
|
"logps/chosen": -437.57421875, |
|
"logps/rejected": -550.803466796875, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3558123111724854, |
|
"rewards/margins": 1.194657802581787, |
|
"rewards/rejected": -2.5504701137542725, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8589251439539347, |
|
"grad_norm": 48.13040334135157, |
|
"learning_rate": 2.9708117008348576e-08, |
|
"logits/chosen": 0.31328874826431274, |
|
"logits/rejected": 0.3502875864505768, |
|
"logps/chosen": -517.9609985351562, |
|
"logps/rejected": -542.474365234375, |
|
"loss": 0.4897, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4639475345611572, |
|
"rewards/margins": 0.7750081419944763, |
|
"rewards/rejected": -2.2389559745788574, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8613243761996161, |
|
"grad_norm": 53.220249607806664, |
|
"learning_rate": 2.8725880143264992e-08, |
|
"logits/chosen": 0.21370474994182587, |
|
"logits/rejected": 0.17975714802742004, |
|
"logps/chosen": -469.7068786621094, |
|
"logps/rejected": -589.1580200195312, |
|
"loss": 0.5243, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5573484897613525, |
|
"rewards/margins": 0.7886762022972107, |
|
"rewards/rejected": -2.346024513244629, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8637236084452975, |
|
"grad_norm": 67.90288894206734, |
|
"learning_rate": 2.775916477857948e-08, |
|
"logits/chosen": 0.25214099884033203, |
|
"logits/rejected": 0.19312720000743866, |
|
"logps/chosen": -414.92059326171875, |
|
"logps/rejected": -506.6708984375, |
|
"loss": 0.4783, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.330328345298767, |
|
"rewards/margins": 0.9338465929031372, |
|
"rewards/rejected": -2.2641749382019043, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8661228406909789, |
|
"grad_norm": 59.11695302836589, |
|
"learning_rate": 2.680803872553408e-08, |
|
"logits/chosen": 0.2528062164783478, |
|
"logits/rejected": 0.17121002078056335, |
|
"logps/chosen": -428.0210876464844, |
|
"logps/rejected": -563.393310546875, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2556774616241455, |
|
"rewards/margins": 1.5917272567749023, |
|
"rewards/rejected": -2.8474044799804688, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8685220729366603, |
|
"grad_norm": 59.52967993062111, |
|
"learning_rate": 2.5872568701842706e-08, |
|
"logits/chosen": 0.32945194840431213, |
|
"logits/rejected": 0.2652639150619507, |
|
"logps/chosen": -392.31329345703125, |
|
"logps/rejected": -495.3326721191406, |
|
"loss": 0.539, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3325417041778564, |
|
"rewards/margins": 0.8470155000686646, |
|
"rewards/rejected": -2.1795573234558105, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8709213051823417, |
|
"grad_norm": 53.81677624528546, |
|
"learning_rate": 2.495282032701096e-08, |
|
"logits/chosen": 0.15500156581401825, |
|
"logits/rejected": 0.2495473325252533, |
|
"logps/chosen": -334.62774658203125, |
|
"logps/rejected": -434.7308044433594, |
|
"loss": 0.5105, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1157186031341553, |
|
"rewards/margins": 1.1813395023345947, |
|
"rewards/rejected": -2.29705810546875, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8733205374280231, |
|
"grad_norm": 70.3049018186209, |
|
"learning_rate": 2.4048858117733133e-08, |
|
"logits/chosen": 0.16910839080810547, |
|
"logits/rejected": 0.169979065656662, |
|
"logps/chosen": -436.8203125, |
|
"logps/rejected": -540.2689819335938, |
|
"loss": 0.4643, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1331539154052734, |
|
"rewards/margins": 1.6183888912200928, |
|
"rewards/rejected": -2.751542568206787, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8757197696737045, |
|
"grad_norm": 49.87789467243074, |
|
"learning_rate": 2.3160745483366938e-08, |
|
"logits/chosen": 0.23682577908039093, |
|
"logits/rejected": 0.1723048985004425, |
|
"logps/chosen": -431.7490234375, |
|
"logps/rejected": -562.2601318359375, |
|
"loss": 0.4639, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3692805767059326, |
|
"rewards/margins": 1.0158613920211792, |
|
"rewards/rejected": -2.3851418495178223, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8781190019193857, |
|
"grad_norm": 47.658615941206975, |
|
"learning_rate": 2.2288544721485197e-08, |
|
"logits/chosen": 0.14381949603557587, |
|
"logits/rejected": 0.03533410280942917, |
|
"logps/chosen": -387.8703918457031, |
|
"logps/rejected": -524.4107666015625, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0450143814086914, |
|
"rewards/margins": 1.2733433246612549, |
|
"rewards/rejected": -2.3183577060699463, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8805182341650671, |
|
"grad_norm": 45.82488725036134, |
|
"learning_rate": 2.1432317013506117e-08, |
|
"logits/chosen": 0.10933347791433334, |
|
"logits/rejected": 0.12345802783966064, |
|
"logps/chosen": -458.113037109375, |
|
"logps/rejected": -490.5302734375, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4664695262908936, |
|
"rewards/margins": 0.8134799003601074, |
|
"rewards/rejected": -2.27994966506958, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8829174664107485, |
|
"grad_norm": 53.31879445002021, |
|
"learning_rate": 2.0592122420401704e-08, |
|
"logits/chosen": 0.22227077186107635, |
|
"logits/rejected": 0.24705934524536133, |
|
"logps/chosen": -430.19537353515625, |
|
"logps/rejected": -503.33050537109375, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5509039163589478, |
|
"rewards/margins": 0.648668646812439, |
|
"rewards/rejected": -2.199572801589966, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8853166986564299, |
|
"grad_norm": 42.381048234129516, |
|
"learning_rate": 1.976801987848459e-08, |
|
"logits/chosen": 0.2069139927625656, |
|
"logits/rejected": 0.16672655940055847, |
|
"logps/chosen": -472.5472106933594, |
|
"logps/rejected": -602.9320068359375, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4228280782699585, |
|
"rewards/margins": 1.2275440692901611, |
|
"rewards/rejected": -2.65037202835083, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8877159309021113, |
|
"grad_norm": 53.40883833426912, |
|
"learning_rate": 1.8960067195273987e-08, |
|
"logits/chosen": 0.22911398112773895, |
|
"logits/rejected": 0.21664564311504364, |
|
"logps/chosen": -400.25030517578125, |
|
"logps/rejected": -505.628173828125, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.1031681299209595, |
|
"rewards/margins": 1.1834853887557983, |
|
"rewards/rejected": -2.286653757095337, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8901151631477927, |
|
"grad_norm": 41.251861300500764, |
|
"learning_rate": 1.816832104544072e-08, |
|
"logits/chosen": 0.30456072092056274, |
|
"logits/rejected": 0.29536372423171997, |
|
"logps/chosen": -486.6631774902344, |
|
"logps/rejected": -542.3421630859375, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6421973705291748, |
|
"rewards/margins": 0.8107506632804871, |
|
"rewards/rejected": -2.4529478549957275, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8925143953934741, |
|
"grad_norm": 39.96769073144664, |
|
"learning_rate": 1.7392836966831553e-08, |
|
"logits/chosen": 0.20969875156879425, |
|
"logits/rejected": 0.1758739948272705, |
|
"logps/chosen": -437.6871643066406, |
|
"logps/rejected": -546.804443359375, |
|
"loss": 0.4412, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2321717739105225, |
|
"rewards/margins": 1.4391019344329834, |
|
"rewards/rejected": -2.671273708343506, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8949136276391555, |
|
"grad_norm": 49.70645527143697, |
|
"learning_rate": 1.663366935657373e-08, |
|
"logits/chosen": 0.2884444296360016, |
|
"logits/rejected": 0.3402741551399231, |
|
"logps/chosen": -414.3851623535156, |
|
"logps/rejected": -516.3787841796875, |
|
"loss": 0.5216, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3819457292556763, |
|
"rewards/margins": 0.948479950428009, |
|
"rewards/rejected": -2.33042573928833, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8973128598848369, |
|
"grad_norm": 77.94886524477812, |
|
"learning_rate": 1.5890871467258898e-08, |
|
"logits/chosen": 0.19290375709533691, |
|
"logits/rejected": 0.21824567019939423, |
|
"logps/chosen": -533.5081787109375, |
|
"logps/rejected": -581.3971557617188, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4534004926681519, |
|
"rewards/margins": 0.8920512199401855, |
|
"rewards/rejected": -2.345451831817627, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8997120921305183, |
|
"grad_norm": 41.01203397728858, |
|
"learning_rate": 1.5164495403207967e-08, |
|
"logits/chosen": 0.1695217341184616, |
|
"logits/rejected": 0.035564176738262177, |
|
"logps/chosen": -487.5433654785156, |
|
"logps/rejected": -645.5303344726562, |
|
"loss": 0.4676, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4484670162200928, |
|
"rewards/margins": 1.3183784484863281, |
|
"rewards/rejected": -2.766845464706421, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9021113243761996, |
|
"grad_norm": 40.21247020861889, |
|
"learning_rate": 1.4454592116815962e-08, |
|
"logits/chosen": 0.2717417776584625, |
|
"logits/rejected": 0.2026948183774948, |
|
"logps/chosen": -468.3108825683594, |
|
"logps/rejected": -587.5950927734375, |
|
"loss": 0.4599, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3242508172988892, |
|
"rewards/margins": 1.0534656047821045, |
|
"rewards/rejected": -2.3777167797088623, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.904510556621881, |
|
"grad_norm": 36.5293014274636, |
|
"learning_rate": 1.3761211404977934e-08, |
|
"logits/chosen": 0.21695688366889954, |
|
"logits/rejected": 0.18997912108898163, |
|
"logps/chosen": -414.2664489746094, |
|
"logps/rejected": -547.6370849609375, |
|
"loss": 0.4415, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.205294132232666, |
|
"rewards/margins": 1.4340205192565918, |
|
"rewards/rejected": -2.639314651489258, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9069097888675623, |
|
"grad_norm": 57.20162040882379, |
|
"learning_rate": 1.3084401905596177e-08, |
|
"logits/chosen": 0.12880149483680725, |
|
"logits/rejected": 0.14128455519676208, |
|
"logps/chosen": -481.64605712890625, |
|
"logps/rejected": -535.501708984375, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1130110025405884, |
|
"rewards/margins": 1.1385244131088257, |
|
"rewards/rejected": -2.251535654067993, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9093090211132437, |
|
"grad_norm": 45.23521207048333, |
|
"learning_rate": 1.2424211094168053e-08, |
|
"logits/chosen": 0.3405439257621765, |
|
"logits/rejected": 0.3810498118400574, |
|
"logps/chosen": -528.0676879882812, |
|
"logps/rejected": -599.46044921875, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4664819240570068, |
|
"rewards/margins": 0.8699405789375305, |
|
"rewards/rejected": -2.3364224433898926, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9117082533589251, |
|
"grad_norm": 42.750810945395436, |
|
"learning_rate": 1.1780685280456143e-08, |
|
"logits/chosen": 0.22092266380786896, |
|
"logits/rejected": 0.1667570322751999, |
|
"logps/chosen": -535.6447143554688, |
|
"logps/rejected": -663.0042724609375, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7906440496444702, |
|
"rewards/margins": 1.2009087800979614, |
|
"rewards/rejected": -2.9915525913238525, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9141074856046065, |
|
"grad_norm": 45.02882150214674, |
|
"learning_rate": 1.1153869605239564e-08, |
|
"logits/chosen": 0.3357655704021454, |
|
"logits/rejected": 0.39680781960487366, |
|
"logps/chosen": -468.96661376953125, |
|
"logps/rejected": -499.510009765625, |
|
"loss": 0.4881, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1827433109283447, |
|
"rewards/margins": 0.8699227571487427, |
|
"rewards/rejected": -2.052665948867798, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9165067178502879, |
|
"grad_norm": 53.835458478805826, |
|
"learning_rate": 1.0543808037147606e-08, |
|
"logits/chosen": 0.19844678044319153, |
|
"logits/rejected": 0.09387796372175217, |
|
"logps/chosen": -430.8998107910156, |
|
"logps/rejected": -596.3431396484375, |
|
"loss": 0.4637, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.182877779006958, |
|
"rewards/margins": 1.630902886390686, |
|
"rewards/rejected": -2.8137805461883545, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9189059500959693, |
|
"grad_norm": 45.731621829576106, |
|
"learning_rate": 9.95054336957557e-09, |
|
"logits/chosen": 0.20105035603046417, |
|
"logits/rejected": 0.12556061148643494, |
|
"logps/chosen": -441.2509765625, |
|
"logps/rejected": -534.109375, |
|
"loss": 0.4648, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1896703243255615, |
|
"rewards/margins": 0.8572039604187012, |
|
"rewards/rejected": -2.046874523162842, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9213051823416507, |
|
"grad_norm": 59.067862218302, |
|
"learning_rate": 9.37411721768286e-09, |
|
"logits/chosen": 0.39653897285461426, |
|
"logits/rejected": 0.27279889583587646, |
|
"logps/chosen": -486.5269470214844, |
|
"logps/rejected": -648.8412475585938, |
|
"loss": 0.46, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6371396780014038, |
|
"rewards/margins": 1.1990723609924316, |
|
"rewards/rejected": -2.836211919784546, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9237044145873321, |
|
"grad_norm": 47.43074874048961, |
|
"learning_rate": 8.81457001547392e-09, |
|
"logits/chosen": 0.2673342823982239, |
|
"logits/rejected": 0.2015964239835739, |
|
"logps/chosen": -492.92254638671875, |
|
"logps/rejected": -605.4844970703125, |
|
"loss": 0.4934, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6292178630828857, |
|
"rewards/margins": 1.0606807470321655, |
|
"rewards/rejected": -2.689898729324341, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9261036468330134, |
|
"grad_norm": 38.22467054106717, |
|
"learning_rate": 8.271941012961942e-09, |
|
"logits/chosen": 0.35539960861206055, |
|
"logits/rejected": 0.2722089886665344, |
|
"logps/chosen": -419.780517578125, |
|
"logps/rejected": -596.482666015625, |
|
"loss": 0.4606, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4746735095977783, |
|
"rewards/margins": 1.2210102081298828, |
|
"rewards/rejected": -2.695683717727661, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9285028790786948, |
|
"grad_norm": 50.37493253511501, |
|
"learning_rate": 7.746268273415568e-09, |
|
"logits/chosen": 0.3808482885360718, |
|
"logits/rejected": 0.2647871673107147, |
|
"logps/chosen": -485.0458984375, |
|
"logps/rejected": -578.0924072265625, |
|
"loss": 0.4923, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5579640865325928, |
|
"rewards/margins": 0.5145239233970642, |
|
"rewards/rejected": -2.0724880695343018, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9309021113243762, |
|
"grad_norm": 48.99088949948664, |
|
"learning_rate": 7.237588670689076e-09, |
|
"logits/chosen": 0.08190400898456573, |
|
"logits/rejected": 0.12344332039356232, |
|
"logps/chosen": -428.4112243652344, |
|
"logps/rejected": -517.3125610351562, |
|
"loss": 0.4641, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.215342402458191, |
|
"rewards/margins": 1.1562786102294922, |
|
"rewards/rejected": -2.3716208934783936, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9333013435700576, |
|
"grad_norm": 44.444359990708264, |
|
"learning_rate": 6.745937886635606e-09, |
|
"logits/chosen": 0.22676298022270203, |
|
"logits/rejected": 0.14976339042186737, |
|
"logps/chosen": -487.2351989746094, |
|
"logps/rejected": -613.9521484375, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.407539963722229, |
|
"rewards/margins": 1.2937225103378296, |
|
"rewards/rejected": -2.7012624740600586, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.935700575815739, |
|
"grad_norm": 40.06142700499872, |
|
"learning_rate": 6.271350408604409e-09, |
|
"logits/chosen": 0.2837770879268646, |
|
"logits/rejected": 0.2296113520860672, |
|
"logps/chosen": -382.27227783203125, |
|
"logps/rejected": -569.7482299804688, |
|
"loss": 0.4645, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.0186858177185059, |
|
"rewards/margins": 1.615103006362915, |
|
"rewards/rejected": -2.633788585662842, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9380998080614203, |
|
"grad_norm": 73.29538152762231, |
|
"learning_rate": 5.813859527021487e-09, |
|
"logits/chosen": 0.35343560576438904, |
|
"logits/rejected": 0.2977786660194397, |
|
"logps/chosen": -445.3648376464844, |
|
"logps/rejected": -555.0151977539062, |
|
"loss": 0.4829, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2847732305526733, |
|
"rewards/margins": 1.3373976945877075, |
|
"rewards/rejected": -2.62217116355896, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9404990403071017, |
|
"grad_norm": 55.32984913756992, |
|
"learning_rate": 5.373497333054616e-09, |
|
"logits/chosen": 0.2757224440574646, |
|
"logits/rejected": 0.27316632866859436, |
|
"logps/chosen": -503.87371826171875, |
|
"logps/rejected": -564.5245361328125, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4553347826004028, |
|
"rewards/margins": 0.799089252948761, |
|
"rewards/rejected": -2.2544240951538086, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9428982725527831, |
|
"grad_norm": 45.13547537051501, |
|
"learning_rate": 4.950294716362213e-09, |
|
"logits/chosen": 0.2402069866657257, |
|
"logits/rejected": 0.2745649814605713, |
|
"logps/chosen": -531.6744995117188, |
|
"logps/rejected": -642.5697021484375, |
|
"loss": 0.4806, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.5781127214431763, |
|
"rewards/margins": 1.1812816858291626, |
|
"rewards/rejected": -2.7593941688537598, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9452975047984645, |
|
"grad_norm": 41.45524037338652, |
|
"learning_rate": 4.544281362926422e-09, |
|
"logits/chosen": 0.1885417103767395, |
|
"logits/rejected": 0.1404399871826172, |
|
"logps/chosen": -493.8916931152344, |
|
"logps/rejected": -607.7820434570312, |
|
"loss": 0.4847, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2237544059753418, |
|
"rewards/margins": 1.2443654537200928, |
|
"rewards/rejected": -2.4681198596954346, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9476967370441459, |
|
"grad_norm": 41.32543731890712, |
|
"learning_rate": 4.15548575297095e-09, |
|
"logits/chosen": 0.13838523626327515, |
|
"logits/rejected": 0.12014584243297577, |
|
"logps/chosen": -423.9912109375, |
|
"logps/rejected": -555.3060302734375, |
|
"loss": 0.4492, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3068205118179321, |
|
"rewards/margins": 1.4025375843048096, |
|
"rewards/rejected": -2.709357976913452, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9500959692898272, |
|
"grad_norm": 38.663387459727744, |
|
"learning_rate": 3.7839351589631366e-09, |
|
"logits/chosen": 0.20229902863502502, |
|
"logits/rejected": 0.06122536584734917, |
|
"logps/chosen": -423.82379150390625, |
|
"logps/rejected": -579.1092529296875, |
|
"loss": 0.4703, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3485114574432373, |
|
"rewards/margins": 0.9370707273483276, |
|
"rewards/rejected": -2.2855823040008545, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9524952015355086, |
|
"grad_norm": 55.20292972374471, |
|
"learning_rate": 3.4296556437010405e-09, |
|
"logits/chosen": 0.20910441875457764, |
|
"logits/rejected": 0.18343612551689148, |
|
"logps/chosen": -397.52239990234375, |
|
"logps/rejected": -482.45513916015625, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3418649435043335, |
|
"rewards/margins": 0.8949347734451294, |
|
"rewards/rejected": -2.236799716949463, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.95489443378119, |
|
"grad_norm": 52.42663168427878, |
|
"learning_rate": 3.092672058485124e-09, |
|
"logits/chosen": 0.2784040868282318, |
|
"logits/rejected": 0.22552700340747833, |
|
"logps/chosen": -437.07122802734375, |
|
"logps/rejected": -581.2984008789062, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.39797043800354, |
|
"rewards/margins": 1.417875051498413, |
|
"rewards/rejected": -2.815845489501953, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9572936660268714, |
|
"grad_norm": 51.730419941201816, |
|
"learning_rate": 2.7730080413750356e-09, |
|
"logits/chosen": 0.3203295171260834, |
|
"logits/rejected": 0.33414626121520996, |
|
"logps/chosen": -470.68963623046875, |
|
"logps/rejected": -590.4835815429688, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3279026746749878, |
|
"rewards/margins": 1.245241641998291, |
|
"rewards/rejected": -2.5731444358825684, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"grad_norm": 52.110269896703294, |
|
"learning_rate": 2.4706860155316033e-09, |
|
"logits/chosen": 0.2101161777973175, |
|
"logits/rejected": 0.23008927702903748, |
|
"logps/chosen": -545.7728271484375, |
|
"logps/rejected": -637.5755615234375, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4393521547317505, |
|
"rewards/margins": 0.8712779879570007, |
|
"rewards/rejected": -2.3106300830841064, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"eval_logits/chosen": 0.38506969809532166, |
|
"eval_logits/rejected": 0.3408171534538269, |
|
"eval_logps/chosen": -459.0677185058594, |
|
"eval_logps/rejected": -584.910400390625, |
|
"eval_loss": 0.4760858714580536, |
|
"eval_rewards/accuracies": 0.7982142567634583, |
|
"eval_rewards/chosen": -1.4039554595947266, |
|
"eval_rewards/margins": 1.1972852945327759, |
|
"eval_rewards/rejected": -2.601240873336792, |
|
"eval_runtime": 172.2382, |
|
"eval_samples_per_second": 25.9, |
|
"eval_steps_per_second": 0.406, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9620921305182342, |
|
"grad_norm": 48.027804731217394, |
|
"learning_rate": 2.185727187643843e-09, |
|
"logits/chosen": 0.17230884730815887, |
|
"logits/rejected": 0.11436843872070312, |
|
"logps/chosen": -407.50506591796875, |
|
"logps/rejected": -561.4027709960938, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3693194389343262, |
|
"rewards/margins": 1.4761823415756226, |
|
"rewards/rejected": -2.8455021381378174, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9644913627639156, |
|
"grad_norm": 57.935705040777876, |
|
"learning_rate": 1.9181515464413434e-09, |
|
"logits/chosen": 0.15140806138515472, |
|
"logits/rejected": 0.09838312864303589, |
|
"logps/chosen": -575.939208984375, |
|
"logps/rejected": -703.3623657226562, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2815978527069092, |
|
"rewards/margins": 1.3098185062408447, |
|
"rewards/rejected": -2.591416597366333, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.966890595009597, |
|
"grad_norm": 40.72962697033489, |
|
"learning_rate": 1.6679778612923302e-09, |
|
"logits/chosen": 0.21621087193489075, |
|
"logits/rejected": 0.2728949785232544, |
|
"logps/chosen": -515.5426635742188, |
|
"logps/rejected": -592.4903564453125, |
|
"loss": 0.4629, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5008609294891357, |
|
"rewards/margins": 0.7948905229568481, |
|
"rewards/rejected": -2.2957513332366943, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9692898272552783, |
|
"grad_norm": 54.67990587779175, |
|
"learning_rate": 1.43522368088686e-09, |
|
"logits/chosen": 0.29817652702331543, |
|
"logits/rejected": 0.22439947724342346, |
|
"logps/chosen": -469.2783203125, |
|
"logps/rejected": -633.0770263671875, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5123900175094604, |
|
"rewards/margins": 1.635197401046753, |
|
"rewards/rejected": -3.147587299346924, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9716890595009597, |
|
"grad_norm": 70.3894278582445, |
|
"learning_rate": 1.2199053320059993e-09, |
|
"logits/chosen": 0.3103833794593811, |
|
"logits/rejected": 0.2175188809633255, |
|
"logps/chosen": -478.85443115234375, |
|
"logps/rejected": -599.4166259765625, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4971873760223389, |
|
"rewards/margins": 1.086474061012268, |
|
"rewards/rejected": -2.5836615562438965, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.974088291746641, |
|
"grad_norm": 45.89117778001179, |
|
"learning_rate": 1.0220379183764338e-09, |
|
"logits/chosen": 0.1872117817401886, |
|
"logits/rejected": 0.14962831139564514, |
|
"logps/chosen": -379.21612548828125, |
|
"logps/rejected": -526.4472045898438, |
|
"loss": 0.4727, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1573994159698486, |
|
"rewards/margins": 1.4220101833343506, |
|
"rewards/rejected": -2.5794098377227783, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9764875239923224, |
|
"grad_norm": 42.58928630808853, |
|
"learning_rate": 8.416353196111503e-10, |
|
"logits/chosen": 0.4299827218055725, |
|
"logits/rejected": 0.3653213679790497, |
|
"logps/chosen": -455.606689453125, |
|
"logps/rejected": -537.0999755859375, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.646712064743042, |
|
"rewards/margins": 0.9391171336174011, |
|
"rewards/rejected": -2.585829257965088, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9788867562380038, |
|
"grad_norm": 53.46584271337103, |
|
"learning_rate": 6.787101902356873e-10, |
|
"logits/chosen": 0.3689078986644745, |
|
"logits/rejected": 0.34390968084335327, |
|
"logps/chosen": -460.3603515625, |
|
"logps/rejected": -581.0008544921875, |
|
"loss": 0.461, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3799049854278564, |
|
"rewards/margins": 0.9097992181777954, |
|
"rewards/rejected": -2.2897043228149414, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9812859884836852, |
|
"grad_norm": 61.26328575901746, |
|
"learning_rate": 5.332739588005953e-10, |
|
"logits/chosen": 0.1865283101797104, |
|
"logits/rejected": 0.08630210161209106, |
|
"logps/chosen": -390.46929931640625, |
|
"logps/rejected": -543.6526489257812, |
|
"loss": 0.4761, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2067457437515259, |
|
"rewards/margins": 1.2890859842300415, |
|
"rewards/rejected": -2.4958317279815674, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9836852207293666, |
|
"grad_norm": 49.7010925580212, |
|
"learning_rate": 4.053368270797164e-10, |
|
"logits/chosen": 0.34013232588768005, |
|
"logits/rejected": 0.23841337859630585, |
|
"logps/chosen": -435.8818359375, |
|
"logps/rejected": -555.6300659179688, |
|
"loss": 0.452, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4993178844451904, |
|
"rewards/margins": 1.1681034564971924, |
|
"rewards/rejected": -2.6674208641052246, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.986084452975048, |
|
"grad_norm": 39.60831889767418, |
|
"learning_rate": 2.949077693545354e-10, |
|
"logits/chosen": 0.3429808020591736, |
|
"logits/rejected": 0.28340935707092285, |
|
"logps/chosen": -493.12799072265625, |
|
"logps/rejected": -603.4589233398438, |
|
"loss": 0.5203, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5291836261749268, |
|
"rewards/margins": 0.7797685861587524, |
|
"rewards/rejected": -2.3089520931243896, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9884836852207294, |
|
"grad_norm": 48.86243638343189, |
|
"learning_rate": 2.0199453178471047e-10, |
|
"logits/chosen": 0.2578023374080658, |
|
"logits/rejected": 0.28469234704971313, |
|
"logps/chosen": -521.9082641601562, |
|
"logps/rejected": -584.6770629882812, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.4713417291641235, |
|
"rewards/margins": 1.0106664896011353, |
|
"rewards/rejected": -2.482008457183838, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9908829174664108, |
|
"grad_norm": 40.74851597282627, |
|
"learning_rate": 1.266036318647301e-10, |
|
"logits/chosen": 0.24952539801597595, |
|
"logits/rejected": 0.21447113156318665, |
|
"logps/chosen": -515.4271240234375, |
|
"logps/rejected": -623.4884033203125, |
|
"loss": 0.4614, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3878891468048096, |
|
"rewards/margins": 1.3714239597320557, |
|
"rewards/rejected": -2.7593131065368652, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9932821497120922, |
|
"grad_norm": 59.7391917924102, |
|
"learning_rate": 6.874035796672339e-11, |
|
"logits/chosen": 0.20685334503650665, |
|
"logits/rejected": 0.19621731340885162, |
|
"logps/chosen": -468.5065002441406, |
|
"logps/rejected": -590.8399047851562, |
|
"loss": 0.511, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1164958477020264, |
|
"rewards/margins": 1.485654592514038, |
|
"rewards/rejected": -2.6021504402160645, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9956813819577736, |
|
"grad_norm": 64.53279604006218, |
|
"learning_rate": 2.8408768969423458e-11, |
|
"logits/chosen": 0.16596756875514984, |
|
"logits/rejected": 0.11380906403064728, |
|
"logps/chosen": -485.75006103515625, |
|
"logps/rejected": -593.44287109375, |
|
"loss": 0.4651, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3494365215301514, |
|
"rewards/margins": 1.0383247137069702, |
|
"rewards/rejected": -2.387761354446411, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9980806142034548, |
|
"grad_norm": 64.97328454417662, |
|
"learning_rate": 5.611693973617271e-12, |
|
"logits/chosen": 0.3674852252006531, |
|
"logits/rejected": 0.3302612006664276, |
|
"logps/chosen": -414.0726623535156, |
|
"logps/rejected": -535.21337890625, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3726593255996704, |
|
"rewards/margins": 0.9844037890434265, |
|
"rewards/rejected": -2.357063055038452, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4168, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5273771832863338, |
|
"train_runtime": 14157.4064, |
|
"train_samples_per_second": 9.42, |
|
"train_steps_per_second": 0.294 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|