|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994111874386653, |
|
"eval_steps": 100, |
|
"global_step": 1273, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007850834151128558, |
|
"grad_norm": 8.785704612731934, |
|
"learning_rate": 3.90625e-08, |
|
"logits/chosen": -2.957148313522339, |
|
"logits/rejected": -2.900550365447998, |
|
"logps/chosen": -466.9051818847656, |
|
"logps/rejected": -502.35345458984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007850834151128557, |
|
"grad_norm": 6.684790134429932, |
|
"learning_rate": 3.90625e-07, |
|
"logits/chosen": -3.0609865188598633, |
|
"logits/rejected": -2.9977359771728516, |
|
"logps/chosen": -308.3629455566406, |
|
"logps/rejected": -261.8404235839844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": 0.0009382184944115579, |
|
"rewards/margins": 5.389652869780548e-05, |
|
"rewards/rejected": 0.0008843218092806637, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015701668302257114, |
|
"grad_norm": 6.8678717613220215, |
|
"learning_rate": 7.8125e-07, |
|
"logits/chosen": -3.1096813678741455, |
|
"logits/rejected": -3.0798025131225586, |
|
"logps/chosen": -292.4770202636719, |
|
"logps/rejected": -254.6656494140625, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.007172191981226206, |
|
"rewards/margins": 0.001467574737034738, |
|
"rewards/rejected": 0.005704617593437433, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.023552502453385672, |
|
"grad_norm": 6.647519588470459, |
|
"learning_rate": 1.1718750000000001e-06, |
|
"logits/chosen": -3.1090664863586426, |
|
"logits/rejected": -3.084791660308838, |
|
"logps/chosen": -265.77301025390625, |
|
"logps/rejected": -266.1317138671875, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.01251781266182661, |
|
"rewards/margins": 0.0033119157887995243, |
|
"rewards/rejected": 0.009205898270010948, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03140333660451423, |
|
"grad_norm": 6.744068145751953, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": -3.081329107284546, |
|
"logits/rejected": -3.1170654296875, |
|
"logps/chosen": -297.75823974609375, |
|
"logps/rejected": -270.17462158203125, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.011317016556859016, |
|
"rewards/margins": 0.013026026077568531, |
|
"rewards/rejected": -0.00170900858938694, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03925417075564279, |
|
"grad_norm": 7.407871246337891, |
|
"learning_rate": 1.953125e-06, |
|
"logits/chosen": -3.1149449348449707, |
|
"logits/rejected": -3.066861629486084, |
|
"logps/chosen": -306.6522216796875, |
|
"logps/rejected": -255.38491821289062, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.015373636968433857, |
|
"rewards/margins": 0.022752460092306137, |
|
"rewards/rejected": -0.00737882312387228, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.047105004906771344, |
|
"grad_norm": 7.203430652618408, |
|
"learning_rate": 2.3437500000000002e-06, |
|
"logits/chosen": -3.008836269378662, |
|
"logits/rejected": -3.026230573654175, |
|
"logps/chosen": -269.2247009277344, |
|
"logps/rejected": -276.4228820800781, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.006092413794249296, |
|
"rewards/margins": 0.021052923053503036, |
|
"rewards/rejected": -0.014960509724915028, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0549558390578999, |
|
"grad_norm": 6.94841194152832, |
|
"learning_rate": 2.7343750000000004e-06, |
|
"logits/chosen": -3.0492148399353027, |
|
"logits/rejected": -3.0904951095581055, |
|
"logps/chosen": -267.11553955078125, |
|
"logps/rejected": -257.773681640625, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.009770817123353481, |
|
"rewards/margins": 0.04689077287912369, |
|
"rewards/rejected": -0.056661587208509445, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06280667320902845, |
|
"grad_norm": 9.025800704956055, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -3.1011645793914795, |
|
"logits/rejected": -3.1380457878112793, |
|
"logps/chosen": -308.1461486816406, |
|
"logps/rejected": -312.40869140625, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.09069164097309113, |
|
"rewards/margins": 0.052069295197725296, |
|
"rewards/rejected": -0.14276091754436493, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07065750736015702, |
|
"grad_norm": 10.733589172363281, |
|
"learning_rate": 3.5156250000000003e-06, |
|
"logits/chosen": -3.0480704307556152, |
|
"logits/rejected": -3.068376064300537, |
|
"logps/chosen": -309.16766357421875, |
|
"logps/rejected": -306.99627685546875, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.12756133079528809, |
|
"rewards/margins": 0.07411627471446991, |
|
"rewards/rejected": -0.20167763531208038, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07850834151128558, |
|
"grad_norm": 13.016451835632324, |
|
"learning_rate": 3.90625e-06, |
|
"logits/chosen": -2.9545440673828125, |
|
"logits/rejected": -2.923600435256958, |
|
"logps/chosen": -302.78509521484375, |
|
"logps/rejected": -304.83795166015625, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.2593812942504883, |
|
"rewards/margins": 0.14604052901268005, |
|
"rewards/rejected": -0.40542179346084595, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07850834151128558, |
|
"eval_logits/chosen": -3.014045238494873, |
|
"eval_logits/rejected": -3.0357654094696045, |
|
"eval_logps/chosen": -329.1207580566406, |
|
"eval_logps/rejected": -306.6942443847656, |
|
"eval_loss": 0.6424023509025574, |
|
"eval_rewards/accuracies": 0.6547619104385376, |
|
"eval_rewards/chosen": -0.40733060240745544, |
|
"eval_rewards/margins": 0.12344833463430405, |
|
"eval_rewards/rejected": -0.5307790040969849, |
|
"eval_runtime": 174.6916, |
|
"eval_samples_per_second": 11.449, |
|
"eval_steps_per_second": 0.481, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08635917566241413, |
|
"grad_norm": 14.229337692260742, |
|
"learning_rate": 4.296875e-06, |
|
"logits/chosen": -2.9414217472076416, |
|
"logits/rejected": -2.9711925983428955, |
|
"logps/chosen": -339.30914306640625, |
|
"logps/rejected": -324.44390869140625, |
|
"loss": 0.6339, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3848220109939575, |
|
"rewards/margins": 0.16449818015098572, |
|
"rewards/rejected": -0.5493202209472656, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09421000981354269, |
|
"grad_norm": 20.014785766601562, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": -2.9944517612457275, |
|
"logits/rejected": -3.045173168182373, |
|
"logps/chosen": -391.199462890625, |
|
"logps/rejected": -376.3497009277344, |
|
"loss": 0.6211, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5974650979042053, |
|
"rewards/margins": 0.2744296193122864, |
|
"rewards/rejected": -0.8718946576118469, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.10206084396467124, |
|
"grad_norm": 12.38216781616211, |
|
"learning_rate": 4.999962359300416e-06, |
|
"logits/chosen": -2.9552016258239746, |
|
"logits/rejected": -2.9254870414733887, |
|
"logps/chosen": -402.8971252441406, |
|
"logps/rejected": -404.6396789550781, |
|
"loss": 0.6189, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8715218305587769, |
|
"rewards/margins": 0.27080851793289185, |
|
"rewards/rejected": -1.1423304080963135, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1099116781157998, |
|
"grad_norm": 16.306636810302734, |
|
"learning_rate": 4.998645053824218e-06, |
|
"logits/chosen": -2.803802967071533, |
|
"logits/rejected": -2.8079888820648193, |
|
"logps/chosen": -379.4205017089844, |
|
"logps/rejected": -362.0006103515625, |
|
"loss": 0.6495, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.0367389917373657, |
|
"rewards/margins": 0.17243310809135437, |
|
"rewards/rejected": -1.2091721296310425, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11776251226692837, |
|
"grad_norm": 16.321983337402344, |
|
"learning_rate": 4.9954468466732145e-06, |
|
"logits/chosen": -2.8862144947052, |
|
"logits/rejected": -2.9243063926696777, |
|
"logps/chosen": -417.48272705078125, |
|
"logps/rejected": -420.72381591796875, |
|
"loss": 0.6151, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8957064747810364, |
|
"rewards/margins": 0.33516108989715576, |
|
"rewards/rejected": -1.2308675050735474, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1256133464180569, |
|
"grad_norm": 21.833602905273438, |
|
"learning_rate": 4.990370145357496e-06, |
|
"logits/chosen": -2.880340099334717, |
|
"logits/rejected": -2.8787879943847656, |
|
"logps/chosen": -374.5000305175781, |
|
"logps/rejected": -366.66619873046875, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9459589719772339, |
|
"rewards/margins": 0.245649054646492, |
|
"rewards/rejected": -1.1916080713272095, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13346418056918546, |
|
"grad_norm": 22.123382568359375, |
|
"learning_rate": 4.983418771458684e-06, |
|
"logits/chosen": -2.9111855030059814, |
|
"logits/rejected": -2.8439784049987793, |
|
"logps/chosen": -384.2833251953125, |
|
"logps/rejected": -396.34326171875, |
|
"loss": 0.598, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.9746086001396179, |
|
"rewards/margins": 0.32514628767967224, |
|
"rewards/rejected": -1.2997548580169678, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14131501472031405, |
|
"grad_norm": 18.860288619995117, |
|
"learning_rate": 4.97459795775315e-06, |
|
"logits/chosen": -2.846890449523926, |
|
"logits/rejected": -2.8465495109558105, |
|
"logps/chosen": -371.8786315917969, |
|
"logps/rejected": -400.20501708984375, |
|
"loss": 0.5886, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8063033819198608, |
|
"rewards/margins": 0.39446142315864563, |
|
"rewards/rejected": -1.200764775276184, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1491658488714426, |
|
"grad_norm": 17.285179138183594, |
|
"learning_rate": 4.963914344272961e-06, |
|
"logits/chosen": -2.9533636569976807, |
|
"logits/rejected": -2.9740447998046875, |
|
"logps/chosen": -379.0119934082031, |
|
"logps/rejected": -406.78936767578125, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7768992781639099, |
|
"rewards/margins": 0.33595213294029236, |
|
"rewards/rejected": -1.1128513813018799, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15701668302257116, |
|
"grad_norm": 23.66827964782715, |
|
"learning_rate": 4.951375973307458e-06, |
|
"logits/chosen": -2.9775123596191406, |
|
"logits/rejected": -2.977674961090088, |
|
"logps/chosen": -389.47088623046875, |
|
"logps/rejected": -386.0644226074219, |
|
"loss": 0.5977, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8474963903427124, |
|
"rewards/margins": 0.37333354353904724, |
|
"rewards/rejected": -1.220829963684082, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15701668302257116, |
|
"eval_logits/chosen": -3.014695405960083, |
|
"eval_logits/rejected": -3.025944232940674, |
|
"eval_logps/chosen": -394.195068359375, |
|
"eval_logps/rejected": -395.111328125, |
|
"eval_loss": 0.5976593494415283, |
|
"eval_rewards/accuracies": 0.6666666865348816, |
|
"eval_rewards/chosen": -1.0580739974975586, |
|
"eval_rewards/margins": 0.35687559843063354, |
|
"eval_rewards/rejected": -1.414949655532837, |
|
"eval_runtime": 171.2039, |
|
"eval_samples_per_second": 11.682, |
|
"eval_steps_per_second": 0.491, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1648675171736997, |
|
"grad_norm": 21.703943252563477, |
|
"learning_rate": 4.93699228334928e-06, |
|
"logits/chosen": -3.014017343521118, |
|
"logits/rejected": -2.9310977458953857, |
|
"logps/chosen": -398.8971862792969, |
|
"logps/rejected": -423.201416015625, |
|
"loss": 0.5652, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1008360385894775, |
|
"rewards/margins": 0.518481433391571, |
|
"rewards/rejected": -1.6193174123764038, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.17271835132482827, |
|
"grad_norm": 30.472820281982422, |
|
"learning_rate": 4.920774101989362e-06, |
|
"logits/chosen": -2.922285318374634, |
|
"logits/rejected": -2.8559417724609375, |
|
"logps/chosen": -394.4663391113281, |
|
"logps/rejected": -436.795166015625, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.094743013381958, |
|
"rewards/margins": 0.4840098023414612, |
|
"rewards/rejected": -1.578752875328064, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18056918547595682, |
|
"grad_norm": 19.1884708404541, |
|
"learning_rate": 4.902733637766261e-06, |
|
"logits/chosen": -2.8735547065734863, |
|
"logits/rejected": -2.8807244300842285, |
|
"logps/chosen": -358.8312072753906, |
|
"logps/rejected": -393.9620056152344, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8047897219657898, |
|
"rewards/margins": 0.49235886335372925, |
|
"rewards/rejected": -1.297148585319519, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.18842001962708538, |
|
"grad_norm": 25.822147369384766, |
|
"learning_rate": 4.882884470975954e-06, |
|
"logits/chosen": -2.733098030090332, |
|
"logits/rejected": -2.768909454345703, |
|
"logps/chosen": -396.49188232421875, |
|
"logps/rejected": -438.16455078125, |
|
"loss": 0.5768, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.028875708580017, |
|
"rewards/margins": 0.5807405710220337, |
|
"rewards/rejected": -1.6096162796020508, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19627085377821393, |
|
"grad_norm": 18.91808319091797, |
|
"learning_rate": 4.861241543449015e-06, |
|
"logits/chosen": -2.723087787628174, |
|
"logits/rejected": -2.6532533168792725, |
|
"logps/chosen": -397.3339538574219, |
|
"logps/rejected": -422.185302734375, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.094036340713501, |
|
"rewards/margins": 0.49098238348960876, |
|
"rewards/rejected": -1.5850186347961426, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2041216879293425, |
|
"grad_norm": 32.23611068725586, |
|
"learning_rate": 4.8378211473028755e-06, |
|
"logits/chosen": -2.828057289123535, |
|
"logits/rejected": -2.838313579559326, |
|
"logps/chosen": -390.9112243652344, |
|
"logps/rejected": -397.8682556152344, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7329429984092712, |
|
"rewards/margins": 0.33645009994506836, |
|
"rewards/rejected": -1.0693930387496948, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.21197252208047104, |
|
"grad_norm": 25.560338973999023, |
|
"learning_rate": 4.812640912677624e-06, |
|
"logits/chosen": -2.9140567779541016, |
|
"logits/rejected": -2.930488109588623, |
|
"logps/chosen": -346.61273193359375, |
|
"logps/rejected": -370.59771728515625, |
|
"loss": 0.5867, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6457637548446655, |
|
"rewards/margins": 0.3336387276649475, |
|
"rewards/rejected": -0.9794024229049683, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2198233562315996, |
|
"grad_norm": 21.532350540161133, |
|
"learning_rate": 4.785719794464596e-06, |
|
"logits/chosen": -2.7777903079986572, |
|
"logits/rejected": -2.7826027870178223, |
|
"logps/chosen": -360.8690490722656, |
|
"logps/rejected": -386.78753662109375, |
|
"loss": 0.5804, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.9706557393074036, |
|
"rewards/margins": 0.3798271715641022, |
|
"rewards/rejected": -1.3504829406738281, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22767419038272815, |
|
"grad_norm": 25.418230056762695, |
|
"learning_rate": 4.757078058037722e-06, |
|
"logits/chosen": -2.886289119720459, |
|
"logits/rejected": -2.813042402267456, |
|
"logps/chosen": -386.9649353027344, |
|
"logps/rejected": -436.1798400878906, |
|
"loss": 0.5789, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1510156393051147, |
|
"rewards/margins": 0.4267166256904602, |
|
"rewards/rejected": -1.5777322053909302, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.23552502453385674, |
|
"grad_norm": 26.089282989501953, |
|
"learning_rate": 4.72673726399839e-06, |
|
"logits/chosen": -2.6797690391540527, |
|
"logits/rejected": -2.7410836219787598, |
|
"logps/chosen": -347.84405517578125, |
|
"logps/rejected": -429.4021911621094, |
|
"loss": 0.5583, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0482864379882812, |
|
"rewards/margins": 0.5523291826248169, |
|
"rewards/rejected": -1.6006155014038086, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23552502453385674, |
|
"eval_logits/chosen": -2.723576307296753, |
|
"eval_logits/rejected": -2.7299251556396484, |
|
"eval_logps/chosen": -384.55767822265625, |
|
"eval_logps/rejected": -390.4145812988281, |
|
"eval_loss": 0.5714064836502075, |
|
"eval_rewards/accuracies": 0.6711309552192688, |
|
"eval_rewards/chosen": -0.9617000818252563, |
|
"eval_rewards/margins": 0.40628206729888916, |
|
"eval_rewards/rejected": -1.3679821491241455, |
|
"eval_runtime": 171.2042, |
|
"eval_samples_per_second": 11.682, |
|
"eval_steps_per_second": 0.491, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2433758586849853, |
|
"grad_norm": 18.173839569091797, |
|
"learning_rate": 4.694720251945298e-06, |
|
"logits/chosen": -2.7685041427612305, |
|
"logits/rejected": -2.686394691467285, |
|
"logps/chosen": -381.9715881347656, |
|
"logps/rejected": -402.4884338378906, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9211880564689636, |
|
"rewards/margins": 0.3484678566455841, |
|
"rewards/rejected": -1.2696558237075806, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2512266928361138, |
|
"grad_norm": 20.20842933654785, |
|
"learning_rate": 4.661051123281528e-06, |
|
"logits/chosen": -2.535449504852295, |
|
"logits/rejected": -2.4344544410705566, |
|
"logps/chosen": -394.1264953613281, |
|
"logps/rejected": -438.5677185058594, |
|
"loss": 0.542, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9613161087036133, |
|
"rewards/margins": 0.5449072122573853, |
|
"rewards/rejected": -1.5062233209609985, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2590775269872424, |
|
"grad_norm": 19.297094345092773, |
|
"learning_rate": 4.6257552230717536e-06, |
|
"logits/chosen": -2.40204119682312, |
|
"logits/rejected": -2.392609119415283, |
|
"logps/chosen": -459.760498046875, |
|
"logps/rejected": -468.84698486328125, |
|
"loss": 0.5285, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.178815245628357, |
|
"rewards/margins": 0.6503817439079285, |
|
"rewards/rejected": -1.8291969299316406, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.26692836113837093, |
|
"grad_norm": 36.15755081176758, |
|
"learning_rate": 4.588859120963282e-06, |
|
"logits/chosen": -2.267246723175049, |
|
"logits/rejected": -2.1048290729522705, |
|
"logps/chosen": -382.4331359863281, |
|
"logps/rejected": -419.305908203125, |
|
"loss": 0.5572, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2648835182189941, |
|
"rewards/margins": 0.6192021369934082, |
|
"rewards/rejected": -1.8840856552124023, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2747791952894995, |
|
"grad_norm": 15.106271743774414, |
|
"learning_rate": 4.5503905911852435e-06, |
|
"logits/chosen": -2.3543121814727783, |
|
"logits/rejected": -2.33532452583313, |
|
"logps/chosen": -404.1642761230469, |
|
"logps/rejected": -431.0093688964844, |
|
"loss": 0.5287, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0297491550445557, |
|
"rewards/margins": 0.6717931032180786, |
|
"rewards/rejected": -1.7015421390533447, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2826300294406281, |
|
"grad_norm": 20.0123348236084, |
|
"learning_rate": 4.510378591641036e-06, |
|
"logits/chosen": -2.2860474586486816, |
|
"logits/rejected": -2.3591558933258057, |
|
"logps/chosen": -394.79827880859375, |
|
"logps/rejected": -417.1219177246094, |
|
"loss": 0.5561, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9084060788154602, |
|
"rewards/margins": 0.6691475510597229, |
|
"rewards/rejected": -1.5775535106658936, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2904808635917566, |
|
"grad_norm": 19.609752655029297, |
|
"learning_rate": 4.468853242109712e-06, |
|
"logits/chosen": -2.3907597064971924, |
|
"logits/rejected": -2.378951072692871, |
|
"logps/chosen": -362.96331787109375, |
|
"logps/rejected": -399.92401123046875, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.9224265217781067, |
|
"rewards/margins": 0.5320521593093872, |
|
"rewards/rejected": -1.4544788599014282, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2983316977428852, |
|
"grad_norm": 29.80910873413086, |
|
"learning_rate": 4.42584580157276e-06, |
|
"logits/chosen": -2.2916672229766846, |
|
"logits/rejected": -2.059715986251831, |
|
"logps/chosen": -365.691162109375, |
|
"logps/rejected": -418.39642333984375, |
|
"loss": 0.5196, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0353277921676636, |
|
"rewards/margins": 0.6206272840499878, |
|
"rewards/rejected": -1.6559550762176514, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.30618253189401373, |
|
"grad_norm": 28.393800735473633, |
|
"learning_rate": 4.381388644683317e-06, |
|
"logits/chosen": -2.1753897666931152, |
|
"logits/rejected": -2.1332502365112305, |
|
"logps/chosen": -400.159423828125, |
|
"logps/rejected": -432.15777587890625, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2140204906463623, |
|
"rewards/margins": 0.7265356779098511, |
|
"rewards/rejected": -1.9405561685562134, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3140333660451423, |
|
"grad_norm": 32.2076301574707, |
|
"learning_rate": 4.33551523739555e-06, |
|
"logits/chosen": -2.03031849861145, |
|
"logits/rejected": -2.0334537029266357, |
|
"logps/chosen": -369.33056640625, |
|
"logps/rejected": -458.2646484375, |
|
"loss": 0.5727, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2683178186416626, |
|
"rewards/margins": 0.6755903363227844, |
|
"rewards/rejected": -1.9439083337783813, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3140333660451423, |
|
"eval_logits/chosen": -2.2418928146362305, |
|
"eval_logits/rejected": -2.2240025997161865, |
|
"eval_logps/chosen": -389.8349609375, |
|
"eval_logps/rejected": -412.5816955566406, |
|
"eval_loss": 0.5430436730384827, |
|
"eval_rewards/accuracies": 0.6875, |
|
"eval_rewards/chosen": -1.0144727230072021, |
|
"eval_rewards/margins": 0.5751808285713196, |
|
"eval_rewards/rejected": -1.5896533727645874, |
|
"eval_runtime": 172.1182, |
|
"eval_samples_per_second": 11.62, |
|
"eval_steps_per_second": 0.488, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.32188420019627084, |
|
"grad_norm": 22.76254653930664, |
|
"learning_rate": 4.288260111772535e-06, |
|
"logits/chosen": -2.275933027267456, |
|
"logits/rejected": -2.148829221725464, |
|
"logps/chosen": -390.9195861816406, |
|
"logps/rejected": -428.26226806640625, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9567239880561829, |
|
"rewards/margins": 0.5892980694770813, |
|
"rewards/rejected": -1.5460221767425537, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3297350343473994, |
|
"grad_norm": 23.928640365600586, |
|
"learning_rate": 4.239658839991594e-06, |
|
"logits/chosen": -2.107412099838257, |
|
"logits/rejected": -2.1913232803344727, |
|
"logps/chosen": -405.66265869140625, |
|
"logps/rejected": -424.18377685546875, |
|
"loss": 0.5677, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.139583706855774, |
|
"rewards/margins": 0.5120341777801514, |
|
"rewards/rejected": -1.6516176462173462, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.33758586849852795, |
|
"grad_norm": 21.068220138549805, |
|
"learning_rate": 4.189748007566686e-06, |
|
"logits/chosen": -2.05175518989563, |
|
"logits/rejected": -1.9536798000335693, |
|
"logps/chosen": -372.6251525878906, |
|
"logps/rejected": -439.80810546875, |
|
"loss": 0.5315, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.315185785293579, |
|
"rewards/margins": 0.6874850988388062, |
|
"rewards/rejected": -2.0026707649230957, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.34543670264965654, |
|
"grad_norm": 30.113636016845703, |
|
"learning_rate": 4.138565185807972e-06, |
|
"logits/chosen": -2.102708339691162, |
|
"logits/rejected": -2.0921308994293213, |
|
"logps/chosen": -431.109375, |
|
"logps/rejected": -471.14532470703125, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.4245600700378418, |
|
"rewards/margins": 0.6852970719337463, |
|
"rewards/rejected": -2.1098570823669434, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.35328753680078506, |
|
"grad_norm": 43.14057540893555, |
|
"learning_rate": 4.086148903539311e-06, |
|
"logits/chosen": -1.9374672174453735, |
|
"logits/rejected": -1.9135332107543945, |
|
"logps/chosen": -499.2344665527344, |
|
"logps/rejected": -536.7950439453125, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.975155234336853, |
|
"rewards/margins": 0.659866988658905, |
|
"rewards/rejected": -2.6350224018096924, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.36113837095191365, |
|
"grad_norm": 38.40256881713867, |
|
"learning_rate": 4.032538618094972e-06, |
|
"logits/chosen": -2.0139780044555664, |
|
"logits/rejected": -1.9372785091400146, |
|
"logps/chosen": -458.24664306640625, |
|
"logps/rejected": -535.4005126953125, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7541725635528564, |
|
"rewards/margins": 0.8262192010879517, |
|
"rewards/rejected": -2.5803914070129395, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3689892051030422, |
|
"grad_norm": 19.52273941040039, |
|
"learning_rate": 3.977774685617386e-06, |
|
"logits/chosen": -2.1808319091796875, |
|
"logits/rejected": -2.155151844024658, |
|
"logps/chosen": -449.31927490234375, |
|
"logps/rejected": -500.30242919921875, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5000309944152832, |
|
"rewards/margins": 0.7721298336982727, |
|
"rewards/rejected": -2.272160768508911, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.37684003925417076, |
|
"grad_norm": 40.90033721923828, |
|
"learning_rate": 3.92189833067831e-06, |
|
"logits/chosen": -1.9630296230316162, |
|
"logits/rejected": -1.909574270248413, |
|
"logps/chosen": -471.18243408203125, |
|
"logps/rejected": -558.3318481445312, |
|
"loss": 0.5098, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8825572729110718, |
|
"rewards/margins": 0.9354137182235718, |
|
"rewards/rejected": -2.8179707527160645, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.38469087340529934, |
|
"grad_norm": 20.045015335083008, |
|
"learning_rate": 3.864951615246261e-06, |
|
"logits/chosen": -1.8974872827529907, |
|
"logits/rejected": -1.8500369787216187, |
|
"logps/chosen": -516.6534423828125, |
|
"logps/rejected": -588.5135498046875, |
|
"loss": 0.5681, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.3989312648773193, |
|
"rewards/margins": 0.9191252589225769, |
|
"rewards/rejected": -3.3180572986602783, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.39254170755642787, |
|
"grad_norm": 24.87650489807129, |
|
"learning_rate": 3.806977407023581e-06, |
|
"logits/chosen": -2.218294143676758, |
|
"logits/rejected": -2.087562084197998, |
|
"logps/chosen": -463.06121826171875, |
|
"logps/rejected": -502.892333984375, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.609442949295044, |
|
"rewards/margins": 0.832965075969696, |
|
"rewards/rejected": -2.4424080848693848, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.39254170755642787, |
|
"eval_logits/chosen": -2.3772380352020264, |
|
"eval_logits/rejected": -2.3567545413970947, |
|
"eval_logps/chosen": -410.2373046875, |
|
"eval_logps/rejected": -440.0260925292969, |
|
"eval_loss": 0.5367991328239441, |
|
"eval_rewards/accuracies": 0.6815476417541504, |
|
"eval_rewards/chosen": -1.2184962034225464, |
|
"eval_rewards/margins": 0.6456010937690735, |
|
"eval_rewards/rejected": -1.864097237586975, |
|
"eval_runtime": 164.1, |
|
"eval_samples_per_second": 12.188, |
|
"eval_steps_per_second": 0.512, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.40039254170755645, |
|
"grad_norm": 17.09919548034668, |
|
"learning_rate": 3.7480193471769815e-06, |
|
"logits/chosen": -2.3634283542633057, |
|
"logits/rejected": -2.379462242126465, |
|
"logps/chosen": -422.3035583496094, |
|
"logps/rejected": -452.18951416015625, |
|
"loss": 0.559, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.2336976528167725, |
|
"rewards/margins": 0.5405682325363159, |
|
"rewards/rejected": -1.7742656469345093, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.408243375858685, |
|
"grad_norm": 19.045442581176758, |
|
"learning_rate": 3.6881218174858354e-06, |
|
"logits/chosen": -2.298239231109619, |
|
"logits/rejected": -2.1397132873535156, |
|
"logps/chosen": -401.8265075683594, |
|
"logps/rejected": -456.30535888671875, |
|
"loss": 0.5242, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.240301251411438, |
|
"rewards/margins": 0.7864383459091187, |
|
"rewards/rejected": -2.0267395973205566, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.41609421000981356, |
|
"grad_norm": 26.22776985168457, |
|
"learning_rate": 3.627329906932964e-06, |
|
"logits/chosen": -2.407930374145508, |
|
"logits/rejected": -2.3968963623046875, |
|
"logps/chosen": -411.4175720214844, |
|
"logps/rejected": -485.57379150390625, |
|
"loss": 0.5329, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2162271738052368, |
|
"rewards/margins": 0.7498941421508789, |
|
"rewards/rejected": -1.9661214351654053, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4239450441609421, |
|
"grad_norm": 33.6424674987793, |
|
"learning_rate": 3.5656893777630686e-06, |
|
"logits/chosen": -2.208657741546631, |
|
"logits/rejected": -2.1544740200042725, |
|
"logps/chosen": -431.5694274902344, |
|
"logps/rejected": -502.0116271972656, |
|
"loss": 0.5605, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4970638751983643, |
|
"rewards/margins": 0.8648282885551453, |
|
"rewards/rejected": -2.361891984939575, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.43179587831207067, |
|
"grad_norm": 32.2934684753418, |
|
"learning_rate": 3.503246631034345e-06, |
|
"logits/chosen": -2.119847297668457, |
|
"logits/rejected": -2.133668899536133, |
|
"logps/chosen": -413.0341796875, |
|
"logps/rejected": -459.9708557128906, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.7010523080825806, |
|
"rewards/margins": 0.699297308921814, |
|
"rewards/rejected": -2.4003493785858154, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4396467124631992, |
|
"grad_norm": 20.624055862426758, |
|
"learning_rate": 3.440048671689219e-06, |
|
"logits/chosen": -2.2201478481292725, |
|
"logits/rejected": -2.28852915763855, |
|
"logps/chosen": -394.3067321777344, |
|
"logps/rejected": -428.1966857910156, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.156294822692871, |
|
"rewards/margins": 0.6579602360725403, |
|
"rewards/rejected": -1.8142551183700562, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4474975466143278, |
|
"grad_norm": 20.51217269897461, |
|
"learning_rate": 3.3761430731705056e-06, |
|
"logits/chosen": -2.342036485671997, |
|
"logits/rejected": -2.3035025596618652, |
|
"logps/chosen": -397.56768798828125, |
|
"logps/rejected": -449.5596618652344, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1822260618209839, |
|
"rewards/margins": 0.667534589767456, |
|
"rewards/rejected": -1.84976065158844, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4553483807654563, |
|
"grad_norm": 23.517745971679688, |
|
"learning_rate": 3.311577941609604e-06, |
|
"logits/chosen": -2.2895524501800537, |
|
"logits/rejected": -2.30122447013855, |
|
"logps/chosen": -426.5897521972656, |
|
"logps/rejected": -487.896484375, |
|
"loss": 0.5231, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.1464052200317383, |
|
"rewards/margins": 0.7059827446937561, |
|
"rewards/rejected": -1.8523880243301392, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4631992149165849, |
|
"grad_norm": 28.418771743774414, |
|
"learning_rate": 3.2464018796137157e-06, |
|
"logits/chosen": -2.184406042098999, |
|
"logits/rejected": -2.1148581504821777, |
|
"logps/chosen": -412.546875, |
|
"logps/rejected": -503.6065368652344, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.3471360206604004, |
|
"rewards/margins": 0.9848724603652954, |
|
"rewards/rejected": -2.3320083618164062, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.47105004906771347, |
|
"grad_norm": 30.563884735107422, |
|
"learning_rate": 3.1806639496793245e-06, |
|
"logits/chosen": -2.0617759227752686, |
|
"logits/rejected": -1.9668960571289062, |
|
"logps/chosen": -447.58984375, |
|
"logps/rejected": -517.9015502929688, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5449590682983398, |
|
"rewards/margins": 0.9296010732650757, |
|
"rewards/rejected": -2.474560260772705, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.47105004906771347, |
|
"eval_logits/chosen": -2.120598793029785, |
|
"eval_logits/rejected": -2.074557065963745, |
|
"eval_logps/chosen": -434.86480712890625, |
|
"eval_logps/rejected": -479.731201171875, |
|
"eval_loss": 0.5332732200622559, |
|
"eval_rewards/accuracies": 0.6875, |
|
"eval_rewards/chosen": -1.4647715091705322, |
|
"eval_rewards/margins": 0.7963771820068359, |
|
"eval_rewards/rejected": -2.261148691177368, |
|
"eval_runtime": 168.5149, |
|
"eval_samples_per_second": 11.868, |
|
"eval_steps_per_second": 0.498, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.478900883218842, |
|
"grad_norm": 26.9317626953125, |
|
"learning_rate": 3.114413637259484e-06, |
|
"logits/chosen": -2.065842628479004, |
|
"logits/rejected": -1.9007959365844727, |
|
"logps/chosen": -437.0047912597656, |
|
"logps/rejected": -493.7703552246094, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6189963817596436, |
|
"rewards/margins": 0.897415816783905, |
|
"rewards/rejected": -2.5164122581481934, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4867517173699706, |
|
"grad_norm": 30.88678741455078, |
|
"learning_rate": 3.0477008135127247e-06, |
|
"logits/chosen": -2.133183002471924, |
|
"logits/rejected": -2.0338778495788574, |
|
"logps/chosen": -457.9064025878906, |
|
"logps/rejected": -531.5266723632812, |
|
"loss": 0.5087, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.429966688156128, |
|
"rewards/margins": 0.9474767446517944, |
|
"rewards/rejected": -2.377443552017212, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4946025515210991, |
|
"grad_norm": 34.801639556884766, |
|
"learning_rate": 2.980575697761603e-06, |
|
"logits/chosen": -2.0099399089813232, |
|
"logits/rejected": -1.8623266220092773, |
|
"logps/chosen": -441.07757568359375, |
|
"logps/rejected": -508.0874938964844, |
|
"loss": 0.5061, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.6641613245010376, |
|
"rewards/margins": 1.0001566410064697, |
|
"rewards/rejected": -2.664318084716797, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5024533856722276, |
|
"grad_norm": 30.205976486206055, |
|
"learning_rate": 2.9130888196891755e-06, |
|
"logits/chosen": -2.0108351707458496, |
|
"logits/rejected": -1.890523910522461, |
|
"logps/chosen": -568.1267700195312, |
|
"logps/rejected": -614.1755981445312, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.448976993560791, |
|
"rewards/margins": 0.9926842451095581, |
|
"rewards/rejected": -3.4416611194610596, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5103042198233563, |
|
"grad_norm": 27.39600372314453, |
|
"learning_rate": 2.845290981301834e-06, |
|
"logits/chosen": -1.7695420980453491, |
|
"logits/rejected": -1.7348365783691406, |
|
"logps/chosen": -495.6388244628906, |
|
"logps/rejected": -598.6192016601562, |
|
"loss": 0.5113, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.325334072113037, |
|
"rewards/margins": 1.1762292385101318, |
|
"rewards/rejected": -3.501563310623169, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5181550539744848, |
|
"grad_norm": 28.21457862854004, |
|
"learning_rate": 2.7772332186871464e-06, |
|
"logits/chosen": -1.947697401046753, |
|
"logits/rejected": -1.838045358657837, |
|
"logps/chosen": -504.62628173828125, |
|
"logps/rejected": -588.11669921875, |
|
"loss": 0.5176, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0726189613342285, |
|
"rewards/margins": 0.9631049036979675, |
|
"rewards/rejected": -3.035723924636841, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5260058881256133, |
|
"grad_norm": 29.053319931030273, |
|
"learning_rate": 2.708966763595493e-06, |
|
"logits/chosen": -1.9613069295883179, |
|
"logits/rejected": -1.8020261526107788, |
|
"logps/chosen": -448.95977783203125, |
|
"logps/rejected": -519.1914672851562, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.724962830543518, |
|
"rewards/margins": 1.0086156129837036, |
|
"rewards/rejected": -2.7335782051086426, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5338567222767419, |
|
"grad_norm": 34.93812561035156, |
|
"learning_rate": 2.640543004874409e-06, |
|
"logits/chosen": -2.0338661670684814, |
|
"logits/rejected": -1.964261770248413, |
|
"logps/chosen": -492.92205810546875, |
|
"logps/rejected": -533.3572998046875, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7126522064208984, |
|
"rewards/margins": 0.9582921862602234, |
|
"rewards/rejected": -2.6709446907043457, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5417075564278705, |
|
"grad_norm": 23.211416244506836, |
|
"learning_rate": 2.572013449784671e-06, |
|
"logits/chosen": -1.9940426349639893, |
|
"logits/rejected": -1.881670594215393, |
|
"logps/chosen": -523.5638427734375, |
|
"logps/rejected": -588.251220703125, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.3426578044891357, |
|
"rewards/margins": 1.0034016370773315, |
|
"rewards/rejected": -3.3460593223571777, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.549558390578999, |
|
"grad_norm": 31.131181716918945, |
|
"learning_rate": 2.503429685227245e-06, |
|
"logits/chosen": -1.831365942955017, |
|
"logits/rejected": -1.7525676488876343, |
|
"logps/chosen": -545.9859619140625, |
|
"logps/rejected": -647.7813720703125, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.500175714492798, |
|
"rewards/margins": 1.1063227653503418, |
|
"rewards/rejected": -3.6064987182617188, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.549558390578999, |
|
"eval_logits/chosen": -2.004182815551758, |
|
"eval_logits/rejected": -1.9400309324264526, |
|
"eval_logps/chosen": -563.7943725585938, |
|
"eval_logps/rejected": -608.2110595703125, |
|
"eval_loss": 0.5244275331497192, |
|
"eval_rewards/accuracies": 0.7038690447807312, |
|
"eval_rewards/chosen": -2.754066228866577, |
|
"eval_rewards/margins": 0.7918809056282043, |
|
"eval_rewards/rejected": -3.5459470748901367, |
|
"eval_runtime": 179.3578, |
|
"eval_samples_per_second": 11.151, |
|
"eval_steps_per_second": 0.468, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5574092247301276, |
|
"grad_norm": 37.639991760253906, |
|
"learning_rate": 2.434843338910286e-06, |
|
"logits/chosen": -1.9917552471160889, |
|
"logits/rejected": -1.9698021411895752, |
|
"logps/chosen": -578.1214599609375, |
|
"logps/rejected": -614.1402587890625, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.914405584335327, |
|
"rewards/margins": 0.6746307015419006, |
|
"rewards/rejected": -3.589036464691162, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5652600588812562, |
|
"grad_norm": 25.04204750061035, |
|
"learning_rate": 2.3663060404854155e-06, |
|
"logits/chosen": -1.9311301708221436, |
|
"logits/rejected": -1.946319818496704, |
|
"logps/chosen": -533.4403076171875, |
|
"logps/rejected": -599.1284790039062, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.5715718269348145, |
|
"rewards/margins": 0.9469249844551086, |
|
"rewards/rejected": -3.5184967517852783, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5731108930323847, |
|
"grad_norm": 30.593637466430664, |
|
"learning_rate": 2.2978693826825406e-06, |
|
"logits/chosen": -1.8591407537460327, |
|
"logits/rejected": -1.9342968463897705, |
|
"logps/chosen": -519.0078125, |
|
"logps/rejected": -567.246826171875, |
|
"loss": 0.5521, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.448857545852661, |
|
"rewards/margins": 0.8260825276374817, |
|
"rewards/rejected": -3.274940013885498, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5809617271835132, |
|
"grad_norm": 32.455841064453125, |
|
"learning_rate": 2.2295848824724612e-06, |
|
"logits/chosen": -2.01774263381958, |
|
"logits/rejected": -1.9122161865234375, |
|
"logps/chosen": -491.2975158691406, |
|
"logps/rejected": -555.1488037109375, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.1018691062927246, |
|
"rewards/margins": 0.892257571220398, |
|
"rewards/rejected": -2.994126796722412, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5888125613346418, |
|
"grad_norm": 19.341310501098633, |
|
"learning_rate": 2.1615039422865136e-06, |
|
"logits/chosen": -1.8771547079086304, |
|
"logits/rejected": -1.815799355506897, |
|
"logps/chosen": -499.349609375, |
|
"logps/rejected": -598.83935546875, |
|
"loss": 0.4899, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.9459705352783203, |
|
"rewards/margins": 1.2197866439819336, |
|
"rewards/rejected": -3.165757179260254, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5966633954857704, |
|
"grad_norm": 55.24733352661133, |
|
"learning_rate": 2.0936778113224253e-06, |
|
"logits/chosen": -1.9215799570083618, |
|
"logits/rejected": -1.8155832290649414, |
|
"logps/chosen": -542.361328125, |
|
"logps/rejected": -551.7185668945312, |
|
"loss": 0.5494, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.0699851512908936, |
|
"rewards/margins": 0.8438289761543274, |
|
"rewards/rejected": -2.913814067840576, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6045142296368989, |
|
"grad_norm": 37.531490325927734, |
|
"learning_rate": 2.0261575469655304e-06, |
|
"logits/chosen": -1.9638067483901978, |
|
"logits/rejected": -1.8803679943084717, |
|
"logps/chosen": -466.53143310546875, |
|
"logps/rejected": -552.6204833984375, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.0402417182922363, |
|
"rewards/margins": 1.0711690187454224, |
|
"rewards/rejected": -3.1114110946655273, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6123650637880275, |
|
"grad_norm": 22.25844383239746, |
|
"learning_rate": 1.9589939763543693e-06, |
|
"logits/chosen": -1.8626676797866821, |
|
"logits/rejected": -1.8624019622802734, |
|
"logps/chosen": -464.10333251953125, |
|
"logps/rejected": -532.6005249023438, |
|
"loss": 0.5502, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.867743730545044, |
|
"rewards/margins": 0.8303905725479126, |
|
"rewards/rejected": -2.698134183883667, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.620215897939156, |
|
"grad_norm": 28.578536987304688, |
|
"learning_rate": 1.8922376581196107e-06, |
|
"logits/chosen": -2.015662670135498, |
|
"logits/rejected": -1.9723193645477295, |
|
"logps/chosen": -475.9444885253906, |
|
"logps/rejected": -536.0194091796875, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.8610671758651733, |
|
"rewards/margins": 0.9107308387756348, |
|
"rewards/rejected": -2.7717981338500977, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6280667320902846, |
|
"grad_norm": 21.485143661499023, |
|
"learning_rate": 1.8259388443250993e-06, |
|
"logits/chosen": -2.004772663116455, |
|
"logits/rejected": -1.8513364791870117, |
|
"logps/chosen": -469.4261779785156, |
|
"logps/rejected": -537.4896240234375, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.004453659057617, |
|
"rewards/margins": 0.9301109313964844, |
|
"rewards/rejected": -2.9345641136169434, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6280667320902846, |
|
"eval_logits/chosen": -1.9647265672683716, |
|
"eval_logits/rejected": -1.909649133682251, |
|
"eval_logps/chosen": -498.1996765136719, |
|
"eval_logps/rejected": -547.5287475585938, |
|
"eval_loss": 0.517790675163269, |
|
"eval_rewards/accuracies": 0.7008928656578064, |
|
"eval_rewards/chosen": -2.0981194972991943, |
|
"eval_rewards/margins": 0.8410041332244873, |
|
"eval_rewards/rejected": -2.9391238689422607, |
|
"eval_runtime": 177.4176, |
|
"eval_samples_per_second": 11.273, |
|
"eval_steps_per_second": 0.473, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6359175662414132, |
|
"grad_norm": 43.05495071411133, |
|
"learning_rate": 1.760147442639679e-06, |
|
"logits/chosen": -1.7117631435394287, |
|
"logits/rejected": -1.8081023693084717, |
|
"logps/chosen": -471.0235290527344, |
|
"logps/rejected": -578.5651245117188, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.020402193069458, |
|
"rewards/margins": 1.2799599170684814, |
|
"rewards/rejected": -3.3003621101379395, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6437684003925417, |
|
"grad_norm": 25.53011131286621, |
|
"learning_rate": 1.6949129787682628e-06, |
|
"logits/chosen": -1.8636391162872314, |
|
"logits/rejected": -1.7885582447052002, |
|
"logps/chosen": -535.2430419921875, |
|
"logps/rejected": -592.5420532226562, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.181544542312622, |
|
"rewards/margins": 1.0555063486099243, |
|
"rewards/rejected": -3.2370505332946777, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6516192345436702, |
|
"grad_norm": 32.84662628173828, |
|
"learning_rate": 1.6302845591704348e-06, |
|
"logits/chosen": -1.7528541088104248, |
|
"logits/rejected": -1.919858694076538, |
|
"logps/chosen": -471.3095703125, |
|
"logps/rejected": -554.0218505859375, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.969342589378357, |
|
"rewards/margins": 0.9811599850654602, |
|
"rewards/rejected": -2.950502872467041, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6594700686947988, |
|
"grad_norm": 37.13783264160156, |
|
"learning_rate": 1.5663108340946465e-06, |
|
"logits/chosen": -2.004257917404175, |
|
"logits/rejected": -1.7805702686309814, |
|
"logps/chosen": -476.3814392089844, |
|
"logps/rejected": -552.9000244140625, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8513377904891968, |
|
"rewards/margins": 0.9407541155815125, |
|
"rewards/rejected": -2.7920918464660645, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6673209028459274, |
|
"grad_norm": 66.06834411621094, |
|
"learning_rate": 1.5030399609558364e-06, |
|
"logits/chosen": -1.9352130889892578, |
|
"logits/rejected": -1.8171558380126953, |
|
"logps/chosen": -489.35052490234375, |
|
"logps/rejected": -586.8991088867188, |
|
"loss": 0.485, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1446428298950195, |
|
"rewards/margins": 1.0632911920547485, |
|
"rewards/rejected": -3.2079339027404785, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6751717369970559, |
|
"grad_norm": 32.76154708862305, |
|
"learning_rate": 1.4405195680840357e-06, |
|
"logits/chosen": -1.8590924739837646, |
|
"logits/rejected": -1.8191407918930054, |
|
"logps/chosen": -515.1978759765625, |
|
"logps/rejected": -582.213623046875, |
|
"loss": 0.5305, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.157810926437378, |
|
"rewards/margins": 0.9910067319869995, |
|
"rewards/rejected": -3.148818016052246, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6830225711481845, |
|
"grad_norm": 32.92315673828125, |
|
"learning_rate": 1.378796718871252e-06, |
|
"logits/chosen": -1.9760971069335938, |
|
"logits/rejected": -1.8940002918243408, |
|
"logps/chosen": -500.63360595703125, |
|
"logps/rejected": -580.5349731445312, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.069706916809082, |
|
"rewards/margins": 1.104552984237671, |
|
"rewards/rejected": -3.174259662628174, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6908734052993131, |
|
"grad_norm": 27.977630615234375, |
|
"learning_rate": 1.3179178763436302e-06, |
|
"logits/chosen": -1.713399887084961, |
|
"logits/rejected": -1.5991706848144531, |
|
"logps/chosen": -485.16693115234375, |
|
"logps/rejected": -589.6981201171875, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.5337507724761963, |
|
"rewards/margins": 0.9967278242111206, |
|
"rewards/rejected": -3.5304782390594482, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6987242394504416, |
|
"grad_norm": 46.519187927246094, |
|
"learning_rate": 1.2579288681855364e-06, |
|
"logits/chosen": -1.8697153329849243, |
|
"logits/rejected": -1.7676079273223877, |
|
"logps/chosen": -555.0260620117188, |
|
"logps/rejected": -671.7311401367188, |
|
"loss": 0.4779, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.7753043174743652, |
|
"rewards/margins": 1.084364414215088, |
|
"rewards/rejected": -3.859668731689453, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7065750736015701, |
|
"grad_norm": 31.31684684753418, |
|
"learning_rate": 1.1988748522419163e-06, |
|
"logits/chosen": -1.9314721822738647, |
|
"logits/rejected": -1.8384710550308228, |
|
"logps/chosen": -595.1455078125, |
|
"logps/rejected": -668.7490234375, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.979123592376709, |
|
"rewards/margins": 0.9926818609237671, |
|
"rewards/rejected": -3.9718050956726074, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7065750736015701, |
|
"eval_logits/chosen": -1.8656275272369385, |
|
"eval_logits/rejected": -1.7931705713272095, |
|
"eval_logps/chosen": -577.5418701171875, |
|
"eval_logps/rejected": -637.5369873046875, |
|
"eval_loss": 0.5191683173179626, |
|
"eval_rewards/accuracies": 0.7008928656578064, |
|
"eval_rewards/chosen": -2.891542434692383, |
|
"eval_rewards/margins": 0.9476642608642578, |
|
"eval_rewards/rejected": -3.8392069339752197, |
|
"eval_runtime": 255.7835, |
|
"eval_samples_per_second": 7.819, |
|
"eval_steps_per_second": 0.328, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7144259077526988, |
|
"grad_norm": 60.76858139038086, |
|
"learning_rate": 1.1408002825248842e-06, |
|
"logits/chosen": -1.8335750102996826, |
|
"logits/rejected": -1.7328205108642578, |
|
"logps/chosen": -567.2271728515625, |
|
"logps/rejected": -646.0481567382812, |
|
"loss": 0.5185, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.787205219268799, |
|
"rewards/margins": 1.058345079421997, |
|
"rewards/rejected": -3.845550537109375, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7222767419038273, |
|
"grad_norm": 42.74496078491211, |
|
"learning_rate": 1.0837488757501369e-06, |
|
"logits/chosen": -1.7031282186508179, |
|
"logits/rejected": -1.6774184703826904, |
|
"logps/chosen": -532.3548583984375, |
|
"logps/rejected": -636.7594604492188, |
|
"loss": 0.4887, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.610095262527466, |
|
"rewards/margins": 1.1858711242675781, |
|
"rewards/rejected": -3.795966386795044, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7301275760549558, |
|
"grad_norm": 32.94953155517578, |
|
"learning_rate": 1.027763578428379e-06, |
|
"logits/chosen": -1.7176014184951782, |
|
"logits/rejected": -1.7608709335327148, |
|
"logps/chosen": -563.7265625, |
|
"logps/rejected": -646.8751220703125, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.821300983428955, |
|
"rewards/margins": 1.0197052955627441, |
|
"rewards/rejected": -3.8410065174102783, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7379784102060843, |
|
"grad_norm": 74.49922943115234, |
|
"learning_rate": 9.728865345365379e-07, |
|
"logits/chosen": -1.7150166034698486, |
|
"logits/rejected": -1.5209593772888184, |
|
"logps/chosen": -534.5591430664062, |
|
"logps/rejected": -621.5565185546875, |
|
"loss": 0.5418, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.711378812789917, |
|
"rewards/margins": 1.1061863899230957, |
|
"rewards/rejected": -3.8175652027130127, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.745829244357213, |
|
"grad_norm": 27.46148681640625, |
|
"learning_rate": 9.191590537930975e-07, |
|
"logits/chosen": -1.7130823135375977, |
|
"logits/rejected": -1.638779878616333, |
|
"logps/chosen": -529.4462280273438, |
|
"logps/rejected": -603.8697509765625, |
|
"loss": 0.536, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.627434015274048, |
|
"rewards/margins": 1.0126179456710815, |
|
"rewards/rejected": -3.6400516033172607, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7536800785083415, |
|
"grad_norm": 21.87665367126465, |
|
"learning_rate": 8.666215805614373e-07, |
|
"logits/chosen": -1.7968714237213135, |
|
"logits/rejected": -1.8486363887786865, |
|
"logps/chosen": -504.91571044921875, |
|
"logps/rejected": -589.1393432617188, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.2802155017852783, |
|
"rewards/margins": 1.0134499073028564, |
|
"rewards/rejected": -3.2936654090881348, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.76153091265947, |
|
"grad_norm": 29.431264877319336, |
|
"learning_rate": 8.153136634045844e-07, |
|
"logits/chosen": -1.9010169506072998, |
|
"logits/rejected": -1.6634715795516968, |
|
"logps/chosen": -493.634765625, |
|
"logps/rejected": -557.65380859375, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.152984619140625, |
|
"rewards/margins": 1.0319383144378662, |
|
"rewards/rejected": -3.184922933578491, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7693817468105987, |
|
"grad_norm": 41.45183181762695, |
|
"learning_rate": 7.652739253142915e-07, |
|
"logits/chosen": -1.9328157901763916, |
|
"logits/rejected": -1.7516534328460693, |
|
"logps/chosen": -538.4470825195312, |
|
"logps/rejected": -577.069580078125, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.125819683074951, |
|
"rewards/margins": 1.0004959106445312, |
|
"rewards/rejected": -3.1263155937194824, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7772325809617272, |
|
"grad_norm": 21.71674346923828, |
|
"learning_rate": 7.165400346368648e-07, |
|
"logits/chosen": -1.9481573104858398, |
|
"logits/rejected": -1.8963590860366821, |
|
"logps/chosen": -547.48486328125, |
|
"logps/rejected": -585.6912231445312, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.2814033031463623, |
|
"rewards/margins": 0.8729672431945801, |
|
"rewards/rejected": -3.1543705463409424, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7850834151128557, |
|
"grad_norm": 60.18208312988281, |
|
"learning_rate": 6.691486767176092e-07, |
|
"logits/chosen": -1.7295516729354858, |
|
"logits/rejected": -1.773970365524292, |
|
"logps/chosen": -467.82049560546875, |
|
"logps/rejected": -562.5482177734375, |
|
"loss": 0.5008, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.1015613079071045, |
|
"rewards/margins": 0.9576795697212219, |
|
"rewards/rejected": -3.0592408180236816, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7850834151128557, |
|
"eval_logits/chosen": -1.991379737854004, |
|
"eval_logits/rejected": -1.937352180480957, |
|
"eval_logps/chosen": -498.16119384765625, |
|
"eval_logps/rejected": -545.6578979492188, |
|
"eval_loss": 0.5102471709251404, |
|
"eval_rewards/accuracies": 0.7008928656578064, |
|
"eval_rewards/chosen": -2.0977351665496826, |
|
"eval_rewards/margins": 0.8226803541183472, |
|
"eval_rewards/rejected": -2.9204154014587402, |
|
"eval_runtime": 248.8844, |
|
"eval_samples_per_second": 8.036, |
|
"eval_steps_per_second": 0.338, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7929342492639843, |
|
"grad_norm": 27.0752010345459, |
|
"learning_rate": 6.231355262852529e-07, |
|
"logits/chosen": -1.8228180408477783, |
|
"logits/rejected": -1.728371024131775, |
|
"logps/chosen": -497.906982421875, |
|
"logps/rejected": -574.6722412109375, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.1114232540130615, |
|
"rewards/margins": 1.0526831150054932, |
|
"rewards/rejected": -3.1641063690185547, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8007850834151129, |
|
"grad_norm": 40.453643798828125, |
|
"learning_rate": 5.785352205971275e-07, |
|
"logits/chosen": -1.8827228546142578, |
|
"logits/rejected": -1.8348219394683838, |
|
"logps/chosen": -479.0231018066406, |
|
"logps/rejected": -544.406982421875, |
|
"loss": 0.4717, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9858747720718384, |
|
"rewards/margins": 0.8169358372688293, |
|
"rewards/rejected": -2.8028104305267334, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8086359175662414, |
|
"grad_norm": 26.58576774597168, |
|
"learning_rate": 5.353813333653287e-07, |
|
"logits/chosen": -1.9306774139404297, |
|
"logits/rejected": -1.9138189554214478, |
|
"logps/chosen": -529.3744506835938, |
|
"logps/rejected": -577.8673095703125, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.1622471809387207, |
|
"rewards/margins": 0.9140118360519409, |
|
"rewards/rejected": -3.076258659362793, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.81648675171737, |
|
"grad_norm": 23.61007308959961, |
|
"learning_rate": 4.937063494834774e-07, |
|
"logits/chosen": -1.814344048500061, |
|
"logits/rejected": -1.6967451572418213, |
|
"logps/chosen": -507.7666015625, |
|
"logps/rejected": -598.0667724609375, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.145608425140381, |
|
"rewards/margins": 0.9707077741622925, |
|
"rewards/rejected": -3.116316080093384, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8243375858684985, |
|
"grad_norm": 28.008739471435547, |
|
"learning_rate": 4.5354164057310857e-07, |
|
"logits/chosen": -1.8821042776107788, |
|
"logits/rejected": -1.7559188604354858, |
|
"logps/chosen": -465.9667053222656, |
|
"logps/rejected": -576.3198852539062, |
|
"loss": 0.5257, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.1245594024658203, |
|
"rewards/margins": 1.131911039352417, |
|
"rewards/rejected": -3.256470203399658, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8321884200196271, |
|
"grad_norm": 23.431196212768555, |
|
"learning_rate": 4.1491744136810066e-07, |
|
"logits/chosen": -1.8241643905639648, |
|
"logits/rejected": -1.5898910760879517, |
|
"logps/chosen": -494.38006591796875, |
|
"logps/rejected": -608.50048828125, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.2372994422912598, |
|
"rewards/margins": 1.0501940250396729, |
|
"rewards/rejected": -3.2874934673309326, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8400392541707556, |
|
"grad_norm": 36.24497604370117, |
|
"learning_rate": 3.7786282695491313e-07, |
|
"logits/chosen": -1.7533372640609741, |
|
"logits/rejected": -1.780310034751892, |
|
"logps/chosen": -521.2637939453125, |
|
"logps/rejected": -594.5169067382812, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.1260104179382324, |
|
"rewards/margins": 1.0049241781234741, |
|
"rewards/rejected": -3.130934476852417, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8478900883218842, |
|
"grad_norm": 28.115896224975586, |
|
"learning_rate": 3.4240569088577564e-07, |
|
"logits/chosen": -1.9627529382705688, |
|
"logits/rejected": -1.9232120513916016, |
|
"logps/chosen": -521.6199951171875, |
|
"logps/rejected": -590.34619140625, |
|
"loss": 0.5159, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.1085124015808105, |
|
"rewards/margins": 1.0188482999801636, |
|
"rewards/rejected": -3.1273605823516846, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8557409224730128, |
|
"grad_norm": 25.046926498413086, |
|
"learning_rate": 3.0857272418129136e-07, |
|
"logits/chosen": -1.8483200073242188, |
|
"logits/rejected": -1.8257999420166016, |
|
"logps/chosen": -538.3873901367188, |
|
"logps/rejected": -620.02978515625, |
|
"loss": 0.5008, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.2884747982025146, |
|
"rewards/margins": 1.0311329364776611, |
|
"rewards/rejected": -3.3196074962615967, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8635917566241413, |
|
"grad_norm": 25.578903198242188, |
|
"learning_rate": 2.7638939523827956e-07, |
|
"logits/chosen": -1.771712064743042, |
|
"logits/rejected": -1.6592738628387451, |
|
"logps/chosen": -536.8753662109375, |
|
"logps/rejected": -635.494384765625, |
|
"loss": 0.5223, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.2466344833374023, |
|
"rewards/margins": 1.0864031314849854, |
|
"rewards/rejected": -3.3330376148223877, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8635917566241413, |
|
"eval_logits/chosen": -1.9598368406295776, |
|
"eval_logits/rejected": -1.8985047340393066, |
|
"eval_logps/chosen": -510.208984375, |
|
"eval_logps/rejected": -564.5363159179688, |
|
"eval_loss": 0.5109513401985168, |
|
"eval_rewards/accuracies": 0.6934523582458496, |
|
"eval_rewards/chosen": -2.218212604522705, |
|
"eval_rewards/margins": 0.8909867405891418, |
|
"eval_rewards/rejected": -3.1092000007629395, |
|
"eval_runtime": 178.9794, |
|
"eval_samples_per_second": 11.174, |
|
"eval_steps_per_second": 0.469, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8714425907752699, |
|
"grad_norm": 24.112642288208008, |
|
"learning_rate": 2.4587993065795983e-07, |
|
"logits/chosen": -1.8837692737579346, |
|
"logits/rejected": -1.7314777374267578, |
|
"logps/chosen": -480.4740295410156, |
|
"logps/rejected": -563.0213623046875, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.0834219455718994, |
|
"rewards/margins": 1.149505853652954, |
|
"rewards/rejected": -3.2329280376434326, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8792934249263984, |
|
"grad_norm": 24.728294372558594, |
|
"learning_rate": 2.170672970089291e-07, |
|
"logits/chosen": -1.8168354034423828, |
|
"logits/rejected": -1.7316901683807373, |
|
"logps/chosen": -536.4750366210938, |
|
"logps/rejected": -631.4368896484375, |
|
"loss": 0.4847, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.207212448120117, |
|
"rewards/margins": 1.1604888439178467, |
|
"rewards/rejected": -3.367701768875122, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.887144259077527, |
|
"grad_norm": 34.55753707885742, |
|
"learning_rate": 1.8997318353864673e-07, |
|
"logits/chosen": -1.887563943862915, |
|
"logits/rejected": -1.5958278179168701, |
|
"logps/chosen": -506.45294189453125, |
|
"logps/rejected": -567.0094604492188, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.201402187347412, |
|
"rewards/margins": 1.0240195989608765, |
|
"rewards/rejected": -3.225421905517578, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8949950932286556, |
|
"grad_norm": 37.284019470214844, |
|
"learning_rate": 1.6461798584644944e-07, |
|
"logits/chosen": -1.940473198890686, |
|
"logits/rejected": -1.8656337261199951, |
|
"logps/chosen": -518.56494140625, |
|
"logps/rejected": -582.9520874023438, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.122587203979492, |
|
"rewards/margins": 1.1023415327072144, |
|
"rewards/rejected": -3.224928617477417, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9028459273797841, |
|
"grad_norm": 48.473114013671875, |
|
"learning_rate": 1.4102079053038454e-07, |
|
"logits/chosen": -1.9566850662231445, |
|
"logits/rejected": -1.7725406885147095, |
|
"logps/chosen": -515.0001220703125, |
|
"logps/rejected": -587.2335205078125, |
|
"loss": 0.4947, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.1063010692596436, |
|
"rewards/margins": 1.121829628944397, |
|
"rewards/rejected": -3.22813081741333, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9106967615309126, |
|
"grad_norm": 23.98328399658203, |
|
"learning_rate": 1.1919936081941585e-07, |
|
"logits/chosen": -1.9583518505096436, |
|
"logits/rejected": -1.8895307779312134, |
|
"logps/chosen": -528.2996215820312, |
|
"logps/rejected": -599.0931396484375, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.383150577545166, |
|
"rewards/margins": 0.8790243268013, |
|
"rewards/rejected": -3.2621750831604004, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9185475956820413, |
|
"grad_norm": 30.729877471923828, |
|
"learning_rate": 9.917012320182245e-08, |
|
"logits/chosen": -1.8442468643188477, |
|
"logits/rejected": -1.7293345928192139, |
|
"logps/chosen": -530.6605224609375, |
|
"logps/rejected": -573.9486083984375, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.3617968559265137, |
|
"rewards/margins": 0.8581873774528503, |
|
"rewards/rejected": -3.2199840545654297, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9263984298331698, |
|
"grad_norm": 29.362680435180664, |
|
"learning_rate": 8.094815505985315e-08, |
|
"logits/chosen": -1.898097276687622, |
|
"logits/rejected": -1.7420837879180908, |
|
"logps/chosen": -498.27874755859375, |
|
"logps/rejected": -638.7017211914062, |
|
"loss": 0.4825, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2651729583740234, |
|
"rewards/margins": 1.1845793724060059, |
|
"rewards/rejected": -3.44975209236145, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.9342492639842983, |
|
"grad_norm": 36.354610443115234, |
|
"learning_rate": 6.454717331994542e-08, |
|
"logits/chosen": -1.9377390146255493, |
|
"logits/rejected": -1.8412069082260132, |
|
"logps/chosen": -528.7586059570312, |
|
"logps/rejected": -617.6361083984375, |
|
"loss": 0.5211, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.173046350479126, |
|
"rewards/margins": 1.1210204362869263, |
|
"rewards/rejected": -3.2940666675567627, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9421000981354269, |
|
"grad_norm": 37.27730178833008, |
|
"learning_rate": 4.9979524127052595e-08, |
|
"logits/chosen": -1.7879035472869873, |
|
"logits/rejected": -1.8019065856933594, |
|
"logps/chosen": -485.499755859375, |
|
"logps/rejected": -587.9569091796875, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.1999027729034424, |
|
"rewards/margins": 1.0023242235183716, |
|
"rewards/rejected": -3.2022266387939453, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9421000981354269, |
|
"eval_logits/chosen": -1.9679957628250122, |
|
"eval_logits/rejected": -1.9060754776000977, |
|
"eval_logps/chosen": -509.53515625, |
|
"eval_logps/rejected": -565.4013061523438, |
|
"eval_loss": 0.5110836029052734, |
|
"eval_rewards/accuracies": 0.699404776096344, |
|
"eval_rewards/chosen": -2.211474895477295, |
|
"eval_rewards/margins": 0.9063741564750671, |
|
"eval_rewards/rejected": -3.117849349975586, |
|
"eval_runtime": 303.5083, |
|
"eval_samples_per_second": 6.59, |
|
"eval_steps_per_second": 0.277, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9499509322865555, |
|
"grad_norm": 30.429931640625, |
|
"learning_rate": 3.725617355085476e-08, |
|
"logits/chosen": -1.7728469371795654, |
|
"logits/rejected": -1.6203314065933228, |
|
"logps/chosen": -476.9127502441406, |
|
"logps/rejected": -577.5582275390625, |
|
"loss": 0.507, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.16386079788208, |
|
"rewards/margins": 1.1731908321380615, |
|
"rewards/rejected": -3.3370513916015625, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.957801766437684, |
|
"grad_norm": 42.811119079589844, |
|
"learning_rate": 2.63866993308437e-08, |
|
"logits/chosen": -1.765027642250061, |
|
"logits/rejected": -1.6837198734283447, |
|
"logps/chosen": -484.285400390625, |
|
"logps/rejected": -537.3614501953125, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.178112506866455, |
|
"rewards/margins": 0.8579233884811401, |
|
"rewards/rejected": -3.0360360145568848, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9656526005888125, |
|
"grad_norm": 28.079404830932617, |
|
"learning_rate": 1.737928366650099e-08, |
|
"logits/chosen": -1.9261119365692139, |
|
"logits/rejected": -1.853053092956543, |
|
"logps/chosen": -547.2498779296875, |
|
"logps/rejected": -600.8333129882812, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.244377851486206, |
|
"rewards/margins": 1.105455756187439, |
|
"rewards/rejected": -3.3498339653015137, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9735034347399412, |
|
"grad_norm": 29.11058807373047, |
|
"learning_rate": 1.0240707057995735e-08, |
|
"logits/chosen": -1.7693697214126587, |
|
"logits/rejected": -1.5242459774017334, |
|
"logps/chosen": -488.11724853515625, |
|
"logps/rejected": -578.2257690429688, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2136459350585938, |
|
"rewards/margins": 0.9566876292228699, |
|
"rewards/rejected": -3.1703333854675293, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9813542688910697, |
|
"grad_norm": 24.037424087524414, |
|
"learning_rate": 4.976343202034717e-09, |
|
"logits/chosen": -1.754732370376587, |
|
"logits/rejected": -1.6457884311676025, |
|
"logps/chosen": -478.7969665527344, |
|
"logps/rejected": -566.3361206054688, |
|
"loss": 0.4716, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.199491024017334, |
|
"rewards/margins": 0.9993401765823364, |
|
"rewards/rejected": -3.198831081390381, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9892051030421982, |
|
"grad_norm": 33.65019607543945, |
|
"learning_rate": 1.5901549467139953e-09, |
|
"logits/chosen": -1.9445594549179077, |
|
"logits/rejected": -1.8698110580444336, |
|
"logps/chosen": -522.216552734375, |
|
"logps/rejected": -589.295654296875, |
|
"loss": 0.5043, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.2183516025543213, |
|
"rewards/margins": 0.9545730352401733, |
|
"rewards/rejected": -3.172924757003784, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9970559371933267, |
|
"grad_norm": 39.74230194091797, |
|
"learning_rate": 8.469130840960127e-11, |
|
"logits/chosen": -1.7422492504119873, |
|
"logits/rejected": -1.6215105056762695, |
|
"logps/chosen": -489.52642822265625, |
|
"logps/rejected": -590.7807006835938, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.154064655303955, |
|
"rewards/margins": 1.0220654010772705, |
|
"rewards/rejected": -3.176130533218384, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9994111874386653, |
|
"step": 1273, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.0132, |
|
"train_samples_per_second": 4647380.664, |
|
"train_steps_per_second": 96772.918 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1273, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|