|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1353, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022172949002217297, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.9512195121951218e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5637989044189453, |
|
"logits/rejected": -2.5734333992004395, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.8527, |
|
"nll_loss": 1.088592767715454, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04434589800443459, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.9024390243902435e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5235965251922607, |
|
"logits/rejected": -2.5694117546081543, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.6216, |
|
"nll_loss": 1.042040467262268, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06651884700665188, |
|
"grad_norm": 0.7492002248764038, |
|
"learning_rate": 5.853658536585366e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.439809560775757, |
|
"logits/rejected": -2.4339804649353027, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3856, |
|
"nll_loss": 1.040069818496704, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08869179600886919, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.804878048780487e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5076301097869873, |
|
"logits/rejected": -2.538536310195923, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5238, |
|
"nll_loss": 1.0452662706375122, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11086474501108648, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.999071182730533e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5563275814056396, |
|
"logits/rejected": -2.5710463523864746, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.7136, |
|
"nll_loss": 0.9837453961372375, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13303769401330376, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.995861010152277e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.422978162765503, |
|
"logits/rejected": -2.497622013092041, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4346, |
|
"nll_loss": 1.1125876903533936, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15521064301552107, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.990359855463492e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.4739160537719727, |
|
"logits/rejected": -2.5015368461608887, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.7054, |
|
"nll_loss": 1.0281422138214111, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17738359201773837, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.982570872689543e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5495190620422363, |
|
"logits/rejected": -2.5661299228668213, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.9447, |
|
"nll_loss": 1.0821787118911743, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19955654101995565, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.972498527556375e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.539638042449951, |
|
"logits/rejected": -2.565432071685791, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4908, |
|
"nll_loss": 1.0412414073944092, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22172949002217296, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.960148594930148e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5148587226867676, |
|
"logits/rejected": -2.531787395477295, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3802, |
|
"nll_loss": 1.0808457136154175, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.945528155506268e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.4936861991882324, |
|
"logits/rejected": -2.5372276306152344, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4052, |
|
"nll_loss": 1.0503720045089722, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2660753880266075, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.928645591749765e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5514473915100098, |
|
"logits/rejected": -2.5984888076782227, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.6079, |
|
"nll_loss": 1.0542423725128174, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.28824833702882485, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.909510583089285e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.513597011566162, |
|
"logits/rejected": -2.509930372238159, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4307, |
|
"nll_loss": 0.9876394271850586, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.31042128603104213, |
|
"grad_norm": 0.5560552477836609, |
|
"learning_rate": 7.888134100367517e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5325217247009277, |
|
"logits/rejected": -2.5615830421447754, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5637, |
|
"nll_loss": 1.090905785560608, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3325942350332594, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.864528399551163e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5349364280700684, |
|
"logits/rejected": -2.555640459060669, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2739, |
|
"nll_loss": 0.984653115272522, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35476718403547675, |
|
"grad_norm": 0.49410519003868103, |
|
"learning_rate": 7.83870701470413e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.579796075820923, |
|
"logits/rejected": -2.6130154132843018, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.6816, |
|
"nll_loss": 0.9771108627319336, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.376940133037694, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.810684750227926e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5355019569396973, |
|
"logits/rejected": -2.5664353370666504, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4045, |
|
"nll_loss": 0.9972349405288696, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3991130820399113, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.780477672373715e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.610077381134033, |
|
"logits/rejected": -2.6662240028381348, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3857, |
|
"nll_loss": 0.9627612829208374, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4212860310421286, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.748103100030918e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5606350898742676, |
|
"logits/rejected": -2.599088430404663, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.7501, |
|
"nll_loss": 0.9361652135848999, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4434589800443459, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.713579594797617e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5864617824554443, |
|
"logits/rejected": -2.6177988052368164, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5321, |
|
"nll_loss": 0.9839666485786438, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4656319290465632, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.676926950338484e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5560379028320312, |
|
"logits/rejected": -2.6088876724243164, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4582, |
|
"nll_loss": 0.9833539724349976, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.638166181036278e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6120612621307373, |
|
"logits/rejected": -2.648507595062256, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.7413, |
|
"nll_loss": 0.9990829229354858, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5099778270509978, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.597319509943522e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5437393188476562, |
|
"logits/rejected": -2.5923430919647217, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3477, |
|
"nll_loss": 0.9254695177078247, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.532150776053215, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.554410356041128e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5858166217803955, |
|
"logits/rejected": -2.645839214324951, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.8364, |
|
"nll_loss": 0.9360870122909546, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5543237250554324, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.509463320811409e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.603842258453369, |
|
"logits/rejected": -2.65669584274292, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.9066, |
|
"nll_loss": 0.8704744577407837, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5764966740576497, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.462504174133093e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5409696102142334, |
|
"logits/rejected": -2.58134388923645, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4614, |
|
"nll_loss": 0.8922187685966492, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5986696230598669, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.413559839506442e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6056294441223145, |
|
"logits/rejected": -2.59102725982666, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4101, |
|
"nll_loss": 0.9519271850585938, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6208425720620843, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.362658378616977e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.518075942993164, |
|
"logits/rejected": -2.5656614303588867, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.7219, |
|
"nll_loss": 0.9626830816268921, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6430155210643016, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.309828975246615e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.478224277496338, |
|
"logits/rejected": -2.4818527698516846, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4698, |
|
"nll_loss": 0.9482153654098511, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6651884700665188, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.255101918541482e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5367703437805176, |
|
"logits/rejected": -2.618227005004883, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3779, |
|
"nll_loss": 0.9249935150146484, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6873614190687362, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.198508585645966e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.498709201812744, |
|
"logits/rejected": -2.5306169986724854, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4247, |
|
"nll_loss": 0.9907130002975464, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7095343680709535, |
|
"grad_norm": 0.5340009331703186, |
|
"learning_rate": 7.140081423712985e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.576976776123047, |
|
"logits/rejected": -2.5837340354919434, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3265, |
|
"nll_loss": 0.9744553565979004, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.079853931300778e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5235087871551514, |
|
"logits/rejected": -2.537680149078369, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3516, |
|
"nll_loss": 0.9415532350540161, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.753880266075388, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.017860639166877e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.561636447906494, |
|
"logits/rejected": -2.6029891967773438, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.0444, |
|
"nll_loss": 0.87201327085495, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7760532150776053, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.95413709047029e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6087021827697754, |
|
"logits/rejected": -2.631678342819214, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.241, |
|
"nll_loss": 0.9579289555549622, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7982261640798226, |
|
"grad_norm": 0.6355442404747009, |
|
"learning_rate": 6.888719820393224e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5300655364990234, |
|
"logits/rejected": -2.538252592086792, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.1996, |
|
"nll_loss": 0.9421980977058411, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8203991130820399, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.821646335194051e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.4321510791778564, |
|
"logits/rejected": -2.488974094390869, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5151, |
|
"nll_loss": 0.9701131582260132, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8425720620842572, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.752955090703516e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5075130462646484, |
|
"logits/rejected": -2.576809883117676, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.6486, |
|
"nll_loss": 0.8745073080062866, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8647450110864745, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.682685470276513e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.609452247619629, |
|
"logits/rejected": -2.6642494201660156, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.6961, |
|
"nll_loss": 0.8854317665100098, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8869179600886918, |
|
"grad_norm": 0.49902427196502686, |
|
"learning_rate": 6.610877762212086e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5146374702453613, |
|
"logits/rejected": -2.529111385345459, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2123, |
|
"nll_loss": 0.9021833539009094, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.537573136654582e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5666518211364746, |
|
"logits/rejected": -2.594228744506836, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4251, |
|
"nll_loss": 0.9093640446662903, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9312638580931264, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.462813621989207e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.551781177520752, |
|
"logits/rejected": -2.5560076236724854, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3258, |
|
"nll_loss": 0.9460923075675964, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9534368070953437, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.386642080745528e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5597198009490967, |
|
"logits/rejected": -2.5898067951202393, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2546, |
|
"nll_loss": 0.9564792513847351, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.4311073124408722, |
|
"learning_rate": 6.30910218502272e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6047396659851074, |
|
"logits/rejected": -2.6136045455932617, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3434, |
|
"nll_loss": 0.96406489610672, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9977827050997783, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.230238391450653e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.598021984100342, |
|
"logits/rejected": -2.635017156600952, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5232, |
|
"nll_loss": 0.9541953802108765, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.0199556541019956, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.150095915701193e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.572929859161377, |
|
"logits/rejected": -2.6330859661102295, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4393, |
|
"nll_loss": 0.9072533845901489, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.042128603104213, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.06872070656429e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.659424066543579, |
|
"logits/rejected": -2.665045976638794, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2597, |
|
"nll_loss": 0.9222477674484253, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.06430155210643, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.986159419603766e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5539708137512207, |
|
"logits/rejected": -2.578723430633545, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5468, |
|
"nll_loss": 0.9003454446792603, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.0864745011086474, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.902459390407861e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5614101886749268, |
|
"logits/rejected": -2.61173677444458, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4411, |
|
"nll_loss": 0.9182124137878418, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.1086474501108647, |
|
"grad_norm": 0.7572781443595886, |
|
"learning_rate": 5.817668607449911e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.621619939804077, |
|
"logits/rejected": -2.688084363937378, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2006, |
|
"nll_loss": 0.9091912508010864, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.130820399113082, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.731835684574692e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.59326171875, |
|
"logits/rejected": -2.6192498207092285, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.392, |
|
"nll_loss": 0.8921745419502258, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.1529933481152994, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.645009833126218e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.570634365081787, |
|
"logits/rejected": -2.5929832458496094, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5643, |
|
"nll_loss": 0.9609503746032715, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.1751662971175167, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.557240833732967e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5438590049743652, |
|
"logits/rejected": -2.586803913116455, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4816, |
|
"nll_loss": 0.8677853345870972, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.1973392461197339, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.46857900776672e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5750033855438232, |
|
"logits/rejected": -2.618818521499634, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3517, |
|
"nll_loss": 0.8420497179031372, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.3790751884913605e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.576446771621704, |
|
"logits/rejected": -2.5843629837036133, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5026, |
|
"nll_loss": 0.8621179461479187, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.2416851441241685, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.288780691918196e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.569793939590454, |
|
"logits/rejected": -2.656283140182495, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3906, |
|
"nll_loss": 1.0533077716827393, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.2638580931263859, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.197747287384502e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.586745500564575, |
|
"logits/rejected": -2.6093835830688477, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5754, |
|
"nll_loss": 0.9478577375411987, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.2860310421286032, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.106027167872141e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5553150177001953, |
|
"logits/rejected": -2.610725164413452, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4337, |
|
"nll_loss": 0.8642798662185669, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.3082039911308203, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.013672920083319e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.579271078109741, |
|
"logits/rejected": -2.635814666748047, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2762, |
|
"nll_loss": 0.9140083193778992, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.3303769401330376, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.920737494290572e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5220165252685547, |
|
"logits/rejected": -2.551558256149292, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.6071, |
|
"nll_loss": 0.9066599011421204, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.352549889135255, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.827274173978333e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.560403347015381, |
|
"logits/rejected": -2.639423131942749, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2709, |
|
"nll_loss": 0.8913120031356812, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.3747228381374723, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.733336545293438e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.616939067840576, |
|
"logits/rejected": -2.650655746459961, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4557, |
|
"nll_loss": 0.9315293431282043, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.3968957871396896, |
|
"grad_norm": 0.6212561130523682, |
|
"learning_rate": 4.638978466322108e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.531559944152832, |
|
"logits/rejected": -2.5307424068450928, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.9584, |
|
"nll_loss": 0.9440839886665344, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.4190687361419068, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.5442540362110285e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.546278476715088, |
|
"logits/rejected": -2.5663809776306152, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4305, |
|
"nll_loss": 0.8402501940727234, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.441241685144124, |
|
"grad_norm": 0.784209132194519, |
|
"learning_rate": 4.449217564150197e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.547091484069824, |
|
"logits/rejected": -2.6290016174316406, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.1191, |
|
"nll_loss": 0.9092384576797485, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.353923538235369e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.60884952545166, |
|
"logits/rejected": -2.640436887741089, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2808, |
|
"nll_loss": 0.872983455657959, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.4855875831485588, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.2584265942279114e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5404248237609863, |
|
"logits/rejected": -2.5652973651885986, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5307, |
|
"nll_loss": 0.8896579742431641, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.507760532150776, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.162781484230005e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.554001569747925, |
|
"logits/rejected": -2.5924289226531982, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5124, |
|
"nll_loss": 0.9706124067306519, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.5299334811529932, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.067043045293142e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6452507972717285, |
|
"logits/rejected": -2.6547276973724365, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3232, |
|
"nll_loss": 0.9081094861030579, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.5521064301552108, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.971266167977914e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5739452838897705, |
|
"logits/rejected": -2.6191649436950684, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.1834, |
|
"nll_loss": 0.8611727952957153, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.5742793791574279, |
|
"grad_norm": 0.7607054710388184, |
|
"learning_rate": 3.875505764883128e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5171356201171875, |
|
"logits/rejected": -2.5558667182922363, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3944, |
|
"nll_loss": 0.9226005673408508, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.5964523281596452, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.7798167391622746e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.4788334369659424, |
|
"logits/rejected": -2.556370735168457, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2499, |
|
"nll_loss": 0.8890296816825867, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.6186252771618626, |
|
"grad_norm": 0.5698145031929016, |
|
"learning_rate": 3.684253953045438e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.59308123588562, |
|
"logits/rejected": -2.5706467628479004, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3174, |
|
"nll_loss": 0.8769053220748901, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.6407982261640797, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.588872196384632e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.515533924102783, |
|
"logits/rejected": -2.539431571960449, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4826, |
|
"nll_loss": 0.9430069923400879, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.6629711751662972, |
|
"grad_norm": 1.2568813562393188, |
|
"learning_rate": 3.493726155240664e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6210389137268066, |
|
"logits/rejected": -2.6431758403778076, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2068, |
|
"nll_loss": 0.9099509119987488, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.6851441241685143, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.3988703805294946e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.572796106338501, |
|
"logits/rejected": -2.6425790786743164, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3102, |
|
"nll_loss": 0.9549806714057922, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 0.7996054887771606, |
|
"learning_rate": 3.3043592567460748e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5201961994171143, |
|
"logits/rejected": -2.552128553390503, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5766, |
|
"nll_loss": 0.9187144041061401, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.729490022172949, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.2102469707836174e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5832736492156982, |
|
"logits/rejected": -2.6100733280181885, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.1691, |
|
"nll_loss": 0.8904333114624023, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.7516629711751663, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.1165874808661342e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.599666118621826, |
|
"logits/rejected": -2.62807297706604, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4813, |
|
"nll_loss": 0.8979522585868835, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.7738359201773837, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.0234344856121086e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6034371852874756, |
|
"logits/rejected": -2.6399123668670654, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2004, |
|
"nll_loss": 0.9016131162643433, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7960088691796008, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.9308413932469805e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5875086784362793, |
|
"logits/rejected": -2.5995872020721436, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2187, |
|
"nll_loss": 0.895309567451477, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.8388612909821512e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.545532703399658, |
|
"logits/rejected": -2.5655813217163086, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4789, |
|
"nll_loss": 0.8789796829223633, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.8403547671840355, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.7475469145780162e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.634237766265869, |
|
"logits/rejected": -2.6819849014282227, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 2.0414, |
|
"nll_loss": 0.8710411190986633, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.8625277161862528, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.6569506181085155e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.569899559020996, |
|
"logits/rejected": -2.627354145050049, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.1485, |
|
"nll_loss": 0.8969793319702148, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.8847006651884701, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.5671243439445098e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5224175453186035, |
|
"logits/rejected": -2.521833896636963, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5147, |
|
"nll_loss": 1.0038148164749146, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.9068736141906872, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.4781195929731997e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5353684425354004, |
|
"logits/rejected": -2.554072141647339, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2603, |
|
"nll_loss": 0.8757489323616028, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.9290465631929048, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.3899873950706803e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6149215698242188, |
|
"logits/rejected": -2.663322687149048, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3348, |
|
"nll_loss": 0.8890805244445801, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.3027782798445205e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5764241218566895, |
|
"logits/rejected": -2.5978610515594482, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.255, |
|
"nll_loss": 0.9303677678108215, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.9733924611973392, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.216542247663192e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5386908054351807, |
|
"logits/rejected": -2.5890002250671387, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.228, |
|
"nll_loss": 0.880374550819397, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.9955654101995566, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.1313287409889075e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6515865325927734, |
|
"logits/rejected": -2.698002338409424, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4501, |
|
"nll_loss": 0.8617309331893921, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.0177383592017737, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.0471866160303494e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.576596975326538, |
|
"logits/rejected": -2.634167194366455, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.658, |
|
"nll_loss": 0.9685592651367188, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.0399113082039912, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.9641641147314996e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.556303024291992, |
|
"logits/rejected": -2.6210145950317383, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.6006, |
|
"nll_loss": 0.9573662877082825, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.0620842572062084, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.88230883711267e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6028056144714355, |
|
"logits/rejected": -2.648607015609741, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3477, |
|
"nll_loss": 0.8656437993049622, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.084257206208426, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.8016677139795635e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5672965049743652, |
|
"logits/rejected": -2.5980045795440674, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.404, |
|
"nll_loss": 0.8821004033088684, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.106430155210643, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.7222869800160197e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.567884683609009, |
|
"logits/rejected": -2.6413073539733887, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5904, |
|
"nll_loss": 0.8698426485061646, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.12860310421286, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.6442121472758776e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5741963386535645, |
|
"logits/rejected": -2.6005654335021973, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.7035, |
|
"nll_loss": 0.8915897607803345, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.1507760532150777, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.5674879790891504e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5121145248413086, |
|
"logits/rejected": -2.5986428260803223, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.429, |
|
"nll_loss": 0.8993185758590698, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.172949002217295, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.4921584643974772e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5978806018829346, |
|
"logits/rejected": -2.6225571632385254, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.6976, |
|
"nll_loss": 0.8835185766220093, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.4182667925335472e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5312979221343994, |
|
"logits/rejected": -2.619032382965088, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2105, |
|
"nll_loss": 0.8996240496635437, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.2172949002217295, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.3458553284589852e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.586155414581299, |
|
"logits/rejected": -2.641432523727417, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4627, |
|
"nll_loss": 0.8832103610038757, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.2394678492239466, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.2749655884748788e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.649963617324829, |
|
"logits/rejected": -2.721240758895874, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.9071, |
|
"nll_loss": 0.8600472211837769, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.261640798226164, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.205638216418864e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6075966358184814, |
|
"logits/rejected": -2.6658735275268555, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3184, |
|
"nll_loss": 0.9156390428543091, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.2838137472283813, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.1379129603624472e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5418643951416016, |
|
"logits/rejected": -2.5810935497283936, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3225, |
|
"nll_loss": 0.9440028071403503, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.305986696230599, |
|
"grad_norm": 0.6439170241355896, |
|
"learning_rate": 1.0718286498218834e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5944840908050537, |
|
"logits/rejected": -2.6368420124053955, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3297, |
|
"nll_loss": 0.8718999028205872, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.328159645232816, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.0074231734957184e-06, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5355989933013916, |
|
"logits/rejected": -2.5640969276428223, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.1733, |
|
"nll_loss": 0.8988415002822876, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.3503325942350335, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.447334575417189e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.605184555053711, |
|
"logits/rejected": -2.6584599018096924, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4157, |
|
"nll_loss": 0.8994711637496948, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.3725055432372506, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.837954444056825e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.521874189376831, |
|
"logits/rejected": -2.5487990379333496, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.504, |
|
"nll_loss": 0.8935952186584473, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.3946784922394677, |
|
"grad_norm": 0.6280531883239746, |
|
"learning_rate": 8.246440722142325e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.612034320831299, |
|
"logits/rejected": -2.640134811401367, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.1654, |
|
"nll_loss": 0.9181571006774902, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.4168514412416853, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.67313254743438e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6642110347747803, |
|
"logits/rejected": -2.6616063117980957, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3256, |
|
"nll_loss": 0.9149085283279419, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.118358619747322e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5517077445983887, |
|
"logits/rejected": -2.5954713821411133, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3177, |
|
"nll_loss": 0.9105826616287231, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.4611973392461195, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.582437012492725e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6221938133239746, |
|
"logits/rejected": -2.6449408531188965, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.387, |
|
"nll_loss": 0.891291618347168, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.483370288248337, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.065674990315623e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5601844787597656, |
|
"logits/rejected": -2.58024263381958, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2113, |
|
"nll_loss": 0.8098522424697876, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.505543237250554, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.568368832927742e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5909504890441895, |
|
"logits/rejected": -2.596946954727173, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3932, |
|
"nll_loss": 0.9140310287475586, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.5277161862527717, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.090803665238872e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5009472370147705, |
|
"logits/rejected": -2.5562808513641357, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2941, |
|
"nll_loss": 0.8312563896179199, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.549889135254989, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.633253293883679e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6316781044006348, |
|
"logits/rejected": -2.635262966156006, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4821, |
|
"nll_loss": 0.8716381192207336, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.5720620842572064, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.19598005023774e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6068694591522217, |
|
"logits/rejected": -2.650502920150757, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5451, |
|
"nll_loss": 0.9065351486206055, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.5942350332594235, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.7792346400128183e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.579982280731201, |
|
"logits/rejected": -2.643984317779541, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3209, |
|
"nll_loss": 0.8348051905632019, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.6164079822616406, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.3832559995175116e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5105443000793457, |
|
"logits/rejected": -2.57656192779541, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4288, |
|
"nll_loss": 0.997296154499054, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.638580931263858, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.0082711586658336e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5682618618011475, |
|
"logits/rejected": -2.6190028190612793, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3811, |
|
"nll_loss": 0.840679943561554, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.6607538802660753, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.654495110812136e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.534421920776367, |
|
"logits/rejected": -2.5627925395965576, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4835, |
|
"nll_loss": 0.9845026135444641, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.682926829268293, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.3221306894870962e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.597075939178467, |
|
"logits/rejected": -2.605375051498413, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.9092, |
|
"nll_loss": 0.8531631231307983, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.70509977827051, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.0113684521053663e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5460121631622314, |
|
"logits/rejected": -2.6030726432800293, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5883, |
|
"nll_loss": 0.8551692962646484, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.722386570711647e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.559178352355957, |
|
"logits/rejected": -2.601423740386963, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5836, |
|
"nll_loss": 0.8850408792495728, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.7494456762749446, |
|
"grad_norm": 0.6112498641014099, |
|
"learning_rate": 1.455350729827698e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.579529285430908, |
|
"logits/rejected": -2.5950798988342285, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3608, |
|
"nll_loss": 0.8255780339241028, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.7716186252771617, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.2104140314590194e-07, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.525857448577881, |
|
"logits/rejected": -2.587132215499878, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5238, |
|
"nll_loss": 0.9289102554321289, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.7937915742793793, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.877169073155167e-08, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.533351182937622, |
|
"logits/rejected": -2.584251880645752, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4373, |
|
"nll_loss": 0.9016596674919128, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.8159645232815964, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.873870382965364e-08, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5861144065856934, |
|
"logits/rejected": -2.612295150756836, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.5352, |
|
"nll_loss": 0.8941007852554321, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.8381374722838135, |
|
"grad_norm": 0.6927753686904907, |
|
"learning_rate": 6.095392812864863e-08, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6299588680267334, |
|
"logits/rejected": -2.654484272003174, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.1739, |
|
"nll_loss": 0.9128586053848267, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.860310421286031, |
|
"grad_norm": 0.6138539910316467, |
|
"learning_rate": 4.5427560330289824e-08, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.5274271965026855, |
|
"logits/rejected": -2.5881905555725098, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.0278, |
|
"nll_loss": 0.8638602495193481, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.882483370288248, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.216850230348145e-08, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6004536151885986, |
|
"logits/rejected": -2.639181613922119, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3678, |
|
"nll_loss": 0.9433631896972656, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.9046563192904657, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.1184355980488067e-08, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.6405842304229736, |
|
"logits/rejected": -2.632291316986084, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.7057, |
|
"nll_loss": 0.9545801281929016, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.2481418998456118e-08, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.552712917327881, |
|
"logits/rejected": -2.603447437286377, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.1937, |
|
"nll_loss": 0.885978102684021, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.9490022172949004, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.064681088730151e-09, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.49711275100708, |
|
"logits/rejected": -2.5226452350616455, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.2368, |
|
"nll_loss": 0.8924044370651245, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.9711751662971175, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.9378212160501285e-09, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.568042755126953, |
|
"logits/rejected": -2.608767032623291, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.4421, |
|
"nll_loss": 0.8176005482673645, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.9933481152993346, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.0320546925512985e-10, |
|
"log_odds_chosen": NaN, |
|
"log_odds_ratio": NaN, |
|
"logits/chosen": -2.606173276901245, |
|
"logits/rejected": -2.6094250679016113, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.9638, |
|
"nll_loss": 0.9152927398681641, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1353, |
|
"total_flos": 0.0, |
|
"train_loss": 1.4294498003771459, |
|
"train_runtime": 7808.9602, |
|
"train_samples_per_second": 2.77, |
|
"train_steps_per_second": 0.173 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1353, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|