Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +714 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:310e812d9543866b79cfab313912d0b3b6011b2f4dd22c86c5c406f1f13f95d9
 size 295488936

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e20986d90a52cf7eab5a744d0f492d7aa3bce15a6a893249d1f4b1e16ac6635
 size 295488936

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c790119159ec5fc472e21d1dbc991562981c63c311648efdc5376c161d6a6388
 size 150487412

 version https://git-lfs.github.com/spec/v1
+oid sha256:467a38620ab8ce66a889f880abbb934944beecab31973acd7bd36b688ecbd1ad
 size 150487412

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e50913eb64cd04291a3e2e998134198970701be790455a8a08ec0dd9533ad68e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c145dfe29395d681bc380e8fda9000cf9ed4b1b249c7090e1dd47932ddd4d746
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:505f9225762b105f8ca5168f44d99b2f8467174f4ade85f1cc95f684fbd828e0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe1d153de177b356f9e3a70d6e4ec979560b0c300994e71ca4cb89afc74c5b3a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8092848062515259,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.20128318027424832,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2847,6 +2847,714 @@
       "eval_samples_per_second": 31.995,
       "eval_steps_per_second": 8.001,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2870,12 +3578,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.1436882307055616e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8062341809272766,
+  "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.2516039753428104,
   "eval_steps": 100,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 31.995,
       "eval_steps_per_second": 8.001,
       "step": 400
+    },
+    {
+      "epoch": 0.20178638822493394,
+      "grad_norm": 0.26666736602783203,
+      "learning_rate": 1.013396731136465e-05,
+      "loss": 0.5772,
+      "step": 401
+    },
+    {
+      "epoch": 0.20228959617561956,
+      "grad_norm": 0.3230019807815552,
+      "learning_rate": 9.937309365446973e-06,
+      "loss": 0.5288,
+      "step": 402
+    },
+    {
+      "epoch": 0.2027928041263052,
+      "grad_norm": 0.22893287241458893,
+      "learning_rate": 9.742367571857091e-06,
+      "loss": 0.5955,
+      "step": 403
+    },
+    {
+      "epoch": 0.20329601207699083,
+      "grad_norm": 0.233260378241539,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.5265,
+      "step": 404
+    },
+    {
+      "epoch": 0.20379922002767645,
+      "grad_norm": 0.2175649255514145,
+      "learning_rate": 9.357665770419244e-06,
+      "loss": 0.4912,
+      "step": 405
+    },
+    {
+      "epoch": 0.20430242797836207,
+      "grad_norm": 0.2440871000289917,
+      "learning_rate": 9.167922241916055e-06,
+      "loss": 0.5758,
+      "step": 406
+    },
+    {
+      "epoch": 0.20480563592904769,
+      "grad_norm": 0.2526509165763855,
+      "learning_rate": 8.97992782372432e-06,
+      "loss": 0.6023,
+      "step": 407
+    },
+    {
+      "epoch": 0.2053088438797333,
+      "grad_norm": 0.30109941959381104,
+      "learning_rate": 8.793690568899216e-06,
+      "loss": 0.7214,
+      "step": 408
+    },
+    {
+      "epoch": 0.20581205183041892,
+      "grad_norm": 0.30608582496643066,
+      "learning_rate": 8.609218455224893e-06,
+      "loss": 0.7214,
+      "step": 409
+    },
+    {
+      "epoch": 0.20631525978110454,
+      "grad_norm": 0.30892831087112427,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.7899,
+      "step": 410
+    },
+    {
+      "epoch": 0.20681846773179016,
+      "grad_norm": 0.3434561491012573,
+      "learning_rate": 8.245601184062852e-06,
+      "loss": 0.7088,
+      "step": 411
+    },
+    {
+      "epoch": 0.20732167568247578,
+      "grad_norm": 0.37294310331344604,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 0.7765,
+      "step": 412
+    },
+    {
+      "epoch": 0.2078248836331614,
+      "grad_norm": 0.32402169704437256,
+      "learning_rate": 7.889138314185678e-06,
+      "loss": 0.6546,
+      "step": 413
+    },
+    {
+      "epoch": 0.20832809158384702,
+      "grad_norm": 0.33943668007850647,
+      "learning_rate": 7.71360891480134e-06,
+      "loss": 0.7051,
+      "step": 414
+    },
+    {
+      "epoch": 0.20883129953453264,
+      "grad_norm": 0.35481706261634827,
+      "learning_rate": 7.539890923671062e-06,
+      "loss": 0.7839,
+      "step": 415
+    },
+    {
+      "epoch": 0.20933450748521826,
+      "grad_norm": 0.40533941984176636,
+      "learning_rate": 7.367991782295391e-06,
+      "loss": 0.7869,
+      "step": 416
+    },
+    {
+      "epoch": 0.20983771543590388,
+      "grad_norm": 0.34854623675346375,
+      "learning_rate": 7.197918854261432e-06,
+      "loss": 0.7238,
+      "step": 417
+    },
+    {
+      "epoch": 0.2103409233865895,
+      "grad_norm": 0.33167386054992676,
+      "learning_rate": 7.029679424927365e-06,
+      "loss": 0.7212,
+      "step": 418
+    },
+    {
+      "epoch": 0.21084413133727514,
+      "grad_norm": 0.41656023263931274,
+      "learning_rate": 6.863280701110408e-06,
+      "loss": 0.8973,
+      "step": 419
+    },
+    {
+      "epoch": 0.21134733928796076,
+      "grad_norm": 0.4043886959552765,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.8857,
+      "step": 420
+    },
+    {
+      "epoch": 0.21185054723864638,
+      "grad_norm": 0.3756745755672455,
+      "learning_rate": 6.536033802742813e-06,
+      "loss": 0.8296,
+      "step": 421
+    },
+    {
+      "epoch": 0.212353755189332,
+      "grad_norm": 0.38114434480667114,
+      "learning_rate": 6.375199646360142e-06,
+      "loss": 0.7459,
+      "step": 422
+    },
+    {
+      "epoch": 0.21285696314001762,
+      "grad_norm": 0.3923634886741638,
+      "learning_rate": 6.216234231230012e-06,
+      "loss": 0.8154,
+      "step": 423
+    },
+    {
+      "epoch": 0.21336017109070324,
+      "grad_norm": 0.3856075704097748,
+      "learning_rate": 6.059144366901736e-06,
+      "loss": 0.7665,
+      "step": 424
+    },
+    {
+      "epoch": 0.21386337904138886,
+      "grad_norm": 0.4270302355289459,
+      "learning_rate": 5.903936782582253e-06,
+      "loss": 0.7968,
+      "step": 425
+    },
+    {
+      "epoch": 0.21436658699207448,
+      "grad_norm": 0.4255678951740265,
+      "learning_rate": 5.750618126847912e-06,
+      "loss": 0.9026,
+      "step": 426
+    },
+    {
+      "epoch": 0.2148697949427601,
+      "grad_norm": 0.38176366686820984,
+      "learning_rate": 5.599194967359639e-06,
+      "loss": 0.6918,
+      "step": 427
+    },
+    {
+      "epoch": 0.21537300289344571,
+      "grad_norm": 0.4316434860229492,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 0.8101,
+      "step": 428
+    },
+    {
+      "epoch": 0.21587621084413133,
+      "grad_norm": 0.43751800060272217,
+      "learning_rate": 5.302061001503394e-06,
+      "loss": 0.859,
+      "step": 429
+    },
+    {
+      "epoch": 0.21637941879481695,
+      "grad_norm": 0.44405457377433777,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.7562,
+      "step": 430
+    },
+    {
+      "epoch": 0.21688262674550257,
+      "grad_norm": 0.5563176870346069,
+      "learning_rate": 5.012585797388936e-06,
+      "loss": 0.9373,
+      "step": 431
+    },
+    {
+      "epoch": 0.2173858346961882,
+      "grad_norm": 0.5112346410751343,
+      "learning_rate": 4.87073578250698e-06,
+      "loss": 0.914,
+      "step": 432
+    },
+    {
+      "epoch": 0.2178890426468738,
+      "grad_norm": 0.4817068576812744,
+      "learning_rate": 4.730818955102234e-06,
+      "loss": 0.7854,
+      "step": 433
+    },
+    {
+      "epoch": 0.21839225059755943,
+      "grad_norm": 0.5343864560127258,
+      "learning_rate": 4.592841308745932e-06,
+      "loss": 0.8649,
+      "step": 434
+    },
+    {
+      "epoch": 0.21889545854824508,
+      "grad_norm": 0.5791187882423401,
+      "learning_rate": 4.456808753941205e-06,
+      "loss": 0.8937,
+      "step": 435
+    },
+    {
+      "epoch": 0.2193986664989307,
+      "grad_norm": 0.5625150799751282,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.8115,
+      "step": 436
+    },
+    {
+      "epoch": 0.2199018744496163,
+      "grad_norm": 0.5949332118034363,
+      "learning_rate": 4.190602144143207e-06,
+      "loss": 0.8589,
+      "step": 437
+    },
+    {
+      "epoch": 0.22040508240030193,
+      "grad_norm": 0.6026896238327026,
+      "learning_rate": 4.06043949255509e-06,
+      "loss": 0.7862,
+      "step": 438
+    },
+    {
+      "epoch": 0.22090829035098755,
+      "grad_norm": 0.590242862701416,
+      "learning_rate": 3.932244738840379e-06,
+      "loss": 0.7858,
+      "step": 439
+    },
+    {
+      "epoch": 0.22141149830167317,
+      "grad_norm": 0.6550387144088745,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.7393,
+      "step": 440
+    },
+    {
+      "epoch": 0.2219147062523588,
+      "grad_norm": 0.6778830289840698,
+      "learning_rate": 3.681780806244095e-06,
+      "loss": 0.8697,
+      "step": 441
+    },
+    {
+      "epoch": 0.2224179142030444,
+      "grad_norm": 0.9251939058303833,
+      "learning_rate": 3.5595223564037884e-06,
+      "loss": 1.0204,
+      "step": 442
+    },
+    {
+      "epoch": 0.22292112215373003,
+      "grad_norm": 0.8432732820510864,
+      "learning_rate": 3.4392532620598216e-06,
+      "loss": 1.0461,
+      "step": 443
+    },
+    {
+      "epoch": 0.22342433010441565,
+      "grad_norm": 0.8827000260353088,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 1.0994,
+      "step": 444
+    },
+    {
+      "epoch": 0.22392753805510127,
+      "grad_norm": 1.0612576007843018,
+      "learning_rate": 3.2047036621337236e-06,
+      "loss": 1.0152,
+      "step": 445
+    },
+    {
+      "epoch": 0.22443074600578689,
+      "grad_norm": 0.8533521890640259,
+      "learning_rate": 3.0904332038757977e-06,
+      "loss": 0.8498,
+      "step": 446
+    },
+    {
+      "epoch": 0.2249339539564725,
+      "grad_norm": 0.9289717078208923,
+      "learning_rate": 2.978172195332263e-06,
+      "loss": 1.1416,
+      "step": 447
+    },
+    {
+      "epoch": 0.22543716190715812,
+      "grad_norm": 1.071365237236023,
+      "learning_rate": 2.8679254453910785e-06,
+      "loss": 0.9285,
+      "step": 448
+    },
+    {
+      "epoch": 0.22594036985784374,
+      "grad_norm": 1.21872878074646,
+      "learning_rate": 2.759697676656098e-06,
+      "loss": 0.8855,
+      "step": 449
+    },
+    {
+      "epoch": 0.22644357780852936,
+      "grad_norm": 1.5362472534179688,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.8939,
+      "step": 450
+    },
+    {
+      "epoch": 0.226946785759215,
+      "grad_norm": 0.23164653778076172,
+      "learning_rate": 2.549317540589308e-06,
+      "loss": 0.4267,
+      "step": 451
+    },
+    {
+      "epoch": 0.22744999370990063,
+      "grad_norm": 0.24931369721889496,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.4757,
+      "step": 452
+    },
+    {
+      "epoch": 0.22795320166058625,
+      "grad_norm": 0.28549155592918396,
+      "learning_rate": 2.3470678346851518e-06,
+      "loss": 0.6825,
+      "step": 453
+    },
+    {
+      "epoch": 0.22845640961127187,
+      "grad_norm": 0.23570339381694794,
+      "learning_rate": 2.2490027771406687e-06,
+      "loss": 0.6598,
+      "step": 454
+    },
+    {
+      "epoch": 0.22895961756195748,
+      "grad_norm": 0.24213777482509613,
+      "learning_rate": 2.152983213389559e-06,
+      "loss": 0.5853,
+      "step": 455
+    },
+    {
+      "epoch": 0.2294628255126431,
+      "grad_norm": 0.2685341238975525,
+      "learning_rate": 2.0590132565903476e-06,
+      "loss": 0.5968,
+      "step": 456
+    },
+    {
+      "epoch": 0.22996603346332872,
+      "grad_norm": 0.26003000140190125,
+      "learning_rate": 1.9670969321032407e-06,
+      "loss": 0.5892,
+      "step": 457
+    },
+    {
+      "epoch": 0.23046924141401434,
+      "grad_norm": 0.2639204263687134,
+      "learning_rate": 1.8772381773176417e-06,
+      "loss": 0.5992,
+      "step": 458
+    },
+    {
+      "epoch": 0.23097244936469996,
+      "grad_norm": 0.27651461958885193,
+      "learning_rate": 1.7894408414835362e-06,
+      "loss": 0.6325,
+      "step": 459
+    },
+    {
+      "epoch": 0.23147565731538558,
+      "grad_norm": 0.3118321895599365,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.6879,
+      "step": 460
+    },
+    {
+      "epoch": 0.2319788652660712,
+      "grad_norm": 0.280271977186203,
+      "learning_rate": 1.620045381987012e-06,
+      "loss": 0.6115,
+      "step": 461
+    },
+    {
+      "epoch": 0.23248207321675682,
+      "grad_norm": 0.33411526679992676,
+      "learning_rate": 1.5384545146622852e-06,
+      "loss": 0.801,
+      "step": 462
+    },
+    {
+      "epoch": 0.23298528116744244,
+      "grad_norm": 0.3432357907295227,
+      "learning_rate": 1.4589395786535953e-06,
+      "loss": 0.7408,
+      "step": 463
+    },
+    {
+      "epoch": 0.23348848911812806,
+      "grad_norm": 0.3068677484989166,
+      "learning_rate": 1.3815039801161721e-06,
+      "loss": 0.7147,
+      "step": 464
+    },
+    {
+      "epoch": 0.23399169706881368,
+      "grad_norm": 0.3475719392299652,
+      "learning_rate": 1.3061510361333185e-06,
+      "loss": 0.8727,
+      "step": 465
+    },
+    {
+      "epoch": 0.2344949050194993,
+      "grad_norm": 0.3557244539260864,
+      "learning_rate": 1.232883974574367e-06,
+      "loss": 0.8553,
+      "step": 466
+    },
+    {
+      "epoch": 0.23499811297018494,
+      "grad_norm": 0.3593432307243347,
+      "learning_rate": 1.1617059339563807e-06,
+      "loss": 0.8505,
+      "step": 467
+    },
+    {
+      "epoch": 0.23550132092087056,
+      "grad_norm": 0.5012146830558777,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.8193,
+      "step": 468
+    },
+    {
+      "epoch": 0.23600452887155618,
+      "grad_norm": 0.38138529658317566,
+      "learning_rate": 1.0256290220474307e-06,
+      "loss": 0.8145,
+      "step": 469
+    },
+    {
+      "epoch": 0.2365077368222418,
+      "grad_norm": 0.3529946208000183,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.7576,
+      "step": 470
+    },
+    {
+      "epoch": 0.23701094477292742,
+      "grad_norm": 0.39787623286247253,
+      "learning_rate": 8.979436164848088e-07,
+      "loss": 0.7766,
+      "step": 471
+    },
+    {
+      "epoch": 0.23751415272361304,
+      "grad_norm": 0.42735952138900757,
+      "learning_rate": 8.372546218022747e-07,
+      "loss": 0.915,
+      "step": 472
+    },
+    {
+      "epoch": 0.23801736067429866,
+      "grad_norm": 0.38604122400283813,
+      "learning_rate": 7.786715955054203e-07,
+      "loss": 0.8366,
+      "step": 473
+    },
+    {
+      "epoch": 0.23852056862498427,
+      "grad_norm": 0.3937179148197174,
+      "learning_rate": 7.221970470961125e-07,
+      "loss": 0.8331,
+      "step": 474
+    },
+    {
+      "epoch": 0.2390237765756699,
+      "grad_norm": 0.39395299553871155,
+      "learning_rate": 6.678333957560512e-07,
+      "loss": 0.8811,
+      "step": 475
+    },
+    {
+      "epoch": 0.2395269845263555,
+      "grad_norm": 0.40626415610313416,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 0.8921,
+      "step": 476
+    },
+    {
+      "epoch": 0.24003019247704113,
+      "grad_norm": 0.48355770111083984,
+      "learning_rate": 5.654480087916303e-07,
+      "loss": 0.897,
+      "step": 477
+    },
+    {
+      "epoch": 0.24053340042772675,
+      "grad_norm": 0.4704931676387787,
+      "learning_rate": 5.174306590164879e-07,
+      "loss": 0.8862,
+      "step": 478
+    },
+    {
+      "epoch": 0.24103660837841237,
+      "grad_norm": 0.4246582090854645,
+      "learning_rate": 4.715329778211375e-07,
+      "loss": 0.7952,
+      "step": 479
+    },
+    {
+      "epoch": 0.241539816329098,
+      "grad_norm": 0.45792463421821594,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.8766,
+      "step": 480
+    },
+    {
+      "epoch": 0.2420430242797836,
+      "grad_norm": 0.470819354057312,
+      "learning_rate": 3.8610439470164737e-07,
+      "loss": 0.8196,
+      "step": 481
+    },
+    {
+      "epoch": 0.24254623223046926,
+      "grad_norm": 0.5227745175361633,
+      "learning_rate": 3.465771522536854e-07,
+      "loss": 0.8448,
+      "step": 482
+    },
+    {
+      "epoch": 0.24304944018115487,
+      "grad_norm": 0.5549777746200562,
+      "learning_rate": 3.09176897181096e-07,
+      "loss": 0.8646,
+      "step": 483
+    },
+    {
+      "epoch": 0.2435526481318405,
+      "grad_norm": 0.5375264286994934,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.8193,
+      "step": 484
+    },
+    {
+      "epoch": 0.2440558560825261,
+      "grad_norm": 0.5278798937797546,
+      "learning_rate": 2.407636663901591e-07,
+      "loss": 0.7679,
+      "step": 485
+    },
+    {
+      "epoch": 0.24455906403321173,
+      "grad_norm": 0.5947657227516174,
+      "learning_rate": 2.0975362126691712e-07,
+      "loss": 0.8244,
+      "step": 486
+    },
+    {
+      "epoch": 0.24506227198389735,
+      "grad_norm": 0.697528600692749,
+      "learning_rate": 1.8087642458373134e-07,
+      "loss": 0.8524,
+      "step": 487
+    },
+    {
+      "epoch": 0.24556547993458297,
+      "grad_norm": 0.6026344299316406,
+      "learning_rate": 1.5413331334360182e-07,
+      "loss": 0.8393,
+      "step": 488
+    },
+    {
+      "epoch": 0.2460686878852686,
+      "grad_norm": 0.6663211584091187,
+      "learning_rate": 1.2952543313240472e-07,
+      "loss": 0.9127,
+      "step": 489
+    },
+    {
+      "epoch": 0.2465718958359542,
+      "grad_norm": 0.5937969088554382,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 0.7741,
+      "step": 490
+    },
+    {
+      "epoch": 0.24707510378663983,
+      "grad_norm": 0.6415567398071289,
+      "learning_rate": 8.671949076420882e-08,
+      "loss": 0.8909,
+      "step": 491
+    },
+    {
+      "epoch": 0.24757831173732545,
+      "grad_norm": 0.7068193554878235,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 0.8228,
+      "step": 492
+    },
+    {
+      "epoch": 0.24808151968801107,
+      "grad_norm": 0.8213286995887756,
+      "learning_rate": 5.246593205699424e-08,
+      "loss": 0.8736,
+      "step": 493
+    },
+    {
+      "epoch": 0.24858472763869668,
+      "grad_norm": 0.7606691122055054,
+      "learning_rate": 3.8548187963854956e-08,
+      "loss": 0.8982,
+      "step": 494
+    },
+    {
+      "epoch": 0.2490879355893823,
+      "grad_norm": 0.7615821361541748,
+      "learning_rate": 2.6770626181715773e-08,
+      "loss": 1.0097,
+      "step": 495
+    },
+    {
+      "epoch": 0.24959114354006792,
+      "grad_norm": 0.882043182849884,
+      "learning_rate": 1.7133751222137007e-08,
+      "loss": 0.9326,
+      "step": 496
+    },
+    {
+      "epoch": 0.25009435149075354,
+      "grad_norm": 0.8523903489112854,
+      "learning_rate": 9.637975896759077e-09,
+      "loss": 0.9696,
+      "step": 497
+    },
+    {
+      "epoch": 0.2505975594414392,
+      "grad_norm": 1.0187333822250366,
+      "learning_rate": 4.2836212996499865e-09,
+      "loss": 0.8635,
+      "step": 498
+    },
+    {
+      "epoch": 0.2511007673921248,
+      "grad_norm": 1.1367398500442505,
+      "learning_rate": 1.0709167935385455e-09,
+      "loss": 0.967,
+      "step": 499
+    },
+    {
+      "epoch": 0.2516039753428104,
+      "grad_norm": 1.361627221107483,
+      "learning_rate": 0.0,
+      "loss": 0.8567,
+      "step": 500
+    },
+    {
+      "epoch": 0.2516039753428104,
+      "eval_loss": 0.8062341809272766,
+      "eval_runtime": 104.6938,
+      "eval_samples_per_second": 31.969,
+      "eval_steps_per_second": 7.995,
+      "step": 500
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.4280794742718464e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null