Training in progress, step 376, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +572 -4
last-checkpoint/optimizer_0/.metadata
CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13934748
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2341607ce9b81fbddc316e1d8ed745adebc0924533d3f31d7116e1338ca52548
|
3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13999412
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2d76e6d82da5288a2ef5759e596842c9b010e3c843177c0a0569fcc85be1fa7
|
3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c690ffe3102be826e29c5633cf8620ca47d1b8b819efc2742c27a8604739ff49
|
3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1485dbdd93d559396936d60e442a160868c25454e2f8eb093acbfacc2547006b
|
3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f43d12007991353f51361573d6d7482f2e62e2ba4187b198fad307fac606fa9
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7e0918853c587646eb55ae02c94dac10dd95a4e905a8656aa368dc6e541224d
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0158d971ba71808f5326cd56710c7e448844128b673df1a6f529bff95750524c
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78def40bb72508d5352010e3b2abe8d73620bbc2697a530d2c483328a80c449f
|
3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e302c9460dd4b2d18e32dcb2207c4813128526fb6cb1fc5ceb7324259f0491ba
|
3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96559dc2d5bf69154ad885b0fecd6a00ab728919e684f3c7a11e872f73da62b1
|
3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:844c660102e4d575fd724e57758d180804c47275487517ac2966e44f0456ff72
|
3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20c48845b93131d80f6356e44142d40faf3a38bdc6caa9bdebf4e90c2b7ceda2
|
3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8979d33fb7a17f61e829a30bf98bf52a2f74ab1c472a4e63d6f1ec93d04d0c66
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2299,6 +2299,574 @@
|
|
2299 |
"eval_samples_per_second": 6.683,
|
2300 |
"eval_steps_per_second": 0.209,
|
2301 |
"step": 300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2302 |
}
|
2303 |
],
|
2304 |
"logging_steps": 1,
|
@@ -2313,12 +2881,12 @@
|
|
2313 |
"should_evaluate": false,
|
2314 |
"should_log": false,
|
2315 |
"should_save": true,
|
2316 |
-
"should_training_stop":
|
2317 |
},
|
2318 |
"attributes": {}
|
2319 |
}
|
2320 |
},
|
2321 |
-
"total_flos":
|
2322 |
"train_batch_size": 8,
|
2323 |
"trial_name": null,
|
2324 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 376,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2299 |
"eval_samples_per_second": 6.683,
|
2300 |
"eval_steps_per_second": 0.209,
|
2301 |
"step": 300
|
2302 |
+
},
|
2303 |
+
{
|
2304 |
+
"epoch": 0.800531914893617,
|
2305 |
+
"grad_norm": 3.4099960327148438,
|
2306 |
+
"learning_rate": 2.332921335481205e-06,
|
2307 |
+
"loss": 0.2715,
|
2308 |
+
"step": 301
|
2309 |
+
},
|
2310 |
+
{
|
2311 |
+
"epoch": 0.8031914893617021,
|
2312 |
+
"grad_norm": 4.202554702758789,
|
2313 |
+
"learning_rate": 2.2735820613083837e-06,
|
2314 |
+
"loss": 0.2616,
|
2315 |
+
"step": 302
|
2316 |
+
},
|
2317 |
+
{
|
2318 |
+
"epoch": 0.8058510638297872,
|
2319 |
+
"grad_norm": 2.95456862449646,
|
2320 |
+
"learning_rate": 2.2149102719882044e-06,
|
2321 |
+
"loss": 0.2455,
|
2322 |
+
"step": 303
|
2323 |
+
},
|
2324 |
+
{
|
2325 |
+
"epoch": 0.8085106382978723,
|
2326 |
+
"grad_norm": 2.7879536151885986,
|
2327 |
+
"learning_rate": 2.156911036173568e-06,
|
2328 |
+
"loss": 0.2054,
|
2329 |
+
"step": 304
|
2330 |
+
},
|
2331 |
+
{
|
2332 |
+
"epoch": 0.8111702127659575,
|
2333 |
+
"grad_norm": 2.4969985485076904,
|
2334 |
+
"learning_rate": 2.0995893644155007e-06,
|
2335 |
+
"loss": 0.2814,
|
2336 |
+
"step": 305
|
2337 |
+
},
|
2338 |
+
{
|
2339 |
+
"epoch": 0.8138297872340425,
|
2340 |
+
"grad_norm": 3.3959643840789795,
|
2341 |
+
"learning_rate": 2.0429502087303164e-06,
|
2342 |
+
"loss": 0.2382,
|
2343 |
+
"step": 306
|
2344 |
+
},
|
2345 |
+
{
|
2346 |
+
"epoch": 0.8164893617021277,
|
2347 |
+
"grad_norm": 2.825615882873535,
|
2348 |
+
"learning_rate": 1.9869984621717888e-06,
|
2349 |
+
"loss": 0.2808,
|
2350 |
+
"step": 307
|
2351 |
+
},
|
2352 |
+
{
|
2353 |
+
"epoch": 0.8191489361702128,
|
2354 |
+
"grad_norm": 2.766301155090332,
|
2355 |
+
"learning_rate": 1.931738958408457e-06,
|
2356 |
+
"loss": 0.2371,
|
2357 |
+
"step": 308
|
2358 |
+
},
|
2359 |
+
{
|
2360 |
+
"epoch": 0.8218085106382979,
|
2361 |
+
"grad_norm": 3.683234930038452,
|
2362 |
+
"learning_rate": 1.8771764713060359e-06,
|
2363 |
+
"loss": 0.2617,
|
2364 |
+
"step": 309
|
2365 |
+
},
|
2366 |
+
{
|
2367 |
+
"epoch": 0.824468085106383,
|
2368 |
+
"grad_norm": 3.0581727027893066,
|
2369 |
+
"learning_rate": 1.8233157145150183e-06,
|
2370 |
+
"loss": 0.254,
|
2371 |
+
"step": 310
|
2372 |
+
},
|
2373 |
+
{
|
2374 |
+
"epoch": 0.8271276595744681,
|
2375 |
+
"grad_norm": 3.316701889038086,
|
2376 |
+
"learning_rate": 1.7701613410634367e-06,
|
2377 |
+
"loss": 0.2596,
|
2378 |
+
"step": 311
|
2379 |
+
},
|
2380 |
+
{
|
2381 |
+
"epoch": 0.8297872340425532,
|
2382 |
+
"grad_norm": 2.8315346240997314,
|
2383 |
+
"learning_rate": 1.717717942954914e-06,
|
2384 |
+
"loss": 0.222,
|
2385 |
+
"step": 312
|
2386 |
+
},
|
2387 |
+
{
|
2388 |
+
"epoch": 0.8324468085106383,
|
2389 |
+
"grad_norm": 2.781020164489746,
|
2390 |
+
"learning_rate": 1.6659900507719406e-06,
|
2391 |
+
"loss": 0.2643,
|
2392 |
+
"step": 313
|
2393 |
+
},
|
2394 |
+
{
|
2395 |
+
"epoch": 0.8351063829787234,
|
2396 |
+
"grad_norm": 2.389970302581787,
|
2397 |
+
"learning_rate": 1.614982133284495e-06,
|
2398 |
+
"loss": 0.2161,
|
2399 |
+
"step": 314
|
2400 |
+
},
|
2401 |
+
{
|
2402 |
+
"epoch": 0.8377659574468085,
|
2403 |
+
"grad_norm": 3.4777987003326416,
|
2404 |
+
"learning_rate": 1.5646985970639717e-06,
|
2405 |
+
"loss": 0.3309,
|
2406 |
+
"step": 315
|
2407 |
+
},
|
2408 |
+
{
|
2409 |
+
"epoch": 0.8404255319148937,
|
2410 |
+
"grad_norm": 4.487973690032959,
|
2411 |
+
"learning_rate": 1.5151437861025032e-06,
|
2412 |
+
"loss": 0.3284,
|
2413 |
+
"step": 316
|
2414 |
+
},
|
2415 |
+
{
|
2416 |
+
"epoch": 0.8430851063829787,
|
2417 |
+
"grad_norm": 4.822957515716553,
|
2418 |
+
"learning_rate": 1.466321981437694e-06,
|
2419 |
+
"loss": 0.2033,
|
2420 |
+
"step": 317
|
2421 |
+
},
|
2422 |
+
{
|
2423 |
+
"epoch": 0.8457446808510638,
|
2424 |
+
"grad_norm": 2.9255247116088867,
|
2425 |
+
"learning_rate": 1.4182374007827605e-06,
|
2426 |
+
"loss": 0.2528,
|
2427 |
+
"step": 318
|
2428 |
+
},
|
2429 |
+
{
|
2430 |
+
"epoch": 0.848404255319149,
|
2431 |
+
"grad_norm": 2.9784889221191406,
|
2432 |
+
"learning_rate": 1.3708941981621814e-06,
|
2433 |
+
"loss": 0.2151,
|
2434 |
+
"step": 319
|
2435 |
+
},
|
2436 |
+
{
|
2437 |
+
"epoch": 0.851063829787234,
|
2438 |
+
"grad_norm": 2.522810459136963,
|
2439 |
+
"learning_rate": 1.324296463552821e-06,
|
2440 |
+
"loss": 0.2333,
|
2441 |
+
"step": 320
|
2442 |
+
},
|
2443 |
+
{
|
2444 |
+
"epoch": 0.851063829787234,
|
2445 |
+
"eval_accuracy": 0.831764705882353,
|
2446 |
+
"eval_f1": 0.5545171339563862,
|
2447 |
+
"eval_loss": 0.38777896761894226,
|
2448 |
+
"eval_precision": 0.7416666666666667,
|
2449 |
+
"eval_recall": 0.4427860696517413,
|
2450 |
+
"eval_runtime": 34.5031,
|
2451 |
+
"eval_samples_per_second": 6.492,
|
2452 |
+
"eval_steps_per_second": 0.203,
|
2453 |
+
"step": 320
|
2454 |
+
},
|
2455 |
+
{
|
2456 |
+
"epoch": 0.8537234042553191,
|
2457 |
+
"grad_norm": 2.794802665710449,
|
2458 |
+
"learning_rate": 1.2784482225306061e-06,
|
2459 |
+
"loss": 0.2338,
|
2460 |
+
"step": 321
|
2461 |
+
},
|
2462 |
+
{
|
2463 |
+
"epoch": 0.8563829787234043,
|
2464 |
+
"grad_norm": 2.8740601539611816,
|
2465 |
+
"learning_rate": 1.2333534359227383e-06,
|
2466 |
+
"loss": 0.2526,
|
2467 |
+
"step": 322
|
2468 |
+
},
|
2469 |
+
{
|
2470 |
+
"epoch": 0.8590425531914894,
|
2471 |
+
"grad_norm": 2.600721597671509,
|
2472 |
+
"learning_rate": 1.1890159994655425e-06,
|
2473 |
+
"loss": 0.2165,
|
2474 |
+
"step": 323
|
2475 |
+
},
|
2476 |
+
{
|
2477 |
+
"epoch": 0.8617021276595744,
|
2478 |
+
"grad_norm": 2.781907796859741,
|
2479 |
+
"learning_rate": 1.1454397434679022e-06,
|
2480 |
+
"loss": 0.2414,
|
2481 |
+
"step": 324
|
2482 |
+
},
|
2483 |
+
{
|
2484 |
+
"epoch": 0.8643617021276596,
|
2485 |
+
"grad_norm": 2.8299474716186523,
|
2486 |
+
"learning_rate": 1.1026284324803493e-06,
|
2487 |
+
"loss": 0.2389,
|
2488 |
+
"step": 325
|
2489 |
+
},
|
2490 |
+
{
|
2491 |
+
"epoch": 0.8670212765957447,
|
2492 |
+
"grad_norm": 2.6625523567199707,
|
2493 |
+
"learning_rate": 1.060585764969867e-06,
|
2494 |
+
"loss": 0.2444,
|
2495 |
+
"step": 326
|
2496 |
+
},
|
2497 |
+
{
|
2498 |
+
"epoch": 0.8696808510638298,
|
2499 |
+
"grad_norm": 3.0182435512542725,
|
2500 |
+
"learning_rate": 1.0193153730003603e-06,
|
2501 |
+
"loss": 0.2967,
|
2502 |
+
"step": 327
|
2503 |
+
},
|
2504 |
+
{
|
2505 |
+
"epoch": 0.8723404255319149,
|
2506 |
+
"grad_norm": 2.5358083248138428,
|
2507 |
+
"learning_rate": 9.788208219188932e-07,
|
2508 |
+
"loss": 0.2091,
|
2509 |
+
"step": 328
|
2510 |
+
},
|
2511 |
+
{
|
2512 |
+
"epoch": 0.875,
|
2513 |
+
"grad_norm": 3.2480201721191406,
|
2514 |
+
"learning_rate": 9.391056100476736e-07,
|
2515 |
+
"loss": 0.2195,
|
2516 |
+
"step": 329
|
2517 |
+
},
|
2518 |
+
{
|
2519 |
+
"epoch": 0.8776595744680851,
|
2520 |
+
"grad_norm": 2.449801445007324,
|
2521 |
+
"learning_rate": 9.001731683818338e-07,
|
2522 |
+
"loss": 0.2316,
|
2523 |
+
"step": 330
|
2524 |
+
},
|
2525 |
+
{
|
2526 |
+
"epoch": 0.8803191489361702,
|
2527 |
+
"grad_norm": 3.304652690887451,
|
2528 |
+
"learning_rate": 8.620268602930271e-07,
|
2529 |
+
"loss": 0.2719,
|
2530 |
+
"step": 331
|
2531 |
+
},
|
2532 |
+
{
|
2533 |
+
"epoch": 0.8829787234042553,
|
2534 |
+
"grad_norm": 3.1013834476470947,
|
2535 |
+
"learning_rate": 8.246699812388714e-07,
|
2536 |
+
"loss": 0.2412,
|
2537 |
+
"step": 332
|
2538 |
+
},
|
2539 |
+
{
|
2540 |
+
"epoch": 0.8856382978723404,
|
2541 |
+
"grad_norm": 2.4398679733276367,
|
2542 |
+
"learning_rate": 7.881057584782448e-07,
|
2543 |
+
"loss": 0.1909,
|
2544 |
+
"step": 333
|
2545 |
+
},
|
2546 |
+
{
|
2547 |
+
"epoch": 0.8882978723404256,
|
2548 |
+
"grad_norm": 3.296792984008789,
|
2549 |
+
"learning_rate": 7.523373507924947e-07,
|
2550 |
+
"loss": 0.2592,
|
2551 |
+
"step": 334
|
2552 |
+
},
|
2553 |
+
{
|
2554 |
+
"epoch": 0.8909574468085106,
|
2555 |
+
"grad_norm": 3.5089118480682373,
|
2556 |
+
"learning_rate": 7.17367848212539e-07,
|
2557 |
+
"loss": 0.2341,
|
2558 |
+
"step": 335
|
2559 |
+
},
|
2560 |
+
{
|
2561 |
+
"epoch": 0.8936170212765957,
|
2562 |
+
"grad_norm": 2.9826953411102295,
|
2563 |
+
"learning_rate": 6.83200271751927e-07,
|
2564 |
+
"loss": 0.239,
|
2565 |
+
"step": 336
|
2566 |
+
},
|
2567 |
+
{
|
2568 |
+
"epoch": 0.8962765957446809,
|
2569 |
+
"grad_norm": 2.965322732925415,
|
2570 |
+
"learning_rate": 6.498375731458529e-07,
|
2571 |
+
"loss": 0.242,
|
2572 |
+
"step": 337
|
2573 |
+
},
|
2574 |
+
{
|
2575 |
+
"epoch": 0.898936170212766,
|
2576 |
+
"grad_norm": 2.855252504348755,
|
2577 |
+
"learning_rate": 6.17282634596148e-07,
|
2578 |
+
"loss": 0.2503,
|
2579 |
+
"step": 338
|
2580 |
+
},
|
2581 |
+
{
|
2582 |
+
"epoch": 0.901595744680851,
|
2583 |
+
"grad_norm": 5.112611293792725,
|
2584 |
+
"learning_rate": 5.85538268522301e-07,
|
2585 |
+
"loss": 0.2665,
|
2586 |
+
"step": 339
|
2587 |
+
},
|
2588 |
+
{
|
2589 |
+
"epoch": 0.9042553191489362,
|
2590 |
+
"grad_norm": 3.4850215911865234,
|
2591 |
+
"learning_rate": 5.546072173184791e-07,
|
2592 |
+
"loss": 0.2896,
|
2593 |
+
"step": 340
|
2594 |
+
},
|
2595 |
+
{
|
2596 |
+
"epoch": 0.9042553191489362,
|
2597 |
+
"eval_accuracy": 0.8305882352941176,
|
2598 |
+
"eval_f1": 0.55,
|
2599 |
+
"eval_loss": 0.38858291506767273,
|
2600 |
+
"eval_precision": 0.7394957983193278,
|
2601 |
+
"eval_recall": 0.43781094527363185,
|
2602 |
+
"eval_runtime": 34.3336,
|
2603 |
+
"eval_samples_per_second": 6.524,
|
2604 |
+
"eval_steps_per_second": 0.204,
|
2605 |
+
"step": 340
|
2606 |
+
},
|
2607 |
+
{
|
2608 |
+
"epoch": 0.9069148936170213,
|
2609 |
+
"grad_norm": 2.3722422122955322,
|
2610 |
+
"learning_rate": 5.244921531166247e-07,
|
2611 |
+
"loss": 0.2334,
|
2612 |
+
"step": 341
|
2613 |
+
},
|
2614 |
+
{
|
2615 |
+
"epoch": 0.9095744680851063,
|
2616 |
+
"grad_norm": 2.8881895542144775,
|
2617 |
+
"learning_rate": 4.951956775556e-07,
|
2618 |
+
"loss": 0.2339,
|
2619 |
+
"step": 342
|
2620 |
+
},
|
2621 |
+
{
|
2622 |
+
"epoch": 0.9122340425531915,
|
2623 |
+
"grad_norm": 4.109971046447754,
|
2624 |
+
"learning_rate": 4.667203215564431e-07,
|
2625 |
+
"loss": 0.2837,
|
2626 |
+
"step": 343
|
2627 |
+
},
|
2628 |
+
{
|
2629 |
+
"epoch": 0.9148936170212766,
|
2630 |
+
"grad_norm": 3.7027337551116943,
|
2631 |
+
"learning_rate": 4.3906854510370245e-07,
|
2632 |
+
"loss": 0.2862,
|
2633 |
+
"step": 344
|
2634 |
+
},
|
2635 |
+
{
|
2636 |
+
"epoch": 0.9175531914893617,
|
2637 |
+
"grad_norm": 3.069493532180786,
|
2638 |
+
"learning_rate": 4.1224273703294515e-07,
|
2639 |
+
"loss": 0.2456,
|
2640 |
+
"step": 345
|
2641 |
+
},
|
2642 |
+
{
|
2643 |
+
"epoch": 0.9202127659574468,
|
2644 |
+
"grad_norm": 2.9162609577178955,
|
2645 |
+
"learning_rate": 3.862452148243623e-07,
|
2646 |
+
"loss": 0.2633,
|
2647 |
+
"step": 346
|
2648 |
+
},
|
2649 |
+
{
|
2650 |
+
"epoch": 0.9228723404255319,
|
2651 |
+
"grad_norm": 3.10223388671875,
|
2652 |
+
"learning_rate": 3.610782244025768e-07,
|
2653 |
+
"loss": 0.2165,
|
2654 |
+
"step": 347
|
2655 |
+
},
|
2656 |
+
{
|
2657 |
+
"epoch": 0.925531914893617,
|
2658 |
+
"grad_norm": 3.3466663360595703,
|
2659 |
+
"learning_rate": 3.367439399426087e-07,
|
2660 |
+
"loss": 0.2748,
|
2661 |
+
"step": 348
|
2662 |
+
},
|
2663 |
+
{
|
2664 |
+
"epoch": 0.9281914893617021,
|
2665 |
+
"grad_norm": 3.4505677223205566,
|
2666 |
+
"learning_rate": 3.132444636820575e-07,
|
2667 |
+
"loss": 0.2789,
|
2668 |
+
"step": 349
|
2669 |
+
},
|
2670 |
+
{
|
2671 |
+
"epoch": 0.9308510638297872,
|
2672 |
+
"grad_norm": 3.7714152336120605,
|
2673 |
+
"learning_rate": 2.905818257394799e-07,
|
2674 |
+
"loss": 0.233,
|
2675 |
+
"step": 350
|
2676 |
+
},
|
2677 |
+
{
|
2678 |
+
"epoch": 0.9335106382978723,
|
2679 |
+
"grad_norm": 5.176234722137451,
|
2680 |
+
"learning_rate": 2.687579839390153e-07,
|
2681 |
+
"loss": 0.2933,
|
2682 |
+
"step": 351
|
2683 |
+
},
|
2684 |
+
{
|
2685 |
+
"epoch": 0.9361702127659575,
|
2686 |
+
"grad_norm": 2.8145923614501953,
|
2687 |
+
"learning_rate": 2.4777482364124695e-07,
|
2688 |
+
"loss": 0.2916,
|
2689 |
+
"step": 352
|
2690 |
+
},
|
2691 |
+
{
|
2692 |
+
"epoch": 0.9388297872340425,
|
2693 |
+
"grad_norm": 2.452026605606079,
|
2694 |
+
"learning_rate": 2.2763415758032316e-07,
|
2695 |
+
"loss": 0.2072,
|
2696 |
+
"step": 353
|
2697 |
+
},
|
2698 |
+
{
|
2699 |
+
"epoch": 0.9414893617021277,
|
2700 |
+
"grad_norm": 2.741774559020996,
|
2701 |
+
"learning_rate": 2.0833772570736376e-07,
|
2702 |
+
"loss": 0.2365,
|
2703 |
+
"step": 354
|
2704 |
+
},
|
2705 |
+
{
|
2706 |
+
"epoch": 0.9441489361702128,
|
2707 |
+
"grad_norm": 2.6265206336975098,
|
2708 |
+
"learning_rate": 1.8988719504013375e-07,
|
2709 |
+
"loss": 0.2226,
|
2710 |
+
"step": 355
|
2711 |
+
},
|
2712 |
+
{
|
2713 |
+
"epoch": 0.9468085106382979,
|
2714 |
+
"grad_norm": 4.149282932281494,
|
2715 |
+
"learning_rate": 1.7228415951904165e-07,
|
2716 |
+
"loss": 0.1923,
|
2717 |
+
"step": 356
|
2718 |
+
},
|
2719 |
+
{
|
2720 |
+
"epoch": 0.949468085106383,
|
2721 |
+
"grad_norm": 2.389505624771118,
|
2722 |
+
"learning_rate": 1.5553013986942645e-07,
|
2723 |
+
"loss": 0.21,
|
2724 |
+
"step": 357
|
2725 |
+
},
|
2726 |
+
{
|
2727 |
+
"epoch": 0.9521276595744681,
|
2728 |
+
"grad_norm": 4.067861557006836,
|
2729 |
+
"learning_rate": 1.3962658347019819e-07,
|
2730 |
+
"loss": 0.2497,
|
2731 |
+
"step": 358
|
2732 |
+
},
|
2733 |
+
{
|
2734 |
+
"epoch": 0.9547872340425532,
|
2735 |
+
"grad_norm": 2.5128250122070312,
|
2736 |
+
"learning_rate": 1.245748642287814e-07,
|
2737 |
+
"loss": 0.2559,
|
2738 |
+
"step": 359
|
2739 |
+
},
|
2740 |
+
{
|
2741 |
+
"epoch": 0.9574468085106383,
|
2742 |
+
"grad_norm": 2.755162477493286,
|
2743 |
+
"learning_rate": 1.103762824624377e-07,
|
2744 |
+
"loss": 0.2398,
|
2745 |
+
"step": 360
|
2746 |
+
},
|
2747 |
+
{
|
2748 |
+
"epoch": 0.9574468085106383,
|
2749 |
+
"eval_accuracy": 0.8329411764705882,
|
2750 |
+
"eval_f1": 0.5617283950617284,
|
2751 |
+
"eval_loss": 0.38481393456459045,
|
2752 |
+
"eval_precision": 0.7398373983739838,
|
2753 |
+
"eval_recall": 0.4527363184079602,
|
2754 |
+
"eval_runtime": 34.7008,
|
2755 |
+
"eval_samples_per_second": 6.455,
|
2756 |
+
"eval_steps_per_second": 0.202,
|
2757 |
+
"step": 360
|
2758 |
+
},
|
2759 |
+
{
|
2760 |
+
"epoch": 0.9601063829787234,
|
2761 |
+
"grad_norm": 3.078138828277588,
|
2762 |
+
"learning_rate": 9.70320647859213e-08,
|
2763 |
+
"loss": 0.2091,
|
2764 |
+
"step": 361
|
2765 |
+
},
|
2766 |
+
{
|
2767 |
+
"epoch": 0.9627659574468085,
|
2768 |
+
"grad_norm": 2.8632972240448,
|
2769 |
+
"learning_rate": 8.454336400552154e-08,
|
2770 |
+
"loss": 0.2513,
|
2771 |
+
"step": 362
|
2772 |
+
},
|
2773 |
+
{
|
2774 |
+
"epoch": 0.9654255319148937,
|
2775 |
+
"grad_norm": 2.500767469406128,
|
2776 |
+
"learning_rate": 7.291125901946027e-08,
|
2777 |
+
"loss": 0.2346,
|
2778 |
+
"step": 363
|
2779 |
+
},
|
2780 |
+
{
|
2781 |
+
"epoch": 0.9680851063829787,
|
2782 |
+
"grad_norm": 4.420257091522217,
|
2783 |
+
"learning_rate": 6.21367547246976e-08,
|
2784 |
+
"loss": 0.2701,
|
2785 |
+
"step": 364
|
2786 |
+
},
|
2787 |
+
{
|
2788 |
+
"epoch": 0.9707446808510638,
|
2789 |
+
"grad_norm": 2.459460973739624,
|
2790 |
+
"learning_rate": 5.2220781930111263e-08,
|
2791 |
+
"loss": 0.2441,
|
2792 |
+
"step": 365
|
2793 |
+
},
|
2794 |
+
{
|
2795 |
+
"epoch": 0.973404255319149,
|
2796 |
+
"grad_norm": 3.661996841430664,
|
2797 |
+
"learning_rate": 4.316419727608434e-08,
|
2798 |
+
"loss": 0.2704,
|
2799 |
+
"step": 366
|
2800 |
+
},
|
2801 |
+
{
|
2802 |
+
"epoch": 0.976063829787234,
|
2803 |
+
"grad_norm": 3.0439155101776123,
|
2804 |
+
"learning_rate": 3.4967783160507753e-08,
|
2805 |
+
"loss": 0.2187,
|
2806 |
+
"step": 367
|
2807 |
+
},
|
2808 |
+
{
|
2809 |
+
"epoch": 0.9787234042553191,
|
2810 |
+
"grad_norm": 3.629185914993286,
|
2811 |
+
"learning_rate": 2.763224767117767e-08,
|
2812 |
+
"loss": 0.3418,
|
2813 |
+
"step": 368
|
2814 |
+
},
|
2815 |
+
{
|
2816 |
+
"epoch": 0.9813829787234043,
|
2817 |
+
"grad_norm": 2.30877423286438,
|
2818 |
+
"learning_rate": 2.115822452463223e-08,
|
2819 |
+
"loss": 0.2607,
|
2820 |
+
"step": 369
|
2821 |
+
},
|
2822 |
+
{
|
2823 |
+
"epoch": 0.9840425531914894,
|
2824 |
+
"grad_norm": 3.398482084274292,
|
2825 |
+
"learning_rate": 1.554627301140199e-08,
|
2826 |
+
"loss": 0.2494,
|
2827 |
+
"step": 370
|
2828 |
+
},
|
2829 |
+
{
|
2830 |
+
"epoch": 0.9867021276595744,
|
2831 |
+
"grad_norm": 3.0833022594451904,
|
2832 |
+
"learning_rate": 1.0796877947691909e-08,
|
2833 |
+
"loss": 0.2924,
|
2834 |
+
"step": 371
|
2835 |
+
},
|
2836 |
+
{
|
2837 |
+
"epoch": 0.9893617021276596,
|
2838 |
+
"grad_norm": 2.702519655227661,
|
2839 |
+
"learning_rate": 6.910449633501515e-09,
|
2840 |
+
"loss": 0.2222,
|
2841 |
+
"step": 372
|
2842 |
+
},
|
2843 |
+
{
|
2844 |
+
"epoch": 0.9920212765957447,
|
2845 |
+
"grad_norm": 3.0397112369537354,
|
2846 |
+
"learning_rate": 3.887323817173272e-09,
|
2847 |
+
"loss": 0.2145,
|
2848 |
+
"step": 373
|
2849 |
+
},
|
2850 |
+
{
|
2851 |
+
"epoch": 0.9946808510638298,
|
2852 |
+
"grad_norm": 2.342505931854248,
|
2853 |
+
"learning_rate": 1.7277616663946562e-09,
|
2854 |
+
"loss": 0.2471,
|
2855 |
+
"step": 374
|
2856 |
+
},
|
2857 |
+
{
|
2858 |
+
"epoch": 0.9973404255319149,
|
2859 |
+
"grad_norm": 2.674713611602783,
|
2860 |
+
"learning_rate": 4.319497456273247e-10,
|
2861 |
+
"loss": 0.2519,
|
2862 |
+
"step": 375
|
2863 |
+
},
|
2864 |
+
{
|
2865 |
+
"epoch": 1.0,
|
2866 |
+
"grad_norm": 4.508094310760498,
|
2867 |
+
"learning_rate": 0.0,
|
2868 |
+
"loss": 0.3025,
|
2869 |
+
"step": 376
|
2870 |
}
|
2871 |
],
|
2872 |
"logging_steps": 1,
|
|
|
2881 |
"should_evaluate": false,
|
2882 |
"should_log": false,
|
2883 |
"should_save": true,
|
2884 |
+
"should_training_stop": true
|
2885 |
},
|
2886 |
"attributes": {}
|
2887 |
}
|
2888 |
},
|
2889 |
+
"total_flos": 1.2170791543740826e+17,
|
2890 |
"train_batch_size": 8,
|
2891 |
"trial_name": null,
|
2892 |
"trial_params": null
|