Training in progress, step 4500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1673342072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:212c1f4aa56505d9d886b2ce32af1f73897b72544c03971b84af1ea1e779ede1
|
3 |
size 1673342072
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 194745274
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4ffa38fb5be6fffd0ce3ed64a7481e8be864f293de431d33c5627f72e76c98a
|
3 |
size 194745274
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a95d4423cecde4a78762bf58efa44aece85b1128c704d4a826647715bc2c3a6c
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8670c5c048d65fccc45f56703c267d59b5ec466e8f5777cf6ff01b0a32855ba
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 900,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -28039,6 +28039,3514 @@
|
|
28039 |
"learning_rate": 2.2271714922049e-05,
|
28040 |
"loss": 0.4815,
|
28041 |
"step": 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28042 |
}
|
28043 |
],
|
28044 |
"logging_steps": 1,
|
@@ -28053,12 +31561,12 @@
|
|
28053 |
"should_evaluate": false,
|
28054 |
"should_log": false,
|
28055 |
"should_save": true,
|
28056 |
-
"should_training_stop":
|
28057 |
},
|
28058 |
"attributes": {}
|
28059 |
}
|
28060 |
},
|
28061 |
-
"total_flos": 4.
|
28062 |
"train_batch_size": 1,
|
28063 |
"trial_name": null,
|
28064 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 900,
|
6 |
+
"global_step": 4500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
28039 |
"learning_rate": 2.2271714922049e-05,
|
28040 |
"loss": 0.4815,
|
28041 |
"step": 4000
|
28042 |
+
},
|
28043 |
+
{
|
28044 |
+
"epoch": 0.8891111111111111,
|
28045 |
+
"grad_norm": 0.5801368355751038,
|
28046 |
+
"learning_rate": 2.22271714922049e-05,
|
28047 |
+
"loss": 0.8694,
|
28048 |
+
"step": 4001
|
28049 |
+
},
|
28050 |
+
{
|
28051 |
+
"epoch": 0.8893333333333333,
|
28052 |
+
"grad_norm": 0.6287752985954285,
|
28053 |
+
"learning_rate": 2.21826280623608e-05,
|
28054 |
+
"loss": 1.1768,
|
28055 |
+
"step": 4002
|
28056 |
+
},
|
28057 |
+
{
|
28058 |
+
"epoch": 0.8895555555555555,
|
28059 |
+
"grad_norm": 0.8464820981025696,
|
28060 |
+
"learning_rate": 2.2138084632516704e-05,
|
28061 |
+
"loss": 2.4031,
|
28062 |
+
"step": 4003
|
28063 |
+
},
|
28064 |
+
{
|
28065 |
+
"epoch": 0.8897777777777778,
|
28066 |
+
"grad_norm": 0.878257691860199,
|
28067 |
+
"learning_rate": 2.209354120267261e-05,
|
28068 |
+
"loss": 2.1387,
|
28069 |
+
"step": 4004
|
28070 |
+
},
|
28071 |
+
{
|
28072 |
+
"epoch": 0.89,
|
28073 |
+
"grad_norm": 0.6297408938407898,
|
28074 |
+
"learning_rate": 2.2048997772828508e-05,
|
28075 |
+
"loss": 1.0515,
|
28076 |
+
"step": 4005
|
28077 |
+
},
|
28078 |
+
{
|
28079 |
+
"epoch": 0.8902222222222222,
|
28080 |
+
"grad_norm": 0.8279980421066284,
|
28081 |
+
"learning_rate": 2.200445434298441e-05,
|
28082 |
+
"loss": 1.992,
|
28083 |
+
"step": 4006
|
28084 |
+
},
|
28085 |
+
{
|
28086 |
+
"epoch": 0.8904444444444445,
|
28087 |
+
"grad_norm": 0.07900725305080414,
|
28088 |
+
"learning_rate": 2.1959910913140315e-05,
|
28089 |
+
"loss": 0.0116,
|
28090 |
+
"step": 4007
|
28091 |
+
},
|
28092 |
+
{
|
28093 |
+
"epoch": 0.8906666666666667,
|
28094 |
+
"grad_norm": 0.07973389327526093,
|
28095 |
+
"learning_rate": 2.1915367483296214e-05,
|
28096 |
+
"loss": 0.0113,
|
28097 |
+
"step": 4008
|
28098 |
+
},
|
28099 |
+
{
|
28100 |
+
"epoch": 0.8908888888888888,
|
28101 |
+
"grad_norm": 0.5878556370735168,
|
28102 |
+
"learning_rate": 2.1870824053452115e-05,
|
28103 |
+
"loss": 1.1704,
|
28104 |
+
"step": 4009
|
28105 |
+
},
|
28106 |
+
{
|
28107 |
+
"epoch": 0.8911111111111111,
|
28108 |
+
"grad_norm": 0.9955252408981323,
|
28109 |
+
"learning_rate": 2.182628062360802e-05,
|
28110 |
+
"loss": 1.8866,
|
28111 |
+
"step": 4010
|
28112 |
+
},
|
28113 |
+
{
|
28114 |
+
"epoch": 0.8913333333333333,
|
28115 |
+
"grad_norm": 0.876213788986206,
|
28116 |
+
"learning_rate": 2.178173719376392e-05,
|
28117 |
+
"loss": 1.9363,
|
28118 |
+
"step": 4011
|
28119 |
+
},
|
28120 |
+
{
|
28121 |
+
"epoch": 0.8915555555555555,
|
28122 |
+
"grad_norm": 0.8237855434417725,
|
28123 |
+
"learning_rate": 2.173719376391982e-05,
|
28124 |
+
"loss": 2.1091,
|
28125 |
+
"step": 4012
|
28126 |
+
},
|
28127 |
+
{
|
28128 |
+
"epoch": 0.8917777777777778,
|
28129 |
+
"grad_norm": 0.6529291868209839,
|
28130 |
+
"learning_rate": 2.1692650334075727e-05,
|
28131 |
+
"loss": 0.9921,
|
28132 |
+
"step": 4013
|
28133 |
+
},
|
28134 |
+
{
|
28135 |
+
"epoch": 0.892,
|
28136 |
+
"grad_norm": 0.9489926099777222,
|
28137 |
+
"learning_rate": 2.1648106904231625e-05,
|
28138 |
+
"loss": 1.8424,
|
28139 |
+
"step": 4014
|
28140 |
+
},
|
28141 |
+
{
|
28142 |
+
"epoch": 0.8922222222222222,
|
28143 |
+
"grad_norm": 0.973099946975708,
|
28144 |
+
"learning_rate": 2.1603563474387527e-05,
|
28145 |
+
"loss": 1.9839,
|
28146 |
+
"step": 4015
|
28147 |
+
},
|
28148 |
+
{
|
28149 |
+
"epoch": 0.8924444444444445,
|
28150 |
+
"grad_norm": 0.8978729248046875,
|
28151 |
+
"learning_rate": 2.1559020044543433e-05,
|
28152 |
+
"loss": 1.7646,
|
28153 |
+
"step": 4016
|
28154 |
+
},
|
28155 |
+
{
|
28156 |
+
"epoch": 0.8926666666666667,
|
28157 |
+
"grad_norm": 0.9603530764579773,
|
28158 |
+
"learning_rate": 2.1514476614699335e-05,
|
28159 |
+
"loss": 1.8374,
|
28160 |
+
"step": 4017
|
28161 |
+
},
|
28162 |
+
{
|
28163 |
+
"epoch": 0.8928888888888888,
|
28164 |
+
"grad_norm": 0.07031574100255966,
|
28165 |
+
"learning_rate": 2.1469933184855233e-05,
|
28166 |
+
"loss": 0.0148,
|
28167 |
+
"step": 4018
|
28168 |
+
},
|
28169 |
+
{
|
28170 |
+
"epoch": 0.8931111111111111,
|
28171 |
+
"grad_norm": 0.06550273299217224,
|
28172 |
+
"learning_rate": 2.142538975501114e-05,
|
28173 |
+
"loss": 0.0147,
|
28174 |
+
"step": 4019
|
28175 |
+
},
|
28176 |
+
{
|
28177 |
+
"epoch": 0.8933333333333333,
|
28178 |
+
"grad_norm": 0.06782650202512741,
|
28179 |
+
"learning_rate": 2.138084632516704e-05,
|
28180 |
+
"loss": 0.0149,
|
28181 |
+
"step": 4020
|
28182 |
+
},
|
28183 |
+
{
|
28184 |
+
"epoch": 0.8935555555555555,
|
28185 |
+
"grad_norm": 0.7483673095703125,
|
28186 |
+
"learning_rate": 2.133630289532294e-05,
|
28187 |
+
"loss": 0.9084,
|
28188 |
+
"step": 4021
|
28189 |
+
},
|
28190 |
+
{
|
28191 |
+
"epoch": 0.8937777777777778,
|
28192 |
+
"grad_norm": 0.9090237617492676,
|
28193 |
+
"learning_rate": 2.129175946547884e-05,
|
28194 |
+
"loss": 1.8329,
|
28195 |
+
"step": 4022
|
28196 |
+
},
|
28197 |
+
{
|
28198 |
+
"epoch": 0.894,
|
28199 |
+
"grad_norm": 0.7626523971557617,
|
28200 |
+
"learning_rate": 2.1247216035634746e-05,
|
28201 |
+
"loss": 0.9174,
|
28202 |
+
"step": 4023
|
28203 |
+
},
|
28204 |
+
{
|
28205 |
+
"epoch": 0.8942222222222223,
|
28206 |
+
"grad_norm": 0.6706441640853882,
|
28207 |
+
"learning_rate": 2.1202672605790645e-05,
|
28208 |
+
"loss": 0.8133,
|
28209 |
+
"step": 4024
|
28210 |
+
},
|
28211 |
+
{
|
28212 |
+
"epoch": 0.8944444444444445,
|
28213 |
+
"grad_norm": 0.9489988684654236,
|
28214 |
+
"learning_rate": 2.1158129175946547e-05,
|
28215 |
+
"loss": 2.003,
|
28216 |
+
"step": 4025
|
28217 |
+
},
|
28218 |
+
{
|
28219 |
+
"epoch": 0.8946666666666667,
|
28220 |
+
"grad_norm": 0.9574695825576782,
|
28221 |
+
"learning_rate": 2.1113585746102452e-05,
|
28222 |
+
"loss": 1.5621,
|
28223 |
+
"step": 4026
|
28224 |
+
},
|
28225 |
+
{
|
28226 |
+
"epoch": 0.8948888888888888,
|
28227 |
+
"grad_norm": 1.1038743257522583,
|
28228 |
+
"learning_rate": 2.1069042316258354e-05,
|
28229 |
+
"loss": 1.8653,
|
28230 |
+
"step": 4027
|
28231 |
+
},
|
28232 |
+
{
|
28233 |
+
"epoch": 0.8951111111111111,
|
28234 |
+
"grad_norm": 1.0262362957000732,
|
28235 |
+
"learning_rate": 2.1024498886414253e-05,
|
28236 |
+
"loss": 1.6652,
|
28237 |
+
"step": 4028
|
28238 |
+
},
|
28239 |
+
{
|
28240 |
+
"epoch": 0.8953333333333333,
|
28241 |
+
"grad_norm": 0.8741075396537781,
|
28242 |
+
"learning_rate": 2.0979955456570158e-05,
|
28243 |
+
"loss": 1.6058,
|
28244 |
+
"step": 4029
|
28245 |
+
},
|
28246 |
+
{
|
28247 |
+
"epoch": 0.8955555555555555,
|
28248 |
+
"grad_norm": 0.7687373161315918,
|
28249 |
+
"learning_rate": 2.093541202672606e-05,
|
28250 |
+
"loss": 0.8818,
|
28251 |
+
"step": 4030
|
28252 |
+
},
|
28253 |
+
{
|
28254 |
+
"epoch": 0.8957777777777778,
|
28255 |
+
"grad_norm": 0.06525861471891403,
|
28256 |
+
"learning_rate": 2.089086859688196e-05,
|
28257 |
+
"loss": 0.0177,
|
28258 |
+
"step": 4031
|
28259 |
+
},
|
28260 |
+
{
|
28261 |
+
"epoch": 0.896,
|
28262 |
+
"grad_norm": 0.6307370066642761,
|
28263 |
+
"learning_rate": 2.0846325167037864e-05,
|
28264 |
+
"loss": 0.7724,
|
28265 |
+
"step": 4032
|
28266 |
+
},
|
28267 |
+
{
|
28268 |
+
"epoch": 0.8962222222222223,
|
28269 |
+
"grad_norm": 1.1199438571929932,
|
28270 |
+
"learning_rate": 2.0801781737193766e-05,
|
28271 |
+
"loss": 1.7534,
|
28272 |
+
"step": 4033
|
28273 |
+
},
|
28274 |
+
{
|
28275 |
+
"epoch": 0.8964444444444445,
|
28276 |
+
"grad_norm": 0.9748408794403076,
|
28277 |
+
"learning_rate": 2.0757238307349665e-05,
|
28278 |
+
"loss": 1.6166,
|
28279 |
+
"step": 4034
|
28280 |
+
},
|
28281 |
+
{
|
28282 |
+
"epoch": 0.8966666666666666,
|
28283 |
+
"grad_norm": 0.0824805200099945,
|
28284 |
+
"learning_rate": 2.071269487750557e-05,
|
28285 |
+
"loss": 0.0188,
|
28286 |
+
"step": 4035
|
28287 |
+
},
|
28288 |
+
{
|
28289 |
+
"epoch": 0.8968888888888888,
|
28290 |
+
"grad_norm": 0.09000510722398758,
|
28291 |
+
"learning_rate": 2.0668151447661472e-05,
|
28292 |
+
"loss": 0.0188,
|
28293 |
+
"step": 4036
|
28294 |
+
},
|
28295 |
+
{
|
28296 |
+
"epoch": 0.8971111111111111,
|
28297 |
+
"grad_norm": 0.08561154454946518,
|
28298 |
+
"learning_rate": 2.0623608017817374e-05,
|
28299 |
+
"loss": 0.0185,
|
28300 |
+
"step": 4037
|
28301 |
+
},
|
28302 |
+
{
|
28303 |
+
"epoch": 0.8973333333333333,
|
28304 |
+
"grad_norm": 0.7661683559417725,
|
28305 |
+
"learning_rate": 2.0579064587973276e-05,
|
28306 |
+
"loss": 0.907,
|
28307 |
+
"step": 4038
|
28308 |
+
},
|
28309 |
+
{
|
28310 |
+
"epoch": 0.8975555555555556,
|
28311 |
+
"grad_norm": 0.9890311360359192,
|
28312 |
+
"learning_rate": 2.0534521158129178e-05,
|
28313 |
+
"loss": 1.3849,
|
28314 |
+
"step": 4039
|
28315 |
+
},
|
28316 |
+
{
|
28317 |
+
"epoch": 0.8977777777777778,
|
28318 |
+
"grad_norm": 0.7973209619522095,
|
28319 |
+
"learning_rate": 2.048997772828508e-05,
|
28320 |
+
"loss": 0.749,
|
28321 |
+
"step": 4040
|
28322 |
+
},
|
28323 |
+
{
|
28324 |
+
"epoch": 0.898,
|
28325 |
+
"grad_norm": 1.1026244163513184,
|
28326 |
+
"learning_rate": 2.044543429844098e-05,
|
28327 |
+
"loss": 1.7591,
|
28328 |
+
"step": 4041
|
28329 |
+
},
|
28330 |
+
{
|
28331 |
+
"epoch": 0.8982222222222223,
|
28332 |
+
"grad_norm": 1.1480908393859863,
|
28333 |
+
"learning_rate": 2.0400890868596884e-05,
|
28334 |
+
"loss": 1.9024,
|
28335 |
+
"step": 4042
|
28336 |
+
},
|
28337 |
+
{
|
28338 |
+
"epoch": 0.8984444444444445,
|
28339 |
+
"grad_norm": 1.0242488384246826,
|
28340 |
+
"learning_rate": 2.0356347438752786e-05,
|
28341 |
+
"loss": 1.4006,
|
28342 |
+
"step": 4043
|
28343 |
+
},
|
28344 |
+
{
|
28345 |
+
"epoch": 0.8986666666666666,
|
28346 |
+
"grad_norm": 1.117613434791565,
|
28347 |
+
"learning_rate": 2.0311804008908684e-05,
|
28348 |
+
"loss": 1.3118,
|
28349 |
+
"step": 4044
|
28350 |
+
},
|
28351 |
+
{
|
28352 |
+
"epoch": 0.8988888888888888,
|
28353 |
+
"grad_norm": 1.1382890939712524,
|
28354 |
+
"learning_rate": 2.026726057906459e-05,
|
28355 |
+
"loss": 1.3193,
|
28356 |
+
"step": 4045
|
28357 |
+
},
|
28358 |
+
{
|
28359 |
+
"epoch": 0.8991111111111111,
|
28360 |
+
"grad_norm": 1.2156895399093628,
|
28361 |
+
"learning_rate": 2.0222717149220492e-05,
|
28362 |
+
"loss": 1.1794,
|
28363 |
+
"step": 4046
|
28364 |
+
},
|
28365 |
+
{
|
28366 |
+
"epoch": 0.8993333333333333,
|
28367 |
+
"grad_norm": 0.6855819225311279,
|
28368 |
+
"learning_rate": 2.017817371937639e-05,
|
28369 |
+
"loss": 0.4808,
|
28370 |
+
"step": 4047
|
28371 |
+
},
|
28372 |
+
{
|
28373 |
+
"epoch": 0.8995555555555556,
|
28374 |
+
"grad_norm": 0.1426740288734436,
|
28375 |
+
"learning_rate": 2.0133630289532296e-05,
|
28376 |
+
"loss": 0.033,
|
28377 |
+
"step": 4048
|
28378 |
+
},
|
28379 |
+
{
|
28380 |
+
"epoch": 0.8997777777777778,
|
28381 |
+
"grad_norm": 1.0770491361618042,
|
28382 |
+
"learning_rate": 2.0089086859688198e-05,
|
28383 |
+
"loss": 0.8844,
|
28384 |
+
"step": 4049
|
28385 |
+
},
|
28386 |
+
{
|
28387 |
+
"epoch": 0.9,
|
28388 |
+
"grad_norm": 0.9471620917320251,
|
28389 |
+
"learning_rate": 2.00445434298441e-05,
|
28390 |
+
"loss": 0.9019,
|
28391 |
+
"step": 4050
|
28392 |
+
},
|
28393 |
+
{
|
28394 |
+
"epoch": 0.9002222222222223,
|
28395 |
+
"grad_norm": 0.8870931267738342,
|
28396 |
+
"learning_rate": 2e-05,
|
28397 |
+
"loss": 1.9518,
|
28398 |
+
"step": 4051
|
28399 |
+
},
|
28400 |
+
{
|
28401 |
+
"epoch": 0.9004444444444445,
|
28402 |
+
"grad_norm": 0.681877851486206,
|
28403 |
+
"learning_rate": 1.9955456570155904e-05,
|
28404 |
+
"loss": 1.0902,
|
28405 |
+
"step": 4052
|
28406 |
+
},
|
28407 |
+
{
|
28408 |
+
"epoch": 0.9006666666666666,
|
28409 |
+
"grad_norm": 0.8550397157669067,
|
28410 |
+
"learning_rate": 1.9910913140311806e-05,
|
28411 |
+
"loss": 2.1819,
|
28412 |
+
"step": 4053
|
28413 |
+
},
|
28414 |
+
{
|
28415 |
+
"epoch": 0.9008888888888889,
|
28416 |
+
"grad_norm": 0.5659412741661072,
|
28417 |
+
"learning_rate": 1.9866369710467708e-05,
|
28418 |
+
"loss": 1.309,
|
28419 |
+
"step": 4054
|
28420 |
+
},
|
28421 |
+
{
|
28422 |
+
"epoch": 0.9011111111111111,
|
28423 |
+
"grad_norm": 0.618087887763977,
|
28424 |
+
"learning_rate": 1.982182628062361e-05,
|
28425 |
+
"loss": 1.0288,
|
28426 |
+
"step": 4055
|
28427 |
+
},
|
28428 |
+
{
|
28429 |
+
"epoch": 0.9013333333333333,
|
28430 |
+
"grad_norm": 0.5965234637260437,
|
28431 |
+
"learning_rate": 1.977728285077951e-05,
|
28432 |
+
"loss": 1.0864,
|
28433 |
+
"step": 4056
|
28434 |
+
},
|
28435 |
+
{
|
28436 |
+
"epoch": 0.9015555555555556,
|
28437 |
+
"grad_norm": 0.8751803636550903,
|
28438 |
+
"learning_rate": 1.973273942093541e-05,
|
28439 |
+
"loss": 2.2079,
|
28440 |
+
"step": 4057
|
28441 |
+
},
|
28442 |
+
{
|
28443 |
+
"epoch": 0.9017777777777778,
|
28444 |
+
"grad_norm": 0.907996416091919,
|
28445 |
+
"learning_rate": 1.9688195991091315e-05,
|
28446 |
+
"loss": 2.0763,
|
28447 |
+
"step": 4058
|
28448 |
+
},
|
28449 |
+
{
|
28450 |
+
"epoch": 0.902,
|
28451 |
+
"grad_norm": 0.08640366792678833,
|
28452 |
+
"learning_rate": 1.9643652561247217e-05,
|
28453 |
+
"loss": 0.0116,
|
28454 |
+
"step": 4059
|
28455 |
+
},
|
28456 |
+
{
|
28457 |
+
"epoch": 0.9022222222222223,
|
28458 |
+
"grad_norm": 0.08815193176269531,
|
28459 |
+
"learning_rate": 1.959910913140312e-05,
|
28460 |
+
"loss": 0.0114,
|
28461 |
+
"step": 4060
|
28462 |
+
},
|
28463 |
+
{
|
28464 |
+
"epoch": 0.9024444444444445,
|
28465 |
+
"grad_norm": 0.08544806391000748,
|
28466 |
+
"learning_rate": 1.955456570155902e-05,
|
28467 |
+
"loss": 0.0115,
|
28468 |
+
"step": 4061
|
28469 |
+
},
|
28470 |
+
{
|
28471 |
+
"epoch": 0.9026666666666666,
|
28472 |
+
"grad_norm": 0.6461583375930786,
|
28473 |
+
"learning_rate": 1.9510022271714923e-05,
|
28474 |
+
"loss": 1.1449,
|
28475 |
+
"step": 4062
|
28476 |
+
},
|
28477 |
+
{
|
28478 |
+
"epoch": 0.9028888888888889,
|
28479 |
+
"grad_norm": 0.8666505813598633,
|
28480 |
+
"learning_rate": 1.9465478841870825e-05,
|
28481 |
+
"loss": 2.0281,
|
28482 |
+
"step": 4063
|
28483 |
+
},
|
28484 |
+
{
|
28485 |
+
"epoch": 0.9031111111111111,
|
28486 |
+
"grad_norm": 1.0003634691238403,
|
28487 |
+
"learning_rate": 1.9420935412026727e-05,
|
28488 |
+
"loss": 1.929,
|
28489 |
+
"step": 4064
|
28490 |
+
},
|
28491 |
+
{
|
28492 |
+
"epoch": 0.9033333333333333,
|
28493 |
+
"grad_norm": 1.1285505294799805,
|
28494 |
+
"learning_rate": 1.937639198218263e-05,
|
28495 |
+
"loss": 2.3214,
|
28496 |
+
"step": 4065
|
28497 |
+
},
|
28498 |
+
{
|
28499 |
+
"epoch": 0.9035555555555556,
|
28500 |
+
"grad_norm": 0.6442127823829651,
|
28501 |
+
"learning_rate": 1.933184855233853e-05,
|
28502 |
+
"loss": 0.79,
|
28503 |
+
"step": 4066
|
28504 |
+
},
|
28505 |
+
{
|
28506 |
+
"epoch": 0.9037777777777778,
|
28507 |
+
"grad_norm": 0.6966649889945984,
|
28508 |
+
"learning_rate": 1.9287305122494433e-05,
|
28509 |
+
"loss": 0.9614,
|
28510 |
+
"step": 4067
|
28511 |
+
},
|
28512 |
+
{
|
28513 |
+
"epoch": 0.904,
|
28514 |
+
"grad_norm": 1.0646114349365234,
|
28515 |
+
"learning_rate": 1.9242761692650335e-05,
|
28516 |
+
"loss": 2.0066,
|
28517 |
+
"step": 4068
|
28518 |
+
},
|
28519 |
+
{
|
28520 |
+
"epoch": 0.9042222222222223,
|
28521 |
+
"grad_norm": 1.0722988843917847,
|
28522 |
+
"learning_rate": 1.9198218262806237e-05,
|
28523 |
+
"loss": 1.8977,
|
28524 |
+
"step": 4069
|
28525 |
+
},
|
28526 |
+
{
|
28527 |
+
"epoch": 0.9044444444444445,
|
28528 |
+
"grad_norm": 0.9870444536209106,
|
28529 |
+
"learning_rate": 1.915367483296214e-05,
|
28530 |
+
"loss": 1.6182,
|
28531 |
+
"step": 4070
|
28532 |
+
},
|
28533 |
+
{
|
28534 |
+
"epoch": 0.9046666666666666,
|
28535 |
+
"grad_norm": 0.6224427819252014,
|
28536 |
+
"learning_rate": 1.910913140311804e-05,
|
28537 |
+
"loss": 0.7754,
|
28538 |
+
"step": 4071
|
28539 |
+
},
|
28540 |
+
{
|
28541 |
+
"epoch": 0.9048888888888889,
|
28542 |
+
"grad_norm": 0.06635406613349915,
|
28543 |
+
"learning_rate": 1.9064587973273943e-05,
|
28544 |
+
"loss": 0.0147,
|
28545 |
+
"step": 4072
|
28546 |
+
},
|
28547 |
+
{
|
28548 |
+
"epoch": 0.9051111111111111,
|
28549 |
+
"grad_norm": 0.07058946043252945,
|
28550 |
+
"learning_rate": 1.9020044543429845e-05,
|
28551 |
+
"loss": 0.0173,
|
28552 |
+
"step": 4073
|
28553 |
+
},
|
28554 |
+
{
|
28555 |
+
"epoch": 0.9053333333333333,
|
28556 |
+
"grad_norm": 0.6457788348197937,
|
28557 |
+
"learning_rate": 1.8975501113585747e-05,
|
28558 |
+
"loss": 0.8879,
|
28559 |
+
"step": 4074
|
28560 |
+
},
|
28561 |
+
{
|
28562 |
+
"epoch": 0.9055555555555556,
|
28563 |
+
"grad_norm": 1.0111377239227295,
|
28564 |
+
"learning_rate": 1.893095768374165e-05,
|
28565 |
+
"loss": 1.617,
|
28566 |
+
"step": 4075
|
28567 |
+
},
|
28568 |
+
{
|
28569 |
+
"epoch": 0.9057777777777778,
|
28570 |
+
"grad_norm": 0.9277496933937073,
|
28571 |
+
"learning_rate": 1.888641425389755e-05,
|
28572 |
+
"loss": 1.8035,
|
28573 |
+
"step": 4076
|
28574 |
+
},
|
28575 |
+
{
|
28576 |
+
"epoch": 0.906,
|
28577 |
+
"grad_norm": 1.0849852561950684,
|
28578 |
+
"learning_rate": 1.8841870824053453e-05,
|
28579 |
+
"loss": 1.6758,
|
28580 |
+
"step": 4077
|
28581 |
+
},
|
28582 |
+
{
|
28583 |
+
"epoch": 0.9062222222222223,
|
28584 |
+
"grad_norm": 1.02144455909729,
|
28585 |
+
"learning_rate": 1.8797327394209355e-05,
|
28586 |
+
"loss": 1.812,
|
28587 |
+
"step": 4078
|
28588 |
+
},
|
28589 |
+
{
|
28590 |
+
"epoch": 0.9064444444444445,
|
28591 |
+
"grad_norm": 1.1183116436004639,
|
28592 |
+
"learning_rate": 1.8752783964365257e-05,
|
28593 |
+
"loss": 1.8675,
|
28594 |
+
"step": 4079
|
28595 |
+
},
|
28596 |
+
{
|
28597 |
+
"epoch": 0.9066666666666666,
|
28598 |
+
"grad_norm": 1.3064316511154175,
|
28599 |
+
"learning_rate": 1.870824053452116e-05,
|
28600 |
+
"loss": 1.7242,
|
28601 |
+
"step": 4080
|
28602 |
+
},
|
28603 |
+
{
|
28604 |
+
"epoch": 0.9068888888888889,
|
28605 |
+
"grad_norm": 0.06701880693435669,
|
28606 |
+
"learning_rate": 1.866369710467706e-05,
|
28607 |
+
"loss": 0.0177,
|
28608 |
+
"step": 4081
|
28609 |
+
},
|
28610 |
+
{
|
28611 |
+
"epoch": 0.9071111111111111,
|
28612 |
+
"grad_norm": 0.06481373310089111,
|
28613 |
+
"learning_rate": 1.8619153674832963e-05,
|
28614 |
+
"loss": 0.0178,
|
28615 |
+
"step": 4082
|
28616 |
+
},
|
28617 |
+
{
|
28618 |
+
"epoch": 0.9073333333333333,
|
28619 |
+
"grad_norm": 0.7761397361755371,
|
28620 |
+
"learning_rate": 1.8574610244988865e-05,
|
28621 |
+
"loss": 0.7757,
|
28622 |
+
"step": 4083
|
28623 |
+
},
|
28624 |
+
{
|
28625 |
+
"epoch": 0.9075555555555556,
|
28626 |
+
"grad_norm": 1.0291235446929932,
|
28627 |
+
"learning_rate": 1.8530066815144767e-05,
|
28628 |
+
"loss": 1.6664,
|
28629 |
+
"step": 4084
|
28630 |
+
},
|
28631 |
+
{
|
28632 |
+
"epoch": 0.9077777777777778,
|
28633 |
+
"grad_norm": 0.7274791598320007,
|
28634 |
+
"learning_rate": 1.848552338530067e-05,
|
28635 |
+
"loss": 0.979,
|
28636 |
+
"step": 4085
|
28637 |
+
},
|
28638 |
+
{
|
28639 |
+
"epoch": 0.908,
|
28640 |
+
"grad_norm": 0.6331042647361755,
|
28641 |
+
"learning_rate": 1.844097995545657e-05,
|
28642 |
+
"loss": 0.6799,
|
28643 |
+
"step": 4086
|
28644 |
+
},
|
28645 |
+
{
|
28646 |
+
"epoch": 0.9082222222222223,
|
28647 |
+
"grad_norm": 0.08008535206317902,
|
28648 |
+
"learning_rate": 1.8396436525612473e-05,
|
28649 |
+
"loss": 0.018,
|
28650 |
+
"step": 4087
|
28651 |
+
},
|
28652 |
+
{
|
28653 |
+
"epoch": 0.9084444444444445,
|
28654 |
+
"grad_norm": 0.08232392370700836,
|
28655 |
+
"learning_rate": 1.8351893095768375e-05,
|
28656 |
+
"loss": 0.018,
|
28657 |
+
"step": 4088
|
28658 |
+
},
|
28659 |
+
{
|
28660 |
+
"epoch": 0.9086666666666666,
|
28661 |
+
"grad_norm": 0.6026217341423035,
|
28662 |
+
"learning_rate": 1.830734966592428e-05,
|
28663 |
+
"loss": 0.8012,
|
28664 |
+
"step": 4089
|
28665 |
+
},
|
28666 |
+
{
|
28667 |
+
"epoch": 0.9088888888888889,
|
28668 |
+
"grad_norm": 1.0574473142623901,
|
28669 |
+
"learning_rate": 1.826280623608018e-05,
|
28670 |
+
"loss": 1.7651,
|
28671 |
+
"step": 4090
|
28672 |
+
},
|
28673 |
+
{
|
28674 |
+
"epoch": 0.9091111111111111,
|
28675 |
+
"grad_norm": 1.1249449253082275,
|
28676 |
+
"learning_rate": 1.821826280623608e-05,
|
28677 |
+
"loss": 1.6539,
|
28678 |
+
"step": 4091
|
28679 |
+
},
|
28680 |
+
{
|
28681 |
+
"epoch": 0.9093333333333333,
|
28682 |
+
"grad_norm": 0.7003470659255981,
|
28683 |
+
"learning_rate": 1.8173719376391986e-05,
|
28684 |
+
"loss": 0.764,
|
28685 |
+
"step": 4092
|
28686 |
+
},
|
28687 |
+
{
|
28688 |
+
"epoch": 0.9095555555555556,
|
28689 |
+
"grad_norm": 1.0299309492111206,
|
28690 |
+
"learning_rate": 1.8129175946547884e-05,
|
28691 |
+
"loss": 1.3027,
|
28692 |
+
"step": 4093
|
28693 |
+
},
|
28694 |
+
{
|
28695 |
+
"epoch": 0.9097777777777778,
|
28696 |
+
"grad_norm": 0.21282123029232025,
|
28697 |
+
"learning_rate": 1.8084632516703786e-05,
|
28698 |
+
"loss": 0.0309,
|
28699 |
+
"step": 4094
|
28700 |
+
},
|
28701 |
+
{
|
28702 |
+
"epoch": 0.91,
|
28703 |
+
"grad_norm": 1.1035081148147583,
|
28704 |
+
"learning_rate": 1.804008908685969e-05,
|
28705 |
+
"loss": 1.2723,
|
28706 |
+
"step": 4095
|
28707 |
+
},
|
28708 |
+
{
|
28709 |
+
"epoch": 0.9102222222222223,
|
28710 |
+
"grad_norm": 0.9910405874252319,
|
28711 |
+
"learning_rate": 1.799554565701559e-05,
|
28712 |
+
"loss": 1.0153,
|
28713 |
+
"step": 4096
|
28714 |
+
},
|
28715 |
+
{
|
28716 |
+
"epoch": 0.9104444444444444,
|
28717 |
+
"grad_norm": 1.0081919431686401,
|
28718 |
+
"learning_rate": 1.7951002227171492e-05,
|
28719 |
+
"loss": 1.0348,
|
28720 |
+
"step": 4097
|
28721 |
+
},
|
28722 |
+
{
|
28723 |
+
"epoch": 0.9106666666666666,
|
28724 |
+
"grad_norm": 0.1447010040283203,
|
28725 |
+
"learning_rate": 1.7906458797327394e-05,
|
28726 |
+
"loss": 0.0327,
|
28727 |
+
"step": 4098
|
28728 |
+
},
|
28729 |
+
{
|
28730 |
+
"epoch": 0.9108888888888889,
|
28731 |
+
"grad_norm": 1.1237828731536865,
|
28732 |
+
"learning_rate": 1.7861915367483296e-05,
|
28733 |
+
"loss": 1.332,
|
28734 |
+
"step": 4099
|
28735 |
+
},
|
28736 |
+
{
|
28737 |
+
"epoch": 0.9111111111111111,
|
28738 |
+
"grad_norm": 1.0061198472976685,
|
28739 |
+
"learning_rate": 1.7817371937639198e-05,
|
28740 |
+
"loss": 0.9762,
|
28741 |
+
"step": 4100
|
28742 |
+
},
|
28743 |
+
{
|
28744 |
+
"epoch": 0.9113333333333333,
|
28745 |
+
"grad_norm": 0.045394111424684525,
|
28746 |
+
"learning_rate": 1.77728285077951e-05,
|
28747 |
+
"loss": 0.0105,
|
28748 |
+
"step": 4101
|
28749 |
+
},
|
28750 |
+
{
|
28751 |
+
"epoch": 0.9115555555555556,
|
28752 |
+
"grad_norm": 0.6273143291473389,
|
28753 |
+
"learning_rate": 1.7728285077951006e-05,
|
28754 |
+
"loss": 0.8231,
|
28755 |
+
"step": 4102
|
28756 |
+
},
|
28757 |
+
{
|
28758 |
+
"epoch": 0.9117777777777778,
|
28759 |
+
"grad_norm": 0.5369709730148315,
|
28760 |
+
"learning_rate": 1.7683741648106904e-05,
|
28761 |
+
"loss": 1.0971,
|
28762 |
+
"step": 4103
|
28763 |
+
},
|
28764 |
+
{
|
28765 |
+
"epoch": 0.912,
|
28766 |
+
"grad_norm": 0.841785728931427,
|
28767 |
+
"learning_rate": 1.7639198218262806e-05,
|
28768 |
+
"loss": 2.1345,
|
28769 |
+
"step": 4104
|
28770 |
+
},
|
28771 |
+
{
|
28772 |
+
"epoch": 0.9122222222222223,
|
28773 |
+
"grad_norm": 0.5022440552711487,
|
28774 |
+
"learning_rate": 1.759465478841871e-05,
|
28775 |
+
"loss": 1.0847,
|
28776 |
+
"step": 4105
|
28777 |
+
},
|
28778 |
+
{
|
28779 |
+
"epoch": 0.9124444444444444,
|
28780 |
+
"grad_norm": 0.5736976265907288,
|
28781 |
+
"learning_rate": 1.755011135857461e-05,
|
28782 |
+
"loss": 1.0705,
|
28783 |
+
"step": 4106
|
28784 |
+
},
|
28785 |
+
{
|
28786 |
+
"epoch": 0.9126666666666666,
|
28787 |
+
"grad_norm": 0.7846779227256775,
|
28788 |
+
"learning_rate": 1.7505567928730512e-05,
|
28789 |
+
"loss": 2.2198,
|
28790 |
+
"step": 4107
|
28791 |
+
},
|
28792 |
+
{
|
28793 |
+
"epoch": 0.9128888888888889,
|
28794 |
+
"grad_norm": 0.04545416682958603,
|
28795 |
+
"learning_rate": 1.7461024498886417e-05,
|
28796 |
+
"loss": 0.0105,
|
28797 |
+
"step": 4108
|
28798 |
+
},
|
28799 |
+
{
|
28800 |
+
"epoch": 0.9131111111111111,
|
28801 |
+
"grad_norm": 0.8995314240455627,
|
28802 |
+
"learning_rate": 1.7416481069042316e-05,
|
28803 |
+
"loss": 2.0983,
|
28804 |
+
"step": 4109
|
28805 |
+
},
|
28806 |
+
{
|
28807 |
+
"epoch": 0.9133333333333333,
|
28808 |
+
"grad_norm": 0.08467597514390945,
|
28809 |
+
"learning_rate": 1.7371937639198218e-05,
|
28810 |
+
"loss": 0.0111,
|
28811 |
+
"step": 4110
|
28812 |
+
},
|
28813 |
+
{
|
28814 |
+
"epoch": 0.9135555555555556,
|
28815 |
+
"grad_norm": 0.06848177313804626,
|
28816 |
+
"learning_rate": 1.732739420935412e-05,
|
28817 |
+
"loss": 0.0109,
|
28818 |
+
"step": 4111
|
28819 |
+
},
|
28820 |
+
{
|
28821 |
+
"epoch": 0.9137777777777778,
|
28822 |
+
"grad_norm": 0.6615252494812012,
|
28823 |
+
"learning_rate": 1.7282850779510025e-05,
|
28824 |
+
"loss": 1.0423,
|
28825 |
+
"step": 4112
|
28826 |
+
},
|
28827 |
+
{
|
28828 |
+
"epoch": 0.914,
|
28829 |
+
"grad_norm": 0.8934789896011353,
|
28830 |
+
"learning_rate": 1.7238307349665924e-05,
|
28831 |
+
"loss": 1.8382,
|
28832 |
+
"step": 4113
|
28833 |
+
},
|
28834 |
+
{
|
28835 |
+
"epoch": 0.9142222222222223,
|
28836 |
+
"grad_norm": 0.8137645125389099,
|
28837 |
+
"learning_rate": 1.7193763919821826e-05,
|
28838 |
+
"loss": 1.9163,
|
28839 |
+
"step": 4114
|
28840 |
+
},
|
28841 |
+
{
|
28842 |
+
"epoch": 0.9144444444444444,
|
28843 |
+
"grad_norm": 0.8993197083473206,
|
28844 |
+
"learning_rate": 1.714922048997773e-05,
|
28845 |
+
"loss": 2.1383,
|
28846 |
+
"step": 4115
|
28847 |
+
},
|
28848 |
+
{
|
28849 |
+
"epoch": 0.9146666666666666,
|
28850 |
+
"grad_norm": 0.908676028251648,
|
28851 |
+
"learning_rate": 1.710467706013363e-05,
|
28852 |
+
"loss": 2.0524,
|
28853 |
+
"step": 4116
|
28854 |
+
},
|
28855 |
+
{
|
28856 |
+
"epoch": 0.9148888888888889,
|
28857 |
+
"grad_norm": 0.6348316669464111,
|
28858 |
+
"learning_rate": 1.7060133630289532e-05,
|
28859 |
+
"loss": 0.9211,
|
28860 |
+
"step": 4117
|
28861 |
+
},
|
28862 |
+
{
|
28863 |
+
"epoch": 0.9151111111111111,
|
28864 |
+
"grad_norm": 0.10803266614675522,
|
28865 |
+
"learning_rate": 1.7015590200445437e-05,
|
28866 |
+
"loss": 0.0177,
|
28867 |
+
"step": 4118
|
28868 |
+
},
|
28869 |
+
{
|
28870 |
+
"epoch": 0.9153333333333333,
|
28871 |
+
"grad_norm": 0.5778976678848267,
|
28872 |
+
"learning_rate": 1.6971046770601336e-05,
|
28873 |
+
"loss": 0.7947,
|
28874 |
+
"step": 4119
|
28875 |
+
},
|
28876 |
+
{
|
28877 |
+
"epoch": 0.9155555555555556,
|
28878 |
+
"grad_norm": 0.9023910164833069,
|
28879 |
+
"learning_rate": 1.6926503340757238e-05,
|
28880 |
+
"loss": 1.6568,
|
28881 |
+
"step": 4120
|
28882 |
+
},
|
28883 |
+
{
|
28884 |
+
"epoch": 0.9157777777777778,
|
28885 |
+
"grad_norm": 0.7427157759666443,
|
28886 |
+
"learning_rate": 1.6881959910913143e-05,
|
28887 |
+
"loss": 0.6653,
|
28888 |
+
"step": 4121
|
28889 |
+
},
|
28890 |
+
{
|
28891 |
+
"epoch": 0.916,
|
28892 |
+
"grad_norm": 0.8236956000328064,
|
28893 |
+
"learning_rate": 1.683741648106904e-05,
|
28894 |
+
"loss": 1.0835,
|
28895 |
+
"step": 4122
|
28896 |
+
},
|
28897 |
+
{
|
28898 |
+
"epoch": 0.9162222222222223,
|
28899 |
+
"grad_norm": 0.788445234298706,
|
28900 |
+
"learning_rate": 1.6792873051224944e-05,
|
28901 |
+
"loss": 1.1082,
|
28902 |
+
"step": 4123
|
28903 |
+
},
|
28904 |
+
{
|
28905 |
+
"epoch": 0.9164444444444444,
|
28906 |
+
"grad_norm": 0.7391776442527771,
|
28907 |
+
"learning_rate": 1.674832962138085e-05,
|
28908 |
+
"loss": 1.0042,
|
28909 |
+
"step": 4124
|
28910 |
+
},
|
28911 |
+
{
|
28912 |
+
"epoch": 0.9166666666666666,
|
28913 |
+
"grad_norm": 0.993009626865387,
|
28914 |
+
"learning_rate": 1.670378619153675e-05,
|
28915 |
+
"loss": 1.7091,
|
28916 |
+
"step": 4125
|
28917 |
+
},
|
28918 |
+
{
|
28919 |
+
"epoch": 0.9168888888888889,
|
28920 |
+
"grad_norm": 1.1671327352523804,
|
28921 |
+
"learning_rate": 1.665924276169265e-05,
|
28922 |
+
"loss": 1.8704,
|
28923 |
+
"step": 4126
|
28924 |
+
},
|
28925 |
+
{
|
28926 |
+
"epoch": 0.9171111111111111,
|
28927 |
+
"grad_norm": 0.9321463704109192,
|
28928 |
+
"learning_rate": 1.6614699331848555e-05,
|
28929 |
+
"loss": 1.3912,
|
28930 |
+
"step": 4127
|
28931 |
+
},
|
28932 |
+
{
|
28933 |
+
"epoch": 0.9173333333333333,
|
28934 |
+
"grad_norm": 0.7201982140541077,
|
28935 |
+
"learning_rate": 1.6570155902004457e-05,
|
28936 |
+
"loss": 1.0362,
|
28937 |
+
"step": 4128
|
28938 |
+
},
|
28939 |
+
{
|
28940 |
+
"epoch": 0.9175555555555556,
|
28941 |
+
"grad_norm": 0.06468725949525833,
|
28942 |
+
"learning_rate": 1.6525612472160355e-05,
|
28943 |
+
"loss": 0.0177,
|
28944 |
+
"step": 4129
|
28945 |
+
},
|
28946 |
+
{
|
28947 |
+
"epoch": 0.9177777777777778,
|
28948 |
+
"grad_norm": 0.06377862393856049,
|
28949 |
+
"learning_rate": 1.6481069042316257e-05,
|
28950 |
+
"loss": 0.0175,
|
28951 |
+
"step": 4130
|
28952 |
+
},
|
28953 |
+
{
|
28954 |
+
"epoch": 0.918,
|
28955 |
+
"grad_norm": 1.2018589973449707,
|
28956 |
+
"learning_rate": 1.6436525612472163e-05,
|
28957 |
+
"loss": 2.088,
|
28958 |
+
"step": 4131
|
28959 |
+
},
|
28960 |
+
{
|
28961 |
+
"epoch": 0.9182222222222223,
|
28962 |
+
"grad_norm": 0.08640787750482559,
|
28963 |
+
"learning_rate": 1.639198218262806e-05,
|
28964 |
+
"loss": 0.0178,
|
28965 |
+
"step": 4132
|
28966 |
+
},
|
28967 |
+
{
|
28968 |
+
"epoch": 0.9184444444444444,
|
28969 |
+
"grad_norm": 0.07626676559448242,
|
28970 |
+
"learning_rate": 1.6347438752783963e-05,
|
28971 |
+
"loss": 0.0179,
|
28972 |
+
"step": 4133
|
28973 |
+
},
|
28974 |
+
{
|
28975 |
+
"epoch": 0.9186666666666666,
|
28976 |
+
"grad_norm": 0.684622585773468,
|
28977 |
+
"learning_rate": 1.630289532293987e-05,
|
28978 |
+
"loss": 0.9346,
|
28979 |
+
"step": 4134
|
28980 |
+
},
|
28981 |
+
{
|
28982 |
+
"epoch": 0.9188888888888889,
|
28983 |
+
"grad_norm": 1.07980215549469,
|
28984 |
+
"learning_rate": 1.625835189309577e-05,
|
28985 |
+
"loss": 1.8093,
|
28986 |
+
"step": 4135
|
28987 |
+
},
|
28988 |
+
{
|
28989 |
+
"epoch": 0.9191111111111111,
|
28990 |
+
"grad_norm": 1.0103257894515991,
|
28991 |
+
"learning_rate": 1.621380846325167e-05,
|
28992 |
+
"loss": 1.3954,
|
28993 |
+
"step": 4136
|
28994 |
+
},
|
28995 |
+
{
|
28996 |
+
"epoch": 0.9193333333333333,
|
28997 |
+
"grad_norm": 1.0367659330368042,
|
28998 |
+
"learning_rate": 1.6169265033407574e-05,
|
28999 |
+
"loss": 1.665,
|
29000 |
+
"step": 4137
|
29001 |
+
},
|
29002 |
+
{
|
29003 |
+
"epoch": 0.9195555555555556,
|
29004 |
+
"grad_norm": 1.13039231300354,
|
29005 |
+
"learning_rate": 1.6124721603563476e-05,
|
29006 |
+
"loss": 1.3939,
|
29007 |
+
"step": 4138
|
29008 |
+
},
|
29009 |
+
{
|
29010 |
+
"epoch": 0.9197777777777778,
|
29011 |
+
"grad_norm": 1.2525602579116821,
|
29012 |
+
"learning_rate": 1.6080178173719375e-05,
|
29013 |
+
"loss": 1.7406,
|
29014 |
+
"step": 4139
|
29015 |
+
},
|
29016 |
+
{
|
29017 |
+
"epoch": 0.92,
|
29018 |
+
"grad_norm": 1.0886310338974,
|
29019 |
+
"learning_rate": 1.603563474387528e-05,
|
29020 |
+
"loss": 1.447,
|
29021 |
+
"step": 4140
|
29022 |
+
},
|
29023 |
+
{
|
29024 |
+
"epoch": 0.9202222222222223,
|
29025 |
+
"grad_norm": 1.0128674507141113,
|
29026 |
+
"learning_rate": 1.5991091314031182e-05,
|
29027 |
+
"loss": 1.3665,
|
29028 |
+
"step": 4141
|
29029 |
+
},
|
29030 |
+
{
|
29031 |
+
"epoch": 0.9204444444444444,
|
29032 |
+
"grad_norm": 1.087297797203064,
|
29033 |
+
"learning_rate": 1.594654788418708e-05,
|
29034 |
+
"loss": 1.2043,
|
29035 |
+
"step": 4142
|
29036 |
+
},
|
29037 |
+
{
|
29038 |
+
"epoch": 0.9206666666666666,
|
29039 |
+
"grad_norm": 1.0681723356246948,
|
29040 |
+
"learning_rate": 1.5902004454342986e-05,
|
29041 |
+
"loss": 1.3818,
|
29042 |
+
"step": 4143
|
29043 |
+
},
|
29044 |
+
{
|
29045 |
+
"epoch": 0.9208888888888889,
|
29046 |
+
"grad_norm": 0.8169934153556824,
|
29047 |
+
"learning_rate": 1.5857461024498888e-05,
|
29048 |
+
"loss": 0.7076,
|
29049 |
+
"step": 4144
|
29050 |
+
},
|
29051 |
+
{
|
29052 |
+
"epoch": 0.9211111111111111,
|
29053 |
+
"grad_norm": 1.1659146547317505,
|
29054 |
+
"learning_rate": 1.581291759465479e-05,
|
29055 |
+
"loss": 1.3053,
|
29056 |
+
"step": 4145
|
29057 |
+
},
|
29058 |
+
{
|
29059 |
+
"epoch": 0.9213333333333333,
|
29060 |
+
"grad_norm": 1.0384572744369507,
|
29061 |
+
"learning_rate": 1.5768374164810692e-05,
|
29062 |
+
"loss": 1.1864,
|
29063 |
+
"step": 4146
|
29064 |
+
},
|
29065 |
+
{
|
29066 |
+
"epoch": 0.9215555555555556,
|
29067 |
+
"grad_norm": 1.0471240282058716,
|
29068 |
+
"learning_rate": 1.5723830734966594e-05,
|
29069 |
+
"loss": 1.066,
|
29070 |
+
"step": 4147
|
29071 |
+
},
|
29072 |
+
{
|
29073 |
+
"epoch": 0.9217777777777778,
|
29074 |
+
"grad_norm": 0.7413065433502197,
|
29075 |
+
"learning_rate": 1.5679287305122496e-05,
|
29076 |
+
"loss": 0.5414,
|
29077 |
+
"step": 4148
|
29078 |
+
},
|
29079 |
+
{
|
29080 |
+
"epoch": 0.922,
|
29081 |
+
"grad_norm": 0.9853238463401794,
|
29082 |
+
"learning_rate": 1.5634743875278395e-05,
|
29083 |
+
"loss": 0.9216,
|
29084 |
+
"step": 4149
|
29085 |
+
},
|
29086 |
+
{
|
29087 |
+
"epoch": 0.9222222222222223,
|
29088 |
+
"grad_norm": 0.9708325266838074,
|
29089 |
+
"learning_rate": 1.55902004454343e-05,
|
29090 |
+
"loss": 0.8306,
|
29091 |
+
"step": 4150
|
29092 |
+
},
|
29093 |
+
{
|
29094 |
+
"epoch": 0.9224444444444444,
|
29095 |
+
"grad_norm": 0.565495491027832,
|
29096 |
+
"learning_rate": 1.5545657015590202e-05,
|
29097 |
+
"loss": 1.0385,
|
29098 |
+
"step": 4151
|
29099 |
+
},
|
29100 |
+
{
|
29101 |
+
"epoch": 0.9226666666666666,
|
29102 |
+
"grad_norm": 0.046501629054546356,
|
29103 |
+
"learning_rate": 1.55011135857461e-05,
|
29104 |
+
"loss": 0.0103,
|
29105 |
+
"step": 4152
|
29106 |
+
},
|
29107 |
+
{
|
29108 |
+
"epoch": 0.9228888888888889,
|
29109 |
+
"grad_norm": 0.5920565128326416,
|
29110 |
+
"learning_rate": 1.5456570155902006e-05,
|
29111 |
+
"loss": 1.1167,
|
29112 |
+
"step": 4153
|
29113 |
+
},
|
29114 |
+
{
|
29115 |
+
"epoch": 0.9231111111111111,
|
29116 |
+
"grad_norm": 0.6111573576927185,
|
29117 |
+
"learning_rate": 1.5412026726057908e-05,
|
29118 |
+
"loss": 0.9939,
|
29119 |
+
"step": 4154
|
29120 |
+
},
|
29121 |
+
{
|
29122 |
+
"epoch": 0.9233333333333333,
|
29123 |
+
"grad_norm": 0.5594994425773621,
|
29124 |
+
"learning_rate": 1.5367483296213807e-05,
|
29125 |
+
"loss": 1.1777,
|
29126 |
+
"step": 4155
|
29127 |
+
},
|
29128 |
+
{
|
29129 |
+
"epoch": 0.9235555555555556,
|
29130 |
+
"grad_norm": 0.05240090563893318,
|
29131 |
+
"learning_rate": 1.5322939866369712e-05,
|
29132 |
+
"loss": 0.0107,
|
29133 |
+
"step": 4156
|
29134 |
+
},
|
29135 |
+
{
|
29136 |
+
"epoch": 0.9237777777777778,
|
29137 |
+
"grad_norm": 0.8332452178001404,
|
29138 |
+
"learning_rate": 1.5278396436525614e-05,
|
29139 |
+
"loss": 2.1969,
|
29140 |
+
"step": 4157
|
29141 |
+
},
|
29142 |
+
{
|
29143 |
+
"epoch": 0.924,
|
29144 |
+
"grad_norm": 0.60703045129776,
|
29145 |
+
"learning_rate": 1.5233853006681514e-05,
|
29146 |
+
"loss": 0.8806,
|
29147 |
+
"step": 4158
|
29148 |
+
},
|
29149 |
+
{
|
29150 |
+
"epoch": 0.9242222222222222,
|
29151 |
+
"grad_norm": 0.0834372490644455,
|
29152 |
+
"learning_rate": 1.5189309576837418e-05,
|
29153 |
+
"loss": 0.0109,
|
29154 |
+
"step": 4159
|
29155 |
+
},
|
29156 |
+
{
|
29157 |
+
"epoch": 0.9244444444444444,
|
29158 |
+
"grad_norm": 0.06800226867198944,
|
29159 |
+
"learning_rate": 1.514476614699332e-05,
|
29160 |
+
"loss": 0.0111,
|
29161 |
+
"step": 4160
|
29162 |
+
},
|
29163 |
+
{
|
29164 |
+
"epoch": 0.9246666666666666,
|
29165 |
+
"grad_norm": 0.0833854079246521,
|
29166 |
+
"learning_rate": 1.510022271714922e-05,
|
29167 |
+
"loss": 0.0112,
|
29168 |
+
"step": 4161
|
29169 |
+
},
|
29170 |
+
{
|
29171 |
+
"epoch": 0.9248888888888889,
|
29172 |
+
"grad_norm": 0.6188771724700928,
|
29173 |
+
"learning_rate": 1.5055679287305124e-05,
|
29174 |
+
"loss": 0.9193,
|
29175 |
+
"step": 4162
|
29176 |
+
},
|
29177 |
+
{
|
29178 |
+
"epoch": 0.9251111111111111,
|
29179 |
+
"grad_norm": 0.9110550284385681,
|
29180 |
+
"learning_rate": 1.5011135857461026e-05,
|
29181 |
+
"loss": 1.8064,
|
29182 |
+
"step": 4163
|
29183 |
+
},
|
29184 |
+
{
|
29185 |
+
"epoch": 0.9253333333333333,
|
29186 |
+
"grad_norm": 0.9889512658119202,
|
29187 |
+
"learning_rate": 1.4966592427616926e-05,
|
29188 |
+
"loss": 2.1231,
|
29189 |
+
"step": 4164
|
29190 |
+
},
|
29191 |
+
{
|
29192 |
+
"epoch": 0.9255555555555556,
|
29193 |
+
"grad_norm": 0.700734555721283,
|
29194 |
+
"learning_rate": 1.4922048997772831e-05,
|
29195 |
+
"loss": 0.9772,
|
29196 |
+
"step": 4165
|
29197 |
+
},
|
29198 |
+
{
|
29199 |
+
"epoch": 0.9257777777777778,
|
29200 |
+
"grad_norm": 0.10837709903717041,
|
29201 |
+
"learning_rate": 1.4877505567928732e-05,
|
29202 |
+
"loss": 0.0171,
|
29203 |
+
"step": 4166
|
29204 |
+
},
|
29205 |
+
{
|
29206 |
+
"epoch": 0.926,
|
29207 |
+
"grad_norm": 0.11039458960294724,
|
29208 |
+
"learning_rate": 1.4832962138084634e-05,
|
29209 |
+
"loss": 0.017,
|
29210 |
+
"step": 4167
|
29211 |
+
},
|
29212 |
+
{
|
29213 |
+
"epoch": 0.9262222222222222,
|
29214 |
+
"grad_norm": 1.0548503398895264,
|
29215 |
+
"learning_rate": 1.4788418708240534e-05,
|
29216 |
+
"loss": 2.02,
|
29217 |
+
"step": 4168
|
29218 |
+
},
|
29219 |
+
{
|
29220 |
+
"epoch": 0.9264444444444444,
|
29221 |
+
"grad_norm": 1.027597427368164,
|
29222 |
+
"learning_rate": 1.4743875278396438e-05,
|
29223 |
+
"loss": 2.0137,
|
29224 |
+
"step": 4169
|
29225 |
+
},
|
29226 |
+
{
|
29227 |
+
"epoch": 0.9266666666666666,
|
29228 |
+
"grad_norm": 0.9742117524147034,
|
29229 |
+
"learning_rate": 1.469933184855234e-05,
|
29230 |
+
"loss": 1.6216,
|
29231 |
+
"step": 4170
|
29232 |
+
},
|
29233 |
+
{
|
29234 |
+
"epoch": 0.9268888888888889,
|
29235 |
+
"grad_norm": 1.1034184694290161,
|
29236 |
+
"learning_rate": 1.465478841870824e-05,
|
29237 |
+
"loss": 1.9359,
|
29238 |
+
"step": 4171
|
29239 |
+
},
|
29240 |
+
{
|
29241 |
+
"epoch": 0.9271111111111111,
|
29242 |
+
"grad_norm": 0.9604677557945251,
|
29243 |
+
"learning_rate": 1.4610244988864143e-05,
|
29244 |
+
"loss": 1.9562,
|
29245 |
+
"step": 4172
|
29246 |
+
},
|
29247 |
+
{
|
29248 |
+
"epoch": 0.9273333333333333,
|
29249 |
+
"grad_norm": 0.07002148032188416,
|
29250 |
+
"learning_rate": 1.4565701559020045e-05,
|
29251 |
+
"loss": 0.0148,
|
29252 |
+
"step": 4173
|
29253 |
+
},
|
29254 |
+
{
|
29255 |
+
"epoch": 0.9275555555555556,
|
29256 |
+
"grad_norm": 0.07596537470817566,
|
29257 |
+
"learning_rate": 1.4521158129175946e-05,
|
29258 |
+
"loss": 0.015,
|
29259 |
+
"step": 4174
|
29260 |
+
},
|
29261 |
+
{
|
29262 |
+
"epoch": 0.9277777777777778,
|
29263 |
+
"grad_norm": 0.07176879793405533,
|
29264 |
+
"learning_rate": 1.447661469933185e-05,
|
29265 |
+
"loss": 0.0153,
|
29266 |
+
"step": 4175
|
29267 |
+
},
|
29268 |
+
{
|
29269 |
+
"epoch": 0.928,
|
29270 |
+
"grad_norm": 0.6465597152709961,
|
29271 |
+
"learning_rate": 1.4432071269487751e-05,
|
29272 |
+
"loss": 0.8603,
|
29273 |
+
"step": 4176
|
29274 |
+
},
|
29275 |
+
{
|
29276 |
+
"epoch": 0.9282222222222222,
|
29277 |
+
"grad_norm": 0.7567986249923706,
|
29278 |
+
"learning_rate": 1.4387527839643652e-05,
|
29279 |
+
"loss": 0.974,
|
29280 |
+
"step": 4177
|
29281 |
+
},
|
29282 |
+
{
|
29283 |
+
"epoch": 0.9284444444444444,
|
29284 |
+
"grad_norm": 0.9745081067085266,
|
29285 |
+
"learning_rate": 1.4342984409799557e-05,
|
29286 |
+
"loss": 1.7431,
|
29287 |
+
"step": 4178
|
29288 |
+
},
|
29289 |
+
{
|
29290 |
+
"epoch": 0.9286666666666666,
|
29291 |
+
"grad_norm": 1.2606123685836792,
|
29292 |
+
"learning_rate": 1.4298440979955457e-05,
|
29293 |
+
"loss": 1.8718,
|
29294 |
+
"step": 4179
|
29295 |
+
},
|
29296 |
+
{
|
29297 |
+
"epoch": 0.9288888888888889,
|
29298 |
+
"grad_norm": 1.018596887588501,
|
29299 |
+
"learning_rate": 1.425389755011136e-05,
|
29300 |
+
"loss": 1.7782,
|
29301 |
+
"step": 4180
|
29302 |
+
},
|
29303 |
+
{
|
29304 |
+
"epoch": 0.9291111111111111,
|
29305 |
+
"grad_norm": 0.961146891117096,
|
29306 |
+
"learning_rate": 1.4209354120267263e-05,
|
29307 |
+
"loss": 1.6335,
|
29308 |
+
"step": 4181
|
29309 |
+
},
|
29310 |
+
{
|
29311 |
+
"epoch": 0.9293333333333333,
|
29312 |
+
"grad_norm": 0.9671225547790527,
|
29313 |
+
"learning_rate": 1.4164810690423163e-05,
|
29314 |
+
"loss": 1.507,
|
29315 |
+
"step": 4182
|
29316 |
+
},
|
29317 |
+
{
|
29318 |
+
"epoch": 0.9295555555555556,
|
29319 |
+
"grad_norm": 0.06404636800289154,
|
29320 |
+
"learning_rate": 1.4120267260579065e-05,
|
29321 |
+
"loss": 0.0178,
|
29322 |
+
"step": 4183
|
29323 |
+
},
|
29324 |
+
{
|
29325 |
+
"epoch": 0.9297777777777778,
|
29326 |
+
"grad_norm": 0.06602248549461365,
|
29327 |
+
"learning_rate": 1.4075723830734965e-05,
|
29328 |
+
"loss": 0.0179,
|
29329 |
+
"step": 4184
|
29330 |
+
},
|
29331 |
+
{
|
29332 |
+
"epoch": 0.93,
|
29333 |
+
"grad_norm": 0.06565021723508835,
|
29334 |
+
"learning_rate": 1.4031180400890869e-05,
|
29335 |
+
"loss": 0.0178,
|
29336 |
+
"step": 4185
|
29337 |
+
},
|
29338 |
+
{
|
29339 |
+
"epoch": 0.9302222222222222,
|
29340 |
+
"grad_norm": 1.20155930519104,
|
29341 |
+
"learning_rate": 1.3986636971046771e-05,
|
29342 |
+
"loss": 1.839,
|
29343 |
+
"step": 4186
|
29344 |
+
},
|
29345 |
+
{
|
29346 |
+
"epoch": 0.9304444444444444,
|
29347 |
+
"grad_norm": 0.08105297386646271,
|
29348 |
+
"learning_rate": 1.3942093541202671e-05,
|
29349 |
+
"loss": 0.0171,
|
29350 |
+
"step": 4187
|
29351 |
+
},
|
29352 |
+
{
|
29353 |
+
"epoch": 0.9306666666666666,
|
29354 |
+
"grad_norm": 0.07562917470932007,
|
29355 |
+
"learning_rate": 1.3897550111358577e-05,
|
29356 |
+
"loss": 0.0173,
|
29357 |
+
"step": 4188
|
29358 |
+
},
|
29359 |
+
{
|
29360 |
+
"epoch": 0.9308888888888889,
|
29361 |
+
"grad_norm": 0.9316315054893494,
|
29362 |
+
"learning_rate": 1.3853006681514477e-05,
|
29363 |
+
"loss": 0.9604,
|
29364 |
+
"step": 4189
|
29365 |
+
},
|
29366 |
+
{
|
29367 |
+
"epoch": 0.9311111111111111,
|
29368 |
+
"grad_norm": 1.0534974336624146,
|
29369 |
+
"learning_rate": 1.3808463251670379e-05,
|
29370 |
+
"loss": 1.5912,
|
29371 |
+
"step": 4190
|
29372 |
+
},
|
29373 |
+
{
|
29374 |
+
"epoch": 0.9313333333333333,
|
29375 |
+
"grad_norm": 1.1811798810958862,
|
29376 |
+
"learning_rate": 1.3763919821826283e-05,
|
29377 |
+
"loss": 1.76,
|
29378 |
+
"step": 4191
|
29379 |
+
},
|
29380 |
+
{
|
29381 |
+
"epoch": 0.9315555555555556,
|
29382 |
+
"grad_norm": 1.158895492553711,
|
29383 |
+
"learning_rate": 1.3719376391982183e-05,
|
29384 |
+
"loss": 1.6761,
|
29385 |
+
"step": 4192
|
29386 |
+
},
|
29387 |
+
{
|
29388 |
+
"epoch": 0.9317777777777778,
|
29389 |
+
"grad_norm": 1.1721011400222778,
|
29390 |
+
"learning_rate": 1.3674832962138085e-05,
|
29391 |
+
"loss": 1.3907,
|
29392 |
+
"step": 4193
|
29393 |
+
},
|
29394 |
+
{
|
29395 |
+
"epoch": 0.932,
|
29396 |
+
"grad_norm": 0.180902361869812,
|
29397 |
+
"learning_rate": 1.3630289532293989e-05,
|
29398 |
+
"loss": 0.0291,
|
29399 |
+
"step": 4194
|
29400 |
+
},
|
29401 |
+
{
|
29402 |
+
"epoch": 0.9322222222222222,
|
29403 |
+
"grad_norm": 0.17803487181663513,
|
29404 |
+
"learning_rate": 1.3585746102449889e-05,
|
29405 |
+
"loss": 0.029,
|
29406 |
+
"step": 4195
|
29407 |
+
},
|
29408 |
+
{
|
29409 |
+
"epoch": 0.9324444444444444,
|
29410 |
+
"grad_norm": 1.2288343906402588,
|
29411 |
+
"learning_rate": 1.354120267260579e-05,
|
29412 |
+
"loss": 1.5034,
|
29413 |
+
"step": 4196
|
29414 |
+
},
|
29415 |
+
{
|
29416 |
+
"epoch": 0.9326666666666666,
|
29417 |
+
"grad_norm": 1.1452394723892212,
|
29418 |
+
"learning_rate": 1.3496659242761694e-05,
|
29419 |
+
"loss": 1.3718,
|
29420 |
+
"step": 4197
|
29421 |
+
},
|
29422 |
+
{
|
29423 |
+
"epoch": 0.9328888888888889,
|
29424 |
+
"grad_norm": 1.1846433877944946,
|
29425 |
+
"learning_rate": 1.3452115812917596e-05,
|
29426 |
+
"loss": 1.1493,
|
29427 |
+
"step": 4198
|
29428 |
+
},
|
29429 |
+
{
|
29430 |
+
"epoch": 0.9331111111111111,
|
29431 |
+
"grad_norm": 1.074758529663086,
|
29432 |
+
"learning_rate": 1.3407572383073497e-05,
|
29433 |
+
"loss": 1.3701,
|
29434 |
+
"step": 4199
|
29435 |
+
},
|
29436 |
+
{
|
29437 |
+
"epoch": 0.9333333333333333,
|
29438 |
+
"grad_norm": 1.0819156169891357,
|
29439 |
+
"learning_rate": 1.33630289532294e-05,
|
29440 |
+
"loss": 0.4173,
|
29441 |
+
"step": 4200
|
29442 |
+
},
|
29443 |
+
{
|
29444 |
+
"epoch": 0.9335555555555556,
|
29445 |
+
"grad_norm": 0.04741929471492767,
|
29446 |
+
"learning_rate": 1.3318485523385302e-05,
|
29447 |
+
"loss": 0.0103,
|
29448 |
+
"step": 4201
|
29449 |
+
},
|
29450 |
+
{
|
29451 |
+
"epoch": 0.9337777777777778,
|
29452 |
+
"grad_norm": 0.5368507504463196,
|
29453 |
+
"learning_rate": 1.3273942093541203e-05,
|
29454 |
+
"loss": 0.9904,
|
29455 |
+
"step": 4202
|
29456 |
+
},
|
29457 |
+
{
|
29458 |
+
"epoch": 0.934,
|
29459 |
+
"grad_norm": 0.9232761859893799,
|
29460 |
+
"learning_rate": 1.3229398663697105e-05,
|
29461 |
+
"loss": 2.0784,
|
29462 |
+
"step": 4203
|
29463 |
+
},
|
29464 |
+
{
|
29465 |
+
"epoch": 0.9342222222222222,
|
29466 |
+
"grad_norm": 0.8134653568267822,
|
29467 |
+
"learning_rate": 1.3184855233853008e-05,
|
29468 |
+
"loss": 1.8798,
|
29469 |
+
"step": 4204
|
29470 |
+
},
|
29471 |
+
{
|
29472 |
+
"epoch": 0.9344444444444444,
|
29473 |
+
"grad_norm": 0.6048870086669922,
|
29474 |
+
"learning_rate": 1.3140311804008909e-05,
|
29475 |
+
"loss": 1.0963,
|
29476 |
+
"step": 4205
|
29477 |
+
},
|
29478 |
+
{
|
29479 |
+
"epoch": 0.9346666666666666,
|
29480 |
+
"grad_norm": 0.6360740065574646,
|
29481 |
+
"learning_rate": 1.309576837416481e-05,
|
29482 |
+
"loss": 1.0909,
|
29483 |
+
"step": 4206
|
29484 |
+
},
|
29485 |
+
{
|
29486 |
+
"epoch": 0.9348888888888889,
|
29487 |
+
"grad_norm": 0.6409539580345154,
|
29488 |
+
"learning_rate": 1.3051224944320714e-05,
|
29489 |
+
"loss": 0.9653,
|
29490 |
+
"step": 4207
|
29491 |
+
},
|
29492 |
+
{
|
29493 |
+
"epoch": 0.9351111111111111,
|
29494 |
+
"grad_norm": 0.8436147570610046,
|
29495 |
+
"learning_rate": 1.3006681514476614e-05,
|
29496 |
+
"loss": 2.0953,
|
29497 |
+
"step": 4208
|
29498 |
+
},
|
29499 |
+
{
|
29500 |
+
"epoch": 0.9353333333333333,
|
29501 |
+
"grad_norm": 0.06498900055885315,
|
29502 |
+
"learning_rate": 1.2962138084632516e-05,
|
29503 |
+
"loss": 0.0108,
|
29504 |
+
"step": 4209
|
29505 |
+
},
|
29506 |
+
{
|
29507 |
+
"epoch": 0.9355555555555556,
|
29508 |
+
"grad_norm": 0.06997820734977722,
|
29509 |
+
"learning_rate": 1.291759465478842e-05,
|
29510 |
+
"loss": 0.0104,
|
29511 |
+
"step": 4210
|
29512 |
+
},
|
29513 |
+
{
|
29514 |
+
"epoch": 0.9357777777777778,
|
29515 |
+
"grad_norm": 0.07132133841514587,
|
29516 |
+
"learning_rate": 1.2873051224944322e-05,
|
29517 |
+
"loss": 0.0105,
|
29518 |
+
"step": 4211
|
29519 |
+
},
|
29520 |
+
{
|
29521 |
+
"epoch": 0.936,
|
29522 |
+
"grad_norm": 0.880042552947998,
|
29523 |
+
"learning_rate": 1.2828507795100222e-05,
|
29524 |
+
"loss": 2.0466,
|
29525 |
+
"step": 4212
|
29526 |
+
},
|
29527 |
+
{
|
29528 |
+
"epoch": 0.9362222222222222,
|
29529 |
+
"grad_norm": 0.9125528931617737,
|
29530 |
+
"learning_rate": 1.2783964365256126e-05,
|
29531 |
+
"loss": 1.4838,
|
29532 |
+
"step": 4213
|
29533 |
+
},
|
29534 |
+
{
|
29535 |
+
"epoch": 0.9364444444444444,
|
29536 |
+
"grad_norm": 0.8474193811416626,
|
29537 |
+
"learning_rate": 1.2739420935412028e-05,
|
29538 |
+
"loss": 1.9134,
|
29539 |
+
"step": 4214
|
29540 |
+
},
|
29541 |
+
{
|
29542 |
+
"epoch": 0.9366666666666666,
|
29543 |
+
"grad_norm": 0.9230953454971313,
|
29544 |
+
"learning_rate": 1.2694877505567928e-05,
|
29545 |
+
"loss": 1.9544,
|
29546 |
+
"step": 4215
|
29547 |
+
},
|
29548 |
+
{
|
29549 |
+
"epoch": 0.9368888888888889,
|
29550 |
+
"grad_norm": 0.9458275437355042,
|
29551 |
+
"learning_rate": 1.2650334075723832e-05,
|
29552 |
+
"loss": 1.9736,
|
29553 |
+
"step": 4216
|
29554 |
+
},
|
29555 |
+
{
|
29556 |
+
"epoch": 0.9371111111111111,
|
29557 |
+
"grad_norm": 1.3373515605926514,
|
29558 |
+
"learning_rate": 1.2605790645879734e-05,
|
29559 |
+
"loss": 2.2127,
|
29560 |
+
"step": 4217
|
29561 |
+
},
|
29562 |
+
{
|
29563 |
+
"epoch": 0.9373333333333334,
|
29564 |
+
"grad_norm": 0.09605936706066132,
|
29565 |
+
"learning_rate": 1.2561247216035634e-05,
|
29566 |
+
"loss": 0.0156,
|
29567 |
+
"step": 4218
|
29568 |
+
},
|
29569 |
+
{
|
29570 |
+
"epoch": 0.9375555555555556,
|
29571 |
+
"grad_norm": 0.9786912798881531,
|
29572 |
+
"learning_rate": 1.251670378619154e-05,
|
29573 |
+
"loss": 1.8977,
|
29574 |
+
"step": 4219
|
29575 |
+
},
|
29576 |
+
{
|
29577 |
+
"epoch": 0.9377777777777778,
|
29578 |
+
"grad_norm": 0.9807332754135132,
|
29579 |
+
"learning_rate": 1.247216035634744e-05,
|
29580 |
+
"loss": 1.8869,
|
29581 |
+
"step": 4220
|
29582 |
+
},
|
29583 |
+
{
|
29584 |
+
"epoch": 0.938,
|
29585 |
+
"grad_norm": 0.8643897175788879,
|
29586 |
+
"learning_rate": 1.2427616926503342e-05,
|
29587 |
+
"loss": 1.6852,
|
29588 |
+
"step": 4221
|
29589 |
+
},
|
29590 |
+
{
|
29591 |
+
"epoch": 0.9382222222222222,
|
29592 |
+
"grad_norm": 1.1636970043182373,
|
29593 |
+
"learning_rate": 1.2383073496659244e-05,
|
29594 |
+
"loss": 1.5879,
|
29595 |
+
"step": 4222
|
29596 |
+
},
|
29597 |
+
{
|
29598 |
+
"epoch": 0.9384444444444444,
|
29599 |
+
"grad_norm": 0.0700439065694809,
|
29600 |
+
"learning_rate": 1.2338530066815146e-05,
|
29601 |
+
"loss": 0.015,
|
29602 |
+
"step": 4223
|
29603 |
+
},
|
29604 |
+
{
|
29605 |
+
"epoch": 0.9386666666666666,
|
29606 |
+
"grad_norm": 0.07256254553794861,
|
29607 |
+
"learning_rate": 1.2293986636971048e-05,
|
29608 |
+
"loss": 0.015,
|
29609 |
+
"step": 4224
|
29610 |
+
},
|
29611 |
+
{
|
29612 |
+
"epoch": 0.9388888888888889,
|
29613 |
+
"grad_norm": 0.6184701919555664,
|
29614 |
+
"learning_rate": 1.224944320712695e-05,
|
29615 |
+
"loss": 0.8797,
|
29616 |
+
"step": 4225
|
29617 |
+
},
|
29618 |
+
{
|
29619 |
+
"epoch": 0.9391111111111111,
|
29620 |
+
"grad_norm": 0.9730594158172607,
|
29621 |
+
"learning_rate": 1.2204899777282852e-05,
|
29622 |
+
"loss": 1.6952,
|
29623 |
+
"step": 4226
|
29624 |
+
},
|
29625 |
+
{
|
29626 |
+
"epoch": 0.9393333333333334,
|
29627 |
+
"grad_norm": 0.8093335628509521,
|
29628 |
+
"learning_rate": 1.2160356347438754e-05,
|
29629 |
+
"loss": 1.7324,
|
29630 |
+
"step": 4227
|
29631 |
+
},
|
29632 |
+
{
|
29633 |
+
"epoch": 0.9395555555555556,
|
29634 |
+
"grad_norm": 1.0205293893814087,
|
29635 |
+
"learning_rate": 1.2115812917594656e-05,
|
29636 |
+
"loss": 1.6117,
|
29637 |
+
"step": 4228
|
29638 |
+
},
|
29639 |
+
{
|
29640 |
+
"epoch": 0.9397777777777778,
|
29641 |
+
"grad_norm": 0.9411073327064514,
|
29642 |
+
"learning_rate": 1.2071269487750557e-05,
|
29643 |
+
"loss": 1.6911,
|
29644 |
+
"step": 4229
|
29645 |
+
},
|
29646 |
+
{
|
29647 |
+
"epoch": 0.94,
|
29648 |
+
"grad_norm": 1.1085401773452759,
|
29649 |
+
"learning_rate": 1.202672605790646e-05,
|
29650 |
+
"loss": 1.822,
|
29651 |
+
"step": 4230
|
29652 |
+
},
|
29653 |
+
{
|
29654 |
+
"epoch": 0.9402222222222222,
|
29655 |
+
"grad_norm": 1.0736027956008911,
|
29656 |
+
"learning_rate": 1.1982182628062361e-05,
|
29657 |
+
"loss": 1.7295,
|
29658 |
+
"step": 4231
|
29659 |
+
},
|
29660 |
+
{
|
29661 |
+
"epoch": 0.9404444444444444,
|
29662 |
+
"grad_norm": 1.0651301145553589,
|
29663 |
+
"learning_rate": 1.1937639198218263e-05,
|
29664 |
+
"loss": 1.6569,
|
29665 |
+
"step": 4232
|
29666 |
+
},
|
29667 |
+
{
|
29668 |
+
"epoch": 0.9406666666666667,
|
29669 |
+
"grad_norm": 0.9908804297447205,
|
29670 |
+
"learning_rate": 1.1893095768374165e-05,
|
29671 |
+
"loss": 1.7012,
|
29672 |
+
"step": 4233
|
29673 |
+
},
|
29674 |
+
{
|
29675 |
+
"epoch": 0.9408888888888889,
|
29676 |
+
"grad_norm": 0.9733399748802185,
|
29677 |
+
"learning_rate": 1.1848552338530067e-05,
|
29678 |
+
"loss": 1.7654,
|
29679 |
+
"step": 4234
|
29680 |
+
},
|
29681 |
+
{
|
29682 |
+
"epoch": 0.9411111111111111,
|
29683 |
+
"grad_norm": 0.8500748872756958,
|
29684 |
+
"learning_rate": 1.180400890868597e-05,
|
29685 |
+
"loss": 0.8944,
|
29686 |
+
"step": 4235
|
29687 |
+
},
|
29688 |
+
{
|
29689 |
+
"epoch": 0.9413333333333334,
|
29690 |
+
"grad_norm": 0.7698972821235657,
|
29691 |
+
"learning_rate": 1.1759465478841871e-05,
|
29692 |
+
"loss": 0.6535,
|
29693 |
+
"step": 4236
|
29694 |
+
},
|
29695 |
+
{
|
29696 |
+
"epoch": 0.9415555555555556,
|
29697 |
+
"grad_norm": 0.976285994052887,
|
29698 |
+
"learning_rate": 1.1714922048997773e-05,
|
29699 |
+
"loss": 1.5464,
|
29700 |
+
"step": 4237
|
29701 |
+
},
|
29702 |
+
{
|
29703 |
+
"epoch": 0.9417777777777778,
|
29704 |
+
"grad_norm": 0.07256097346544266,
|
29705 |
+
"learning_rate": 1.1670378619153675e-05,
|
29706 |
+
"loss": 0.0177,
|
29707 |
+
"step": 4238
|
29708 |
+
},
|
29709 |
+
{
|
29710 |
+
"epoch": 0.942,
|
29711 |
+
"grad_norm": 0.8034363389015198,
|
29712 |
+
"learning_rate": 1.1625835189309577e-05,
|
29713 |
+
"loss": 0.7093,
|
29714 |
+
"step": 4239
|
29715 |
+
},
|
29716 |
+
{
|
29717 |
+
"epoch": 0.9422222222222222,
|
29718 |
+
"grad_norm": 1.131561279296875,
|
29719 |
+
"learning_rate": 1.158129175946548e-05,
|
29720 |
+
"loss": 1.6093,
|
29721 |
+
"step": 4240
|
29722 |
+
},
|
29723 |
+
{
|
29724 |
+
"epoch": 0.9424444444444444,
|
29725 |
+
"grad_norm": 0.6806484460830688,
|
29726 |
+
"learning_rate": 1.1536748329621381e-05,
|
29727 |
+
"loss": 0.5989,
|
29728 |
+
"step": 4241
|
29729 |
+
},
|
29730 |
+
{
|
29731 |
+
"epoch": 0.9426666666666667,
|
29732 |
+
"grad_norm": 0.7716354727745056,
|
29733 |
+
"learning_rate": 1.1492204899777285e-05,
|
29734 |
+
"loss": 0.8757,
|
29735 |
+
"step": 4242
|
29736 |
+
},
|
29737 |
+
{
|
29738 |
+
"epoch": 0.9428888888888889,
|
29739 |
+
"grad_norm": 1.0691800117492676,
|
29740 |
+
"learning_rate": 1.1447661469933185e-05,
|
29741 |
+
"loss": 1.525,
|
29742 |
+
"step": 4243
|
29743 |
+
},
|
29744 |
+
{
|
29745 |
+
"epoch": 0.9431111111111111,
|
29746 |
+
"grad_norm": 1.1259671449661255,
|
29747 |
+
"learning_rate": 1.1403118040089087e-05,
|
29748 |
+
"loss": 1.3549,
|
29749 |
+
"step": 4244
|
29750 |
+
},
|
29751 |
+
{
|
29752 |
+
"epoch": 0.9433333333333334,
|
29753 |
+
"grad_norm": 1.147953987121582,
|
29754 |
+
"learning_rate": 1.1358574610244989e-05,
|
29755 |
+
"loss": 1.5969,
|
29756 |
+
"step": 4245
|
29757 |
+
},
|
29758 |
+
{
|
29759 |
+
"epoch": 0.9435555555555556,
|
29760 |
+
"grad_norm": 0.932826578617096,
|
29761 |
+
"learning_rate": 1.1314031180400891e-05,
|
29762 |
+
"loss": 1.3558,
|
29763 |
+
"step": 4246
|
29764 |
+
},
|
29765 |
+
{
|
29766 |
+
"epoch": 0.9437777777777778,
|
29767 |
+
"grad_norm": 1.2732888460159302,
|
29768 |
+
"learning_rate": 1.1269487750556795e-05,
|
29769 |
+
"loss": 1.2856,
|
29770 |
+
"step": 4247
|
29771 |
+
},
|
29772 |
+
{
|
29773 |
+
"epoch": 0.944,
|
29774 |
+
"grad_norm": 1.4992557764053345,
|
29775 |
+
"learning_rate": 1.1224944320712695e-05,
|
29776 |
+
"loss": 1.1909,
|
29777 |
+
"step": 4248
|
29778 |
+
},
|
29779 |
+
{
|
29780 |
+
"epoch": 0.9442222222222222,
|
29781 |
+
"grad_norm": 0.13996893167495728,
|
29782 |
+
"learning_rate": 1.1180400890868597e-05,
|
29783 |
+
"loss": 0.0324,
|
29784 |
+
"step": 4249
|
29785 |
+
},
|
29786 |
+
{
|
29787 |
+
"epoch": 0.9444444444444444,
|
29788 |
+
"grad_norm": 1.2993894815444946,
|
29789 |
+
"learning_rate": 1.11358574610245e-05,
|
29790 |
+
"loss": 1.1957,
|
29791 |
+
"step": 4250
|
29792 |
+
},
|
29793 |
+
{
|
29794 |
+
"epoch": 0.9446666666666667,
|
29795 |
+
"grad_norm": 0.5980258584022522,
|
29796 |
+
"learning_rate": 1.10913140311804e-05,
|
29797 |
+
"loss": 1.0878,
|
29798 |
+
"step": 4251
|
29799 |
+
},
|
29800 |
+
{
|
29801 |
+
"epoch": 0.9448888888888889,
|
29802 |
+
"grad_norm": 0.044246070086956024,
|
29803 |
+
"learning_rate": 1.1046770601336305e-05,
|
29804 |
+
"loss": 0.0103,
|
29805 |
+
"step": 4252
|
29806 |
+
},
|
29807 |
+
{
|
29808 |
+
"epoch": 0.9451111111111111,
|
29809 |
+
"grad_norm": 0.04438783973455429,
|
29810 |
+
"learning_rate": 1.1002227171492205e-05,
|
29811 |
+
"loss": 0.0103,
|
29812 |
+
"step": 4253
|
29813 |
+
},
|
29814 |
+
{
|
29815 |
+
"epoch": 0.9453333333333334,
|
29816 |
+
"grad_norm": 0.5702335834503174,
|
29817 |
+
"learning_rate": 1.0957683741648107e-05,
|
29818 |
+
"loss": 1.2407,
|
29819 |
+
"step": 4254
|
29820 |
+
},
|
29821 |
+
{
|
29822 |
+
"epoch": 0.9455555555555556,
|
29823 |
+
"grad_norm": 0.5994575023651123,
|
29824 |
+
"learning_rate": 1.091314031180401e-05,
|
29825 |
+
"loss": 0.974,
|
29826 |
+
"step": 4255
|
29827 |
+
},
|
29828 |
+
{
|
29829 |
+
"epoch": 0.9457777777777778,
|
29830 |
+
"grad_norm": 0.916534423828125,
|
29831 |
+
"learning_rate": 1.086859688195991e-05,
|
29832 |
+
"loss": 2.1087,
|
29833 |
+
"step": 4256
|
29834 |
+
},
|
29835 |
+
{
|
29836 |
+
"epoch": 0.946,
|
29837 |
+
"grad_norm": 0.8901073336601257,
|
29838 |
+
"learning_rate": 1.0824053452115813e-05,
|
29839 |
+
"loss": 2.4281,
|
29840 |
+
"step": 4257
|
29841 |
+
},
|
29842 |
+
{
|
29843 |
+
"epoch": 0.9462222222222222,
|
29844 |
+
"grad_norm": 0.9570392966270447,
|
29845 |
+
"learning_rate": 1.0779510022271716e-05,
|
29846 |
+
"loss": 1.9183,
|
29847 |
+
"step": 4258
|
29848 |
+
},
|
29849 |
+
{
|
29850 |
+
"epoch": 0.9464444444444444,
|
29851 |
+
"grad_norm": 0.880129873752594,
|
29852 |
+
"learning_rate": 1.0734966592427617e-05,
|
29853 |
+
"loss": 2.15,
|
29854 |
+
"step": 4259
|
29855 |
+
},
|
29856 |
+
{
|
29857 |
+
"epoch": 0.9466666666666667,
|
29858 |
+
"grad_norm": 0.06885567307472229,
|
29859 |
+
"learning_rate": 1.069042316258352e-05,
|
29860 |
+
"loss": 0.0106,
|
29861 |
+
"step": 4260
|
29862 |
+
},
|
29863 |
+
{
|
29864 |
+
"epoch": 0.9468888888888889,
|
29865 |
+
"grad_norm": 0.8540828824043274,
|
29866 |
+
"learning_rate": 1.064587973273942e-05,
|
29867 |
+
"loss": 1.9726,
|
29868 |
+
"step": 4261
|
29869 |
+
},
|
29870 |
+
{
|
29871 |
+
"epoch": 0.9471111111111111,
|
29872 |
+
"grad_norm": 0.9908187985420227,
|
29873 |
+
"learning_rate": 1.0601336302895323e-05,
|
29874 |
+
"loss": 2.4167,
|
29875 |
+
"step": 4262
|
29876 |
+
},
|
29877 |
+
{
|
29878 |
+
"epoch": 0.9473333333333334,
|
29879 |
+
"grad_norm": 0.9318075180053711,
|
29880 |
+
"learning_rate": 1.0556792873051226e-05,
|
29881 |
+
"loss": 1.9713,
|
29882 |
+
"step": 4263
|
29883 |
+
},
|
29884 |
+
{
|
29885 |
+
"epoch": 0.9475555555555556,
|
29886 |
+
"grad_norm": 0.9172812700271606,
|
29887 |
+
"learning_rate": 1.0512249443207126e-05,
|
29888 |
+
"loss": 1.9912,
|
29889 |
+
"step": 4264
|
29890 |
+
},
|
29891 |
+
{
|
29892 |
+
"epoch": 0.9477777777777778,
|
29893 |
+
"grad_norm": 0.6781771779060364,
|
29894 |
+
"learning_rate": 1.046770601336303e-05,
|
29895 |
+
"loss": 1.0824,
|
29896 |
+
"step": 4265
|
29897 |
+
},
|
29898 |
+
{
|
29899 |
+
"epoch": 0.948,
|
29900 |
+
"grad_norm": 0.7084238529205322,
|
29901 |
+
"learning_rate": 1.0423162583518932e-05,
|
29902 |
+
"loss": 0.9626,
|
29903 |
+
"step": 4266
|
29904 |
+
},
|
29905 |
+
{
|
29906 |
+
"epoch": 0.9482222222222222,
|
29907 |
+
"grad_norm": 1.27622389793396,
|
29908 |
+
"learning_rate": 1.0378619153674832e-05,
|
29909 |
+
"loss": 1.8587,
|
29910 |
+
"step": 4267
|
29911 |
+
},
|
29912 |
+
{
|
29913 |
+
"epoch": 0.9484444444444444,
|
29914 |
+
"grad_norm": 0.9670615792274475,
|
29915 |
+
"learning_rate": 1.0334075723830736e-05,
|
29916 |
+
"loss": 1.6965,
|
29917 |
+
"step": 4268
|
29918 |
+
},
|
29919 |
+
{
|
29920 |
+
"epoch": 0.9486666666666667,
|
29921 |
+
"grad_norm": 0.07310232520103455,
|
29922 |
+
"learning_rate": 1.0289532293986638e-05,
|
29923 |
+
"loss": 0.0157,
|
29924 |
+
"step": 4269
|
29925 |
+
},
|
29926 |
+
{
|
29927 |
+
"epoch": 0.9488888888888889,
|
29928 |
+
"grad_norm": 0.9369098544120789,
|
29929 |
+
"learning_rate": 1.024498886414254e-05,
|
29930 |
+
"loss": 1.7478,
|
29931 |
+
"step": 4270
|
29932 |
+
},
|
29933 |
+
{
|
29934 |
+
"epoch": 0.9491111111111111,
|
29935 |
+
"grad_norm": 0.7294790148735046,
|
29936 |
+
"learning_rate": 1.0200445434298442e-05,
|
29937 |
+
"loss": 1.1187,
|
29938 |
+
"step": 4271
|
29939 |
+
},
|
29940 |
+
{
|
29941 |
+
"epoch": 0.9493333333333334,
|
29942 |
+
"grad_norm": 0.9501145482063293,
|
29943 |
+
"learning_rate": 1.0155902004454342e-05,
|
29944 |
+
"loss": 1.6452,
|
29945 |
+
"step": 4272
|
29946 |
+
},
|
29947 |
+
{
|
29948 |
+
"epoch": 0.9495555555555556,
|
29949 |
+
"grad_norm": 1.1477452516555786,
|
29950 |
+
"learning_rate": 1.0111358574610246e-05,
|
29951 |
+
"loss": 1.8428,
|
29952 |
+
"step": 4273
|
29953 |
+
},
|
29954 |
+
{
|
29955 |
+
"epoch": 0.9497777777777778,
|
29956 |
+
"grad_norm": 0.9523744583129883,
|
29957 |
+
"learning_rate": 1.0066815144766148e-05,
|
29958 |
+
"loss": 1.8126,
|
29959 |
+
"step": 4274
|
29960 |
+
},
|
29961 |
+
{
|
29962 |
+
"epoch": 0.95,
|
29963 |
+
"grad_norm": 0.9534154534339905,
|
29964 |
+
"learning_rate": 1.002227171492205e-05,
|
29965 |
+
"loss": 1.7667,
|
29966 |
+
"step": 4275
|
29967 |
+
},
|
29968 |
+
{
|
29969 |
+
"epoch": 0.9502222222222222,
|
29970 |
+
"grad_norm": 1.2276133298873901,
|
29971 |
+
"learning_rate": 9.977728285077952e-06,
|
29972 |
+
"loss": 1.901,
|
29973 |
+
"step": 4276
|
29974 |
+
},
|
29975 |
+
{
|
29976 |
+
"epoch": 0.9504444444444444,
|
29977 |
+
"grad_norm": 0.7759425044059753,
|
29978 |
+
"learning_rate": 9.933184855233854e-06,
|
29979 |
+
"loss": 0.7691,
|
29980 |
+
"step": 4277
|
29981 |
+
},
|
29982 |
+
{
|
29983 |
+
"epoch": 0.9506666666666667,
|
29984 |
+
"grad_norm": 0.06685524433851242,
|
29985 |
+
"learning_rate": 9.888641425389756e-06,
|
29986 |
+
"loss": 0.0175,
|
29987 |
+
"step": 4278
|
29988 |
+
},
|
29989 |
+
{
|
29990 |
+
"epoch": 0.9508888888888889,
|
29991 |
+
"grad_norm": 0.7249411940574646,
|
29992 |
+
"learning_rate": 9.844097995545658e-06,
|
29993 |
+
"loss": 0.983,
|
29994 |
+
"step": 4279
|
29995 |
+
},
|
29996 |
+
{
|
29997 |
+
"epoch": 0.9511111111111111,
|
29998 |
+
"grad_norm": 0.6953089237213135,
|
29999 |
+
"learning_rate": 9.79955456570156e-06,
|
30000 |
+
"loss": 0.9221,
|
30001 |
+
"step": 4280
|
30002 |
+
},
|
30003 |
+
{
|
30004 |
+
"epoch": 0.9513333333333334,
|
30005 |
+
"grad_norm": 1.0122225284576416,
|
30006 |
+
"learning_rate": 9.755011135857462e-06,
|
30007 |
+
"loss": 1.6027,
|
30008 |
+
"step": 4281
|
30009 |
+
},
|
30010 |
+
{
|
30011 |
+
"epoch": 0.9515555555555556,
|
30012 |
+
"grad_norm": 1.0647549629211426,
|
30013 |
+
"learning_rate": 9.710467706013364e-06,
|
30014 |
+
"loss": 1.6956,
|
30015 |
+
"step": 4282
|
30016 |
+
},
|
30017 |
+
{
|
30018 |
+
"epoch": 0.9517777777777777,
|
30019 |
+
"grad_norm": 1.0427286624908447,
|
30020 |
+
"learning_rate": 9.665924276169266e-06,
|
30021 |
+
"loss": 1.6593,
|
30022 |
+
"step": 4283
|
30023 |
+
},
|
30024 |
+
{
|
30025 |
+
"epoch": 0.952,
|
30026 |
+
"grad_norm": 0.07722879201173782,
|
30027 |
+
"learning_rate": 9.621380846325168e-06,
|
30028 |
+
"loss": 0.0186,
|
30029 |
+
"step": 4284
|
30030 |
+
},
|
30031 |
+
{
|
30032 |
+
"epoch": 0.9522222222222222,
|
30033 |
+
"grad_norm": 0.7623983025550842,
|
30034 |
+
"learning_rate": 9.57683741648107e-06,
|
30035 |
+
"loss": 0.9881,
|
30036 |
+
"step": 4285
|
30037 |
+
},
|
30038 |
+
{
|
30039 |
+
"epoch": 0.9524444444444444,
|
30040 |
+
"grad_norm": 1.0861274003982544,
|
30041 |
+
"learning_rate": 9.532293986636972e-06,
|
30042 |
+
"loss": 1.5431,
|
30043 |
+
"step": 4286
|
30044 |
+
},
|
30045 |
+
{
|
30046 |
+
"epoch": 0.9526666666666667,
|
30047 |
+
"grad_norm": 0.9466423988342285,
|
30048 |
+
"learning_rate": 9.487750556792873e-06,
|
30049 |
+
"loss": 1.5866,
|
30050 |
+
"step": 4287
|
30051 |
+
},
|
30052 |
+
{
|
30053 |
+
"epoch": 0.9528888888888889,
|
30054 |
+
"grad_norm": 0.7006486058235168,
|
30055 |
+
"learning_rate": 9.443207126948775e-06,
|
30056 |
+
"loss": 0.8612,
|
30057 |
+
"step": 4288
|
30058 |
+
},
|
30059 |
+
{
|
30060 |
+
"epoch": 0.9531111111111111,
|
30061 |
+
"grad_norm": 0.9445701241493225,
|
30062 |
+
"learning_rate": 9.398663697104677e-06,
|
30063 |
+
"loss": 1.4541,
|
30064 |
+
"step": 4289
|
30065 |
+
},
|
30066 |
+
{
|
30067 |
+
"epoch": 0.9533333333333334,
|
30068 |
+
"grad_norm": 1.2460566759109497,
|
30069 |
+
"learning_rate": 9.35412026726058e-06,
|
30070 |
+
"loss": 1.6395,
|
30071 |
+
"step": 4290
|
30072 |
+
},
|
30073 |
+
{
|
30074 |
+
"epoch": 0.9535555555555556,
|
30075 |
+
"grad_norm": 1.0960421562194824,
|
30076 |
+
"learning_rate": 9.309576837416481e-06,
|
30077 |
+
"loss": 1.3166,
|
30078 |
+
"step": 4291
|
30079 |
+
},
|
30080 |
+
{
|
30081 |
+
"epoch": 0.9537777777777777,
|
30082 |
+
"grad_norm": 1.22000253200531,
|
30083 |
+
"learning_rate": 9.265033407572383e-06,
|
30084 |
+
"loss": 1.4362,
|
30085 |
+
"step": 4292
|
30086 |
+
},
|
30087 |
+
{
|
30088 |
+
"epoch": 0.954,
|
30089 |
+
"grad_norm": 1.0577735900878906,
|
30090 |
+
"learning_rate": 9.220489977728285e-06,
|
30091 |
+
"loss": 1.2362,
|
30092 |
+
"step": 4293
|
30093 |
+
},
|
30094 |
+
{
|
30095 |
+
"epoch": 0.9542222222222222,
|
30096 |
+
"grad_norm": 1.261118769645691,
|
30097 |
+
"learning_rate": 9.175946547884187e-06,
|
30098 |
+
"loss": 1.5433,
|
30099 |
+
"step": 4294
|
30100 |
+
},
|
30101 |
+
{
|
30102 |
+
"epoch": 0.9544444444444444,
|
30103 |
+
"grad_norm": 1.0835603475570679,
|
30104 |
+
"learning_rate": 9.13140311804009e-06,
|
30105 |
+
"loss": 1.0706,
|
30106 |
+
"step": 4295
|
30107 |
+
},
|
30108 |
+
{
|
30109 |
+
"epoch": 0.9546666666666667,
|
30110 |
+
"grad_norm": 0.1414426565170288,
|
30111 |
+
"learning_rate": 9.086859688195993e-06,
|
30112 |
+
"loss": 0.0324,
|
30113 |
+
"step": 4296
|
30114 |
+
},
|
30115 |
+
{
|
30116 |
+
"epoch": 0.9548888888888889,
|
30117 |
+
"grad_norm": 0.15208975970745087,
|
30118 |
+
"learning_rate": 9.042316258351893e-06,
|
30119 |
+
"loss": 0.0327,
|
30120 |
+
"step": 4297
|
30121 |
+
},
|
30122 |
+
{
|
30123 |
+
"epoch": 0.9551111111111111,
|
30124 |
+
"grad_norm": 0.942937970161438,
|
30125 |
+
"learning_rate": 8.997772828507795e-06,
|
30126 |
+
"loss": 0.9045,
|
30127 |
+
"step": 4298
|
30128 |
+
},
|
30129 |
+
{
|
30130 |
+
"epoch": 0.9553333333333334,
|
30131 |
+
"grad_norm": 0.9884275197982788,
|
30132 |
+
"learning_rate": 8.953229398663697e-06,
|
30133 |
+
"loss": 0.9895,
|
30134 |
+
"step": 4299
|
30135 |
+
},
|
30136 |
+
{
|
30137 |
+
"epoch": 0.9555555555555556,
|
30138 |
+
"grad_norm": 0.936667263507843,
|
30139 |
+
"learning_rate": 8.908685968819599e-06,
|
30140 |
+
"loss": 0.776,
|
30141 |
+
"step": 4300
|
30142 |
+
},
|
30143 |
+
{
|
30144 |
+
"epoch": 0.9557777777777777,
|
30145 |
+
"grad_norm": 0.6337212324142456,
|
30146 |
+
"learning_rate": 8.864142538975503e-06,
|
30147 |
+
"loss": 1.1042,
|
30148 |
+
"step": 4301
|
30149 |
+
},
|
30150 |
+
{
|
30151 |
+
"epoch": 0.956,
|
30152 |
+
"grad_norm": 0.8414755463600159,
|
30153 |
+
"learning_rate": 8.819599109131403e-06,
|
30154 |
+
"loss": 1.9703,
|
30155 |
+
"step": 4302
|
30156 |
+
},
|
30157 |
+
{
|
30158 |
+
"epoch": 0.9562222222222222,
|
30159 |
+
"grad_norm": 0.5726562738418579,
|
30160 |
+
"learning_rate": 8.775055679287305e-06,
|
30161 |
+
"loss": 0.8849,
|
30162 |
+
"step": 4303
|
30163 |
+
},
|
30164 |
+
{
|
30165 |
+
"epoch": 0.9564444444444444,
|
30166 |
+
"grad_norm": 0.6391728520393372,
|
30167 |
+
"learning_rate": 8.730512249443209e-06,
|
30168 |
+
"loss": 1.1577,
|
30169 |
+
"step": 4304
|
30170 |
+
},
|
30171 |
+
{
|
30172 |
+
"epoch": 0.9566666666666667,
|
30173 |
+
"grad_norm": 0.5593711137771606,
|
30174 |
+
"learning_rate": 8.685968819599109e-06,
|
30175 |
+
"loss": 0.8922,
|
30176 |
+
"step": 4305
|
30177 |
+
},
|
30178 |
+
{
|
30179 |
+
"epoch": 0.9568888888888889,
|
30180 |
+
"grad_norm": 0.7331346869468689,
|
30181 |
+
"learning_rate": 8.641425389755013e-06,
|
30182 |
+
"loss": 1.1386,
|
30183 |
+
"step": 4306
|
30184 |
+
},
|
30185 |
+
{
|
30186 |
+
"epoch": 0.9571111111111111,
|
30187 |
+
"grad_norm": 0.5739585757255554,
|
30188 |
+
"learning_rate": 8.596881959910913e-06,
|
30189 |
+
"loss": 0.8375,
|
30190 |
+
"step": 4307
|
30191 |
+
},
|
30192 |
+
{
|
30193 |
+
"epoch": 0.9573333333333334,
|
30194 |
+
"grad_norm": 0.07684678584337234,
|
30195 |
+
"learning_rate": 8.552338530066815e-06,
|
30196 |
+
"loss": 0.011,
|
30197 |
+
"step": 4308
|
30198 |
+
},
|
30199 |
+
{
|
30200 |
+
"epoch": 0.9575555555555556,
|
30201 |
+
"grad_norm": 0.07993515580892563,
|
30202 |
+
"learning_rate": 8.507795100222719e-06,
|
30203 |
+
"loss": 0.0111,
|
30204 |
+
"step": 4309
|
30205 |
+
},
|
30206 |
+
{
|
30207 |
+
"epoch": 0.9577777777777777,
|
30208 |
+
"grad_norm": 0.6196415424346924,
|
30209 |
+
"learning_rate": 8.463251670378619e-06,
|
30210 |
+
"loss": 1.0166,
|
30211 |
+
"step": 4310
|
30212 |
+
},
|
30213 |
+
{
|
30214 |
+
"epoch": 0.958,
|
30215 |
+
"grad_norm": 0.8932988047599792,
|
30216 |
+
"learning_rate": 8.41870824053452e-06,
|
30217 |
+
"loss": 2.3441,
|
30218 |
+
"step": 4311
|
30219 |
+
},
|
30220 |
+
{
|
30221 |
+
"epoch": 0.9582222222222222,
|
30222 |
+
"grad_norm": 1.2204405069351196,
|
30223 |
+
"learning_rate": 8.374164810690424e-06,
|
30224 |
+
"loss": 2.0743,
|
30225 |
+
"step": 4312
|
30226 |
+
},
|
30227 |
+
{
|
30228 |
+
"epoch": 0.9584444444444444,
|
30229 |
+
"grad_norm": 0.9031944870948792,
|
30230 |
+
"learning_rate": 8.329621380846325e-06,
|
30231 |
+
"loss": 1.9911,
|
30232 |
+
"step": 4313
|
30233 |
+
},
|
30234 |
+
{
|
30235 |
+
"epoch": 0.9586666666666667,
|
30236 |
+
"grad_norm": 1.0615592002868652,
|
30237 |
+
"learning_rate": 8.285077951002228e-06,
|
30238 |
+
"loss": 2.3094,
|
30239 |
+
"step": 4314
|
30240 |
+
},
|
30241 |
+
{
|
30242 |
+
"epoch": 0.9588888888888889,
|
30243 |
+
"grad_norm": 0.931339681148529,
|
30244 |
+
"learning_rate": 8.240534521158129e-06,
|
30245 |
+
"loss": 1.8745,
|
30246 |
+
"step": 4315
|
30247 |
+
},
|
30248 |
+
{
|
30249 |
+
"epoch": 0.9591111111111111,
|
30250 |
+
"grad_norm": 1.068681240081787,
|
30251 |
+
"learning_rate": 8.19599109131403e-06,
|
30252 |
+
"loss": 2.1088,
|
30253 |
+
"step": 4316
|
30254 |
+
},
|
30255 |
+
{
|
30256 |
+
"epoch": 0.9593333333333334,
|
30257 |
+
"grad_norm": 0.9199005365371704,
|
30258 |
+
"learning_rate": 8.151447661469934e-06,
|
30259 |
+
"loss": 1.9663,
|
30260 |
+
"step": 4317
|
30261 |
+
},
|
30262 |
+
{
|
30263 |
+
"epoch": 0.9595555555555556,
|
30264 |
+
"grad_norm": 0.9643719792366028,
|
30265 |
+
"learning_rate": 8.106904231625835e-06,
|
30266 |
+
"loss": 2.2083,
|
30267 |
+
"step": 4318
|
30268 |
+
},
|
30269 |
+
{
|
30270 |
+
"epoch": 0.9597777777777777,
|
30271 |
+
"grad_norm": 1.026021122932434,
|
30272 |
+
"learning_rate": 8.062360801781738e-06,
|
30273 |
+
"loss": 1.688,
|
30274 |
+
"step": 4319
|
30275 |
+
},
|
30276 |
+
{
|
30277 |
+
"epoch": 0.96,
|
30278 |
+
"grad_norm": 0.9671936631202698,
|
30279 |
+
"learning_rate": 8.01781737193764e-06,
|
30280 |
+
"loss": 2.0219,
|
30281 |
+
"step": 4320
|
30282 |
+
},
|
30283 |
+
{
|
30284 |
+
"epoch": 0.9602222222222222,
|
30285 |
+
"grad_norm": 0.9827919006347656,
|
30286 |
+
"learning_rate": 7.97327394209354e-06,
|
30287 |
+
"loss": 1.6401,
|
30288 |
+
"step": 4321
|
30289 |
+
},
|
30290 |
+
{
|
30291 |
+
"epoch": 0.9604444444444444,
|
30292 |
+
"grad_norm": 0.07093919813632965,
|
30293 |
+
"learning_rate": 7.928730512249444e-06,
|
30294 |
+
"loss": 0.0157,
|
30295 |
+
"step": 4322
|
30296 |
+
},
|
30297 |
+
{
|
30298 |
+
"epoch": 0.9606666666666667,
|
30299 |
+
"grad_norm": 0.07090691477060318,
|
30300 |
+
"learning_rate": 7.884187082405346e-06,
|
30301 |
+
"loss": 0.0154,
|
30302 |
+
"step": 4323
|
30303 |
+
},
|
30304 |
+
{
|
30305 |
+
"epoch": 0.9608888888888889,
|
30306 |
+
"grad_norm": 0.06990091502666473,
|
30307 |
+
"learning_rate": 7.839643652561248e-06,
|
30308 |
+
"loss": 0.0153,
|
30309 |
+
"step": 4324
|
30310 |
+
},
|
30311 |
+
{
|
30312 |
+
"epoch": 0.9611111111111111,
|
30313 |
+
"grad_norm": 0.06884946674108505,
|
30314 |
+
"learning_rate": 7.79510022271715e-06,
|
30315 |
+
"loss": 0.0152,
|
30316 |
+
"step": 4325
|
30317 |
+
},
|
30318 |
+
{
|
30319 |
+
"epoch": 0.9613333333333334,
|
30320 |
+
"grad_norm": 0.8022354245185852,
|
30321 |
+
"learning_rate": 7.75055679287305e-06,
|
30322 |
+
"loss": 0.8116,
|
30323 |
+
"step": 4326
|
30324 |
+
},
|
30325 |
+
{
|
30326 |
+
"epoch": 0.9615555555555556,
|
30327 |
+
"grad_norm": 1.062786340713501,
|
30328 |
+
"learning_rate": 7.706013363028954e-06,
|
30329 |
+
"loss": 1.7179,
|
30330 |
+
"step": 4327
|
30331 |
+
},
|
30332 |
+
{
|
30333 |
+
"epoch": 0.9617777777777777,
|
30334 |
+
"grad_norm": 1.0791099071502686,
|
30335 |
+
"learning_rate": 7.661469933184856e-06,
|
30336 |
+
"loss": 1.7474,
|
30337 |
+
"step": 4328
|
30338 |
+
},
|
30339 |
+
{
|
30340 |
+
"epoch": 0.962,
|
30341 |
+
"grad_norm": 1.0296615362167358,
|
30342 |
+
"learning_rate": 7.616926503340757e-06,
|
30343 |
+
"loss": 1.6771,
|
30344 |
+
"step": 4329
|
30345 |
+
},
|
30346 |
+
{
|
30347 |
+
"epoch": 0.9622222222222222,
|
30348 |
+
"grad_norm": 1.076456904411316,
|
30349 |
+
"learning_rate": 7.57238307349666e-06,
|
30350 |
+
"loss": 1.8226,
|
30351 |
+
"step": 4330
|
30352 |
+
},
|
30353 |
+
{
|
30354 |
+
"epoch": 0.9624444444444444,
|
30355 |
+
"grad_norm": 0.9866617321968079,
|
30356 |
+
"learning_rate": 7.527839643652562e-06,
|
30357 |
+
"loss": 1.887,
|
30358 |
+
"step": 4331
|
30359 |
+
},
|
30360 |
+
{
|
30361 |
+
"epoch": 0.9626666666666667,
|
30362 |
+
"grad_norm": 0.06448253244161606,
|
30363 |
+
"learning_rate": 7.483296213808463e-06,
|
30364 |
+
"loss": 0.0173,
|
30365 |
+
"step": 4332
|
30366 |
+
},
|
30367 |
+
{
|
30368 |
+
"epoch": 0.9628888888888889,
|
30369 |
+
"grad_norm": 0.06699193269014359,
|
30370 |
+
"learning_rate": 7.438752783964366e-06,
|
30371 |
+
"loss": 0.0175,
|
30372 |
+
"step": 4333
|
30373 |
+
},
|
30374 |
+
{
|
30375 |
+
"epoch": 0.9631111111111111,
|
30376 |
+
"grad_norm": 0.7605263590812683,
|
30377 |
+
"learning_rate": 7.394209354120267e-06,
|
30378 |
+
"loss": 0.8152,
|
30379 |
+
"step": 4334
|
30380 |
+
},
|
30381 |
+
{
|
30382 |
+
"epoch": 0.9633333333333334,
|
30383 |
+
"grad_norm": 1.077652096748352,
|
30384 |
+
"learning_rate": 7.34966592427617e-06,
|
30385 |
+
"loss": 1.8169,
|
30386 |
+
"step": 4335
|
30387 |
+
},
|
30388 |
+
{
|
30389 |
+
"epoch": 0.9635555555555556,
|
30390 |
+
"grad_norm": 0.07532133162021637,
|
30391 |
+
"learning_rate": 7.305122494432072e-06,
|
30392 |
+
"loss": 0.0182,
|
30393 |
+
"step": 4336
|
30394 |
+
},
|
30395 |
+
{
|
30396 |
+
"epoch": 0.9637777777777777,
|
30397 |
+
"grad_norm": 0.07628657668828964,
|
30398 |
+
"learning_rate": 7.260579064587973e-06,
|
30399 |
+
"loss": 0.0177,
|
30400 |
+
"step": 4337
|
30401 |
+
},
|
30402 |
+
{
|
30403 |
+
"epoch": 0.964,
|
30404 |
+
"grad_norm": 0.07550018280744553,
|
30405 |
+
"learning_rate": 7.216035634743876e-06,
|
30406 |
+
"loss": 0.0181,
|
30407 |
+
"step": 4338
|
30408 |
+
},
|
30409 |
+
{
|
30410 |
+
"epoch": 0.9642222222222222,
|
30411 |
+
"grad_norm": 0.9071139097213745,
|
30412 |
+
"learning_rate": 7.1714922048997785e-06,
|
30413 |
+
"loss": 0.8412,
|
30414 |
+
"step": 4339
|
30415 |
+
},
|
30416 |
+
{
|
30417 |
+
"epoch": 0.9644444444444444,
|
30418 |
+
"grad_norm": 0.9595382213592529,
|
30419 |
+
"learning_rate": 7.12694877505568e-06,
|
30420 |
+
"loss": 1.4946,
|
30421 |
+
"step": 4340
|
30422 |
+
},
|
30423 |
+
{
|
30424 |
+
"epoch": 0.9646666666666667,
|
30425 |
+
"grad_norm": 1.0608892440795898,
|
30426 |
+
"learning_rate": 7.082405345211582e-06,
|
30427 |
+
"loss": 1.6542,
|
30428 |
+
"step": 4341
|
30429 |
+
},
|
30430 |
+
{
|
30431 |
+
"epoch": 0.9648888888888889,
|
30432 |
+
"grad_norm": 1.1697642803192139,
|
30433 |
+
"learning_rate": 7.037861915367483e-06,
|
30434 |
+
"loss": 1.7757,
|
30435 |
+
"step": 4342
|
30436 |
+
},
|
30437 |
+
{
|
30438 |
+
"epoch": 0.9651111111111111,
|
30439 |
+
"grad_norm": 1.0154902935028076,
|
30440 |
+
"learning_rate": 6.9933184855233855e-06,
|
30441 |
+
"loss": 1.2555,
|
30442 |
+
"step": 4343
|
30443 |
+
},
|
30444 |
+
{
|
30445 |
+
"epoch": 0.9653333333333334,
|
30446 |
+
"grad_norm": 1.1377027034759521,
|
30447 |
+
"learning_rate": 6.948775055679288e-06,
|
30448 |
+
"loss": 1.7557,
|
30449 |
+
"step": 4344
|
30450 |
+
},
|
30451 |
+
{
|
30452 |
+
"epoch": 0.9655555555555555,
|
30453 |
+
"grad_norm": 1.1074367761611938,
|
30454 |
+
"learning_rate": 6.9042316258351895e-06,
|
30455 |
+
"loss": 1.5758,
|
30456 |
+
"step": 4345
|
30457 |
+
},
|
30458 |
+
{
|
30459 |
+
"epoch": 0.9657777777777777,
|
30460 |
+
"grad_norm": 0.6881236433982849,
|
30461 |
+
"learning_rate": 6.8596881959910914e-06,
|
30462 |
+
"loss": 0.7293,
|
30463 |
+
"step": 4346
|
30464 |
+
},
|
30465 |
+
{
|
30466 |
+
"epoch": 0.966,
|
30467 |
+
"grad_norm": 0.18515631556510925,
|
30468 |
+
"learning_rate": 6.815144766146994e-06,
|
30469 |
+
"loss": 0.0304,
|
30470 |
+
"step": 4347
|
30471 |
+
},
|
30472 |
+
{
|
30473 |
+
"epoch": 0.9662222222222222,
|
30474 |
+
"grad_norm": 1.1112456321716309,
|
30475 |
+
"learning_rate": 6.770601336302895e-06,
|
30476 |
+
"loss": 1.2363,
|
30477 |
+
"step": 4348
|
30478 |
+
},
|
30479 |
+
{
|
30480 |
+
"epoch": 0.9664444444444444,
|
30481 |
+
"grad_norm": 0.8793594837188721,
|
30482 |
+
"learning_rate": 6.726057906458798e-06,
|
30483 |
+
"loss": 0.8094,
|
30484 |
+
"step": 4349
|
30485 |
+
},
|
30486 |
+
{
|
30487 |
+
"epoch": 0.9666666666666667,
|
30488 |
+
"grad_norm": 0.9809714555740356,
|
30489 |
+
"learning_rate": 6.6815144766147e-06,
|
30490 |
+
"loss": 0.7478,
|
30491 |
+
"step": 4350
|
30492 |
+
},
|
30493 |
+
{
|
30494 |
+
"epoch": 0.9668888888888889,
|
30495 |
+
"grad_norm": 0.6749547123908997,
|
30496 |
+
"learning_rate": 6.636971046770601e-06,
|
30497 |
+
"loss": 1.0962,
|
30498 |
+
"step": 4351
|
30499 |
+
},
|
30500 |
+
{
|
30501 |
+
"epoch": 0.9671111111111111,
|
30502 |
+
"grad_norm": 0.8518489599227905,
|
30503 |
+
"learning_rate": 6.592427616926504e-06,
|
30504 |
+
"loss": 2.0981,
|
30505 |
+
"step": 4352
|
30506 |
+
},
|
30507 |
+
{
|
30508 |
+
"epoch": 0.9673333333333334,
|
30509 |
+
"grad_norm": 0.5904682874679565,
|
30510 |
+
"learning_rate": 6.547884187082405e-06,
|
30511 |
+
"loss": 0.8928,
|
30512 |
+
"step": 4353
|
30513 |
+
},
|
30514 |
+
{
|
30515 |
+
"epoch": 0.9675555555555555,
|
30516 |
+
"grad_norm": 0.6970412731170654,
|
30517 |
+
"learning_rate": 6.503340757238307e-06,
|
30518 |
+
"loss": 1.1904,
|
30519 |
+
"step": 4354
|
30520 |
+
},
|
30521 |
+
{
|
30522 |
+
"epoch": 0.9677777777777777,
|
30523 |
+
"grad_norm": 0.8849335312843323,
|
30524 |
+
"learning_rate": 6.45879732739421e-06,
|
30525 |
+
"loss": 1.9197,
|
30526 |
+
"step": 4355
|
30527 |
+
},
|
30528 |
+
{
|
30529 |
+
"epoch": 0.968,
|
30530 |
+
"grad_norm": 0.7975565791130066,
|
30531 |
+
"learning_rate": 6.414253897550111e-06,
|
30532 |
+
"loss": 2.0422,
|
30533 |
+
"step": 4356
|
30534 |
+
},
|
30535 |
+
{
|
30536 |
+
"epoch": 0.9682222222222222,
|
30537 |
+
"grad_norm": 0.898343026638031,
|
30538 |
+
"learning_rate": 6.369710467706014e-06,
|
30539 |
+
"loss": 2.0284,
|
30540 |
+
"step": 4357
|
30541 |
+
},
|
30542 |
+
{
|
30543 |
+
"epoch": 0.9684444444444444,
|
30544 |
+
"grad_norm": 0.08013878762722015,
|
30545 |
+
"learning_rate": 6.325167037861916e-06,
|
30546 |
+
"loss": 0.011,
|
30547 |
+
"step": 4358
|
30548 |
+
},
|
30549 |
+
{
|
30550 |
+
"epoch": 0.9686666666666667,
|
30551 |
+
"grad_norm": 0.0778137668967247,
|
30552 |
+
"learning_rate": 6.280623608017817e-06,
|
30553 |
+
"loss": 0.011,
|
30554 |
+
"step": 4359
|
30555 |
+
},
|
30556 |
+
{
|
30557 |
+
"epoch": 0.9688888888888889,
|
30558 |
+
"grad_norm": 0.075262151658535,
|
30559 |
+
"learning_rate": 6.23608017817372e-06,
|
30560 |
+
"loss": 0.0111,
|
30561 |
+
"step": 4360
|
30562 |
+
},
|
30563 |
+
{
|
30564 |
+
"epoch": 0.9691111111111111,
|
30565 |
+
"grad_norm": 0.8569869995117188,
|
30566 |
+
"learning_rate": 6.191536748329622e-06,
|
30567 |
+
"loss": 2.1552,
|
30568 |
+
"step": 4361
|
30569 |
+
},
|
30570 |
+
{
|
30571 |
+
"epoch": 0.9693333333333334,
|
30572 |
+
"grad_norm": 0.7958348393440247,
|
30573 |
+
"learning_rate": 6.146993318485524e-06,
|
30574 |
+
"loss": 2.1074,
|
30575 |
+
"step": 4362
|
30576 |
+
},
|
30577 |
+
{
|
30578 |
+
"epoch": 0.9695555555555555,
|
30579 |
+
"grad_norm": 0.861589789390564,
|
30580 |
+
"learning_rate": 6.102449888641426e-06,
|
30581 |
+
"loss": 1.8603,
|
30582 |
+
"step": 4363
|
30583 |
+
},
|
30584 |
+
{
|
30585 |
+
"epoch": 0.9697777777777777,
|
30586 |
+
"grad_norm": 0.870924174785614,
|
30587 |
+
"learning_rate": 6.057906458797328e-06,
|
30588 |
+
"loss": 1.6482,
|
30589 |
+
"step": 4364
|
30590 |
+
},
|
30591 |
+
{
|
30592 |
+
"epoch": 0.97,
|
30593 |
+
"grad_norm": 0.6811211109161377,
|
30594 |
+
"learning_rate": 6.01336302895323e-06,
|
30595 |
+
"loss": 0.9431,
|
30596 |
+
"step": 4365
|
30597 |
+
},
|
30598 |
+
{
|
30599 |
+
"epoch": 0.9702222222222222,
|
30600 |
+
"grad_norm": 1.2624512910842896,
|
30601 |
+
"learning_rate": 5.968819599109132e-06,
|
30602 |
+
"loss": 1.2395,
|
30603 |
+
"step": 4366
|
30604 |
+
},
|
30605 |
+
{
|
30606 |
+
"epoch": 0.9704444444444444,
|
30607 |
+
"grad_norm": 0.6082854866981506,
|
30608 |
+
"learning_rate": 5.924276169265034e-06,
|
30609 |
+
"loss": 0.9055,
|
30610 |
+
"step": 4367
|
30611 |
+
},
|
30612 |
+
{
|
30613 |
+
"epoch": 0.9706666666666667,
|
30614 |
+
"grad_norm": 0.990861713886261,
|
30615 |
+
"learning_rate": 5.879732739420936e-06,
|
30616 |
+
"loss": 1.96,
|
30617 |
+
"step": 4368
|
30618 |
+
},
|
30619 |
+
{
|
30620 |
+
"epoch": 0.9708888888888889,
|
30621 |
+
"grad_norm": 0.99041748046875,
|
30622 |
+
"learning_rate": 5.835189309576838e-06,
|
30623 |
+
"loss": 1.6842,
|
30624 |
+
"step": 4369
|
30625 |
+
},
|
30626 |
+
{
|
30627 |
+
"epoch": 0.9711111111111111,
|
30628 |
+
"grad_norm": 0.9319810271263123,
|
30629 |
+
"learning_rate": 5.79064587973274e-06,
|
30630 |
+
"loss": 1.9125,
|
30631 |
+
"step": 4370
|
30632 |
+
},
|
30633 |
+
{
|
30634 |
+
"epoch": 0.9713333333333334,
|
30635 |
+
"grad_norm": 1.01008141040802,
|
30636 |
+
"learning_rate": 5.746102449888642e-06,
|
30637 |
+
"loss": 1.9553,
|
30638 |
+
"step": 4371
|
30639 |
+
},
|
30640 |
+
{
|
30641 |
+
"epoch": 0.9715555555555555,
|
30642 |
+
"grad_norm": 0.7167505025863647,
|
30643 |
+
"learning_rate": 5.7015590200445435e-06,
|
30644 |
+
"loss": 0.972,
|
30645 |
+
"step": 4372
|
30646 |
+
},
|
30647 |
+
{
|
30648 |
+
"epoch": 0.9717777777777777,
|
30649 |
+
"grad_norm": 0.07200965285301208,
|
30650 |
+
"learning_rate": 5.6570155902004455e-06,
|
30651 |
+
"loss": 0.0151,
|
30652 |
+
"step": 4373
|
30653 |
+
},
|
30654 |
+
{
|
30655 |
+
"epoch": 0.972,
|
30656 |
+
"grad_norm": 0.6893488764762878,
|
30657 |
+
"learning_rate": 5.6124721603563475e-06,
|
30658 |
+
"loss": 0.8677,
|
30659 |
+
"step": 4374
|
30660 |
+
},
|
30661 |
+
{
|
30662 |
+
"epoch": 0.9722222222222222,
|
30663 |
+
"grad_norm": 0.12305945158004761,
|
30664 |
+
"learning_rate": 5.56792873051225e-06,
|
30665 |
+
"loss": 0.0204,
|
30666 |
+
"step": 4375
|
30667 |
+
},
|
30668 |
+
{
|
30669 |
+
"epoch": 0.9724444444444444,
|
30670 |
+
"grad_norm": 0.9432768821716309,
|
30671 |
+
"learning_rate": 5.523385300668152e-06,
|
30672 |
+
"loss": 1.7875,
|
30673 |
+
"step": 4376
|
30674 |
+
},
|
30675 |
+
{
|
30676 |
+
"epoch": 0.9726666666666667,
|
30677 |
+
"grad_norm": 1.0131165981292725,
|
30678 |
+
"learning_rate": 5.478841870824053e-06,
|
30679 |
+
"loss": 1.6501,
|
30680 |
+
"step": 4377
|
30681 |
+
},
|
30682 |
+
{
|
30683 |
+
"epoch": 0.9728888888888889,
|
30684 |
+
"grad_norm": 1.0048753023147583,
|
30685 |
+
"learning_rate": 5.434298440979955e-06,
|
30686 |
+
"loss": 1.6584,
|
30687 |
+
"step": 4378
|
30688 |
+
},
|
30689 |
+
{
|
30690 |
+
"epoch": 0.9731111111111111,
|
30691 |
+
"grad_norm": 0.9927910566329956,
|
30692 |
+
"learning_rate": 5.389755011135858e-06,
|
30693 |
+
"loss": 1.9151,
|
30694 |
+
"step": 4379
|
30695 |
+
},
|
30696 |
+
{
|
30697 |
+
"epoch": 0.9733333333333334,
|
30698 |
+
"grad_norm": 0.7287546396255493,
|
30699 |
+
"learning_rate": 5.34521158129176e-06,
|
30700 |
+
"loss": 0.7935,
|
30701 |
+
"step": 4380
|
30702 |
+
},
|
30703 |
+
{
|
30704 |
+
"epoch": 0.9735555555555555,
|
30705 |
+
"grad_norm": 0.06438437104225159,
|
30706 |
+
"learning_rate": 5.300668151447661e-06,
|
30707 |
+
"loss": 0.0174,
|
30708 |
+
"step": 4381
|
30709 |
+
},
|
30710 |
+
{
|
30711 |
+
"epoch": 0.9737777777777777,
|
30712 |
+
"grad_norm": 0.7063573002815247,
|
30713 |
+
"learning_rate": 5.256124721603563e-06,
|
30714 |
+
"loss": 0.794,
|
30715 |
+
"step": 4382
|
30716 |
+
},
|
30717 |
+
{
|
30718 |
+
"epoch": 0.974,
|
30719 |
+
"grad_norm": 0.07574823498725891,
|
30720 |
+
"learning_rate": 5.211581291759466e-06,
|
30721 |
+
"loss": 0.0178,
|
30722 |
+
"step": 4383
|
30723 |
+
},
|
30724 |
+
{
|
30725 |
+
"epoch": 0.9742222222222222,
|
30726 |
+
"grad_norm": 0.0726298987865448,
|
30727 |
+
"learning_rate": 5.167037861915368e-06,
|
30728 |
+
"loss": 0.0177,
|
30729 |
+
"step": 4384
|
30730 |
+
},
|
30731 |
+
{
|
30732 |
+
"epoch": 0.9744444444444444,
|
30733 |
+
"grad_norm": 0.7331129312515259,
|
30734 |
+
"learning_rate": 5.12249443207127e-06,
|
30735 |
+
"loss": 0.7402,
|
30736 |
+
"step": 4385
|
30737 |
+
},
|
30738 |
+
{
|
30739 |
+
"epoch": 0.9746666666666667,
|
30740 |
+
"grad_norm": 1.0017316341400146,
|
30741 |
+
"learning_rate": 5.077951002227171e-06,
|
30742 |
+
"loss": 1.6145,
|
30743 |
+
"step": 4386
|
30744 |
+
},
|
30745 |
+
{
|
30746 |
+
"epoch": 0.9748888888888889,
|
30747 |
+
"grad_norm": 0.9680055379867554,
|
30748 |
+
"learning_rate": 5.033407572383074e-06,
|
30749 |
+
"loss": 1.4337,
|
30750 |
+
"step": 4387
|
30751 |
+
},
|
30752 |
+
{
|
30753 |
+
"epoch": 0.9751111111111112,
|
30754 |
+
"grad_norm": 1.0141950845718384,
|
30755 |
+
"learning_rate": 4.988864142538976e-06,
|
30756 |
+
"loss": 1.549,
|
30757 |
+
"step": 4388
|
30758 |
+
},
|
30759 |
+
{
|
30760 |
+
"epoch": 0.9753333333333334,
|
30761 |
+
"grad_norm": 0.7489122748374939,
|
30762 |
+
"learning_rate": 4.944320712694878e-06,
|
30763 |
+
"loss": 0.6078,
|
30764 |
+
"step": 4389
|
30765 |
+
},
|
30766 |
+
{
|
30767 |
+
"epoch": 0.9755555555555555,
|
30768 |
+
"grad_norm": 0.9311794638633728,
|
30769 |
+
"learning_rate": 4.89977728285078e-06,
|
30770 |
+
"loss": 0.6076,
|
30771 |
+
"step": 4390
|
30772 |
+
},
|
30773 |
+
{
|
30774 |
+
"epoch": 0.9757777777777777,
|
30775 |
+
"grad_norm": 1.1416817903518677,
|
30776 |
+
"learning_rate": 4.855233853006682e-06,
|
30777 |
+
"loss": 1.7661,
|
30778 |
+
"step": 4391
|
30779 |
+
},
|
30780 |
+
{
|
30781 |
+
"epoch": 0.976,
|
30782 |
+
"grad_norm": 1.3029440641403198,
|
30783 |
+
"learning_rate": 4.810690423162584e-06,
|
30784 |
+
"loss": 1.5154,
|
30785 |
+
"step": 4392
|
30786 |
+
},
|
30787 |
+
{
|
30788 |
+
"epoch": 0.9762222222222222,
|
30789 |
+
"grad_norm": 1.1306506395339966,
|
30790 |
+
"learning_rate": 4.766146993318486e-06,
|
30791 |
+
"loss": 1.382,
|
30792 |
+
"step": 4393
|
30793 |
+
},
|
30794 |
+
{
|
30795 |
+
"epoch": 0.9764444444444444,
|
30796 |
+
"grad_norm": 0.9537327885627747,
|
30797 |
+
"learning_rate": 4.721603563474388e-06,
|
30798 |
+
"loss": 1.432,
|
30799 |
+
"step": 4394
|
30800 |
+
},
|
30801 |
+
{
|
30802 |
+
"epoch": 0.9766666666666667,
|
30803 |
+
"grad_norm": 0.9183233976364136,
|
30804 |
+
"learning_rate": 4.67706013363029e-06,
|
30805 |
+
"loss": 1.214,
|
30806 |
+
"step": 4395
|
30807 |
+
},
|
30808 |
+
{
|
30809 |
+
"epoch": 0.9768888888888889,
|
30810 |
+
"grad_norm": 0.9410824775695801,
|
30811 |
+
"learning_rate": 4.632516703786192e-06,
|
30812 |
+
"loss": 1.1194,
|
30813 |
+
"step": 4396
|
30814 |
+
},
|
30815 |
+
{
|
30816 |
+
"epoch": 0.9771111111111112,
|
30817 |
+
"grad_norm": 1.0940196514129639,
|
30818 |
+
"learning_rate": 4.587973273942094e-06,
|
30819 |
+
"loss": 1.1115,
|
30820 |
+
"step": 4397
|
30821 |
+
},
|
30822 |
+
{
|
30823 |
+
"epoch": 0.9773333333333334,
|
30824 |
+
"grad_norm": 1.0624735355377197,
|
30825 |
+
"learning_rate": 4.5434298440979965e-06,
|
30826 |
+
"loss": 1.0935,
|
30827 |
+
"step": 4398
|
30828 |
+
},
|
30829 |
+
{
|
30830 |
+
"epoch": 0.9775555555555555,
|
30831 |
+
"grad_norm": 1.0532846450805664,
|
30832 |
+
"learning_rate": 4.498886414253898e-06,
|
30833 |
+
"loss": 1.1252,
|
30834 |
+
"step": 4399
|
30835 |
+
},
|
30836 |
+
{
|
30837 |
+
"epoch": 0.9777777777777777,
|
30838 |
+
"grad_norm": 1.0047916173934937,
|
30839 |
+
"learning_rate": 4.4543429844097995e-06,
|
30840 |
+
"loss": 0.6994,
|
30841 |
+
"step": 4400
|
30842 |
+
},
|
30843 |
+
{
|
30844 |
+
"epoch": 0.978,
|
30845 |
+
"grad_norm": 0.7360401153564453,
|
30846 |
+
"learning_rate": 4.4097995545657015e-06,
|
30847 |
+
"loss": 1.1339,
|
30848 |
+
"step": 4401
|
30849 |
+
},
|
30850 |
+
{
|
30851 |
+
"epoch": 0.9782222222222222,
|
30852 |
+
"grad_norm": 0.8057011961936951,
|
30853 |
+
"learning_rate": 4.365256124721604e-06,
|
30854 |
+
"loss": 2.1264,
|
30855 |
+
"step": 4402
|
30856 |
+
},
|
30857 |
+
{
|
30858 |
+
"epoch": 0.9784444444444444,
|
30859 |
+
"grad_norm": 0.6433674097061157,
|
30860 |
+
"learning_rate": 4.320712694877506e-06,
|
30861 |
+
"loss": 1.1829,
|
30862 |
+
"step": 4403
|
30863 |
+
},
|
30864 |
+
{
|
30865 |
+
"epoch": 0.9786666666666667,
|
30866 |
+
"grad_norm": 0.5455031991004944,
|
30867 |
+
"learning_rate": 4.2761692650334074e-06,
|
30868 |
+
"loss": 1.2065,
|
30869 |
+
"step": 4404
|
30870 |
+
},
|
30871 |
+
{
|
30872 |
+
"epoch": 0.9788888888888889,
|
30873 |
+
"grad_norm": 0.883698582649231,
|
30874 |
+
"learning_rate": 4.231625835189309e-06,
|
30875 |
+
"loss": 2.0593,
|
30876 |
+
"step": 4405
|
30877 |
+
},
|
30878 |
+
{
|
30879 |
+
"epoch": 0.9791111111111112,
|
30880 |
+
"grad_norm": 0.6390405297279358,
|
30881 |
+
"learning_rate": 4.187082405345212e-06,
|
30882 |
+
"loss": 0.9819,
|
30883 |
+
"step": 4406
|
30884 |
+
},
|
30885 |
+
{
|
30886 |
+
"epoch": 0.9793333333333333,
|
30887 |
+
"grad_norm": 0.8261483311653137,
|
30888 |
+
"learning_rate": 4.142538975501114e-06,
|
30889 |
+
"loss": 2.308,
|
30890 |
+
"step": 4407
|
30891 |
+
},
|
30892 |
+
{
|
30893 |
+
"epoch": 0.9795555555555555,
|
30894 |
+
"grad_norm": 0.6975874900817871,
|
30895 |
+
"learning_rate": 4.097995545657015e-06,
|
30896 |
+
"loss": 1.0839,
|
30897 |
+
"step": 4408
|
30898 |
+
},
|
30899 |
+
{
|
30900 |
+
"epoch": 0.9797777777777777,
|
30901 |
+
"grad_norm": 0.6761125326156616,
|
30902 |
+
"learning_rate": 4.053452115812917e-06,
|
30903 |
+
"loss": 0.9341,
|
30904 |
+
"step": 4409
|
30905 |
+
},
|
30906 |
+
{
|
30907 |
+
"epoch": 0.98,
|
30908 |
+
"grad_norm": 0.7890470027923584,
|
30909 |
+
"learning_rate": 4.00890868596882e-06,
|
30910 |
+
"loss": 1.901,
|
30911 |
+
"step": 4410
|
30912 |
+
},
|
30913 |
+
{
|
30914 |
+
"epoch": 0.9802222222222222,
|
30915 |
+
"grad_norm": 0.9101024866104126,
|
30916 |
+
"learning_rate": 3.964365256124722e-06,
|
30917 |
+
"loss": 1.9368,
|
30918 |
+
"step": 4411
|
30919 |
+
},
|
30920 |
+
{
|
30921 |
+
"epoch": 0.9804444444444445,
|
30922 |
+
"grad_norm": 0.9786936640739441,
|
30923 |
+
"learning_rate": 3.919821826280624e-06,
|
30924 |
+
"loss": 2.1794,
|
30925 |
+
"step": 4412
|
30926 |
+
},
|
30927 |
+
{
|
30928 |
+
"epoch": 0.9806666666666667,
|
30929 |
+
"grad_norm": 0.7119241952896118,
|
30930 |
+
"learning_rate": 3.875278396436525e-06,
|
30931 |
+
"loss": 1.0641,
|
30932 |
+
"step": 4413
|
30933 |
+
},
|
30934 |
+
{
|
30935 |
+
"epoch": 0.9808888888888889,
|
30936 |
+
"grad_norm": 0.09762410819530487,
|
30937 |
+
"learning_rate": 3.830734966592428e-06,
|
30938 |
+
"loss": 0.0164,
|
30939 |
+
"step": 4414
|
30940 |
+
},
|
30941 |
+
{
|
30942 |
+
"epoch": 0.9811111111111112,
|
30943 |
+
"grad_norm": 0.7070305943489075,
|
30944 |
+
"learning_rate": 3.78619153674833e-06,
|
30945 |
+
"loss": 0.9271,
|
30946 |
+
"step": 4415
|
30947 |
+
},
|
30948 |
+
{
|
30949 |
+
"epoch": 0.9813333333333333,
|
30950 |
+
"grad_norm": 0.9111929535865784,
|
30951 |
+
"learning_rate": 3.7416481069042315e-06,
|
30952 |
+
"loss": 1.7253,
|
30953 |
+
"step": 4416
|
30954 |
+
},
|
30955 |
+
{
|
30956 |
+
"epoch": 0.9815555555555555,
|
30957 |
+
"grad_norm": 1.0224978923797607,
|
30958 |
+
"learning_rate": 3.6971046770601335e-06,
|
30959 |
+
"loss": 2.0652,
|
30960 |
+
"step": 4417
|
30961 |
+
},
|
30962 |
+
{
|
30963 |
+
"epoch": 0.9817777777777777,
|
30964 |
+
"grad_norm": 1.2484158277511597,
|
30965 |
+
"learning_rate": 3.652561247216036e-06,
|
30966 |
+
"loss": 2.263,
|
30967 |
+
"step": 4418
|
30968 |
+
},
|
30969 |
+
{
|
30970 |
+
"epoch": 0.982,
|
30971 |
+
"grad_norm": 0.6740500926971436,
|
30972 |
+
"learning_rate": 3.608017817371938e-06,
|
30973 |
+
"loss": 0.8257,
|
30974 |
+
"step": 4419
|
30975 |
+
},
|
30976 |
+
{
|
30977 |
+
"epoch": 0.9822222222222222,
|
30978 |
+
"grad_norm": 0.06990643590688705,
|
30979 |
+
"learning_rate": 3.56347438752784e-06,
|
30980 |
+
"loss": 0.0152,
|
30981 |
+
"step": 4420
|
30982 |
+
},
|
30983 |
+
{
|
30984 |
+
"epoch": 0.9824444444444445,
|
30985 |
+
"grad_norm": 0.728216826915741,
|
30986 |
+
"learning_rate": 3.5189309576837414e-06,
|
30987 |
+
"loss": 0.8901,
|
30988 |
+
"step": 4421
|
30989 |
+
},
|
30990 |
+
{
|
30991 |
+
"epoch": 0.9826666666666667,
|
30992 |
+
"grad_norm": 0.06907333433628082,
|
30993 |
+
"learning_rate": 3.474387527839644e-06,
|
30994 |
+
"loss": 0.0152,
|
30995 |
+
"step": 4422
|
30996 |
+
},
|
30997 |
+
{
|
30998 |
+
"epoch": 0.9828888888888889,
|
30999 |
+
"grad_norm": 0.792972981929779,
|
31000 |
+
"learning_rate": 3.4298440979955457e-06,
|
31001 |
+
"loss": 0.8383,
|
31002 |
+
"step": 4423
|
31003 |
+
},
|
31004 |
+
{
|
31005 |
+
"epoch": 0.9831111111111112,
|
31006 |
+
"grad_norm": 0.9240522384643555,
|
31007 |
+
"learning_rate": 3.3853006681514477e-06,
|
31008 |
+
"loss": 1.9004,
|
31009 |
+
"step": 4424
|
31010 |
+
},
|
31011 |
+
{
|
31012 |
+
"epoch": 0.9833333333333333,
|
31013 |
+
"grad_norm": 0.9684634208679199,
|
31014 |
+
"learning_rate": 3.34075723830735e-06,
|
31015 |
+
"loss": 1.7412,
|
31016 |
+
"step": 4425
|
31017 |
+
},
|
31018 |
+
{
|
31019 |
+
"epoch": 0.9835555555555555,
|
31020 |
+
"grad_norm": 1.075197696685791,
|
31021 |
+
"learning_rate": 3.296213808463252e-06,
|
31022 |
+
"loss": 1.4785,
|
31023 |
+
"step": 4426
|
31024 |
+
},
|
31025 |
+
{
|
31026 |
+
"epoch": 0.9837777777777778,
|
31027 |
+
"grad_norm": 0.9526484608650208,
|
31028 |
+
"learning_rate": 3.2516703786191536e-06,
|
31029 |
+
"loss": 1.6998,
|
31030 |
+
"step": 4427
|
31031 |
+
},
|
31032 |
+
{
|
31033 |
+
"epoch": 0.984,
|
31034 |
+
"grad_norm": 0.995002269744873,
|
31035 |
+
"learning_rate": 3.2071269487750556e-06,
|
31036 |
+
"loss": 1.5562,
|
31037 |
+
"step": 4428
|
31038 |
+
},
|
31039 |
+
{
|
31040 |
+
"epoch": 0.9842222222222222,
|
31041 |
+
"grad_norm": 1.0168581008911133,
|
31042 |
+
"learning_rate": 3.162583518930958e-06,
|
31043 |
+
"loss": 1.722,
|
31044 |
+
"step": 4429
|
31045 |
+
},
|
31046 |
+
{
|
31047 |
+
"epoch": 0.9844444444444445,
|
31048 |
+
"grad_norm": 0.06682226806879044,
|
31049 |
+
"learning_rate": 3.11804008908686e-06,
|
31050 |
+
"loss": 0.0176,
|
31051 |
+
"step": 4430
|
31052 |
+
},
|
31053 |
+
{
|
31054 |
+
"epoch": 0.9846666666666667,
|
31055 |
+
"grad_norm": 0.06983762979507446,
|
31056 |
+
"learning_rate": 3.073496659242762e-06,
|
31057 |
+
"loss": 0.0173,
|
31058 |
+
"step": 4431
|
31059 |
+
},
|
31060 |
+
{
|
31061 |
+
"epoch": 0.9848888888888889,
|
31062 |
+
"grad_norm": 0.06633459031581879,
|
31063 |
+
"learning_rate": 3.028953229398664e-06,
|
31064 |
+
"loss": 0.0174,
|
31065 |
+
"step": 4432
|
31066 |
+
},
|
31067 |
+
{
|
31068 |
+
"epoch": 0.9851111111111112,
|
31069 |
+
"grad_norm": 0.06410173326730728,
|
31070 |
+
"learning_rate": 2.984409799554566e-06,
|
31071 |
+
"loss": 0.0175,
|
31072 |
+
"step": 4433
|
31073 |
+
},
|
31074 |
+
{
|
31075 |
+
"epoch": 0.9853333333333333,
|
31076 |
+
"grad_norm": 0.8998127579689026,
|
31077 |
+
"learning_rate": 2.939866369710468e-06,
|
31078 |
+
"loss": 1.6707,
|
31079 |
+
"step": 4434
|
31080 |
+
},
|
31081 |
+
{
|
31082 |
+
"epoch": 0.9855555555555555,
|
31083 |
+
"grad_norm": 0.08308030664920807,
|
31084 |
+
"learning_rate": 2.89532293986637e-06,
|
31085 |
+
"loss": 0.0179,
|
31086 |
+
"step": 4435
|
31087 |
+
},
|
31088 |
+
{
|
31089 |
+
"epoch": 0.9857777777777778,
|
31090 |
+
"grad_norm": 0.8417572379112244,
|
31091 |
+
"learning_rate": 2.8507795100222718e-06,
|
31092 |
+
"loss": 0.966,
|
31093 |
+
"step": 4436
|
31094 |
+
},
|
31095 |
+
{
|
31096 |
+
"epoch": 0.986,
|
31097 |
+
"grad_norm": 1.395193338394165,
|
31098 |
+
"learning_rate": 2.8062360801781737e-06,
|
31099 |
+
"loss": 1.8783,
|
31100 |
+
"step": 4437
|
31101 |
+
},
|
31102 |
+
{
|
31103 |
+
"epoch": 0.9862222222222222,
|
31104 |
+
"grad_norm": 0.9416733384132385,
|
31105 |
+
"learning_rate": 2.761692650334076e-06,
|
31106 |
+
"loss": 1.5385,
|
31107 |
+
"step": 4438
|
31108 |
+
},
|
31109 |
+
{
|
31110 |
+
"epoch": 0.9864444444444445,
|
31111 |
+
"grad_norm": 1.100425362586975,
|
31112 |
+
"learning_rate": 2.7171492204899777e-06,
|
31113 |
+
"loss": 1.7733,
|
31114 |
+
"step": 4439
|
31115 |
+
},
|
31116 |
+
{
|
31117 |
+
"epoch": 0.9866666666666667,
|
31118 |
+
"grad_norm": 0.7649857401847839,
|
31119 |
+
"learning_rate": 2.67260579064588e-06,
|
31120 |
+
"loss": 0.7851,
|
31121 |
+
"step": 4440
|
31122 |
+
},
|
31123 |
+
{
|
31124 |
+
"epoch": 0.9868888888888889,
|
31125 |
+
"grad_norm": 1.1875056028366089,
|
31126 |
+
"learning_rate": 2.6280623608017816e-06,
|
31127 |
+
"loss": 1.6325,
|
31128 |
+
"step": 4441
|
31129 |
+
},
|
31130 |
+
{
|
31131 |
+
"epoch": 0.9871111111111112,
|
31132 |
+
"grad_norm": 1.1401832103729248,
|
31133 |
+
"learning_rate": 2.583518930957684e-06,
|
31134 |
+
"loss": 1.6937,
|
31135 |
+
"step": 4442
|
31136 |
+
},
|
31137 |
+
{
|
31138 |
+
"epoch": 0.9873333333333333,
|
31139 |
+
"grad_norm": 1.1035478115081787,
|
31140 |
+
"learning_rate": 2.5389755011135856e-06,
|
31141 |
+
"loss": 1.526,
|
31142 |
+
"step": 4443
|
31143 |
+
},
|
31144 |
+
{
|
31145 |
+
"epoch": 0.9875555555555555,
|
31146 |
+
"grad_norm": 0.8037136793136597,
|
31147 |
+
"learning_rate": 2.494432071269488e-06,
|
31148 |
+
"loss": 0.7856,
|
31149 |
+
"step": 4444
|
31150 |
+
},
|
31151 |
+
{
|
31152 |
+
"epoch": 0.9877777777777778,
|
31153 |
+
"grad_norm": 1.0584372282028198,
|
31154 |
+
"learning_rate": 2.44988864142539e-06,
|
31155 |
+
"loss": 1.3084,
|
31156 |
+
"step": 4445
|
31157 |
+
},
|
31158 |
+
{
|
31159 |
+
"epoch": 0.988,
|
31160 |
+
"grad_norm": 0.1836099475622177,
|
31161 |
+
"learning_rate": 2.405345211581292e-06,
|
31162 |
+
"loss": 0.0299,
|
31163 |
+
"step": 4446
|
31164 |
+
},
|
31165 |
+
{
|
31166 |
+
"epoch": 0.9882222222222222,
|
31167 |
+
"grad_norm": 1.108872413635254,
|
31168 |
+
"learning_rate": 2.360801781737194e-06,
|
31169 |
+
"loss": 1.0455,
|
31170 |
+
"step": 4447
|
31171 |
+
},
|
31172 |
+
{
|
31173 |
+
"epoch": 0.9884444444444445,
|
31174 |
+
"grad_norm": 0.6207655072212219,
|
31175 |
+
"learning_rate": 2.316258351893096e-06,
|
31176 |
+
"loss": 0.4939,
|
31177 |
+
"step": 4448
|
31178 |
+
},
|
31179 |
+
{
|
31180 |
+
"epoch": 0.9886666666666667,
|
31181 |
+
"grad_norm": 0.14554363489151,
|
31182 |
+
"learning_rate": 2.2717149220489982e-06,
|
31183 |
+
"loss": 0.0318,
|
31184 |
+
"step": 4449
|
31185 |
+
},
|
31186 |
+
{
|
31187 |
+
"epoch": 0.9888888888888889,
|
31188 |
+
"grad_norm": 1.2572603225708008,
|
31189 |
+
"learning_rate": 2.2271714922048998e-06,
|
31190 |
+
"loss": 1.1098,
|
31191 |
+
"step": 4450
|
31192 |
+
},
|
31193 |
+
{
|
31194 |
+
"epoch": 0.9891111111111112,
|
31195 |
+
"grad_norm": 0.04474545270204544,
|
31196 |
+
"learning_rate": 2.182628062360802e-06,
|
31197 |
+
"loss": 0.0103,
|
31198 |
+
"step": 4451
|
31199 |
+
},
|
31200 |
+
{
|
31201 |
+
"epoch": 0.9893333333333333,
|
31202 |
+
"grad_norm": 0.04636682942509651,
|
31203 |
+
"learning_rate": 2.1380846325167037e-06,
|
31204 |
+
"loss": 0.0101,
|
31205 |
+
"step": 4452
|
31206 |
+
},
|
31207 |
+
{
|
31208 |
+
"epoch": 0.9895555555555555,
|
31209 |
+
"grad_norm": 0.5653097033500671,
|
31210 |
+
"learning_rate": 2.093541202672606e-06,
|
31211 |
+
"loss": 1.0041,
|
31212 |
+
"step": 4453
|
31213 |
+
},
|
31214 |
+
{
|
31215 |
+
"epoch": 0.9897777777777778,
|
31216 |
+
"grad_norm": 0.4789440333843231,
|
31217 |
+
"learning_rate": 2.0489977728285077e-06,
|
31218 |
+
"loss": 0.979,
|
31219 |
+
"step": 4454
|
31220 |
+
},
|
31221 |
+
{
|
31222 |
+
"epoch": 0.99,
|
31223 |
+
"grad_norm": 0.8047142028808594,
|
31224 |
+
"learning_rate": 2.00445434298441e-06,
|
31225 |
+
"loss": 2.0886,
|
31226 |
+
"step": 4455
|
31227 |
+
},
|
31228 |
+
{
|
31229 |
+
"epoch": 0.9902222222222222,
|
31230 |
+
"grad_norm": 0.8989213109016418,
|
31231 |
+
"learning_rate": 1.959910913140312e-06,
|
31232 |
+
"loss": 2.1387,
|
31233 |
+
"step": 4456
|
31234 |
+
},
|
31235 |
+
{
|
31236 |
+
"epoch": 0.9904444444444445,
|
31237 |
+
"grad_norm": 0.06995019316673279,
|
31238 |
+
"learning_rate": 1.915367483296214e-06,
|
31239 |
+
"loss": 0.0109,
|
31240 |
+
"step": 4457
|
31241 |
+
},
|
31242 |
+
{
|
31243 |
+
"epoch": 0.9906666666666667,
|
31244 |
+
"grad_norm": 0.07215920835733414,
|
31245 |
+
"learning_rate": 1.8708240534521158e-06,
|
31246 |
+
"loss": 0.0108,
|
31247 |
+
"step": 4458
|
31248 |
+
},
|
31249 |
+
{
|
31250 |
+
"epoch": 0.9908888888888889,
|
31251 |
+
"grad_norm": 0.07202310115098953,
|
31252 |
+
"learning_rate": 1.826280623608018e-06,
|
31253 |
+
"loss": 0.0109,
|
31254 |
+
"step": 4459
|
31255 |
+
},
|
31256 |
+
{
|
31257 |
+
"epoch": 0.9911111111111112,
|
31258 |
+
"grad_norm": 0.9508035778999329,
|
31259 |
+
"learning_rate": 1.78173719376392e-06,
|
31260 |
+
"loss": 2.2415,
|
31261 |
+
"step": 4460
|
31262 |
+
},
|
31263 |
+
{
|
31264 |
+
"epoch": 0.9913333333333333,
|
31265 |
+
"grad_norm": 0.891727864742279,
|
31266 |
+
"learning_rate": 1.737193763919822e-06,
|
31267 |
+
"loss": 1.9116,
|
31268 |
+
"step": 4461
|
31269 |
+
},
|
31270 |
+
{
|
31271 |
+
"epoch": 0.9915555555555555,
|
31272 |
+
"grad_norm": 1.0234503746032715,
|
31273 |
+
"learning_rate": 1.6926503340757238e-06,
|
31274 |
+
"loss": 2.0408,
|
31275 |
+
"step": 4462
|
31276 |
+
},
|
31277 |
+
{
|
31278 |
+
"epoch": 0.9917777777777778,
|
31279 |
+
"grad_norm": 0.8998834490776062,
|
31280 |
+
"learning_rate": 1.648106904231626e-06,
|
31281 |
+
"loss": 2.0895,
|
31282 |
+
"step": 4463
|
31283 |
+
},
|
31284 |
+
{
|
31285 |
+
"epoch": 0.992,
|
31286 |
+
"grad_norm": 0.9309079051017761,
|
31287 |
+
"learning_rate": 1.6035634743875278e-06,
|
31288 |
+
"loss": 1.9546,
|
31289 |
+
"step": 4464
|
31290 |
+
},
|
31291 |
+
{
|
31292 |
+
"epoch": 0.9922222222222222,
|
31293 |
+
"grad_norm": 0.903396725654602,
|
31294 |
+
"learning_rate": 1.55902004454343e-06,
|
31295 |
+
"loss": 1.0776,
|
31296 |
+
"step": 4465
|
31297 |
+
},
|
31298 |
+
{
|
31299 |
+
"epoch": 0.9924444444444445,
|
31300 |
+
"grad_norm": 1.0036734342575073,
|
31301 |
+
"learning_rate": 1.514476614699332e-06,
|
31302 |
+
"loss": 1.7439,
|
31303 |
+
"step": 4466
|
31304 |
+
},
|
31305 |
+
{
|
31306 |
+
"epoch": 0.9926666666666667,
|
31307 |
+
"grad_norm": 0.9246737957000732,
|
31308 |
+
"learning_rate": 1.469933184855234e-06,
|
31309 |
+
"loss": 1.7637,
|
31310 |
+
"step": 4467
|
31311 |
+
},
|
31312 |
+
{
|
31313 |
+
"epoch": 0.9928888888888889,
|
31314 |
+
"grad_norm": 1.0618118047714233,
|
31315 |
+
"learning_rate": 1.4253897550111359e-06,
|
31316 |
+
"loss": 1.9589,
|
31317 |
+
"step": 4468
|
31318 |
+
},
|
31319 |
+
{
|
31320 |
+
"epoch": 0.9931111111111111,
|
31321 |
+
"grad_norm": 1.1122076511383057,
|
31322 |
+
"learning_rate": 1.380846325167038e-06,
|
31323 |
+
"loss": 1.9023,
|
31324 |
+
"step": 4469
|
31325 |
+
},
|
31326 |
+
{
|
31327 |
+
"epoch": 0.9933333333333333,
|
31328 |
+
"grad_norm": 1.027601957321167,
|
31329 |
+
"learning_rate": 1.33630289532294e-06,
|
31330 |
+
"loss": 1.9814,
|
31331 |
+
"step": 4470
|
31332 |
+
},
|
31333 |
+
{
|
31334 |
+
"epoch": 0.9935555555555555,
|
31335 |
+
"grad_norm": 0.06850501894950867,
|
31336 |
+
"learning_rate": 1.291759465478842e-06,
|
31337 |
+
"loss": 0.0152,
|
31338 |
+
"step": 4471
|
31339 |
+
},
|
31340 |
+
{
|
31341 |
+
"epoch": 0.9937777777777778,
|
31342 |
+
"grad_norm": 0.067985400557518,
|
31343 |
+
"learning_rate": 1.247216035634744e-06,
|
31344 |
+
"loss": 0.0152,
|
31345 |
+
"step": 4472
|
31346 |
+
},
|
31347 |
+
{
|
31348 |
+
"epoch": 0.994,
|
31349 |
+
"grad_norm": 1.0229130983352661,
|
31350 |
+
"learning_rate": 1.202672605790646e-06,
|
31351 |
+
"loss": 1.6158,
|
31352 |
+
"step": 4473
|
31353 |
+
},
|
31354 |
+
{
|
31355 |
+
"epoch": 0.9942222222222222,
|
31356 |
+
"grad_norm": 0.6642321944236755,
|
31357 |
+
"learning_rate": 1.158129175946548e-06,
|
31358 |
+
"loss": 0.7592,
|
31359 |
+
"step": 4474
|
31360 |
+
},
|
31361 |
+
{
|
31362 |
+
"epoch": 0.9944444444444445,
|
31363 |
+
"grad_norm": 1.025769829750061,
|
31364 |
+
"learning_rate": 1.1135857461024499e-06,
|
31365 |
+
"loss": 1.8864,
|
31366 |
+
"step": 4475
|
31367 |
+
},
|
31368 |
+
{
|
31369 |
+
"epoch": 0.9946666666666667,
|
31370 |
+
"grad_norm": 1.1777735948562622,
|
31371 |
+
"learning_rate": 1.0690423162583519e-06,
|
31372 |
+
"loss": 1.9098,
|
31373 |
+
"step": 4476
|
31374 |
+
},
|
31375 |
+
{
|
31376 |
+
"epoch": 0.9948888888888889,
|
31377 |
+
"grad_norm": 1.0232651233673096,
|
31378 |
+
"learning_rate": 1.0244988864142538e-06,
|
31379 |
+
"loss": 1.6622,
|
31380 |
+
"step": 4477
|
31381 |
+
},
|
31382 |
+
{
|
31383 |
+
"epoch": 0.9951111111111111,
|
31384 |
+
"grad_norm": 1.0267844200134277,
|
31385 |
+
"learning_rate": 9.79955456570156e-07,
|
31386 |
+
"loss": 1.8175,
|
31387 |
+
"step": 4478
|
31388 |
+
},
|
31389 |
+
{
|
31390 |
+
"epoch": 0.9953333333333333,
|
31391 |
+
"grad_norm": 0.7749679684638977,
|
31392 |
+
"learning_rate": 9.354120267260579e-07,
|
31393 |
+
"loss": 0.906,
|
31394 |
+
"step": 4479
|
31395 |
+
},
|
31396 |
+
{
|
31397 |
+
"epoch": 0.9955555555555555,
|
31398 |
+
"grad_norm": 0.06536448746919632,
|
31399 |
+
"learning_rate": 8.9086859688196e-07,
|
31400 |
+
"loss": 0.0173,
|
31401 |
+
"step": 4480
|
31402 |
+
},
|
31403 |
+
{
|
31404 |
+
"epoch": 0.9957777777777778,
|
31405 |
+
"grad_norm": 0.6798564195632935,
|
31406 |
+
"learning_rate": 8.463251670378619e-07,
|
31407 |
+
"loss": 0.7955,
|
31408 |
+
"step": 4481
|
31409 |
+
},
|
31410 |
+
{
|
31411 |
+
"epoch": 0.996,
|
31412 |
+
"grad_norm": 0.06655056774616241,
|
31413 |
+
"learning_rate": 8.017817371937639e-07,
|
31414 |
+
"loss": 0.0176,
|
31415 |
+
"step": 4482
|
31416 |
+
},
|
31417 |
+
{
|
31418 |
+
"epoch": 0.9962222222222222,
|
31419 |
+
"grad_norm": 0.7525641918182373,
|
31420 |
+
"learning_rate": 7.57238307349666e-07,
|
31421 |
+
"loss": 0.8103,
|
31422 |
+
"step": 4483
|
31423 |
+
},
|
31424 |
+
{
|
31425 |
+
"epoch": 0.9964444444444445,
|
31426 |
+
"grad_norm": 0.6724408268928528,
|
31427 |
+
"learning_rate": 7.126948775055679e-07,
|
31428 |
+
"loss": 0.7957,
|
31429 |
+
"step": 4484
|
31430 |
+
},
|
31431 |
+
{
|
31432 |
+
"epoch": 0.9966666666666667,
|
31433 |
+
"grad_norm": 0.99349445104599,
|
31434 |
+
"learning_rate": 6.6815144766147e-07,
|
31435 |
+
"loss": 1.691,
|
31436 |
+
"step": 4485
|
31437 |
+
},
|
31438 |
+
{
|
31439 |
+
"epoch": 0.9968888888888889,
|
31440 |
+
"grad_norm": 1.0608917474746704,
|
31441 |
+
"learning_rate": 6.23608017817372e-07,
|
31442 |
+
"loss": 1.6244,
|
31443 |
+
"step": 4486
|
31444 |
+
},
|
31445 |
+
{
|
31446 |
+
"epoch": 0.9971111111111111,
|
31447 |
+
"grad_norm": 0.07752467691898346,
|
31448 |
+
"learning_rate": 5.79064587973274e-07,
|
31449 |
+
"loss": 0.018,
|
31450 |
+
"step": 4487
|
31451 |
+
},
|
31452 |
+
{
|
31453 |
+
"epoch": 0.9973333333333333,
|
31454 |
+
"grad_norm": 0.7708075046539307,
|
31455 |
+
"learning_rate": 5.345211581291759e-07,
|
31456 |
+
"loss": 0.8414,
|
31457 |
+
"step": 4488
|
31458 |
+
},
|
31459 |
+
{
|
31460 |
+
"epoch": 0.9975555555555555,
|
31461 |
+
"grad_norm": 0.6976569890975952,
|
31462 |
+
"learning_rate": 4.89977728285078e-07,
|
31463 |
+
"loss": 0.7637,
|
31464 |
+
"step": 4489
|
31465 |
+
},
|
31466 |
+
{
|
31467 |
+
"epoch": 0.9977777777777778,
|
31468 |
+
"grad_norm": 1.0548564195632935,
|
31469 |
+
"learning_rate": 4.4543429844098e-07,
|
31470 |
+
"loss": 1.4826,
|
31471 |
+
"step": 4490
|
31472 |
+
},
|
31473 |
+
{
|
31474 |
+
"epoch": 0.998,
|
31475 |
+
"grad_norm": 0.6447573900222778,
|
31476 |
+
"learning_rate": 4.0089086859688195e-07,
|
31477 |
+
"loss": 0.6006,
|
31478 |
+
"step": 4491
|
31479 |
+
},
|
31480 |
+
{
|
31481 |
+
"epoch": 0.9982222222222222,
|
31482 |
+
"grad_norm": 1.2821402549743652,
|
31483 |
+
"learning_rate": 3.5634743875278397e-07,
|
31484 |
+
"loss": 1.8731,
|
31485 |
+
"step": 4492
|
31486 |
+
},
|
31487 |
+
{
|
31488 |
+
"epoch": 0.9984444444444445,
|
31489 |
+
"grad_norm": 1.1518702507019043,
|
31490 |
+
"learning_rate": 3.11804008908686e-07,
|
31491 |
+
"loss": 1.5884,
|
31492 |
+
"step": 4493
|
31493 |
+
},
|
31494 |
+
{
|
31495 |
+
"epoch": 0.9986666666666667,
|
31496 |
+
"grad_norm": 1.11997389793396,
|
31497 |
+
"learning_rate": 2.6726057906458796e-07,
|
31498 |
+
"loss": 1.4486,
|
31499 |
+
"step": 4494
|
31500 |
+
},
|
31501 |
+
{
|
31502 |
+
"epoch": 0.9988888888888889,
|
31503 |
+
"grad_norm": 1.1092532873153687,
|
31504 |
+
"learning_rate": 2.2271714922049e-07,
|
31505 |
+
"loss": 1.3308,
|
31506 |
+
"step": 4495
|
31507 |
+
},
|
31508 |
+
{
|
31509 |
+
"epoch": 0.9991111111111111,
|
31510 |
+
"grad_norm": 0.17926262319087982,
|
31511 |
+
"learning_rate": 1.7817371937639199e-07,
|
31512 |
+
"loss": 0.0297,
|
31513 |
+
"step": 4496
|
31514 |
+
},
|
31515 |
+
{
|
31516 |
+
"epoch": 0.9993333333333333,
|
31517 |
+
"grad_norm": 1.144982933998108,
|
31518 |
+
"learning_rate": 1.3363028953229398e-07,
|
31519 |
+
"loss": 1.2413,
|
31520 |
+
"step": 4497
|
31521 |
+
},
|
31522 |
+
{
|
31523 |
+
"epoch": 0.9995555555555555,
|
31524 |
+
"grad_norm": 1.1863489151000977,
|
31525 |
+
"learning_rate": 8.908685968819599e-08,
|
31526 |
+
"loss": 1.1693,
|
31527 |
+
"step": 4498
|
31528 |
+
},
|
31529 |
+
{
|
31530 |
+
"epoch": 0.9997777777777778,
|
31531 |
+
"grad_norm": 0.7812955975532532,
|
31532 |
+
"learning_rate": 4.4543429844097996e-08,
|
31533 |
+
"loss": 0.5973,
|
31534 |
+
"step": 4499
|
31535 |
+
},
|
31536 |
+
{
|
31537 |
+
"epoch": 1.0,
|
31538 |
+
"grad_norm": 1.0512616634368896,
|
31539 |
+
"learning_rate": 0.0,
|
31540 |
+
"loss": 0.8343,
|
31541 |
+
"step": 4500
|
31542 |
+
},
|
31543 |
+
{
|
31544 |
+
"epoch": 1.0,
|
31545 |
+
"eval_loss": 1.1682192087173462,
|
31546 |
+
"eval_runtime": 240.8872,
|
31547 |
+
"eval_samples_per_second": 4.151,
|
31548 |
+
"eval_steps_per_second": 4.151,
|
31549 |
+
"step": 4500
|
31550 |
}
|
31551 |
],
|
31552 |
"logging_steps": 1,
|
|
|
31561 |
"should_evaluate": false,
|
31562 |
"should_log": false,
|
31563 |
"should_save": true,
|
31564 |
+
"should_training_stop": true
|
31565 |
},
|
31566 |
"attributes": {}
|
31567 |
}
|
31568 |
},
|
31569 |
+
"total_flos": 4.847769692985754e+16,
|
31570 |
"train_batch_size": 1,
|
31571 |
"trial_name": null,
|
31572 |
"trial_params": null
|