abaddon182 commited on
Commit
2fb415a
·
verified ·
1 Parent(s): 0bf742f

Training in progress, step 2633, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0a522ed20fbdcfb113dac066fed8d5b62b7ac84e9b723a4d5198b0889376683
3
  size 645975704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:249855c09c45849f7c73a6e694c5b1ef53724d710c791981cb8a57e2e47d8bb0
3
  size 645975704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82d998e6f69f3908d5c4ad510978d8ef138168d04280863066e625fc1c9faf28
3
  size 328468852
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb86dc30259e5769411b56c74f8cfbbc600b1b6d0721871505a9b32b9f758a58
3
  size 328468852
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bec98415c91702db8febabe5fa4b06fa655e77182dea80a3c11ffa1ab4776c8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ec212340c0f9a97e8e05869b4820cabc71039c05580684b502c1f1e5bdf06e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8edf9ab4b0b1e6233954c0a1ce2a991dbd70522c8d64dd611a655fbe0011ac0d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:033d2edcf91400e7e05df9655654c43012ce54b45d76ed7b95e8c0821772d728
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.2001230716705322,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2550",
4
- "epoch": 0.9685689868008737,
5
  "eval_steps": 150,
6
- "global_step": 2550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1936,6 +1936,62 @@
1936
  "eval_samples_per_second": 26.423,
1937
  "eval_steps_per_second": 6.609,
1938
  "step": 2550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1939
  }
1940
  ],
1941
  "logging_steps": 10,
@@ -1959,12 +2015,12 @@
1959
  "should_evaluate": false,
1960
  "should_log": false,
1961
  "should_save": true,
1962
- "should_training_stop": false
1963
  },
1964
  "attributes": {}
1965
  }
1966
  },
1967
- "total_flos": 1.812908073811968e+18,
1968
  "train_batch_size": 8,
1969
  "trial_name": null,
1970
  "trial_params": null
 
1
  {
2
  "best_metric": 1.2001230716705322,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2550",
4
+ "epoch": 1.000094957743804,
5
  "eval_steps": 150,
6
+ "global_step": 2633,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1936
  "eval_samples_per_second": 26.423,
1937
  "eval_steps_per_second": 6.609,
1938
  "step": 2550
1939
+ },
1940
+ {
1941
+ "epoch": 0.9723672965530339,
1942
+ "grad_norm": 1.9546929597854614,
1943
+ "learning_rate": 1.969478955326509e-07,
1944
+ "loss": 1.3771,
1945
+ "step": 2560
1946
+ },
1947
+ {
1948
+ "epoch": 0.9761656063051942,
1949
+ "grad_norm": 2.411525249481201,
1950
+ "learning_rate": 1.4670994081297795e-07,
1951
+ "loss": 1.2084,
1952
+ "step": 2570
1953
+ },
1954
+ {
1955
+ "epoch": 0.9799639160573544,
1956
+ "grad_norm": 2.34065580368042,
1957
+ "learning_rate": 1.0384660197125806e-07,
1958
+ "loss": 1.0336,
1959
+ "step": 2580
1960
+ },
1961
+ {
1962
+ "epoch": 0.9837622258095148,
1963
+ "grad_norm": 3.2528553009033203,
1964
+ "learning_rate": 6.836421962531648e-08,
1965
+ "loss": 1.1792,
1966
+ "step": 2590
1967
+ },
1968
+ {
1969
+ "epoch": 0.987560535561675,
1970
+ "grad_norm": 3.4945614337921143,
1971
+ "learning_rate": 4.026804255491401e-08,
1972
+ "loss": 1.0964,
1973
+ "step": 2600
1974
+ },
1975
+ {
1976
+ "epoch": 0.9913588453138353,
1977
+ "grad_norm": 2.077958106994629,
1978
+ "learning_rate": 1.9562226925340378e-08,
1979
+ "loss": 1.3392,
1980
+ "step": 2610
1981
+ },
1982
+ {
1983
+ "epoch": 0.9951571550659957,
1984
+ "grad_norm": 2.580904722213745,
1985
+ "learning_rate": 6.249835672594851e-09,
1986
+ "loss": 1.1596,
1987
+ "step": 2620
1988
+ },
1989
+ {
1990
+ "epoch": 0.9989554648181559,
1991
+ "grad_norm": 3.186758279800415,
1992
+ "learning_rate": 3.328380502876094e-10,
1993
+ "loss": 1.1276,
1994
+ "step": 2630
1995
  }
1996
  ],
1997
  "logging_steps": 10,
 
2015
  "should_evaluate": false,
2016
  "should_log": false,
2017
  "should_save": true,
2018
+ "should_training_stop": true
2019
  },
2020
  "attributes": {}
2021
  }
2022
  },
2023
+ "total_flos": 1.8718053692001485e+18,
2024
  "train_batch_size": 8,
2025
  "trial_name": null,
2026
  "trial_params": null