End of training
Browse files- .gitignore +1 -0
- config.json +33 -0
- generation_config.json +6 -0
- pytorch_model.bin +3 -0
- runs/Jun02_17-44-14_12a7977882ab/1685727861.9010558/events.out.tfevents.1685727861.12a7977882ab.2624.1 +3 -0
- runs/Jun02_17-44-14_12a7977882ab/events.out.tfevents.1685727861.12a7977882ab.2624.0 +3 -0
- runs/Jun02_17-44-44_12a7977882ab/1685727884.0527675/events.out.tfevents.1685727884.12a7977882ab.2624.3 +3 -0
- runs/Jun02_17-44-44_12a7977882ab/events.out.tfevents.1685727884.12a7977882ab.2624.2 +3 -0
- runs/Jun02_17-45-58_12a7977882ab/1685727958.5564306/events.out.tfevents.1685727958.12a7977882ab.2624.5 +3 -0
- runs/Jun02_17-45-58_12a7977882ab/events.out.tfevents.1685727958.12a7977882ab.2624.4 +3 -0
- runs/Jun02_17-51-29_12a7977882ab/1685728289.0756934/events.out.tfevents.1685728289.12a7977882ab.2624.7 +3 -0
- runs/Jun02_17-51-29_12a7977882ab/events.out.tfevents.1685728289.12a7977882ab.2624.6 +3 -0
- runs/Jun02_17-51-45_12a7977882ab/1685728305.2496407/events.out.tfevents.1685728305.12a7977882ab.2624.9 +3 -0
- runs/Jun02_17-51-45_12a7977882ab/events.out.tfevents.1685728305.12a7977882ab.2624.8 +3 -0
- runs/Jun02_17-52-24_12a7977882ab/1685728344.4435043/events.out.tfevents.1685728344.12a7977882ab.5378.1 +3 -0
- runs/Jun02_17-52-24_12a7977882ab/events.out.tfevents.1685728344.12a7977882ab.5378.0 +3 -0
- runs/Jun02_17-52-58_12a7977882ab/1685728378.2396302/events.out.tfevents.1685728378.12a7977882ab.5378.3 +3 -0
- runs/Jun02_17-52-58_12a7977882ab/events.out.tfevents.1685728378.12a7977882ab.5378.2 +3 -0
- runs/Jun02_17-53-56_12a7977882ab/1685728436.214853/events.out.tfevents.1685728436.12a7977882ab.5378.5 +3 -0
- runs/Jun02_17-53-56_12a7977882ab/events.out.tfevents.1685728436.12a7977882ab.5378.4 +3 -0
- runs/Jun02_17-54-24_12a7977882ab/1685728464.7410722/events.out.tfevents.1685728464.12a7977882ab.5378.7 +3 -0
- runs/Jun02_17-54-24_12a7977882ab/events.out.tfevents.1685728464.12a7977882ab.5378.6 +3 -0
- runs/Jun02_17-57-14_12a7977882ab/1685728641.2848952/events.out.tfevents.1685728641.12a7977882ab.6747.1 +3 -0
- runs/Jun02_17-57-14_12a7977882ab/events.out.tfevents.1685728641.12a7977882ab.6747.0 +3 -0
- runs/Jun02_18-02-50_12a7977882ab/1685728970.8927755/events.out.tfevents.1685728970.12a7977882ab.6747.3 +3 -0
- runs/Jun02_18-02-50_12a7977882ab/events.out.tfevents.1685728970.12a7977882ab.6747.2 +3 -0
- runs/Jun02_18-09-33_12a7977882ab/1685729373.6836426/events.out.tfevents.1685729373.12a7977882ab.6747.5 +3 -0
- runs/Jun02_18-09-33_12a7977882ab/events.out.tfevents.1685729373.12a7977882ab.6747.4 +3 -0
- special_tokens_map.json +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +5 -0
- training_args.bin +3 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
checkpoint-*/
|
config.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "./falcon-mini-shakespeare",
|
3 |
+
"alibi": false,
|
4 |
+
"apply_residual_connection_post_layernorm": false,
|
5 |
+
"architectures": [
|
6 |
+
"RWForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"auto_map": {
|
10 |
+
"AutoConfig": "configuration_RW.RWConfig",
|
11 |
+
"AutoModel": "modelling_RW.RWModel",
|
12 |
+
"AutoModelForCausalLM": "modelling_RW.RWForCausalLM",
|
13 |
+
"AutoModelForQuestionAnswering": "modelling_RW.RWForQuestionAnswering",
|
14 |
+
"AutoModelForSequenceClassification": "modelling_RW.RWForSequenceClassification",
|
15 |
+
"AutoModelForTokenClassification": "modelling_RW.RWForTokenClassification"
|
16 |
+
},
|
17 |
+
"bias": false,
|
18 |
+
"bos_token_id": 11,
|
19 |
+
"eos_token_id": 11,
|
20 |
+
"hidden_dropout": 0.0,
|
21 |
+
"hidden_size": 128,
|
22 |
+
"initializer_range": 0.02,
|
23 |
+
"layer_norm_epsilon": 1e-05,
|
24 |
+
"model_type": "RefinedWebModel",
|
25 |
+
"multi_query": true,
|
26 |
+
"n_head": 4,
|
27 |
+
"n_layer": 8,
|
28 |
+
"parallel_attn": true,
|
29 |
+
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.28.0",
|
31 |
+
"use_cache": true,
|
32 |
+
"vocab_size": 65024
|
33 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 11,
|
4 |
+
"eos_token_id": 11,
|
5 |
+
"transformers_version": "4.28.0"
|
6 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65201c8be4a370c39b1eb7e59d8e608ef6c600e868c897d37b53334e15fce1c3
|
3 |
+
size 38823498
|
runs/Jun02_17-44-14_12a7977882ab/1685727861.9010558/events.out.tfevents.1685727861.12a7977882ab.2624.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:050953e233b870b056dd8c55b5a1ec94a2ab07972a4244d92398f5ce13de4d43
|
3 |
+
size 5894
|
runs/Jun02_17-44-14_12a7977882ab/events.out.tfevents.1685727861.12a7977882ab.2624.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9250dae0e78896aab73bfbb1d676df8b84472a40d0773131dd1877f25481f0b
|
3 |
+
size 4319
|
runs/Jun02_17-44-44_12a7977882ab/1685727884.0527675/events.out.tfevents.1685727884.12a7977882ab.2624.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83c81d29ee90b97ffea0133e7020a6b30b812dff9ba3031705ea1a3ab6a00d70
|
3 |
+
size 5894
|
runs/Jun02_17-44-44_12a7977882ab/events.out.tfevents.1685727884.12a7977882ab.2624.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84977d1a41c8b46f0b73c18c46af2022b61420ff57a807623f4ae6f8737e92b1
|
3 |
+
size 4667
|
runs/Jun02_17-45-58_12a7977882ab/1685727958.5564306/events.out.tfevents.1685727958.12a7977882ab.2624.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1088b74785d87f03c5d8b6f57b54db03d1081d182de45cf4e363b1f16184fd4
|
3 |
+
size 5894
|
runs/Jun02_17-45-58_12a7977882ab/events.out.tfevents.1685727958.12a7977882ab.2624.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3c2aea5b2921ffd2a5fe19e477e192b45d8b149ff735d63a32e8e2edce0d983
|
3 |
+
size 5149
|
runs/Jun02_17-51-29_12a7977882ab/1685728289.0756934/events.out.tfevents.1685728289.12a7977882ab.2624.7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60b0731efe2922f44992acb7fbdf5a7c657aa6ecff0411a2c1816c144e0e07a1
|
3 |
+
size 5894
|
runs/Jun02_17-51-29_12a7977882ab/events.out.tfevents.1685728289.12a7977882ab.2624.6
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f50b37baa4a597ea6dc09394aeae33d8bab5db3fdbdfee88765e0c79e145caf7
|
3 |
+
size 4184
|
runs/Jun02_17-51-45_12a7977882ab/1685728305.2496407/events.out.tfevents.1685728305.12a7977882ab.2624.9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:052c9a801adf9cccb3d8e31ac49ac2fb685ce8bab31d9951f4eb136a16e49505
|
3 |
+
size 5894
|
runs/Jun02_17-51-45_12a7977882ab/events.out.tfevents.1685728305.12a7977882ab.2624.8
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d8c2d18e66af7ff61b73274c233859a5470b7bf00b8610d59ed05ba514b9092
|
3 |
+
size 4184
|
runs/Jun02_17-52-24_12a7977882ab/1685728344.4435043/events.out.tfevents.1685728344.12a7977882ab.5378.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0299e1ce344362585903bef2f27eb6b4bc5878a4fe4de177f61ae272e61be09b
|
3 |
+
size 5894
|
runs/Jun02_17-52-24_12a7977882ab/events.out.tfevents.1685728344.12a7977882ab.5378.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d6051af793d60698eb9d7a8300f9a6b706efb8eef85ec61ec3be3bc26641a48
|
3 |
+
size 4367
|
runs/Jun02_17-52-58_12a7977882ab/1685728378.2396302/events.out.tfevents.1685728378.12a7977882ab.5378.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc7a69d8831b598c26c7fbe854135cdc9976e7e38de92923c0b99efd51783dfb
|
3 |
+
size 5894
|
runs/Jun02_17-52-58_12a7977882ab/events.out.tfevents.1685728378.12a7977882ab.5378.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf4afe479ec9a6c62b21289702b4754ff90ba90bdbb80246807d1cbe40aaf193
|
3 |
+
size 4367
|
runs/Jun02_17-53-56_12a7977882ab/1685728436.214853/events.out.tfevents.1685728436.12a7977882ab.5378.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:def939c746abbfd08a2113ccc215fa2df0605d12eb7ecbf110c6205db93ebed2
|
3 |
+
size 5894
|
runs/Jun02_17-53-56_12a7977882ab/events.out.tfevents.1685728436.12a7977882ab.5378.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8c8412f2cd70c3ebfc39e390998473ba4342aafa4fba024ea84f0147f718bf0
|
3 |
+
size 4367
|
runs/Jun02_17-54-24_12a7977882ab/1685728464.7410722/events.out.tfevents.1685728464.12a7977882ab.5378.7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28d7b80b81c42e63da3fcf0575aa7a5c61fa2f6c88dccb7da1f36087e118cd33
|
3 |
+
size 5894
|
runs/Jun02_17-54-24_12a7977882ab/events.out.tfevents.1685728464.12a7977882ab.5378.6
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc939bbec37d02d5f425a43a1f7794848efd3146fd0be7281b017a3625a3260f
|
3 |
+
size 4367
|
runs/Jun02_17-57-14_12a7977882ab/1685728641.2848952/events.out.tfevents.1685728641.12a7977882ab.6747.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:792a5b42d899f5aae27d71412276238d27f239447ded42ab61b528e992ca1f6b
|
3 |
+
size 5894
|
runs/Jun02_17-57-14_12a7977882ab/events.out.tfevents.1685728641.12a7977882ab.6747.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:feedd08873dbaf58e62b4c72c755fab035a2522e61000f63ad171fd50d8154ec
|
3 |
+
size 5149
|
runs/Jun02_18-02-50_12a7977882ab/1685728970.8927755/events.out.tfevents.1685728970.12a7977882ab.6747.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfcda3838237a636a82a24a1e0b2a2741470c39a4193c91e41f3785b8da1d474
|
3 |
+
size 5894
|
runs/Jun02_18-02-50_12a7977882ab/events.out.tfevents.1685728970.12a7977882ab.6747.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97e534f0e39c4c6c4b2ec420172f8de7d90cafd8c45d8f3027ee4b1edc082176
|
3 |
+
size 5149
|
runs/Jun02_18-09-33_12a7977882ab/1685729373.6836426/events.out.tfevents.1685729373.12a7977882ab.6747.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fccb043bdd4f1d176d32abd21f1190774f95703e6c1623481177e7916ef0f57
|
3 |
+
size 5894
|
runs/Jun02_18-09-33_12a7977882ab/events.out.tfevents.1685729373.12a7977882ab.6747.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14474a337f6b8d047a9e680cbff6a5ad0ac0959bc3d36358761d7293811b4d3c
|
3 |
+
size 5149
|
special_tokens_map.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"pad_token": "<|endoftext|>"
|
3 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"clean_up_tokenization_spaces": true,
|
3 |
+
"model_max_length": 1000000000000000019884624838656,
|
4 |
+
"tokenizer_class": "PreTrainedTokenizerFast"
|
5 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16d53dbe99d774263dd0230459246f3106eef426139b6637bc91a7f8c2ec5e69
|
3 |
+
size 3579
|