tttx
/

PEFT
Safetensors
llama
alignment-handbook
trl
sft
Generated from Trainer
aadityap commited on
Commit
1d11ba9
·
verified ·
1 Parent(s): 17e6708

Model save

Browse files
README.md CHANGED
@@ -1,14 +1,11 @@
1
  ---
2
  library_name: peft
3
- license: llama3.1
4
- base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
5
  tags:
6
- - alignment-handbook
7
  - trl
8
  - sft
9
  - generated_from_trainer
10
- datasets:
11
- - tttx/fake_dataset_prompt_3072_response_15360_data_size_1000
12
  model-index:
13
  - name: dummy_lora_ft_3k_1k
14
  results: []
@@ -19,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # dummy_lora_ft_3k_1k
21
 
22
- This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) on the tttx/fake_dataset_prompt_3072_response_15360_data_size_1000 dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 12.0219
25
 
26
  ## Model description
27
 
@@ -48,7 +45,7 @@ The following hyperparameters were used during training:
48
  - num_devices: 8
49
  - total_train_batch_size: 16
50
  - total_eval_batch_size: 8
51
- - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
52
  - lr_scheduler_type: cosine
53
  - lr_scheduler_warmup_ratio: 0.1
54
  - num_epochs: 1
@@ -57,7 +54,7 @@ The following hyperparameters were used during training:
57
 
58
  | Training Loss | Epoch | Step | Validation Loss |
59
  |:-------------:|:-----:|:----:|:---------------:|
60
- | 12.0266 | 1.0 | 7 | 12.0219 |
61
 
62
 
63
  ### Framework versions
 
1
  ---
2
  library_name: peft
3
+ license: mit
4
+ base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
5
  tags:
 
6
  - trl
7
  - sft
8
  - generated_from_trainer
 
 
9
  model-index:
10
  - name: dummy_lora_ft_3k_1k
11
  results: []
 
16
 
17
  # dummy_lora_ft_3k_1k
18
 
19
+ This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: nan
22
 
23
  ## Model description
24
 
 
45
  - num_devices: 8
46
  - total_train_batch_size: 16
47
  - total_eval_batch_size: 8
48
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
49
  - lr_scheduler_type: cosine
50
  - lr_scheduler_warmup_ratio: 0.1
51
  - num_epochs: 1
 
54
 
55
  | Training Loss | Epoch | Step | Validation Loss |
56
  |:-------------:|:-----:|:----:|:---------------:|
57
+ | 0.0 | 1.0 | 7 | nan |
58
 
59
 
60
  ### Framework versions
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "meta-llama/Meta-Llama-3.1-8B-Instruct",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "v_proj",
24
- "up_proj",
25
- "down_proj",
26
- "o_proj",
27
  "gate_proj",
 
28
  "q_proj",
29
- "k_proj"
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
 
23
  "gate_proj",
24
+ "up_proj",
25
  "q_proj",
26
+ "v_proj",
27
+ "o_proj",
28
+ "k_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:710e7db52e15b2d4bedc9f2832ae4d3214857d0b20a86b5fdf772f27b1965831
3
  size 335605144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:572c67b37990c48135b147d9de32b9150ce997af7fbc4a7d91ecd5d76381f513
3
  size 335605144
all_results.json CHANGED
@@ -1,14 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 12.021870613098145,
4
- "eval_runtime": 2.9731,
5
- "eval_samples": 1,
6
- "eval_samples_per_second": 0.336,
7
- "eval_steps_per_second": 0.336,
8
  "total_flos": 54979878453248.0,
9
- "train_loss": 12.068599019731794,
10
- "train_runtime": 98.6323,
11
  "train_samples": 100,
12
- "train_samples_per_second": 1.014,
13
  "train_steps_per_second": 0.071
14
  }
 
1
  {
2
  "epoch": 1.0,
 
 
 
 
 
3
  "total_flos": 54979878453248.0,
4
+ "train_loss": 0.0,
5
+ "train_runtime": 98.4374,
6
  "train_samples": 100,
7
+ "train_samples_per_second": 1.016,
8
  "train_steps_per_second": 0.071
9
  }
special_tokens_map.json CHANGED
@@ -1,17 +1,23 @@
1
  {
2
  "bos_token": {
3
- "content": "<|begin_of_text|>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|eot_id|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|eot_id|>"
 
 
 
 
 
 
17
  }
 
1
  {
2
  "bos_token": {
3
+ "content": "<|begin▁of▁sentence|>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|end▁of▁sentence|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "<|end▁of▁sentence|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
- size 17209920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b7eba2882b4c9427d5009106a0889d1fe145dac8155be0d71c0ead141d0b6e9
3
+ size 17209630
tokenizer_config.json CHANGED
@@ -1,7 +1,10 @@
1
  {
 
 
 
2
  "added_tokens_decoder": {
3
  "128000": {
4
- "content": "<|begin_of_text|>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
@@ -9,7 +12,7 @@
9
  "special": true
10
  },
11
  "128001": {
12
- "content": "<|end_of_text|>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
@@ -89,39 +92,39 @@
89
  "special": true
90
  },
91
  "128011": {
92
- "content": "<|reserved_special_token_3|>",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
96
  "single_word": false,
97
- "special": true
98
  },
99
  "128012": {
100
- "content": "<|reserved_special_token_4|>",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
104
  "single_word": false,
105
- "special": true
106
  },
107
  "128013": {
108
- "content": "<|reserved_special_token_5|>",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
112
  "single_word": false,
113
- "special": true
114
  },
115
  "128014": {
116
- "content": "<|reserved_special_token_6|>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
120
  "single_word": false,
121
- "special": true
122
  },
123
  "128015": {
124
- "content": "<|reserved_special_token_7|>",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
@@ -2049,15 +2052,15 @@
2049
  "special": true
2050
  }
2051
  },
2052
- "bos_token": "<|begin_of_text|>",
2053
- "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
2054
- "clean_up_tokenization_spaces": true,
2055
- "eos_token": "<|eot_id|>",
2056
- "model_input_names": [
2057
- "input_ids",
2058
- "attention_mask"
2059
- ],
2060
- "model_max_length": 131072,
2061
- "pad_token": "<|eot_id|>",
2062
- "tokenizer_class": "PreTrainedTokenizerFast"
2063
  }
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "128000": {
7
+ "content": "<|begin▁of▁sentence|>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
 
12
  "special": true
13
  },
14
  "128001": {
15
+ "content": "<|end▁of▁sentence|>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
 
92
  "special": true
93
  },
94
  "128011": {
95
+ "content": "<|User|>",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
99
  "single_word": false,
100
+ "special": false
101
  },
102
  "128012": {
103
+ "content": "<|Assistant|>",
104
  "lstrip": false,
105
  "normalized": false,
106
  "rstrip": false,
107
  "single_word": false,
108
+ "special": false
109
  },
110
  "128013": {
111
+ "content": "<think>",
112
  "lstrip": false,
113
  "normalized": false,
114
  "rstrip": false,
115
  "single_word": false,
116
+ "special": false
117
  },
118
  "128014": {
119
+ "content": "</think>",
120
  "lstrip": false,
121
  "normalized": false,
122
  "rstrip": false,
123
  "single_word": false,
124
+ "special": false
125
  },
126
  "128015": {
127
+ "content": "<|▁pad▁|>",
128
  "lstrip": false,
129
  "normalized": false,
130
  "rstrip": false,
 
2052
  "special": true
2053
  }
2054
  },
2055
+ "bos_token": "<|begin▁of▁sentence|>",
2056
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
2057
+ "clean_up_tokenization_spaces": false,
2058
+ "eos_token": "<|end▁of▁sentence|>",
2059
+ "legacy": true,
2060
+ "model_max_length": 16384,
2061
+ "pad_token": "<|end▁of▁sentence|>",
2062
+ "sp_model_kwargs": {},
2063
+ "tokenizer_class": "LlamaTokenizer",
2064
+ "unk_token": null,
2065
+ "use_default_system_prompt": false
2066
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 54979878453248.0,
4
- "train_loss": 12.068599019731794,
5
- "train_runtime": 98.6323,
6
  "train_samples": 100,
7
- "train_samples_per_second": 1.014,
8
  "train_steps_per_second": 0.071
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 54979878453248.0,
4
+ "train_loss": 0.0,
5
+ "train_runtime": 98.4374,
6
  "train_samples": 100,
7
+ "train_samples_per_second": 1.016,
8
  "train_steps_per_second": 0.071
9
  }
trainer_state.json CHANGED
@@ -10,68 +10,68 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.14285714285714285,
13
- "grad_norm": 1.7077542993921089,
14
  "learning_rate": 1e-05,
15
- "loss": 12.097,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.2857142857142857,
20
- "grad_norm": 1.7074484255367977,
21
  "learning_rate": 9.330127018922195e-06,
22
- "loss": 12.0961,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.42857142857142855,
27
- "grad_norm": 1.6617689590107814,
28
  "learning_rate": 7.500000000000001e-06,
29
- "loss": 12.0896,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.5714285714285714,
34
- "grad_norm": 1.595470347683705,
35
  "learning_rate": 5e-06,
36
- "loss": 12.0741,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 0.7142857142857143,
41
- "grad_norm": 1.5340129983112094,
42
  "learning_rate": 2.5000000000000015e-06,
43
- "loss": 12.0549,
44
  "step": 5
45
  },
46
  {
47
  "epoch": 0.8571428571428571,
48
- "grad_norm": 1.5128298580811501,
49
  "learning_rate": 6.698729810778065e-07,
50
- "loss": 12.0419,
51
  "step": 6
52
  },
53
  {
54
  "epoch": 1.0,
55
- "grad_norm": 1.4539367376130352,
56
  "learning_rate": 0.0,
57
- "loss": 12.0266,
58
  "step": 7
59
  },
60
  {
61
  "epoch": 1.0,
62
- "eval_loss": 12.021870613098145,
63
- "eval_runtime": 2.2193,
64
- "eval_samples_per_second": 0.451,
65
- "eval_steps_per_second": 0.451,
66
  "step": 7
67
  },
68
  {
69
  "epoch": 1.0,
70
  "step": 7,
71
  "total_flos": 54979878453248.0,
72
- "train_loss": 12.068599019731794,
73
- "train_runtime": 98.6323,
74
- "train_samples_per_second": 1.014,
75
  "train_steps_per_second": 0.071
76
  }
77
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.14285714285714285,
13
+ "grad_norm": NaN,
14
  "learning_rate": 1e-05,
15
+ "loss": 0.0,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.2857142857142857,
20
+ "grad_norm": NaN,
21
  "learning_rate": 9.330127018922195e-06,
22
+ "loss": 0.0,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.42857142857142855,
27
+ "grad_norm": NaN,
28
  "learning_rate": 7.500000000000001e-06,
29
+ "loss": 0.0,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.5714285714285714,
34
+ "grad_norm": NaN,
35
  "learning_rate": 5e-06,
36
+ "loss": 0.0,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 0.7142857142857143,
41
+ "grad_norm": NaN,
42
  "learning_rate": 2.5000000000000015e-06,
43
+ "loss": 0.0,
44
  "step": 5
45
  },
46
  {
47
  "epoch": 0.8571428571428571,
48
+ "grad_norm": NaN,
49
  "learning_rate": 6.698729810778065e-07,
50
+ "loss": 0.0,
51
  "step": 6
52
  },
53
  {
54
  "epoch": 1.0,
55
+ "grad_norm": NaN,
56
  "learning_rate": 0.0,
57
+ "loss": 0.0,
58
  "step": 7
59
  },
60
  {
61
  "epoch": 1.0,
62
+ "eval_loss": NaN,
63
+ "eval_runtime": 2.1348,
64
+ "eval_samples_per_second": 0.468,
65
+ "eval_steps_per_second": 0.468,
66
  "step": 7
67
  },
68
  {
69
  "epoch": 1.0,
70
  "step": 7,
71
  "total_flos": 54979878453248.0,
72
+ "train_loss": 0.0,
73
+ "train_runtime": 98.4374,
74
+ "train_samples_per_second": 1.016,
75
  "train_steps_per_second": 0.071
76
  }
77
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e3da11af7b93c43ba25acad580174e987cff97bbdd8c637ad70ad7a32f31a46
3
  size 7224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8bb58d2317c0163d1c837d18ece577bc4085e9a9e50708b7c0407d71980d437
3
  size 7224