willtensora commited on
Commit
d086ed6
·
verified ·
1 Parent(s): 5b4a5eb

Training in progress, step 18

Browse files
00000000-0000-0000-0000-000000000000.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: peft-internal-testing/tiny-dummy-qwen2
2
+ batch_size: 8
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - format: custom
7
+ path: argilla/databricks-dolly-15k-curated-en
8
+ type:
9
+ field_input: original-instruction
10
+ field_instruction: original-instruction
11
+ field_output: original-response
12
+ format: '{instruction} {input}'
13
+ no_input_format: '{instruction}'
14
+ system_format: '{system}'
15
+ system_prompt: ''
16
+ eval_steps: 20
17
+ flash_attention: true
18
+ gpu_memory_limit: 80GiB
19
+ gradient_checkpointing: true
20
+ group_by_length: true
21
+ hub_model_id: willtensora/test-repo
22
+ hub_strategy: checkpoint
23
+ learning_rate: 0.002
24
+ load_best_model_at_end: true
25
+ logging_steps: 10
26
+ lr_scheduler: cosine
27
+ max_steps: 1
28
+ micro_batch_size: 1
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 100
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 8
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.001
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: peft-internal-testing/tiny-dummy-qwen2-argilla/databricks-dolly-15k-curated-en
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
03a659ff-e350-4bb9-8ff3-8c658a5d0dff.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: fxmarty/tiny-llama-fast-tokenizer
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - fc6136aac03f618a_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/fc6136aac03f618a_train_data.json
11
+ type:
12
+ field_instruction: text
13
+ field_output: title
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/b1c9c4ec-ffa2-429d-9c5b-90b5979c502d
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ special_tokens:
40
+ pad_token: </s>
41
+ tokenizer_type: LlamaTokenizerFast
42
+ train_on_inputs: false
43
+ trust_remote_code: true
44
+ val_set_size: 0.1
45
+ wandb_entity: ''
46
+ wandb_mode: online
47
+ wandb_name: fxmarty/tiny-llama-fast-tokenizer-/workspace/input_data/fc6136aac03f618a_train_data.json
48
+ wandb_project: Gradients-On-Demand
49
+ wandb_run: your_name
50
+ wandb_runid: default
51
+ warmup_ratio: 0.05
52
+ xformers_attention: true
077fd330-87f9-4bc4-b449-7713fbdaf1b0.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/mistral-7b-v0.3
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - ca0152973425c947_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/ca0152973425c947_train_data.json
11
+ type:
12
+ field_input: code
13
+ field_instruction: func_name
14
+ field_output: docstring
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/5a2f5ce6-446b-4282-bb4d-9ee4e970231f
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: LlamaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/mistral-7b-v0.3-/tmp/ca0152973425c947_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
21315ae5-16ee-43cd-9612-743524060933.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Meta-Llama-3.1-8B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 562fa3aeea07046a_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/562fa3aeea07046a_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: text
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/c4596edc-efad-4776-86a1-caa06bffcada
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: PreTrainedTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/Meta-Llama-3.1-8B-/workspace/input_data/562fa3aeea07046a_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
40f27435-f59d-488f-b2d6-01e356d79c48.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2-1.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - df925134bb2c32b8_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/df925134bb2c32b8_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: amoral
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/ba640bbe-3257-40d8-88fe-26152f412bb7
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: Qwen/Qwen2-1.5B-Instruct-/tmp/df925134bb2c32b8_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
427d02be-6008-4556-9a5e-9c7cb7503058.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Phi-3.5-mini-instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 7e5b54272524b996_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/7e5b54272524b996_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/ae26a9e9-089e-4d4a-b592-d8935df7c18d
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: LlamaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/Phi-3.5-mini-instruct-/workspace/input_data/7e5b54272524b996_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
54c39bbc-809b-4c67-a254-0e03a4884b4e.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/codegemma-7b-it
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 2ebe89763cb3150d_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/2ebe89763cb3150d_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/a0bc38f8-fcd3-4d7e-9a3f-3aa2e8a4204f
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GemmaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/codegemma-7b-it-/tmp/2ebe89763cb3150d_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
5ff7bf5f-96dc-43dd-aeeb-560c0ab78db8.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/Hermes-3-Llama-3.1-8B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 69447058613b41d8_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/69447058613b41d8_train_data.json
11
+ type:
12
+ field_input: sectionParentTitre
13
+ field_instruction: title_main
14
+ field_output: texte
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/942aa5fc-b540-46ce-b482-e38c4f637264
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: PreTrainedTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: NousResearch/Hermes-3-Llama-3.1-8B-/workspace/input_data/69447058613b41d8_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
63345f8a-4ec9-47f0-9956-6eaa52b2c2a6.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: heegyu/WizardVicuna-open-llama-3b-v2
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - fe9267419ea75ad2_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/fe9267419ea75ad2_train_data.json
11
+ type:
12
+ field_instruction: ca_topic
13
+ field_output: article
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/7114c34f-852f-43da-b985-b7f0b6d6d724
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ special_tokens:
40
+ pad_token: </s>
41
+ tokenizer_type: LlamaTokenizerFast
42
+ train_on_inputs: false
43
+ trust_remote_code: true
44
+ val_set_size: 0.1
45
+ wandb_entity: ''
46
+ wandb_mode: online
47
+ wandb_name: heegyu/WizardVicuna-open-llama-3b-v2-/tmp/fe9267419ea75ad2_train_data.json
48
+ wandb_project: Gradients-On-Demand
49
+ wandb_run: your_name
50
+ wandb_runid: default
51
+ warmup_ratio: 0.05
52
+ xformers_attention: true
6c7ae056-3b4d-460b-ba7b-a4000f32b3f1.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/gemma-2-2b
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - b98d5b59c20c6595_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/b98d5b59c20c6595_train_data.json
11
+ type:
12
+ field_input: metadata
13
+ field_instruction: text
14
+ field_output: tags_str
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/429ee307-6dd2-4dd7-9e1d-7384d807a3df
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GemmaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/gemma-2-2b-/tmp/b98d5b59c20c6595_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
75b21ca4-feab-4bdd-92b0-ea6d90dfa18f.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2.5-1.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - c6adcdcb593a3ee4_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/c6adcdcb593a3ee4_train_data.json
11
+ type:
12
+ field_input: abstract
13
+ field_instruction: question_en_origin
14
+ field_output: answer_en_origin
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/2faf844e-4a0a-4d23-95f4-a055e4864133
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: Qwen2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: Qwen/Qwen2.5-1.5B-Instruct-/workspace/input_data/c6adcdcb593a3ee4_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
879db250-c3f5-4d43-a7c5-c5a456ae5803.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Qwen2.5-Coder-1.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 4d85b564dafa38db_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/4d85b564dafa38db_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: response
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/876ff803-5357-4240-8766-c54166515403
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/Qwen2.5-Coder-1.5B-Instruct-/workspace/input_data/4d85b564dafa38db_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
8910478d-79cf-499e-8fed-7a2142f7ee60.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Phi-3-medium-4k-instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - f6199f34ade98809_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/f6199f34ade98809_train_data.json
11
+ type:
12
+ field_input: choices
13
+ field_instruction: question
14
+ field_output: answer
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/2d37ba50-cd70-4895-be62-3477f5193e86
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: LlamaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/Phi-3-medium-4k-instruct-/tmp/f6199f34ade98809_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
README.md ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: unsloth/SmolLM-135M
5
+ tags:
6
+ - axolotl
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: 09370687-f28e-45e5-91f6-f87011850a94
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
17
+ <details><summary>See axolotl config</summary>
18
+
19
+ axolotl version: `0.4.1`
20
+ ```yaml
21
+ base_model: unsloth/SmolLM-135M
22
+ batch_size: 32
23
+ bf16: true
24
+ chat_template: tokenizer_default_fallback_alpaca
25
+ datasets:
26
+ - data_files:
27
+ - 658988857b0a29c9_train_data.json
28
+ ds_type: json
29
+ format: custom
30
+ path: /workspace/input_data/658988857b0a29c9_train_data.json
31
+ type:
32
+ field_input: choices
33
+ field_instruction: subject
34
+ field_output: question
35
+ format: '{instruction} {input}'
36
+ no_input_format: '{instruction}'
37
+ system_format: '{system}'
38
+ system_prompt: ''
39
+ eval_steps: 20
40
+ flash_attention: true
41
+ gpu_memory_limit: 80GiB
42
+ gradient_checkpointing: true
43
+ group_by_length: true
44
+ hub_model_id: willtensora/09370687-f28e-45e5-91f6-f87011850a94
45
+ hub_strategy: checkpoint
46
+ learning_rate: 0.0002
47
+ logging_steps: 10
48
+ lr_scheduler: cosine
49
+ max_steps: 2500
50
+ micro_batch_size: 4
51
+ model_type: AutoModelForCausalLM
52
+ optimizer: adamw_bnb_8bit
53
+ output_dir: /workspace/axolotl/configs
54
+ pad_to_sequence_len: true
55
+ resize_token_embeddings_to_32x: false
56
+ sample_packing: false
57
+ save_steps: 40
58
+ save_total_limit: 1
59
+ sequence_len: 2048
60
+ tokenizer_type: GPT2TokenizerFast
61
+ train_on_inputs: false
62
+ trust_remote_code: true
63
+ val_set_size: 0.1
64
+ wandb_entity: ''
65
+ wandb_mode: online
66
+ wandb_name: unsloth/SmolLM-135M-/workspace/input_data/658988857b0a29c9_train_data.json
67
+ wandb_project: Gradients-On-Demand
68
+ wandb_run: your_name
69
+ wandb_runid: default
70
+ warmup_ratio: 0.05
71
+ xformers_attention: true
72
+
73
+ ```
74
+
75
+ </details><br>
76
+
77
+ # 09370687-f28e-45e5-91f6-f87011850a94
78
+
79
+ This model is a fine-tuned version of [unsloth/SmolLM-135M](https://huggingface.co/unsloth/SmolLM-135M) on the None dataset.
80
+
81
+ ## Model description
82
+
83
+ More information needed
84
+
85
+ ## Intended uses & limitations
86
+
87
+ More information needed
88
+
89
+ ## Training and evaluation data
90
+
91
+ More information needed
92
+
93
+ ## Training procedure
94
+
95
+ ### Training hyperparameters
96
+
97
+ The following hyperparameters were used during training:
98
+ - learning_rate: 0.0002
99
+ - train_batch_size: 4
100
+ - eval_batch_size: 4
101
+ - seed: 42
102
+ - distributed_type: multi-GPU
103
+ - num_devices: 8
104
+ - total_train_batch_size: 32
105
+ - total_eval_batch_size: 32
106
+ - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
107
+ - lr_scheduler_type: cosine
108
+ - training_steps: 1
109
+
110
+ ### Training results
111
+
112
+ | Training Loss | Epoch | Step | Validation Loss |
113
+ |:-------------:|:-----:|:----:|:---------------:|
114
+ | No log | 0.5 | 1 | 2.4449 |
115
+
116
+
117
+ ### Framework versions
118
+
119
+ - Transformers 4.46.0
120
+ - Pytorch 2.5.0+cu124
121
+ - Datasets 3.0.1
122
+ - Tokenizers 0.20.1
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "fxmarty/tiny-llama-fast-tokenizer",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 0,
9
+ "eos_token_id": 2,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 2048,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 4,
19
+ "num_hidden_layers": 2,
20
+ "num_key_value_heads": 4,
21
+ "pad_token_id": -1,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_scaling": null,
25
+ "rope_theta": 10000.0,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.46.0",
29
+ "use_cache": false,
30
+ "vocab_size": 32000
31
+ }
da9e44b3-e4fb-4905-9c7c-6b03aad6b593.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/SmolLM2-360M-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - f1ccd02a885008e6_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/f1ccd02a885008e6_train_data.json
11
+ type:
12
+ field_input: target
13
+ field_instruction: user
14
+ field_output: assistant
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/3da0a03a-adbb-42e3-8fd7-bd7c0b1d3e9f
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GPT2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/SmolLM2-360M-Instruct-/tmp/f1ccd02a885008e6_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
ee62f35d-1a99-4f1c-a69c-c91bc444b71f.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: EleutherAI/pythia-1b
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - b2a4966d9a5c880e_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/b2a4966d9a5c880e_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/ee937811-31d0-4e11-944a-f4f8e06309d2
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: <|endoftext|>
42
+ tokenizer_type: GPTNeoXTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: EleutherAI/pythia-1b-/workspace/input_data/b2a4966d9a5c880e_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
ef61f40b-eca8-4670-964b-fdd3d1d0f066.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/SmolLM-135M
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 658988857b0a29c9_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/658988857b0a29c9_train_data.json
11
+ type:
12
+ field_input: choices
13
+ field_instruction: subject
14
+ field_output: question
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/09370687-f28e-45e5-91f6-f87011850a94
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GPT2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/SmolLM-135M-/workspace/input_data/658988857b0a29c9_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "do_sample": true,
5
+ "eos_token_id": 0,
6
+ "transformers_version": "4.46.0"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:812c47eb394f5b1421d47e7eef591e1ff951c789cc31fb4b5d68a8b1837eac7b
3
+ size 2066752
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34aef3a9bd77f14e9e2815e5fa6d6d034134c670e5a5c3fe9a7310af9a216c7a
3
+ size 269122058
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response: ' + message['content'] + eos_token}}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 2048,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": false,
42
+ "use_fast": true
43
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:492137edc8d6ffcc24eda892257e04d2830cf1f5da1bcb04692598ac2e8b1ce1
3
+ size 6584
vocab.json ADDED
The diff for this file is too large to render. See raw diff