RoyJoy commited on
Commit
820ef99
·
verified ·
1 Parent(s): 622cd67

End of training

Browse files
Files changed (2) hide show
  1. README.md +15 -15
  2. adapter_model.bin +1 -1
README.md CHANGED
@@ -6,7 +6,7 @@ tags:
6
  - axolotl
7
  - generated_from_trainer
8
  model-index:
9
- - name: d5631ce9-d8a3-4745-8025-ac825332fe39
10
  results: []
11
  ---
12
 
@@ -24,14 +24,14 @@ bf16: auto
24
  dataset_prepared_path: null
25
  datasets:
26
  - data_files:
27
- - 26e357e10ca56cb1_train_data.json
28
  ds_type: json
29
  format: custom
30
- path: /workspace/input_data/26e357e10ca56cb1_train_data.json
31
  type:
32
- field_input: input
33
- field_instruction: instruction
34
- field_output: output
35
  format: '{instruction} {input}'
36
  no_input_format: '{instruction}'
37
  system_format: '{system}'
@@ -49,7 +49,7 @@ fsdp_config: null
49
  gradient_accumulation_steps: 4
50
  gradient_checkpointing: false
51
  group_by_length: false
52
- hub_model_id: sn56t0/d5631ce9-d8a3-4745-8025-ac825332fe39
53
  learning_rate: 0.0002
54
  load_in_4bit: false
55
  load_in_8bit: false
@@ -63,7 +63,7 @@ lora_r: 32
63
  lora_target_linear: true
64
  lr_scheduler: cosine
65
  micro_batch_size: 2
66
- mlflow_experiment_name: /tmp/26e357e10ca56cb1_train_data.json
67
  model_type: AutoModelForCausalLM
68
  num_epochs: 2
69
  optimizer: adamw_bnb_8bit
@@ -72,7 +72,7 @@ pad_to_sequence_len: null
72
  resume_from_checkpoint: null
73
  sample_packing: false
74
  saves_per_epoch: 1
75
- seed: 1401236165
76
  sequence_len: 2048
77
  shuffle: true
78
  special_tokens: null
@@ -88,7 +88,7 @@ wandb_log_model: null
88
  wandb_mode: disabled
89
  wandb_name: null
90
  wandb_project: god
91
- wandb_run: tmck
92
  wandb_runid: null
93
  wandb_watch: null
94
  warmup_steps: 10
@@ -99,7 +99,7 @@ xformers_attention: null
99
 
100
  </details><br>
101
 
102
- # d5631ce9-d8a3-4745-8025-ac825332fe39
103
 
104
  This model is a fine-tuned version of [unsloth/Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/unsloth/Qwen2.5-Coder-1.5B-Instruct) on the None dataset.
105
  It achieves the following results on the evaluation set:
@@ -125,7 +125,7 @@ The following hyperparameters were used during training:
125
  - learning_rate: 0.0002
126
  - train_batch_size: 2
127
  - eval_batch_size: 2
128
- - seed: 1401236165
129
  - distributed_type: multi-GPU
130
  - num_devices: 4
131
  - gradient_accumulation_steps: 4
@@ -140,9 +140,9 @@ The following hyperparameters were used during training:
140
 
141
  | Training Loss | Epoch | Step | Validation Loss |
142
  |:-------------:|:------:|:----:|:---------------:|
143
- | 1.5045 | 0.0007 | 1 | nan |
144
- | 2.4695 | 1.0 | 1538 | nan |
145
- | 0.9065 | 2.0 | 3076 | nan |
146
 
147
 
148
  ### Framework versions
 
6
  - axolotl
7
  - generated_from_trainer
8
  model-index:
9
+ - name: f404ad90-a578-4849-9a7c-c0a4c00d37f5
10
  results: []
11
  ---
12
 
 
24
  dataset_prepared_path: null
25
  datasets:
26
  - data_files:
27
+ - 8eaf7cf861deb379_train_data.json
28
  ds_type: json
29
  format: custom
30
+ path: /workspace/input_data/8eaf7cf861deb379_train_data.json
31
  type:
32
+ field_input: text
33
+ field_instruction: task_name
34
+ field_output: hypothesis
35
  format: '{instruction} {input}'
36
  no_input_format: '{instruction}'
37
  system_format: '{system}'
 
49
  gradient_accumulation_steps: 4
50
  gradient_checkpointing: false
51
  group_by_length: false
52
+ hub_model_id: sn56t0/f404ad90-a578-4849-9a7c-c0a4c00d37f5
53
  learning_rate: 0.0002
54
  load_in_4bit: false
55
  load_in_8bit: false
 
63
  lora_target_linear: true
64
  lr_scheduler: cosine
65
  micro_batch_size: 2
66
+ mlflow_experiment_name: /tmp/8eaf7cf861deb379_train_data.json
67
  model_type: AutoModelForCausalLM
68
  num_epochs: 2
69
  optimizer: adamw_bnb_8bit
 
72
  resume_from_checkpoint: null
73
  sample_packing: false
74
  saves_per_epoch: 1
75
+ seed: 3849342454
76
  sequence_len: 2048
77
  shuffle: true
78
  special_tokens: null
 
88
  wandb_mode: disabled
89
  wandb_name: null
90
  wandb_project: god
91
+ wandb_run: v8sh
92
  wandb_runid: null
93
  wandb_watch: null
94
  warmup_steps: 10
 
99
 
100
  </details><br>
101
 
102
+ # f404ad90-a578-4849-9a7c-c0a4c00d37f5
103
 
104
  This model is a fine-tuned version of [unsloth/Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/unsloth/Qwen2.5-Coder-1.5B-Instruct) on the None dataset.
105
  It achieves the following results on the evaluation set:
 
125
  - learning_rate: 0.0002
126
  - train_batch_size: 2
127
  - eval_batch_size: 2
128
+ - seed: 3849342454
129
  - distributed_type: multi-GPU
130
  - num_devices: 4
131
  - gradient_accumulation_steps: 4
 
140
 
141
  | Training Loss | Epoch | Step | Validation Loss |
142
  |:-------------:|:------:|:----:|:---------------:|
143
+ | 5.5005 | 0.0016 | 1 | nan |
144
+ | 3.4486 | 0.9992 | 630 | nan |
145
+ | 4.2775 | 1.9984 | 1260 | nan |
146
 
147
 
148
  ### Framework versions
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4267f264d25119c67946db0bc0ea60275bf60e30326d3955a951d53b5ddc0e4
3
  size 147859242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9409d67e841f4b1c60ec6005fc640b5341c6e1b15434e81cc2fed9bc29ccdd8b
3
  size 147859242