sn56t0
/

f404ad90-a578-4849-9a7c-c0a4c00d37f5

PEFT

Safetensors

qwen2

axolotl

Generated from Trainer

Model card Files Files and versions Community

RoyJoy commited on about 19 hours ago

Commit

820ef99

verified ·

1 Parent(s): 622cd67

End of training

Browse files

Files changed (2) hide show

README.md +15 -15
adapter_model.bin +1 -1

README.md CHANGED Viewed

@@ -6,7 +6,7 @@ tags:
 - axolotl
 - generated_from_trainer
 model-index:
-- name: d5631ce9-d8a3-4745-8025-ac825332fe39
   results: []
 ---
@@ -24,14 +24,14 @@ bf16: auto
 dataset_prepared_path: null
 datasets:
 - data_files:
-  - 26e357e10ca56cb1_train_data.json
   ds_type: json
   format: custom
-  path: /workspace/input_data/26e357e10ca56cb1_train_data.json
   type:
-    field_input: input
-    field_instruction: instruction
-    field_output: output
     format: '{instruction} {input}'
     no_input_format: '{instruction}'
     system_format: '{system}'
@@ -49,7 +49,7 @@ fsdp_config: null
 gradient_accumulation_steps: 4
 gradient_checkpointing: false
 group_by_length: false
-hub_model_id: sn56t0/d5631ce9-d8a3-4745-8025-ac825332fe39
 learning_rate: 0.0002
 load_in_4bit: false
 load_in_8bit: false
@@ -63,7 +63,7 @@ lora_r: 32
 lora_target_linear: true
 lr_scheduler: cosine
 micro_batch_size: 2
-mlflow_experiment_name: /tmp/26e357e10ca56cb1_train_data.json
 model_type: AutoModelForCausalLM
 num_epochs: 2
 optimizer: adamw_bnb_8bit
@@ -72,7 +72,7 @@ pad_to_sequence_len: null
 resume_from_checkpoint: null
 sample_packing: false
 saves_per_epoch: 1
-seed: 1401236165
 sequence_len: 2048
 shuffle: true
 special_tokens: null
@@ -88,7 +88,7 @@ wandb_log_model: null
 wandb_mode: disabled
 wandb_name: null
 wandb_project: god
-wandb_run: tmck
 wandb_runid: null
 wandb_watch: null
 warmup_steps: 10
@@ -99,7 +99,7 @@ xformers_attention: null
 </details><br>
-# d5631ce9-d8a3-4745-8025-ac825332fe39
 This model is a fine-tuned version of [unsloth/Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/unsloth/Qwen2.5-Coder-1.5B-Instruct) on the None dataset.
 It achieves the following results on the evaluation set:
@@ -125,7 +125,7 @@ The following hyperparameters were used during training:
 - learning_rate: 0.0002
 - train_batch_size: 2
 - eval_batch_size: 2
-- seed: 1401236165
 - distributed_type: multi-GPU
 - num_devices: 4
 - gradient_accumulation_steps: 4
@@ -140,9 +140,9 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 1.5045        | 0.0007 | 1    | nan             |
-| 2.4695        | 1.0    | 1538 | nan             |
-| 0.9065        | 2.0    | 3076 | nan             |
 ### Framework versions

 - axolotl
 - generated_from_trainer
 model-index:
+- name: f404ad90-a578-4849-9a7c-c0a4c00d37f5
   results: []
 ---
 dataset_prepared_path: null
 datasets:
 - data_files:
+  - 8eaf7cf861deb379_train_data.json
   ds_type: json
   format: custom
+  path: /workspace/input_data/8eaf7cf861deb379_train_data.json
   type:
+    field_input: text
+    field_instruction: task_name
+    field_output: hypothesis
     format: '{instruction} {input}'
     no_input_format: '{instruction}'
     system_format: '{system}'
 gradient_accumulation_steps: 4
 gradient_checkpointing: false
 group_by_length: false
+hub_model_id: sn56t0/f404ad90-a578-4849-9a7c-c0a4c00d37f5
 learning_rate: 0.0002
 load_in_4bit: false
 load_in_8bit: false
 lora_target_linear: true
 lr_scheduler: cosine
 micro_batch_size: 2
+mlflow_experiment_name: /tmp/8eaf7cf861deb379_train_data.json
 model_type: AutoModelForCausalLM
 num_epochs: 2
 optimizer: adamw_bnb_8bit
 resume_from_checkpoint: null
 sample_packing: false
 saves_per_epoch: 1
+seed: 3849342454
 sequence_len: 2048
 shuffle: true
 special_tokens: null
 wandb_mode: disabled
 wandb_name: null
 wandb_project: god
+wandb_run: v8sh
 wandb_runid: null
 wandb_watch: null
 warmup_steps: 10
 </details><br>
+# f404ad90-a578-4849-9a7c-c0a4c00d37f5
 This model is a fine-tuned version of [unsloth/Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/unsloth/Qwen2.5-Coder-1.5B-Instruct) on the None dataset.
 It achieves the following results on the evaluation set:
 - learning_rate: 0.0002
 - train_batch_size: 2
 - eval_batch_size: 2
+- seed: 3849342454
 - distributed_type: multi-GPU
 - num_devices: 4
 - gradient_accumulation_steps: 4
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 5.5005        | 0.0016 | 1    | nan             |
+| 3.4486        | 0.9992 | 630  | nan             |
+| 4.2775        | 1.9984 | 1260 | nan             |
 ### Framework versions

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4267f264d25119c67946db0bc0ea60275bf60e30326d3955a951d53b5ddc0e4
 size 147859242

 version https://git-lfs.github.com/spec/v1
+oid sha256:9409d67e841f4b1c60ec6005fc640b5341c6e1b15434e81cc2fed9bc29ccdd8b
 size 147859242