End of training

Files changed (4) hide show

README.md CHANGED Viewed

@@ -3,9 +3,12 @@ library_name: peft
 license: mit
 base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
 tags:
 - trl
 - sft
 - generated_from_trainer
 model-index:
 - name: dummy_lora_ft_3k_1k
   results: []
@@ -16,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
 # dummy_lora_ft_3k_1k
-This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: nan

 license: mit
 base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
 tags:
+- alignment-handbook
 - trl
 - sft
 - generated_from_trainer
+datasets:
+- tttx/fake_dataset_prompt_3072_response_15360_data_size_1000
 model-index:
 - name: dummy_lora_ft_3k_1k
   results: []
 # dummy_lora_ft_3k_1k
+This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) on the tttx/fake_dataset_prompt_3072_response_15360_data_size_1000 dataset.
 It achieves the following results on the evaluation set:
 - Loss: nan

all_results.json CHANGED Viewed

@@ -1,5 +1,10 @@
 {
     "epoch": 1.0,
     "total_flos": 54979878453248.0,
     "train_loss": 0.0,
     "train_runtime": 98.4374,

 {
     "epoch": 1.0,
+    "eval_loss": NaN,
+    "eval_runtime": 1.9779,
+    "eval_samples": 1,
+    "eval_samples_per_second": 0.506,
+    "eval_steps_per_second": 0.506,
     "total_flos": 54979878453248.0,
     "train_loss": 0.0,
     "train_runtime": 98.4374,

config.json CHANGED Viewed

@@ -1,17 +1,13 @@
 {
   "_attn_implementation_autoset": true,
-  "_name_or_path": "meta-llama/Meta-Llama-3.1-8B-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 128000,
-  "eos_token_id": [
-    128001,
-    128008,
-    128009
-  ],
   "head_dim": 128,
   "hidden_act": "silu",
   "hidden_size": 4096,

 {
   "_attn_implementation_autoset": true,
+  "_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 128000,
+  "eos_token_id": 128001,
   "head_dim": 128,
   "hidden_act": "silu",
   "hidden_size": 4096,

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "eval_loss": 12.021870613098145,
-    "eval_runtime": 2.9731,
     "eval_samples": 1,
-    "eval_samples_per_second": 0.336,
-    "eval_steps_per_second": 0.336
 }

 {
     "epoch": 1.0,
+    "eval_loss": NaN,
+    "eval_runtime": 1.9779,
     "eval_samples": 1,
+    "eval_samples_per_second": 0.506,
+    "eval_steps_per_second": 0.506
 }