tanliboy commited on
Commit
107b694
·
verified ·
1 Parent(s): f0b2c3a

End of training

Browse files
README.md CHANGED
@@ -3,10 +3,17 @@ library_name: transformers
3
  license: llama3.2
4
  base_model: tanliboy/llama-3.2-3b-sft
5
  tags:
 
 
 
 
6
  - trl
7
  - dpo
8
  - alignment-handbook
9
  - generated_from_trainer
 
 
 
10
  model-index:
11
  - name: llama-3.2-3b-dpo
12
  results: []
@@ -17,17 +24,17 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  # llama-3.2-3b-dpo
19
 
20
- This model is a fine-tuned version of [tanliboy/llama-3.2-3b-sft](https://huggingface.co/tanliboy/llama-3.2-3b-sft) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 0.6284
23
- - Rewards/chosen: 0.8504
24
- - Rewards/rejected: -3.7058
25
- - Rewards/accuracies: 0.7437
26
- - Rewards/margins: 4.5562
27
- - Logps/rejected: -368.9125
28
- - Logps/chosen: -337.3143
29
- - Logits/rejected: 0.4571
30
- - Logits/chosen: 0.3820
31
 
32
  ## Model description
33
 
 
3
  license: llama3.2
4
  base_model: tanliboy/llama-3.2-3b-sft
5
  tags:
6
+ - alignment-handbook
7
+ - trl
8
+ - dpo
9
+ - generated_from_trainer
10
  - trl
11
  - dpo
12
  - alignment-handbook
13
  - generated_from_trainer
14
+ datasets:
15
+ - HuggingFaceH4/orca_dpo_pairs
16
+ - HuggingFaceH4/ultrafeedback_binarized
17
  model-index:
18
  - name: llama-3.2-3b-dpo
19
  results: []
 
24
 
25
  # llama-3.2-3b-dpo
26
 
27
+ This model is a fine-tuned version of [tanliboy/llama-3.2-3b-sft](https://huggingface.co/tanliboy/llama-3.2-3b-sft) on the HuggingFaceH4/orca_dpo_pairs and the HuggingFaceH4/ultrafeedback_binarized datasets.
28
  It achieves the following results on the evaluation set:
29
+ - Loss: 0.6289
30
+ - Rewards/chosen: 0.7479
31
+ - Rewards/rejected: -3.8379
32
+ - Rewards/accuracies: 0.7405
33
+ - Rewards/margins: 4.5857
34
+ - Logps/rejected: -370.2327
35
+ - Logps/chosen: -338.3392
36
+ - Logits/rejected: 0.4475
37
+ - Logits/chosen: 0.3731
38
 
39
  ## Model description
40
 
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_logits/chosen": 0.3599458634853363,
4
- "eval_logits/rejected": 0.514390230178833,
5
- "eval_logps/chosen": -483.24920654296875,
6
- "eval_logps/rejected": -561.0516357421875,
7
- "eval_loss": 0.4862797260284424,
8
- "eval_rewards/accuracies": 0.7215189933776855,
9
- "eval_rewards/chosen": -1.453184962272644,
10
- "eval_rewards/margins": 1.1619733572006226,
11
- "eval_rewards/rejected": -2.6151583194732666,
12
- "eval_runtime": 56.3308,
13
  "eval_samples": 2500,
14
- "eval_samples_per_second": 44.381,
15
- "eval_steps_per_second": 1.402,
16
  "total_flos": 0.0,
17
  "train_loss": 0.5009220597491634,
18
  "train_runtime": 6227.6413,
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_logits/chosen": 0.3730663061141968,
4
+ "eval_logits/rejected": 0.4475269019603729,
5
+ "eval_logps/chosen": -338.3392028808594,
6
+ "eval_logps/rejected": -370.232666015625,
7
+ "eval_loss": 0.6289177536964417,
8
+ "eval_rewards/accuracies": 0.7405063509941101,
9
+ "eval_rewards/chosen": 0.7478683590888977,
10
+ "eval_rewards/margins": 4.585729122161865,
11
+ "eval_rewards/rejected": -3.8378612995147705,
12
+ "eval_runtime": 70.1775,
13
  "eval_samples": 2500,
14
+ "eval_samples_per_second": 35.624,
15
+ "eval_steps_per_second": 1.126,
16
  "total_flos": 0.0,
17
  "train_loss": 0.5009220597491634,
18
  "train_runtime": 6227.6413,
config.json CHANGED
@@ -35,6 +35,6 @@
35
  "tie_word_embeddings": true,
36
  "torch_dtype": "bfloat16",
37
  "transformers_version": "4.44.2",
38
- "use_cache": false,
39
  "vocab_size": 128256
40
  }
 
35
  "tie_word_embeddings": true,
36
  "torch_dtype": "bfloat16",
37
  "transformers_version": "4.44.2",
38
+ "use_cache": true,
39
  "vocab_size": 128256
40
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 0.999564649542882,
3
- "eval_logits/chosen": 0.3599458634853363,
4
- "eval_logits/rejected": 0.514390230178833,
5
- "eval_logps/chosen": -483.24920654296875,
6
- "eval_logps/rejected": -561.0516357421875,
7
- "eval_loss": 0.4862797260284424,
8
- "eval_rewards/accuracies": 0.7215189933776855,
9
- "eval_rewards/chosen": -1.453184962272644,
10
- "eval_rewards/margins": 1.1619733572006226,
11
- "eval_rewards/rejected": -2.6151583194732666,
12
- "eval_runtime": 56.3308,
13
  "eval_samples": 2500,
14
- "eval_samples_per_second": 44.381,
15
- "eval_steps_per_second": 1.402
16
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_logits/chosen": 0.3730663061141968,
4
+ "eval_logits/rejected": 0.4475269019603729,
5
+ "eval_logps/chosen": -338.3392028808594,
6
+ "eval_logps/rejected": -370.232666015625,
7
+ "eval_loss": 0.6289177536964417,
8
+ "eval_rewards/accuracies": 0.7405063509941101,
9
+ "eval_rewards/chosen": 0.7478683590888977,
10
+ "eval_rewards/margins": 4.585729122161865,
11
+ "eval_rewards/rejected": -3.8378612995147705,
12
+ "eval_runtime": 70.1775,
13
  "eval_samples": 2500,
14
+ "eval_samples_per_second": 35.624,
15
+ "eval_steps_per_second": 1.126
16
  }
runs/Sep29_22-34-07_action-graph-trainer/events.out.tfevents.1727656053.action-graph-trainer.718565.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71c573641baaf0e3920cfaf4291e3c5827addd7c33a74c2c3a9e9e8b40a32325
3
+ size 828