TrevorJS commited on
Commit
6139024
β€’
1 Parent(s): 7377daf

TrevorJS/mtg-phi-1_5-2-dpo

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- license: llama2
3
- base_model: codellama/CodeLlama-7b-hf
4
  tags:
5
  - generated_from_trainer
6
  model-index:
@@ -13,17 +13,17 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # dpo
15
 
16
- This model is a fine-tuned version of [codellama/CodeLlama-7b-hf](https://huggingface.co/codellama/CodeLlama-7b-hf) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.0010
19
- - Rewards/chosen: -5.5160
20
- - Rewards/rejected: -16.3854
21
- - Rewards/accuracies: 0.9991
22
- - Rewards/margins: 10.8694
23
- - Logps/rejected: -189.5804
24
- - Logps/chosen: -75.4962
25
- - Logits/rejected: -1.9168
26
- - Logits/chosen: -2.2651
27
 
28
  ## Model description
29
 
@@ -51,27 +51,42 @@ The following hyperparameters were used during training:
51
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
  - lr_scheduler_type: cosine
53
  - lr_scheduler_warmup_steps: 100
54
- - training_steps: 1000
55
 
56
  ### Training results
57
 
58
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
59
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
60
- | 0.0937 | 0.07 | 100 | 0.1058 | -5.7722 | -13.1291 | 0.9515 | 7.3569 | -157.0180 | -78.0587 | -1.5497 | -1.7945 |
61
- | 0.0442 | 0.15 | 200 | 0.0392 | -4.5353 | -13.5199 | 0.9835 | 8.9845 | -160.9253 | -65.6901 | -2.3349 | -2.6709 |
62
- | 0.0165 | 0.22 | 300 | 0.0186 | -5.9014 | -14.9526 | 0.9939 | 9.0511 | -175.2523 | -79.3511 | -2.1283 | -2.4222 |
63
- | 0.0016 | 0.29 | 400 | 0.0239 | -7.2028 | -18.5000 | 0.9922 | 11.2973 | -210.7271 | -92.3644 | -1.9903 | -2.3400 |
64
- | 0.0014 | 0.36 | 500 | 0.0065 | -5.1522 | -15.5259 | 0.9983 | 10.3737 | -180.9857 | -71.8588 | -1.6803 | -2.0312 |
65
- | 0.0006 | 0.44 | 600 | 0.0038 | -5.0346 | -14.9248 | 0.9991 | 9.8902 | -174.9747 | -70.6829 | -2.1480 | -2.4897 |
66
- | 0.0003 | 0.51 | 700 | 0.0018 | -5.2732 | -16.3551 | 0.9991 | 11.0819 | -189.2777 | -73.0690 | -1.9657 | -2.3112 |
67
- | 0.0002 | 0.58 | 800 | 0.0016 | -5.5844 | -16.3258 | 1.0 | 10.7414 | -188.9845 | -76.1804 | -1.9284 | -2.2717 |
68
- | 0.0004 | 0.66 | 900 | 0.0011 | -5.5422 | -16.3921 | 0.9991 | 10.8499 | -189.6474 | -75.7582 | -1.9152 | -2.2631 |
69
- | 0.0008 | 0.73 | 1000 | 0.0010 | -5.5160 | -16.3854 | 0.9991 | 10.8694 | -189.5804 | -75.4962 | -1.9168 | -2.2651 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
 
72
  ### Framework versions
73
 
74
- - Transformers 4.33.1
75
  - Pytorch 2.0.1+cu118
76
  - Datasets 2.14.5
77
  - Tokenizers 0.13.3
 
1
  ---
2
+ license: other
3
+ base_model: microsoft/phi-1_5
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # dpo
15
 
16
+ This model is a fine-tuned version of [microsoft/phi-1_5](https://huggingface.co/microsoft/phi-1_5) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.0000
19
+ - Rewards/chosen: -8.4849
20
+ - Rewards/rejected: -25.9483
21
+ - Rewards/accuracies: 1.0
22
+ - Rewards/margins: 17.4633
23
+ - Logps/rejected: -293.3352
24
+ - Logps/chosen: -152.1862
25
+ - Logits/rejected: -0.9014
26
+ - Logits/chosen: -0.4994
27
 
28
  ## Model description
29
 
 
51
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
  - lr_scheduler_type: cosine
53
  - lr_scheduler_warmup_steps: 100
54
+ - training_steps: 2500
55
 
56
  ### Training results
57
 
58
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
59
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
60
+ | 0.0318 | 0.07 | 100 | 0.0384 | -0.3956 | -7.7708 | 0.9835 | 7.3753 | -111.5607 | -71.2923 | 1.1941 | 1.0925 |
61
+ | 0.0187 | 0.15 | 200 | 0.0196 | -2.0328 | -10.9862 | 0.9922 | 8.9535 | -143.7145 | -87.6645 | -0.8539 | -0.9067 |
62
+ | 0.0101 | 0.22 | 300 | 0.0351 | -2.7345 | -12.1219 | 0.9896 | 9.3874 | -155.0717 | -94.6821 | 0.4420 | 0.5220 |
63
+ | 0.046 | 0.29 | 400 | 0.0199 | -6.6027 | -18.5556 | 0.9922 | 11.9529 | -219.4086 | -133.3638 | -2.3908 | -2.0500 |
64
+ | 0.0005 | 0.36 | 500 | 0.0101 | -6.4299 | -20.5496 | 0.9965 | 14.1197 | -239.3484 | -131.6356 | -1.0029 | -0.6334 |
65
+ | 0.0003 | 0.44 | 600 | 0.0092 | -9.0181 | -23.0513 | 0.9965 | 14.0332 | -264.3652 | -157.5181 | -1.6334 | -1.1488 |
66
+ | 0.0004 | 0.51 | 700 | 0.0043 | -5.7377 | -21.3127 | 0.9991 | 15.5749 | -246.9788 | -124.7142 | -0.8477 | -0.4037 |
67
+ | 0.0001 | 0.58 | 800 | 0.0040 | -8.9021 | -23.9436 | 0.9991 | 15.0415 | -273.2885 | -156.3581 | 0.2782 | 0.8244 |
68
+ | 0.0001 | 0.66 | 900 | 0.0031 | -9.3191 | -24.3563 | 0.9991 | 15.0371 | -277.4149 | -160.5282 | -0.7279 | -0.2168 |
69
+ | 0.002 | 0.73 | 1000 | 0.0066 | -6.8680 | -23.5822 | 0.9974 | 16.7142 | -269.6745 | -136.0172 | -0.6629 | 0.2962 |
70
+ | 0.0002 | 0.8 | 1100 | 0.0015 | -9.1417 | -27.6276 | 0.9991 | 18.4859 | -310.1280 | -158.7536 | -1.2030 | -0.5215 |
71
+ | 0.0823 | 0.87 | 1200 | 0.0057 | -4.4568 | -18.4378 | 0.9974 | 13.9810 | -218.2306 | -111.9051 | 0.2236 | 0.7934 |
72
+ | 0.0 | 0.95 | 1300 | 0.0171 | -8.1530 | -25.5603 | 0.9983 | 17.4073 | -289.4550 | -148.8665 | -1.2413 | -0.9611 |
73
+ | 0.0007 | 1.02 | 1400 | 0.0019 | -7.9402 | -25.1905 | 0.9983 | 17.2503 | -285.7569 | -146.7384 | -1.2325 | -0.8924 |
74
+ | 0.0002 | 1.09 | 1500 | 0.0010 | -8.1543 | -25.2960 | 0.9991 | 17.1417 | -286.8122 | -148.8794 | -1.0005 | -0.6261 |
75
+ | 0.0 | 1.17 | 1600 | 0.0010 | -8.4019 | -25.6275 | 0.9991 | 17.2256 | -290.1275 | -151.3556 | -1.0850 | -0.7170 |
76
+ | 0.0 | 1.24 | 1700 | 0.0011 | -8.8691 | -26.2284 | 0.9991 | 17.3593 | -296.1366 | -156.0278 | -1.1426 | -0.7830 |
77
+ | 0.0 | 1.31 | 1800 | 0.0010 | -9.2896 | -26.9277 | 0.9991 | 17.6381 | -303.1297 | -160.2331 | -1.1169 | -0.7512 |
78
+ | 0.0001 | 1.39 | 1900 | 0.0011 | -9.2869 | -26.9301 | 0.9991 | 17.6432 | -303.1532 | -160.2053 | -1.1213 | -0.7560 |
79
+ | 0.0 | 1.46 | 2000 | 0.0008 | -8.4453 | -25.9094 | 0.9991 | 17.4641 | -292.9459 | -151.7894 | -0.8854 | -0.4791 |
80
+ | 0.0 | 1.53 | 2100 | 0.0007 | -8.4600 | -25.9284 | 0.9991 | 17.4684 | -293.1361 | -151.9364 | -0.8893 | -0.4835 |
81
+ | 0.0 | 1.6 | 2200 | 0.0000 | -8.4501 | -25.9071 | 1.0 | 17.4569 | -292.9228 | -151.8381 | -0.8823 | -0.4759 |
82
+ | 0.0 | 1.68 | 2300 | 0.0000 | -8.4800 | -25.9444 | 1.0 | 17.4644 | -293.2967 | -152.1372 | -0.8982 | -0.4964 |
83
+ | 0.0 | 1.75 | 2400 | 0.0000 | -8.4864 | -25.9459 | 1.0 | 17.4596 | -293.3117 | -152.2005 | -0.9013 | -0.4999 |
84
+ | 0.0 | 1.82 | 2500 | 0.0000 | -8.4849 | -25.9483 | 1.0 | 17.4633 | -293.3352 | -152.1862 | -0.9014 | -0.4994 |
85
 
86
 
87
  ### Framework versions
88
 
89
+ - Transformers 4.33.2
90
  - Pytorch 2.0.1+cu118
91
  - Datasets 2.14.5
92
  - Tokenizers 0.13.3
adapter_config.json CHANGED
@@ -7,20 +7,31 @@
7
  "init_lora_weights": true,
8
  "layers_pattern": null,
9
  "layers_to_transform": null,
10
- "lora_alpha": 16,
11
  "lora_dropout": 0.05,
12
  "modules_to_save": null,
13
  "peft_type": "LORA",
14
- "r": 8,
15
  "revision": null,
16
  "target_modules": [
17
- "q_proj",
18
- "v_proj",
19
- "k_proj",
20
- "o_proj",
21
- "gate_proj",
22
- "up_proj",
23
- "down_proj"
 
 
 
 
 
 
 
 
 
 
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
7
  "init_lora_weights": true,
8
  "layers_pattern": null,
9
  "layers_to_transform": null,
10
+ "lora_alpha": 64,
11
  "lora_dropout": 0.05,
12
  "modules_to_save": null,
13
  "peft_type": "LORA",
14
+ "r": 64,
15
  "revision": null,
16
  "target_modules": [
17
+ "layers.0.wte",
18
+ "layers.20.mixer.Wqkv",
19
+ "layers.21.mixer.Wqkv",
20
+ "layers.22.mixer.Wqkv",
21
+ "layers.23.mixer.Wqkv",
22
+ "layers.24.mixer.Wqkv",
23
+ "layers.20.mixer.out_proj",
24
+ "layers.21.mixer.out_proj",
25
+ "layers.22.mixer.out_proj",
26
+ "layers.23.mixer.out_proj",
27
+ "layers.24.mixer.out_proj",
28
+ "layers.11.mlp.fc1",
29
+ "layers.17.mlp.fc1",
30
+ "layers.24.mlp.fc1",
31
+ "layers.11.mlp.fc2",
32
+ "layers.17.mlp.fc2",
33
+ "layers.24.mlp.fc2",
34
+ "layers.25.linear"
35
  ],
36
  "task_type": "CAUSAL_LM"
37
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87ea7e6e6940a3bffa6ba4ec1c1a45920c5cee96163f0ebf2df01b1b3bc9a6ab
3
- size 80122381
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24650f2e31518c8264b666c94a4ceedc45bcfdf7e1cc9a75109160b5cf4b56e9
3
+ size 29373021
added_tokens.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "\t\t": 50294,
3
+ "\t\t\t": 50293,
4
+ "\t\t\t\t": 50292,
5
+ "\t\t\t\t\t": 50291,
6
+ "\t\t\t\t\t\t": 50290,
7
+ "\t\t\t\t\t\t\t": 50289,
8
+ "\t\t\t\t\t\t\t\t": 50288,
9
+ "\t\t\t\t\t\t\t\t\t": 50287,
10
+ " ": 50286,
11
+ " ": 50285,
12
+ " ": 50284,
13
+ " ": 50283,
14
+ " ": 50282,
15
+ " ": 50281,
16
+ " ": 50280,
17
+ " ": 50279,
18
+ " ": 50278,
19
+ " ": 50277,
20
+ " ": 50276,
21
+ " ": 50275,
22
+ " ": 50274,
23
+ " ": 50273,
24
+ " ": 50272,
25
+ " ": 50271,
26
+ " ": 50270,
27
+ " ": 50269,
28
+ " ": 50268,
29
+ " ": 50267,
30
+ " ": 50266,
31
+ " ": 50265,
32
+ " ": 50264,
33
+ " ": 50263,
34
+ " ": 50262,
35
+ " ": 50261,
36
+ " ": 50260,
37
+ " ": 50259,
38
+ " ": 50258,
39
+ " ": 50257
40
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -1,30 +1,6 @@
1
  {
2
- "additional_special_tokens": [
3
- "▁<PRE>",
4
- "▁<MID>",
5
- "▁<SUF>",
6
- "▁<EOT>"
7
- ],
8
- "bos_token": {
9
- "content": "<s>",
10
- "lstrip": false,
11
- "normalized": true,
12
- "rstrip": false,
13
- "single_word": false
14
- },
15
- "eos_token": {
16
- "content": "</s>",
17
- "lstrip": false,
18
- "normalized": true,
19
- "rstrip": false,
20
- "single_word": false
21
- },
22
- "pad_token": "</s>",
23
- "unk_token": {
24
- "content": "<unk>",
25
- "lstrip": false,
26
- "normalized": true,
27
- "rstrip": false,
28
- "single_word": false
29
- }
30
  }
 
1
  {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,44 +1,9 @@
1
  {
2
- "additional_special_tokens": [
3
- "▁<PRE>",
4
- "▁<MID>",
5
- "▁<SUF>",
6
- "▁<EOT>"
7
- ],
8
- "bos_token": {
9
- "__type": "AddedToken",
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "clean_up_tokenization_spaces": false,
17
- "eos_token": {
18
- "__type": "AddedToken",
19
- "content": "</s>",
20
- "lstrip": false,
21
- "normalized": true,
22
- "rstrip": false,
23
- "single_word": false
24
- },
25
- "eot_token": "▁<EOT>",
26
- "fill_token": "<FILL_ME>",
27
- "legacy": null,
28
- "middle_token": "▁<MID>",
29
- "model_max_length": 1000000000000000019884624838656,
30
- "pad_token": null,
31
- "prefix_token": "▁<PRE>",
32
- "sp_model_kwargs": {},
33
- "suffix_token": "▁<SUF>",
34
- "tokenizer_class": "CodeLlamaTokenizer",
35
- "unk_token": {
36
- "__type": "AddedToken",
37
- "content": "<unk>",
38
- "lstrip": false,
39
- "normalized": true,
40
- "rstrip": false,
41
- "single_word": false
42
- },
43
- "use_default_system_prompt": false
44
  }
 
1
  {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "eos_token": "<|endoftext|>",
6
+ "model_max_length": 2048,
7
+ "tokenizer_class": "CodeGenTokenizer",
8
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0789fb5cf14abad8e57dc19f2dafc11a51d431e860bcf6d1215e733113e8c29
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:515ba53b3d6464b6b6b3a1eac8d50fbe3b3fc34481b9a260b8edb4b52ba76de5
3
  size 4027
vocab.json ADDED
The diff for this file is too large to render. See raw diff