TrevorJS/mtg-phi-1_5-2-dpo
Browse files- README.md +39 -24
- adapter_config.json +20 -9
- adapter_model.bin +2 -2
- added_tokens.json +40 -0
- merges.txt +0 -0
- special_tokens_map.json +4 -28
- tokenizer.json +0 -0
- tokenizer_config.json +7 -42
- training_args.bin +1 -1
- vocab.json +0 -0
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
license:
|
3 |
-
base_model:
|
4 |
tags:
|
5 |
- generated_from_trainer
|
6 |
model-index:
|
@@ -13,17 +13,17 @@ should probably proofread and complete it, then remove this comment. -->
|
|
13 |
|
14 |
# dpo
|
15 |
|
16 |
-
This model is a fine-tuned version of [
|
17 |
It achieves the following results on the evaluation set:
|
18 |
-
- Loss: 0.
|
19 |
-
- Rewards/chosen: -
|
20 |
-
- Rewards/rejected: -
|
21 |
-
- Rewards/accuracies: 0
|
22 |
-
- Rewards/margins:
|
23 |
-
- Logps/rejected: -
|
24 |
-
- Logps/chosen: -
|
25 |
-
- Logits/rejected: -
|
26 |
-
- Logits/chosen: -
|
27 |
|
28 |
## Model description
|
29 |
|
@@ -51,27 +51,42 @@ The following hyperparameters were used during training:
|
|
51 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
52 |
- lr_scheduler_type: cosine
|
53 |
- lr_scheduler_warmup_steps: 100
|
54 |
-
- training_steps:
|
55 |
|
56 |
### Training results
|
57 |
|
58 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
59 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
60 |
-
| 0.
|
61 |
-
| 0.
|
62 |
-
| 0.
|
63 |
-
| 0.
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
-
| 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
|
72 |
### Framework versions
|
73 |
|
74 |
-
- Transformers 4.33.
|
75 |
- Pytorch 2.0.1+cu118
|
76 |
- Datasets 2.14.5
|
77 |
- Tokenizers 0.13.3
|
|
|
1 |
---
|
2 |
+
license: other
|
3 |
+
base_model: microsoft/phi-1_5
|
4 |
tags:
|
5 |
- generated_from_trainer
|
6 |
model-index:
|
|
|
13 |
|
14 |
# dpo
|
15 |
|
16 |
+
This model is a fine-tuned version of [microsoft/phi-1_5](https://huggingface.co/microsoft/phi-1_5) on the None dataset.
|
17 |
It achieves the following results on the evaluation set:
|
18 |
+
- Loss: 0.0000
|
19 |
+
- Rewards/chosen: -8.4849
|
20 |
+
- Rewards/rejected: -25.9483
|
21 |
+
- Rewards/accuracies: 1.0
|
22 |
+
- Rewards/margins: 17.4633
|
23 |
+
- Logps/rejected: -293.3352
|
24 |
+
- Logps/chosen: -152.1862
|
25 |
+
- Logits/rejected: -0.9014
|
26 |
+
- Logits/chosen: -0.4994
|
27 |
|
28 |
## Model description
|
29 |
|
|
|
51 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
52 |
- lr_scheduler_type: cosine
|
53 |
- lr_scheduler_warmup_steps: 100
|
54 |
+
- training_steps: 2500
|
55 |
|
56 |
### Training results
|
57 |
|
58 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
59 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
60 |
+
| 0.0318 | 0.07 | 100 | 0.0384 | -0.3956 | -7.7708 | 0.9835 | 7.3753 | -111.5607 | -71.2923 | 1.1941 | 1.0925 |
|
61 |
+
| 0.0187 | 0.15 | 200 | 0.0196 | -2.0328 | -10.9862 | 0.9922 | 8.9535 | -143.7145 | -87.6645 | -0.8539 | -0.9067 |
|
62 |
+
| 0.0101 | 0.22 | 300 | 0.0351 | -2.7345 | -12.1219 | 0.9896 | 9.3874 | -155.0717 | -94.6821 | 0.4420 | 0.5220 |
|
63 |
+
| 0.046 | 0.29 | 400 | 0.0199 | -6.6027 | -18.5556 | 0.9922 | 11.9529 | -219.4086 | -133.3638 | -2.3908 | -2.0500 |
|
64 |
+
| 0.0005 | 0.36 | 500 | 0.0101 | -6.4299 | -20.5496 | 0.9965 | 14.1197 | -239.3484 | -131.6356 | -1.0029 | -0.6334 |
|
65 |
+
| 0.0003 | 0.44 | 600 | 0.0092 | -9.0181 | -23.0513 | 0.9965 | 14.0332 | -264.3652 | -157.5181 | -1.6334 | -1.1488 |
|
66 |
+
| 0.0004 | 0.51 | 700 | 0.0043 | -5.7377 | -21.3127 | 0.9991 | 15.5749 | -246.9788 | -124.7142 | -0.8477 | -0.4037 |
|
67 |
+
| 0.0001 | 0.58 | 800 | 0.0040 | -8.9021 | -23.9436 | 0.9991 | 15.0415 | -273.2885 | -156.3581 | 0.2782 | 0.8244 |
|
68 |
+
| 0.0001 | 0.66 | 900 | 0.0031 | -9.3191 | -24.3563 | 0.9991 | 15.0371 | -277.4149 | -160.5282 | -0.7279 | -0.2168 |
|
69 |
+
| 0.002 | 0.73 | 1000 | 0.0066 | -6.8680 | -23.5822 | 0.9974 | 16.7142 | -269.6745 | -136.0172 | -0.6629 | 0.2962 |
|
70 |
+
| 0.0002 | 0.8 | 1100 | 0.0015 | -9.1417 | -27.6276 | 0.9991 | 18.4859 | -310.1280 | -158.7536 | -1.2030 | -0.5215 |
|
71 |
+
| 0.0823 | 0.87 | 1200 | 0.0057 | -4.4568 | -18.4378 | 0.9974 | 13.9810 | -218.2306 | -111.9051 | 0.2236 | 0.7934 |
|
72 |
+
| 0.0 | 0.95 | 1300 | 0.0171 | -8.1530 | -25.5603 | 0.9983 | 17.4073 | -289.4550 | -148.8665 | -1.2413 | -0.9611 |
|
73 |
+
| 0.0007 | 1.02 | 1400 | 0.0019 | -7.9402 | -25.1905 | 0.9983 | 17.2503 | -285.7569 | -146.7384 | -1.2325 | -0.8924 |
|
74 |
+
| 0.0002 | 1.09 | 1500 | 0.0010 | -8.1543 | -25.2960 | 0.9991 | 17.1417 | -286.8122 | -148.8794 | -1.0005 | -0.6261 |
|
75 |
+
| 0.0 | 1.17 | 1600 | 0.0010 | -8.4019 | -25.6275 | 0.9991 | 17.2256 | -290.1275 | -151.3556 | -1.0850 | -0.7170 |
|
76 |
+
| 0.0 | 1.24 | 1700 | 0.0011 | -8.8691 | -26.2284 | 0.9991 | 17.3593 | -296.1366 | -156.0278 | -1.1426 | -0.7830 |
|
77 |
+
| 0.0 | 1.31 | 1800 | 0.0010 | -9.2896 | -26.9277 | 0.9991 | 17.6381 | -303.1297 | -160.2331 | -1.1169 | -0.7512 |
|
78 |
+
| 0.0001 | 1.39 | 1900 | 0.0011 | -9.2869 | -26.9301 | 0.9991 | 17.6432 | -303.1532 | -160.2053 | -1.1213 | -0.7560 |
|
79 |
+
| 0.0 | 1.46 | 2000 | 0.0008 | -8.4453 | -25.9094 | 0.9991 | 17.4641 | -292.9459 | -151.7894 | -0.8854 | -0.4791 |
|
80 |
+
| 0.0 | 1.53 | 2100 | 0.0007 | -8.4600 | -25.9284 | 0.9991 | 17.4684 | -293.1361 | -151.9364 | -0.8893 | -0.4835 |
|
81 |
+
| 0.0 | 1.6 | 2200 | 0.0000 | -8.4501 | -25.9071 | 1.0 | 17.4569 | -292.9228 | -151.8381 | -0.8823 | -0.4759 |
|
82 |
+
| 0.0 | 1.68 | 2300 | 0.0000 | -8.4800 | -25.9444 | 1.0 | 17.4644 | -293.2967 | -152.1372 | -0.8982 | -0.4964 |
|
83 |
+
| 0.0 | 1.75 | 2400 | 0.0000 | -8.4864 | -25.9459 | 1.0 | 17.4596 | -293.3117 | -152.2005 | -0.9013 | -0.4999 |
|
84 |
+
| 0.0 | 1.82 | 2500 | 0.0000 | -8.4849 | -25.9483 | 1.0 | 17.4633 | -293.3352 | -152.1862 | -0.9014 | -0.4994 |
|
85 |
|
86 |
|
87 |
### Framework versions
|
88 |
|
89 |
+
- Transformers 4.33.2
|
90 |
- Pytorch 2.0.1+cu118
|
91 |
- Datasets 2.14.5
|
92 |
- Tokenizers 0.13.3
|
adapter_config.json
CHANGED
@@ -7,20 +7,31 @@
|
|
7 |
"init_lora_weights": true,
|
8 |
"layers_pattern": null,
|
9 |
"layers_to_transform": null,
|
10 |
-
"lora_alpha":
|
11 |
"lora_dropout": 0.05,
|
12 |
"modules_to_save": null,
|
13 |
"peft_type": "LORA",
|
14 |
-
"r":
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
-
"
|
18 |
-
"
|
19 |
-
"
|
20 |
-
"
|
21 |
-
"
|
22 |
-
"
|
23 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
7 |
"init_lora_weights": true,
|
8 |
"layers_pattern": null,
|
9 |
"layers_to_transform": null,
|
10 |
+
"lora_alpha": 64,
|
11 |
"lora_dropout": 0.05,
|
12 |
"modules_to_save": null,
|
13 |
"peft_type": "LORA",
|
14 |
+
"r": 64,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
+
"layers.0.wte",
|
18 |
+
"layers.20.mixer.Wqkv",
|
19 |
+
"layers.21.mixer.Wqkv",
|
20 |
+
"layers.22.mixer.Wqkv",
|
21 |
+
"layers.23.mixer.Wqkv",
|
22 |
+
"layers.24.mixer.Wqkv",
|
23 |
+
"layers.20.mixer.out_proj",
|
24 |
+
"layers.21.mixer.out_proj",
|
25 |
+
"layers.22.mixer.out_proj",
|
26 |
+
"layers.23.mixer.out_proj",
|
27 |
+
"layers.24.mixer.out_proj",
|
28 |
+
"layers.11.mlp.fc1",
|
29 |
+
"layers.17.mlp.fc1",
|
30 |
+
"layers.24.mlp.fc1",
|
31 |
+
"layers.11.mlp.fc2",
|
32 |
+
"layers.17.mlp.fc2",
|
33 |
+
"layers.24.mlp.fc2",
|
34 |
+
"layers.25.linear"
|
35 |
],
|
36 |
"task_type": "CAUSAL_LM"
|
37 |
}
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24650f2e31518c8264b666c94a4ceedc45bcfdf7e1cc9a75109160b5cf4b56e9
|
3 |
+
size 29373021
|
added_tokens.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"\t\t": 50294,
|
3 |
+
"\t\t\t": 50293,
|
4 |
+
"\t\t\t\t": 50292,
|
5 |
+
"\t\t\t\t\t": 50291,
|
6 |
+
"\t\t\t\t\t\t": 50290,
|
7 |
+
"\t\t\t\t\t\t\t": 50289,
|
8 |
+
"\t\t\t\t\t\t\t\t": 50288,
|
9 |
+
"\t\t\t\t\t\t\t\t\t": 50287,
|
10 |
+
" ": 50286,
|
11 |
+
" ": 50285,
|
12 |
+
" ": 50284,
|
13 |
+
" ": 50283,
|
14 |
+
" ": 50282,
|
15 |
+
" ": 50281,
|
16 |
+
" ": 50280,
|
17 |
+
" ": 50279,
|
18 |
+
" ": 50278,
|
19 |
+
" ": 50277,
|
20 |
+
" ": 50276,
|
21 |
+
" ": 50275,
|
22 |
+
" ": 50274,
|
23 |
+
" ": 50273,
|
24 |
+
" ": 50272,
|
25 |
+
" ": 50271,
|
26 |
+
" ": 50270,
|
27 |
+
" ": 50269,
|
28 |
+
" ": 50268,
|
29 |
+
" ": 50267,
|
30 |
+
" ": 50266,
|
31 |
+
" ": 50265,
|
32 |
+
" ": 50264,
|
33 |
+
" ": 50263,
|
34 |
+
" ": 50262,
|
35 |
+
" ": 50261,
|
36 |
+
" ": 50260,
|
37 |
+
" ": 50259,
|
38 |
+
" ": 50258,
|
39 |
+
" ": 50257
|
40 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
CHANGED
@@ -1,30 +1,6 @@
|
|
1 |
{
|
2 |
-
"
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
"β<EOT>"
|
7 |
-
],
|
8 |
-
"bos_token": {
|
9 |
-
"content": "<s>",
|
10 |
-
"lstrip": false,
|
11 |
-
"normalized": true,
|
12 |
-
"rstrip": false,
|
13 |
-
"single_word": false
|
14 |
-
},
|
15 |
-
"eos_token": {
|
16 |
-
"content": "</s>",
|
17 |
-
"lstrip": false,
|
18 |
-
"normalized": true,
|
19 |
-
"rstrip": false,
|
20 |
-
"single_word": false
|
21 |
-
},
|
22 |
-
"pad_token": "</s>",
|
23 |
-
"unk_token": {
|
24 |
-
"content": "<unk>",
|
25 |
-
"lstrip": false,
|
26 |
-
"normalized": true,
|
27 |
-
"rstrip": false,
|
28 |
-
"single_word": false
|
29 |
-
}
|
30 |
}
|
|
|
1 |
{
|
2 |
+
"bos_token": "<|endoftext|>",
|
3 |
+
"eos_token": "<|endoftext|>",
|
4 |
+
"pad_token": "<|endoftext|>",
|
5 |
+
"unk_token": "<|endoftext|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
}
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
CHANGED
@@ -1,44 +1,9 @@
|
|
1 |
{
|
2 |
-
"
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
"
|
9 |
-
"__type": "AddedToken",
|
10 |
-
"content": "<s>",
|
11 |
-
"lstrip": false,
|
12 |
-
"normalized": true,
|
13 |
-
"rstrip": false,
|
14 |
-
"single_word": false
|
15 |
-
},
|
16 |
-
"clean_up_tokenization_spaces": false,
|
17 |
-
"eos_token": {
|
18 |
-
"__type": "AddedToken",
|
19 |
-
"content": "</s>",
|
20 |
-
"lstrip": false,
|
21 |
-
"normalized": true,
|
22 |
-
"rstrip": false,
|
23 |
-
"single_word": false
|
24 |
-
},
|
25 |
-
"eot_token": "β<EOT>",
|
26 |
-
"fill_token": "<FILL_ME>",
|
27 |
-
"legacy": null,
|
28 |
-
"middle_token": "β<MID>",
|
29 |
-
"model_max_length": 1000000000000000019884624838656,
|
30 |
-
"pad_token": null,
|
31 |
-
"prefix_token": "β<PRE>",
|
32 |
-
"sp_model_kwargs": {},
|
33 |
-
"suffix_token": "β<SUF>",
|
34 |
-
"tokenizer_class": "CodeLlamaTokenizer",
|
35 |
-
"unk_token": {
|
36 |
-
"__type": "AddedToken",
|
37 |
-
"content": "<unk>",
|
38 |
-
"lstrip": false,
|
39 |
-
"normalized": true,
|
40 |
-
"rstrip": false,
|
41 |
-
"single_word": false
|
42 |
-
},
|
43 |
-
"use_default_system_prompt": false
|
44 |
}
|
|
|
1 |
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": "<|endoftext|>",
|
4 |
+
"clean_up_tokenization_spaces": true,
|
5 |
+
"eos_token": "<|endoftext|>",
|
6 |
+
"model_max_length": 2048,
|
7 |
+
"tokenizer_class": "CodeGenTokenizer",
|
8 |
+
"unk_token": "<|endoftext|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4027
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:515ba53b3d6464b6b6b3a1eac8d50fbe3b3fc34481b9a260b8edb4b52ba76de5
|
3 |
size 4027
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|