TrevorJS commited on
Commit
7377daf
β€’
1 Parent(s): 5ce43aa

TrevorJS/mtg-code-llama-7b-dpo

Browse files
README.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: llama2
3
+ base_model: codellama/CodeLlama-7b-hf
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: dpo
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # dpo
15
+
16
+ This model is a fine-tuned version of [codellama/CodeLlama-7b-hf](https://huggingface.co/codellama/CodeLlama-7b-hf) on the None dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.0010
19
+ - Rewards/chosen: -5.5160
20
+ - Rewards/rejected: -16.3854
21
+ - Rewards/accuracies: 0.9991
22
+ - Rewards/margins: 10.8694
23
+ - Logps/rejected: -189.5804
24
+ - Logps/chosen: -75.4962
25
+ - Logits/rejected: -1.9168
26
+ - Logits/chosen: -2.2651
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 0.0005
46
+ - train_batch_size: 4
47
+ - eval_batch_size: 1
48
+ - seed: 42
49
+ - gradient_accumulation_steps: 4
50
+ - total_train_batch_size: 16
51
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
+ - lr_scheduler_type: cosine
53
+ - lr_scheduler_warmup_steps: 100
54
+ - training_steps: 1000
55
+
56
+ ### Training results
57
+
58
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
59
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
60
+ | 0.0937 | 0.07 | 100 | 0.1058 | -5.7722 | -13.1291 | 0.9515 | 7.3569 | -157.0180 | -78.0587 | -1.5497 | -1.7945 |
61
+ | 0.0442 | 0.15 | 200 | 0.0392 | -4.5353 | -13.5199 | 0.9835 | 8.9845 | -160.9253 | -65.6901 | -2.3349 | -2.6709 |
62
+ | 0.0165 | 0.22 | 300 | 0.0186 | -5.9014 | -14.9526 | 0.9939 | 9.0511 | -175.2523 | -79.3511 | -2.1283 | -2.4222 |
63
+ | 0.0016 | 0.29 | 400 | 0.0239 | -7.2028 | -18.5000 | 0.9922 | 11.2973 | -210.7271 | -92.3644 | -1.9903 | -2.3400 |
64
+ | 0.0014 | 0.36 | 500 | 0.0065 | -5.1522 | -15.5259 | 0.9983 | 10.3737 | -180.9857 | -71.8588 | -1.6803 | -2.0312 |
65
+ | 0.0006 | 0.44 | 600 | 0.0038 | -5.0346 | -14.9248 | 0.9991 | 9.8902 | -174.9747 | -70.6829 | -2.1480 | -2.4897 |
66
+ | 0.0003 | 0.51 | 700 | 0.0018 | -5.2732 | -16.3551 | 0.9991 | 11.0819 | -189.2777 | -73.0690 | -1.9657 | -2.3112 |
67
+ | 0.0002 | 0.58 | 800 | 0.0016 | -5.5844 | -16.3258 | 1.0 | 10.7414 | -188.9845 | -76.1804 | -1.9284 | -2.2717 |
68
+ | 0.0004 | 0.66 | 900 | 0.0011 | -5.5422 | -16.3921 | 0.9991 | 10.8499 | -189.6474 | -75.7582 | -1.9152 | -2.2631 |
69
+ | 0.0008 | 0.73 | 1000 | 0.0010 | -5.5160 | -16.3854 | 0.9991 | 10.8694 | -189.5804 | -75.4962 | -1.9168 | -2.2651 |
70
+
71
+
72
+ ### Framework versions
73
+
74
+ - Transformers 4.33.1
75
+ - Pytorch 2.0.1+cu118
76
+ - Datasets 2.14.5
77
+ - Tokenizers 0.13.3
adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": null,
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj",
19
+ "k_proj",
20
+ "o_proj",
21
+ "gate_proj",
22
+ "up_proj",
23
+ "down_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87ea7e6e6940a3bffa6ba4ec1c1a45920c5cee96163f0ebf2df01b1b3bc9a6ab
3
+ size 80122381
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>"
7
+ ],
8
+ "bos_token": {
9
+ "content": "<s>",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ },
15
+ "eos_token": {
16
+ "content": "</s>",
17
+ "lstrip": false,
18
+ "normalized": true,
19
+ "rstrip": false,
20
+ "single_word": false
21
+ },
22
+ "pad_token": "</s>",
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>"
7
+ ],
8
+ "bos_token": {
9
+ "__type": "AddedToken",
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "clean_up_tokenization_spaces": false,
17
+ "eos_token": {
18
+ "__type": "AddedToken",
19
+ "content": "</s>",
20
+ "lstrip": false,
21
+ "normalized": true,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "eot_token": "▁<EOT>",
26
+ "fill_token": "<FILL_ME>",
27
+ "legacy": null,
28
+ "middle_token": "▁<MID>",
29
+ "model_max_length": 1000000000000000019884624838656,
30
+ "pad_token": null,
31
+ "prefix_token": "▁<PRE>",
32
+ "sp_model_kwargs": {},
33
+ "suffix_token": "▁<SUF>",
34
+ "tokenizer_class": "CodeLlamaTokenizer",
35
+ "unk_token": {
36
+ "__type": "AddedToken",
37
+ "content": "<unk>",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ },
43
+ "use_default_system_prompt": false
44
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0789fb5cf14abad8e57dc19f2dafc11a51d431e860bcf6d1215e733113e8c29
3
+ size 4027