piuzha commited on 16 days ago

Commit

a14b4fb

•

0 Parent(s):

upload

Browse files

Files changed (26) hide show

.gitattributes +36 -0
README.md +103 -0
config.json +25 -0
generation_config.json +6 -0
moxin-chat-7b.gguf +3 -0
pytorch_model-00001.bin +3 -0
pytorch_model-00002.bin +3 -0
pytorch_model-00003.bin +3 -0
pytorch_model-00004.bin +3 -0
pytorch_model-00005.bin +3 -0
pytorch_model-00006.bin +3 -0
pytorch_model-00007.bin +3 -0
pytorch_model-00008.bin +3 -0
pytorch_model-00009.bin +3 -0
pytorch_model-00010.bin +3 -0
pytorch_model-00011.bin +3 -0
pytorch_model-00012.bin +3 -0
pytorch_model-00013.bin +3 -0
pytorch_model-00014.bin +3 -0
pytorch_model-00015.bin +3 -0
pytorch_model-00016.bin +3 -0
pytorch_model.bin.index.json +334 -0
special_tokens_map.json +23 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +44 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.gguf filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,103 @@

+---
+license: apache-2.0
+---
+<h1 align="center"> Moxin Chat 7B </h1>
+<p align="center"> <a href="https://github.com/moxin-org/Moxin-LLM">Home Page</a> &nbsp&nbsp | &nbsp&nbsp <a href="https://github.com/moxin-org/Moxin-LLM/blob/main/report/Moxin_7B.pdf">Technical Report</a> &nbsp&nbsp | &nbsp&nbsp <a href="https://huggingface.co/moxin-org/moxin-llm-7b">Base Model</a> &nbsp&nbsp | &nbsp&nbsp <a href="https://huggingface.co/moxin-org/moxin-chat-7b">Chat Model</a>  </p>
+## Model
+You can download our base 7B model from this [link](https://huggingface.co/moxin-org/moxin-llm-7b) and our chat 7B model from  this [link](https://huggingface.co/moxin-org/moxin-chat-7b).
+## Inference
+You can use the following code to run inference with the model. The model is saved under './model/' directory. Change the model directory accordingly or use the Huggingface link.
+```
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+torch.backends.cuda.enable_mem_efficient_sdp(False)
+torch.backends.cuda.enable_flash_sdp(False)
+model_name = 'moxin-org/moxin-chat-7b'
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        trust_remote_code=True,
+    )
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer = tokenizer,
+    torch_dtype=torch.bfloat16,
+    device_map="auto"
+)
+prompt = "Can you explain the concept of regularization in machine learning?"
+sequences = pipe(
+    prompt,
+    do_sample=True,
+    max_new_tokens=1000,
+    temperature=0.7,
+    top_k=50,
+    top_p=0.95,
+    num_return_sequences=1,
+)
+print(sequences[0]['generated_text'])
+```
+## Chat template
+## Evaluation
+We test the performance of our model with [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness). The evaluation results on common datasets are shown below. We test on AI2 Reasoning Challenge (25-shot), HellaSwag (10-shot), MMLU (5-shot), and Winogrande (5-shot).
+|          Models         | ARC-C | Hellaswag |  MMLU | WinoGrade |  Ave  |
+|:----------------------:|:-----:|:---------:|:-----:|:---------:|:-----:|
+|    Mistral-7B   | 57.59 |   83.25   | 62.42 |   78.77   | 70.51 |
+|     LLaMA 3.1-8B     | 54.61 |   81.95   | 65.16 |   77.35   | 69.77 |
+|      LLaMA 3-8B      | 55.46 |   82.09   | 65.29 |   77.82   | 70.17 |
+|      LLaMA 2-7B      | 49.74 |   78.94   | 45.89 |   74.27   | 62.21 |
+|       Qwen 2-7B      | 57.68 |   80.76   | 70.42 |   77.43   | 71.57 |
+|       gemma-7b       | 56.48 |   82.31   | 63.02 |    78.3   | 70.03 |
+|    internlm2.5-7b    | 54.78 |    79.7   | 68.17 |    80.9   | 70.89 |
+|     Baichuan2-7B     | 47.87 |   73.89   | 54.13 |    70.8   | 61.67 |
+|        Yi-1.5-9B       | 58.36 |   80.36   | 69.54 |   77.53   | 71.48 |
+|  Moxin-7B-original | 53.75 |   75.46   | 59.43 |   70.32   | 64.74 |
+| Moxin-7B-finetuned | 59.47 |   83.08   | 60.97 |   78.69   | 70.55 |
+We also test the zero shot performance on AI2 Reasoning Challenge (0-shot), AI2 Reasoning Easy (0-shot), HellaSwag (0-shot), PIQA (0-shot) and Winogrande (0-shot). The results are shown below.
+|      Models       	| HellaSwag 	| WinoGrade 	|  PIQA 	| ARC-E 	| ARC-C 	|  Ave  	|
+|:-----------------:|:---------:|:---------:|:-----:|:-----:|:-----:|:-----:|
+| Mistral-7B 	|   80.39   	|    73.4   	| 82.15 	| 78.28 	| 52.22 	| 73.29 	|
+|     LLaMA 2-7B    	|   75.99   	|   69.06   	| 79.11 	| 74.54 	| 46.42 	| 69.02 	|
+|    LLaMA 2-13B    	|   79.37   	|   72.22   	| 80.52 	|  77.4 	| 49.06 	| 71.71 	|
+|    LLaMA 3.1-8B   	|   78.92   	|   74.19   	| 81.12 	| 81.06 	| 53.67 	| 73.79 	|
+|      gemma-7b     	|   80.45   	|   73.72   	|  80.9 	| 79.97 	|  54.1 	| 73.83 	|
+|     Qwen v2-7B    	|    78.9   	|   72.38   	| 79.98 	| 74.71 	| 50.09 	| 71.21 	|
+|   internlm2.5-7b  	|   79.14   	|    77.9   	| 80.52 	| 76.16 	| 51.37 	| 73.02 	|
+|    Baichuan2-7B   	|   72.25   	|   67.17   	| 77.26 	| 72.98 	| 42.15 	| 66.36 	|
+|     Yi-1.5-9B     	|   77.86   	|   73.01   	| 80.74 	| 79.04 	| 55.03 	| 73.14 	|
+|    deepseek-7b    	|   76.13   	|   69.77   	| 79.76 	| 71.04 	|  44.8 	|  68.3 	|
+| Moxin-7B-original 	|   72.06   	|   66.31   	| 78.07 	| 71.47 	| 48.15 	| 67.21 	|
+| Moxin-7B-finetune 	|   80.03   	|   75.17   	| 82.24 	| 81.12 	| 58.64 	| 75.44 	|

config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "Moxin-chat",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "model_type": "mistral",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "sliding_window": 4096,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.34.0",
+  "use_cache": true,
+  "vocab_size": 32000
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.34.0"
+}

moxin-chat-7b.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8129919414b519683cd4ebc140ccd19997afb80144960b9304d3c6a15716d163
+size 16229696704

pytorch_model-00001.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1f5702c04992ac171e4e7cd0576099f359888163cd6db2d3f38ceea7ece6897
+size 1017140493

pytorch_model-00002.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e3af6540c7b3281da70181a8d090eae1e274791b08be0f9d39cf93878073f86
+size 1040244373

pytorch_model-00003.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e6db8a91458c8da3d960e98bd5e43175ec4a4422e68cf75c1bc3f97d126bfb9
+size 1023449521

pytorch_model-00004.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e85b29c00c0a564a7019ea74eed40cdef20d4573b5edea2b8996c68c9a3f5f91
+size 989894824

pytorch_model-00005.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c1b2ef96bdc075e68f770271b0c2096d80bef120e37a306ba9c07fc2e508a73
+size 1040244373

pytorch_model-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b16f2d358f2b518996625dcbef50447f5b4cb8d8e5cdbc839aae12eb4f2b7e1
+size 1023449585

pytorch_model-00007.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:340b6ba2d2be2daf6f9b9d64803445b1dc69825a1afe2d7ff8bc360ac01fbc7f
+size 989894824

pytorch_model-00008.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d87fdccfc33d1ab01cc3ae40afbc224b786acea40d63ca86c09a0b3b2012a50c
+size 1040244373

pytorch_model-00009.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5207b89fb981e402f3093d57b9a8b2e9ae42b97c0824066e29c82bf53db4af8d
+size 1023449585

pytorch_model-00010.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5400c092714924f1e7ec332189da56e8853f3be0f4faf9cdd418ca1e114f952
+size 989894824

pytorch_model-00011.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b738633863df5d75ea6cc9a4443c8fe456360e84900377168565aff427003dbc
+size 1040244373

pytorch_model-00012.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ee1ec9e0aba30597659255172fa7fa8425913314448c91246bb073cac900f9a
+size 1023449585

pytorch_model-00013.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52e37238ca11b88f9a031dc63449e1e6dafa46264a2c4cffff52dbb4a79fc388
+size 989894824

pytorch_model-00014.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60738c69a8bcf9362f794ba982bbfaaaee02e06fd444beb82fe89dff14c2aa3
+size 1040244373

pytorch_model-00015.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3f8c20b21075b3d6e11424a0cb014793aa662a30f93ad1d3d12f80dda7ba670
+size 1023449585

pytorch_model-00016.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4aad5422a6cb5efe3a60790275d16c0041781eabaa73e4392084f827cf1737cc
+size 933278660

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,334 @@

+{
+  "metadata": {
+    "total_size": 15476.5703125
+  },
+  "weight_map": {
+    "model.embed_tokens.weight": "pytorch_model-00001.bin",
+    "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.0.input_layernorm.weight": "pytorch_model-00001.bin",
+    "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001.bin",
+    "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.1.mlp.down_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.1.input_layernorm.weight": "pytorch_model-00002.bin",
+    "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00002.bin",
+    "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.2.mlp.up_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.2.mlp.down_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.2.input_layernorm.weight": "pytorch_model-00002.bin",
+    "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00002.bin",
+    "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.3.mlp.up_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.3.mlp.down_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.3.input_layernorm.weight": "pytorch_model-00002.bin",
+    "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00002.bin",
+    "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.4.mlp.up_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.4.mlp.down_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.4.input_layernorm.weight": "pytorch_model-00003.bin",
+    "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00003.bin",
+    "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.5.mlp.up_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.5.mlp.down_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.5.input_layernorm.weight": "pytorch_model-00003.bin",
+    "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00003.bin",
+    "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.6.mlp.up_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.6.mlp.down_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.6.input_layernorm.weight": "pytorch_model-00004.bin",
+    "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00004.bin",
+    "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.7.mlp.up_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.7.mlp.down_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.7.input_layernorm.weight": "pytorch_model-00004.bin",
+    "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00004.bin",
+    "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.8.mlp.up_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.8.mlp.down_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.8.input_layernorm.weight": "pytorch_model-00005.bin",
+    "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00005.bin",
+    "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.9.mlp.up_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.9.mlp.down_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.9.input_layernorm.weight": "pytorch_model-00005.bin",
+    "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00005.bin",
+    "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.10.mlp.up_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.10.mlp.down_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.10.input_layernorm.weight": "pytorch_model-00005.bin",
+    "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00005.bin",
+    "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.11.mlp.up_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.11.mlp.down_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.11.input_layernorm.weight": "pytorch_model-00006.bin",
+    "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00006.bin",
+    "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.12.mlp.up_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.12.mlp.down_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.12.input_layernorm.weight": "pytorch_model-00006.bin",
+    "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00006.bin",
+    "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.13.mlp.up_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.13.mlp.down_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.13.input_layernorm.weight": "pytorch_model-00007.bin",
+    "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00007.bin",
+    "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.14.mlp.up_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.14.mlp.down_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.14.input_layernorm.weight": "pytorch_model-00007.bin",
+    "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00007.bin",
+    "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.15.mlp.up_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.15.mlp.down_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.15.input_layernorm.weight": "pytorch_model-00008.bin",
+    "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00008.bin",
+    "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.16.mlp.up_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.16.mlp.down_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.16.input_layernorm.weight": "pytorch_model-00008.bin",
+    "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00008.bin",
+    "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.17.mlp.up_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.17.mlp.down_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.17.input_layernorm.weight": "pytorch_model-00008.bin",
+    "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00008.bin",
+    "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.18.mlp.up_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.18.mlp.down_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.18.input_layernorm.weight": "pytorch_model-00009.bin",
+    "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00009.bin",
+    "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.19.mlp.up_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.19.mlp.down_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.19.input_layernorm.weight": "pytorch_model-00009.bin",
+    "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00009.bin",
+    "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.20.mlp.up_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.20.mlp.down_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.20.input_layernorm.weight": "pytorch_model-00010.bin",
+    "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00010.bin",
+    "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.21.mlp.up_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.21.mlp.down_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.21.input_layernorm.weight": "pytorch_model-00010.bin",
+    "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00010.bin",
+    "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.22.mlp.up_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.22.mlp.down_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.22.input_layernorm.weight": "pytorch_model-00011.bin",
+    "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00011.bin",
+    "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.23.mlp.up_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.23.mlp.down_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.23.input_layernorm.weight": "pytorch_model-00011.bin",
+    "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00011.bin",
+    "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.24.mlp.up_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.24.mlp.down_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.24.input_layernorm.weight": "pytorch_model-00011.bin",
+    "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00011.bin",
+    "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.25.mlp.up_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.25.mlp.down_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.25.input_layernorm.weight": "pytorch_model-00012.bin",
+    "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00012.bin",
+    "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.26.mlp.up_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.26.mlp.down_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.26.input_layernorm.weight": "pytorch_model-00012.bin",
+    "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00012.bin",
+    "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.27.mlp.up_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.27.mlp.down_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.27.input_layernorm.weight": "pytorch_model-00013.bin",
+    "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00013.bin",
+    "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.28.mlp.up_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.28.mlp.down_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.28.input_layernorm.weight": "pytorch_model-00013.bin",
+    "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00013.bin",
+    "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.29.mlp.up_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.29.mlp.down_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.29.input_layernorm.weight": "pytorch_model-00014.bin",
+    "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00014.bin",
+    "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.30.mlp.up_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.30.mlp.down_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.30.input_layernorm.weight": "pytorch_model-00014.bin",
+    "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00014.bin",
+    "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.31.mlp.up_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.31.mlp.down_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.31.input_layernorm.weight": "pytorch_model-00014.bin",
+    "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00014.bin",
+    "model.layers.32.self_attn.q_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.32.self_attn.k_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.32.self_attn.v_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.32.self_attn.o_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.32.mlp.gate_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.32.mlp.up_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.32.mlp.down_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.32.input_layernorm.weight": "pytorch_model-00015.bin",
+    "model.layers.32.post_attention_layernorm.weight": "pytorch_model-00015.bin",
+    "model.layers.33.self_attn.q_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.33.self_attn.k_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.33.self_attn.v_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.33.self_attn.o_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.33.mlp.gate_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.33.mlp.up_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.33.mlp.down_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.33.input_layernorm.weight": "pytorch_model-00015.bin",
+    "model.layers.33.post_attention_layernorm.weight": "pytorch_model-00015.bin",
+    "model.layers.34.self_attn.q_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.34.self_attn.k_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.34.self_attn.v_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.34.self_attn.o_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.34.mlp.gate_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.34.mlp.up_proj.weight": "pytorch_model-00016.bin",
+    "model.layers.34.mlp.down_proj.weight": "pytorch_model-00016.bin",
+    "model.layers.34.input_layernorm.weight": "pytorch_model-00016.bin",
+    "model.layers.34.post_attention_layernorm.weight": "pytorch_model-00016.bin",
+    "model.layers.35.self_attn.q_proj.weight": "pytorch_model-00016.bin",
+    "model.layers.35.self_attn.k_proj.weight": "pytorch_model-00016.bin",
+    "model.layers.35.self_attn.v_proj.weight": "pytorch_model-00016.bin",
+    "model.layers.35.self_attn.o_proj.weight": "pytorch_model-00016.bin",
+    "model.layers.35.mlp.gate_proj.weight": "pytorch_model-00016.bin",
+    "model.layers.35.mlp.up_proj.weight": "pytorch_model-00016.bin",
+    "model.layers.35.mlp.down_proj.weight": "pytorch_model-00016.bin",
+    "model.layers.35.input_layernorm.weight": "pytorch_model-00016.bin",
+    "model.layers.35.post_attention_layernorm.weight": "pytorch_model-00016.bin",
+    "model.norm.weight": "pytorch_model-00016.bin",
+    "lm_head.weight": "pytorch_model-00016.bin"
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n    {%- endif %}\n    {%- if message['role'] == 'user' %}\n        {%- if loop.first and system_message is defined %}\n            {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n        {%- else %}\n            {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n        {%- endif %}\n    {%- elif message['role'] == 'assistant' %}\n        {{- ' ' + message['content'] + eos_token}}\n    {%- else %}\n        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n    {%- endif %}\n{%- endfor %}\n",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}