zzzmahesh commited on
Commit
938d259
·
verified ·
1 Parent(s): a242e04

Upload LlamaForCausalLM

Browse files
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
 
 
 
 
 
2
  tags:
3
  - int8
4
  - w8a8
5
  - text-generation
6
- language:
7
- - en
8
- pipeline_tag: text-generation
9
- license: llama3.1
10
- base_model: meta-llama/Meta-Llama-3-8B-Instruct
11
  ---
12
 
13
  # Meta-Llama-3-8B-Instruct-quantized.w4a4
 
1
  ---
2
+ base_model: meta-llama/Meta-Llama-3-8B-Instruct
3
+ language:
4
+ - en
5
+ license: llama3.1
6
+ pipeline_tag: text-generation
7
  tags:
8
  - int8
9
  - w8a8
10
  - text-generation
 
 
 
 
 
11
  ---
12
 
13
  # Meta-Llama-3-8B-Instruct-quantized.w4a4
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "./Meta-Llama-3-8B-Instruct-quantized.w8a8",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -18,36 +18,12 @@
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
20
  "pretraining_tp": 1,
21
- "quantization_config": {
22
- "batch_size": 1,
23
- "bits": 8,
24
- "block_name_to_quantize": null,
25
- "cache_block_outputs": true,
26
- "damp_percent": 0.01,
27
- "dataset": null,
28
- "desc_act": true,
29
- "exllama_config": {
30
- "version": 1
31
- },
32
- "group_size": -1,
33
- "max_input_length": null,
34
- "model_seqlen": null,
35
- "module_name_preceding_first_block": null,
36
- "modules_in_block_to_quantize": null,
37
- "pad_token_id": null,
38
- "quant_method": "gptq",
39
- "sym": true,
40
- "tokenizer": null,
41
- "true_sequential": true,
42
- "use_cuda_fp16": false,
43
- "use_exllama": true
44
- },
45
  "rms_norm_eps": 1e-05,
46
  "rope_scaling": null,
47
  "rope_theta": 500000.0,
48
  "tie_word_embeddings": false,
49
  "torch_dtype": "float16",
50
- "transformers_version": "4.42.4",
51
  "use_cache": true,
52
  "vocab_size": 128256
53
  }
 
1
  {
2
+ "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
20
  "pretraining_tp": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "rms_norm_eps": 1e-05,
22
  "rope_scaling": null,
23
  "rope_theta": 500000.0,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "float16",
26
+ "transformers_version": "4.43.4",
27
  "use_cache": true,
28
  "vocab_size": 128256
29
  }
generation_config.json CHANGED
@@ -1,6 +1,12 @@
1
  {
2
- "_from_model_config": true,
3
  "bos_token_id": 128000,
4
- "eos_token_id": 128009,
5
- "transformers_version": "4.42.4"
 
 
 
 
 
 
 
6
  }
 
1
  {
 
2
  "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128009
7
+ ],
8
+ "max_length": 4096,
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "4.43.4"
12
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5f3b86d455d87619e371e85a64e7b468e6ff0cf3dd93c4e40acef3644e70181
3
- size 4986235568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f38dc229cd79c33c127a791782f9e78039fc57d10cc5148b23206de1d95e097a
3
+ size 4999427512
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:306b103893f9aae093fa46f957c893ee07337b704de597333dee083f72dd94e8
3
- size 4104182696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eaf3136acc221ee97d29dbfce52e7abce46501745337fe6da05f5e13fd89fbb
3
+ size 3559551840
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff