Malekhmem commited on
Commit
4d17494
·
verified ·
1 Parent(s): 51f4653
Files changed (1) hide show
  1. config.json +12 -2
config.json CHANGED
@@ -21,5 +21,15 @@
21
  "torch_dtype": "bfloat16",
22
  "transformers_version": "4.42.0.dev0",
23
  "use_cache": true,
24
- "vocab_size": 32768
25
- }
 
 
 
 
 
 
 
 
 
 
 
21
  "torch_dtype": "bfloat16",
22
  "transformers_version": "4.42.0.dev0",
23
  "use_cache": true,
24
+ "vocab_size": 32768,
25
+
26
+ "quantization_config": {
27
+ "dtype": "int4", // Data type for 4-bit quantization
28
+ "approach": "dynamic", // Dynamic quantization approach
29
+ "bits": 4, // Set to 4 for 4-bit quantization
30
+ "layer_norm": false, // Disable layer normalization during quantization
31
+ "activation_quantization": true, // Quantize activations
32
+ "weight_quantization": true, // Quantize weights
33
+ "weight_scaling": "learned" // Use learned scaling for weights
34
+ }
35
+ }