Malekhmem
/

ActiaMistral

Inference Endpoints

Model card Files Files and versions Community

Malekhmem commited on Nov 1, 2024

Commit

4d17494

·

verified ·

1 Parent(s): 51f4653

update

Files changed (1) hide show

config.json +12 -2

config.json CHANGED Viewed

@@ -21,5 +21,15 @@
   "torch_dtype": "bfloat16",
   "transformers_version": "4.42.0.dev0",
   "use_cache": true,
-  "vocab_size": 32768
-}

   "torch_dtype": "bfloat16",
   "transformers_version": "4.42.0.dev0",
   "use_cache": true,
+  "vocab_size": 32768,
+  "quantization_config": {
+    "dtype": "int4",                   // Data type for 4-bit quantization
+    "approach": "dynamic",             // Dynamic quantization approach
+    "bits": 4,                         // Set to 4 for 4-bit quantization
+    "layer_norm": false,               // Disable layer normalization during quantization
+    "activation_quantization": true,   // Quantize activations
+    "weight_quantization": true,       // Quantize weights
+    "weight_scaling": "learned"        // Use learned scaling for weights
+  }
+}