from awq import AutoAWQForCausalLM from transformers import AutoTokenizer MODEL_PATH = "DeepSeek-V3-1B-Test" QUANT_PATH = "DeepSeek-V3-1B-Test-AWQ" QUANT_CONFIG = {"zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM", "modules_to_not_convert": ["self_attn.kv_a_proj_with_mqa"]} def main(): model = AutoAWQForCausalLM.from_pretrained(MODEL_PATH, low_cpu_mem_usage=True, use_cache=False) tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, legacy=True) model.quantize( tokenizer, quant_config=QUANT_CONFIG, ) model.save_quantized(QUANT_PATH) tokenizer.save_pretrained(QUANT_PATH) print(f"Model is quantized and saved at \"{QUANT_PATH}\".") if __name__ == "__main__": main()