File size: 743 Bytes
cdc3f9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer
MODEL_PATH = "DeepSeek-V3-1B-Test"
QUANT_PATH = "DeepSeek-V3-1B-Test-AWQ"
QUANT_CONFIG = {"zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM", "modules_to_not_convert": ["self_attn.kv_a_proj_with_mqa"]}
def main():
model = AutoAWQForCausalLM.from_pretrained(MODEL_PATH, low_cpu_mem_usage=True, use_cache=False)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, legacy=True)
model.quantize(
tokenizer,
quant_config=QUANT_CONFIG,
)
model.save_quantized(QUANT_PATH)
tokenizer.save_pretrained(QUANT_PATH)
print(f"Model is quantized and saved at \"{QUANT_PATH}\".")
if __name__ == "__main__":
main()
|