File size: 743 Bytes
cdc3f9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer

MODEL_PATH = "DeepSeek-V3-1B-Test"
QUANT_PATH = "DeepSeek-V3-1B-Test-AWQ"
QUANT_CONFIG = {"zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM", "modules_to_not_convert": ["self_attn.kv_a_proj_with_mqa"]}

def main():
    model = AutoAWQForCausalLM.from_pretrained(MODEL_PATH, low_cpu_mem_usage=True, use_cache=False)
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, legacy=True)
    model.quantize(
        tokenizer,
        quant_config=QUANT_CONFIG,
    )
    model.save_quantized(QUANT_PATH)
    tokenizer.save_pretrained(QUANT_PATH)
    print(f"Model is quantized and saved at \"{QUANT_PATH}\".")

if __name__ == "__main__":
    main()