|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import torch |
|
from pathlib import Path |
|
import time |
|
model_dir_name = "/home/mmnga/ssd/llm-data/DeepSeek-V3-bf16" |
|
model_dir_name = "/home/mmnga/ssd/llm-data/DeepSeek-V3-slice" |
|
|
|
model_dir_path = Path(model_dir_name) |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_dir_name, |
|
torch_dtype=torch.bfloat16, |
|
trust_remote_code=True, |
|
device_map="cpu", |
|
) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
model_dir_name, |
|
trust_remote_code=True, |
|
) |
|
|
|
questions = [ |
|
"データサイエンスの分野で今後注目される技術や手法について教えてください。", |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
] |
|
|
|
|
|
for prompt in questions: |
|
messages = [ |
|
{ |
|
"role": "system", |
|
"content": "全て日本語で返してください。" |
|
}, |
|
{ |
|
"role": "user", |
|
"content": prompt |
|
} |
|
] |
|
|
|
prompt = tokenizer.apply_chat_template( |
|
messages, |
|
tokenize=False, |
|
add_generation_prompt=True |
|
) |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt") |
|
|
|
start_time = time.time() |
|
print("計測開始 ------------------------------") |
|
print(f"prompt: {prompt}") |
|
|
|
print(f"CALCULATE_START:--------------------------------") |
|
|
|
outputs = model.generate( |
|
inputs.input_ids, |
|
max_length=256, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
print(f"CALCULATE_END:--------------------------------") |
|
print("TEXT_START:--------------------------------") |
|
print(tokenizer.decode(outputs[0], skip_special_tokens=True)) |
|
print("TEXT_END:--------------------------------") |
|
|
|
end_time = time.time() |
|
print("") |
|
print("計測終了 ------------------------------") |
|
print(f"処理時間: {end_time - start_time:.2f}秒") |
|
|