import sys | |
import torch | |
import model.configuration_deepseek as cds | |
import model.modeling_deepseek as mds | |
from safetensors.torch import load_file | |
def main(): | |
config = cds.DeepseekV3Config.from_pretrained("model") | |
with torch.device("meta"): | |
model = mds.DeepseekV3Attention(config) | |
model.load_state_dict(load_file("model/weights.safetensors", device="cuda"), assign=True, strict=True) | |
inputs = load_file("model/nan_input.safetensors", device="cuda") | |
result = model.forward(**inputs) | |
print(result[0][0][163]) | |
if __name__ == "__main__": | |
try: | |
main() | |
except KeyboardInterrupt: | |
print("\nScript interrupted by user, exiting...") | |
sys.exit(1) | |