import sys import torch import model.configuration_deepseek as cds import model.modeling_deepseek as mds from safetensors.torch import load_file @torch.no_grad def main(): config = cds.DeepseekV3Config.from_pretrained("model") with torch.device("meta"): model = mds.DeepseekV3Attention(config) model.load_state_dict(load_file("model/weights.safetensors", device="cuda"), assign=True, strict=True) inputs = load_file("model/nan_input.safetensors", device="cuda") result = model.forward(**inputs) print(result[0][0][163]) if __name__ == "__main__": try: main() except KeyboardInterrupt: print("\nScript interrupted by user, exiting...") sys.exit(1)