kz919 commited on
Commit
07c2cc6
·
verified ·
1 Parent(s): 4667b5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -5,7 +5,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
5
  # Load the model and tokenizer locally
6
  model_name = "kz919/QwQ-0.5B-Distilled-SFT"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(model_name)
9
 
10
  # Define the function to handle chat responses
11
  @spaces.GPU
@@ -17,7 +17,7 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
17
  prompt += f"User: {message}\nAssistant:"
18
 
19
  # Tokenize the input prompt
20
- inputs = tokenizer(prompt, return_tensors="pt")
21
 
22
  # Generate a response
23
  outputs = model.generate(
 
5
  # Load the model and tokenizer locally
6
  model_name = "kz919/QwQ-0.5B-Distilled-SFT"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
9
 
10
  # Define the function to handle chat responses
11
  @spaces.GPU
 
17
  prompt += f"User: {message}\nAssistant:"
18
 
19
  # Tokenize the input prompt
20
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
21
 
22
  # Generate a response
23
  outputs = model.generate(