kz919 commited on
Commit
3a75081
·
verified ·
1 Parent(s): abf656c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -7
app.py CHANGED
@@ -6,13 +6,8 @@ model_name = "kz919/QwQ-0.5B-Distilled-SFT"
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
  model = AutoModelForCausalLM.from_pretrained(model_name)
8
 
9
- # Ensure the model runs on GPU if available
10
- import torch
11
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
- model.to(device)
13
-
14
-
15
  # Define the function to handle chat responses
 
16
  def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
17
  # Prepare the prompt by combining history and system messages
18
  prompt = system_message + "\n"
@@ -21,7 +16,7 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
21
  prompt += f"User: {message}\nAssistant:"
22
 
23
  # Tokenize the input prompt
24
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
25
 
26
  # Generate a response
27
  outputs = model.generate(
 
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
  model = AutoModelForCausalLM.from_pretrained(model_name)
8
 
 
 
 
 
 
 
9
  # Define the function to handle chat responses
10
+ @spaces.GPU
11
  def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
12
  # Prepare the prompt by combining history and system messages
13
  prompt = system_message + "\n"
 
16
  prompt += f"User: {message}\nAssistant:"
17
 
18
  # Tokenize the input prompt
19
+ inputs = tokenizer(prompt, return_tensors="pt")
20
 
21
  # Generate a response
22
  outputs = model.generate(