sagar007 commited on
Commit
5355d21
·
verified ·
1 Parent(s): 9c3589b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -10
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import torch
2
  import gradio as gr
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
  from peft import PeftModel, PeftConfig
 
5
 
6
  # Load model and tokenizer
7
  MODEL_PATH = "sagar007/phi2_finetune"
@@ -9,16 +10,9 @@ MODEL_PATH = "sagar007/phi2_finetune"
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
10
  tokenizer.pad_token = tokenizer.eos_token
11
 
12
- bnb_config = BitsAndBytesConfig(
13
- load_in_4bit=True,
14
- bnb_4bit_quant_type="nf4",
15
- bnb_4bit_compute_dtype=torch.float16,
16
- bnb_4bit_use_double_quant=False
17
- )
18
-
19
  base_model = AutoModelForCausalLM.from_pretrained(
20
  "microsoft/phi-2",
21
- quantization_config=bnb_config,
22
  device_map="auto",
23
  trust_remote_code=True
24
  )
@@ -27,9 +21,10 @@ peft_config = PeftConfig.from_pretrained(MODEL_PATH)
27
  model = PeftModel.from_pretrained(base_model, MODEL_PATH)
28
  model.eval()
29
 
 
30
  def generate_response(instruction, max_length=512):
31
  prompt = f"Instruction: {instruction}\nResponse:"
32
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
33
 
34
  with torch.no_grad():
35
  outputs = model.generate(
 
1
  import torch
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel, PeftConfig
5
+ import spaces
6
 
7
  # Load model and tokenizer
8
  MODEL_PATH = "sagar007/phi2_finetune"
 
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
11
  tokenizer.pad_token = tokenizer.eos_token
12
 
 
 
 
 
 
 
 
13
  base_model = AutoModelForCausalLM.from_pretrained(
14
  "microsoft/phi-2",
15
+ torch_dtype=torch.float32, # Use float32 for CPU
16
  device_map="auto",
17
  trust_remote_code=True
18
  )
 
21
  model = PeftModel.from_pretrained(base_model, MODEL_PATH)
22
  model.eval()
23
 
24
+ @spaces.GPU(duration=60)
25
  def generate_response(instruction, max_length=512):
26
  prompt = f"Instruction: {instruction}\nResponse:"
27
+ inputs = tokenizer(prompt, return_tensors="pt")
28
 
29
  with torch.no_grad():
30
  outputs = model.generate(