chandhron commited on
Commit
7a8f074
Β·
1 Parent(s): ac076e2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer
3
+ import torch
4
+ from peft import PeftModel, PeftConfig
5
+ base_model = "TinyPixel/Llama-2-7B-bf16-sharded"
6
+ tuned_adapter = "newronai/llama-2-7b-QLoRA-Trial1"
7
+ bnb_config = BitsAndBytesConfig(
8
+ load_in_4bit=True,
9
+ bnb_4bit_quant_type="nf4",
10
+ bnb_4bit_compute_dtype=torch.float16,
11
+ )
12
+
13
+
14
+ config = PeftConfig.from_pretrained(tuned_adapter)
15
+ model = AutoModelForCausalLM.from_pretrained(base_model,
16
+ use_cache="cache",
17
+ quantization_config=bnb_config
18
+ )
19
+
20
+ model = PeftModel.from_pretrained(model, tuned_adapter)
21
+ print("Model Downloaded")
22
+
23
+ tokenizer = AutoTokenizer.from_pretrained(base_model,
24
+ use_cache="cache")
25
+ tokenizer.pad_token = tokenizer.eos_token
26
+ print("Tokenizer Ready")
27
+
28
+ def question_answer(context, question):
29
+ tokens = tokenizer.encode(question, return_tensors="pt").to("cuda")
30
+ output = model.generate(input_tokens)
31
+ output_text = tokenizer.batch_decode(output, skip_special_tokens = True)[0]
32
+ return output_text
33
+
34
+
35
+ gr.Interface(fn=question_answer, inputs=[gr.inputs.Textbox(lines=7, label="Context Paragraph"),
36
+ gr.inputs.Textbox(lines=2, label="Question"),],
37
+ outputs=[gr.outputs.Textbox(label="Answer")]).launch()