AthuKawale commited on
Commit
c1e0503
·
1 Parent(s): ff1b4ac

edited app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ dataset_name = "timdettmers/openassistant-guanaco"
3
+ dataset = load_dataset(dataset_name, split="train")
4
+
5
+ import torch
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
7
+
8
+ # quantizition configuration
9
+ bnb_config = BitsAndBytesConfig(
10
+ load_in_4bit=True,
11
+ bnb_4bit_quant_type="nf4",
12
+ bnb_4bit_compute_dtype=torch.float16,
13
+ )
14
+
15
+ # download model
16
+
17
+ model_name = "TinyPixel/Llama-2-7B-bf16-sharded"
18
+ model = AutoModelForCausalLM.from_pretrained(
19
+ model_name,
20
+ quantization_config=bnb_config,
21
+ trust_remote_code=True
22
+ )
23
+ model.config.use_cache = False
24
+
25
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
26
+ tokenizer.pad_token = tokenizer.eos_token
27
+
28
+ text = "What is a large language model?"
29
+ device = "cuda:0"
30
+ inputs = tokenizer(text, return_tensors="pt").to(device)
31
+ outputs = model.generate(**inputs, max_new_tokens=50)
32
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
33
+
34
+