Spaces:

AthuKawale
/

ChatBot

Sleeping

ChatBot / app.py

removed peft

60870dc over 1 year ago

1.03 kB

	import torch
	from datasets import load_dataset

	torch.cuda.is_available()
	print("executed successfully")

	dataset_name = "timdettmers/openassistant-guanaco"
	dataset = load_dataset(dataset_name, split="train")

	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

	# quantizition configuration
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.float16,
	)

	# download model

	model_name = "TinyPixel/Llama-2-7B-bf16-sharded"
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	quantization_config=bnb_config,
	trust_remote_code=True
	)
	model.config.use_cache = False

	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	tokenizer.pad_token = tokenizer.eos_token

	text = "What is a large language model?"
	device = "cuda:0"
	inputs = tokenizer(text, return_tensors="pt").to(device)
	outputs = model.generate(**inputs, max_new_tokens=50)
	print(tokenizer.decode(outputs[0], skip_special_tokens=True))