Spaces:

sc2582
/

decision_oaif_space2

Sleeping

App Files Files Community

decision_oaif_space2 / app.py

sc2582

Update app.py

5433af7 verified 5 months ago

raw

history blame contribute delete

2.11 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import gradio as gr

	# Adjust this to your model ID
	model_id = "decision-oaif/Meta-Llama-3.1-8B-Instruct-sft-intercode-bash-iter1"

	# Load model with device map and dtype
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto"
	)
	#model.load_adapter(peft_model_id)

	# Load tokenizer and set truncation and padding
	tokenizer = AutoTokenizer.from_pretrained(model_id, truncation=True, padding=True)
	tokenizer.truncation_side = "left"
	tokenizer.padding_side = "left"

	# Ensure pad token is set correctly
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	def generate_response(messages):
	# Convert list of dicts (messages) into the required format by the tokenizer
	# messages should be a list of {"role": "user"/"assistant", "content": "<text>"}

	# Apply the chat template and create the input message
	message = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)

	# Tokenize inputs
	tokenized_inputs = tokenizer(message, return_tensors="pt", padding=True, truncation=True, max_length=512).to(model.device)

	# Generate response
	outputs = model.generate(
	tokenized_inputs["input_ids"],
	attention_mask=tokenized_inputs["attention_mask"],
	max_new_tokens=256,
	temperature=0.3,
	eos_token_id=[
	tokenizer.eos_token_id,
	tokenizer.convert_tokens_to_ids("<\|eot_id\|>"),
	],
	pad_token_id=tokenizer.eos_token_id
	)

	# Extract the first generated output
	output = outputs[0]

	# Decode only the generated tokens, excluding the input part
	response = tokenizer.decode(output[tokenized_inputs["input_ids"].shape[-1]:], skip_special_tokens=True)

	return response


	# Create Gradio interface that takes a list of dicts as input
	iface = gr.Interface(fn=generate_response, inputs="json", outputs="text", title="Meta-Llama-3-8B-Instruct")

	# Launch the interface
	iface.launch()