Spaces:

kuyesu22
/

sunbird-ug

Sleeping

App Files Files Community

sunbird-ug / app.py

kuyesu22

Update app.py

98d724b verified 12 days ago

raw

history blame contribute delete

3.07 kB

	import torch
	from peft import PeftModel, PeftConfig
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from huggingface_hub import login
	import os
	import gradio as gr

	# Login to Hugging Face Hub
	access_token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
	login(token=access_token)


	# Define model details
	peft_model_id = "kuyesu22/sunbird-ug-lang-v1.0-llama-2-7b-hf-lora" # Your fine-tuned Llama 2 model ID
	config = PeftConfig.from_pretrained(peft_model_id)

	# Load base model and tokenizer
	model = AutoModelForCausalLM.from_pretrained(
	config.base_model_name_or_path,
	torch_dtype=torch.float16, # Mixed precision for faster inference
	device_map="auto", # Automatically allocate to available devices
	offload_folder="./offload" # Directory for offloading layers if needed
	)
	tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

	# Set the tokenizer's padding token
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token # Set EOS token as padding if not already defined

	# Load the LoRA fine-tuned model
	model = PeftModel.from_pretrained(model, peft_model_id)

	# Set model to evaluation mode
	model.eval()

	# Define the inference function for translation
	def make_inference(english_text):
	# Format the prompt based on the language pair
	prompt = f"You are English Runyakole language translator, Runyakoleis a language spoke by the bantu speaking people in western Uganda. Can you appropriately translate these user sentences appropriate and must make sense. ### English:\n{english_text}\n\n### Runyankole:"
	batch = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=256).to(model.device)

	# Generate the translation
	with torch.no_grad():
	with torch.cuda.amp.autocast(): # Mixed precision inference for speed
	output_tokens = model.generate(
	input_ids=batch["input_ids"],
	attention_mask=batch["attention_mask"],
	max_new_tokens=100,
	do_sample=True,
	temperature=0.7,
	num_return_sequences=1,
	pad_token_id=tokenizer.eos_token_id
	)

	# Decode the generated tokens to obtain the translation
	translated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
	return translated_text

	# Gradio Interface
	def launch_gradio_interface():
	inputs = gr.components.Textbox(lines=2, label="English Text") # Input text in English
	outputs = gr.components.Textbox(label="Translated Runyankole Text") # Output in Runyankole

	# Launch Gradio app with public sharing link enabled
	gr.Interface(
	fn=make_inference,
	inputs=inputs,
	outputs=outputs,
	title="Dialogue of Delivery Translator",
	description="Translate English to Runyankole using Llama 2 model fine-tuned with LoRA.",
	).launch(share=True) # Set `share=True` to create a public link

	# Entry point to run the Gradio app
	if __name__ == "__main__":
	launch_gradio_interface()