File size: 3,072 Bytes
421412a
b2e8f4f
 
 
6cbb7b3
b2e8f4f
421412a
 
 
 
 
05f8c48
b2e8f4f
1cc6275
b2e8f4f
 
 
 
 
 
 
 
 
 
421412a
1cc6275
 
 
 
b2e8f4f
 
421412a
b2e8f4f
421412a
 
b2e8f4f
 
 
98d724b
1cc6275
b2e8f4f
 
421412a
b2e8f4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1cc6275
b2e8f4f
 
 
 
d011e52
b2e8f4f
1cc6275
b2e8f4f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
import os
import gradio as gr

# Login to Hugging Face Hub
access_token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
login(token=access_token)


# Define model details
peft_model_id = "kuyesu22/sunbird-ug-lang-v1.0-llama-2-7b-hf-lora"  # Your fine-tuned Llama 2 model ID
config = PeftConfig.from_pretrained(peft_model_id)

# Load base model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    torch_dtype=torch.float16,    # Mixed precision for faster inference
    device_map="auto",            # Automatically allocate to available devices
    offload_folder="./offload"    # Directory for offloading layers if needed
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Set the tokenizer's padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # Set EOS token as padding if not already defined

# Load the LoRA fine-tuned model
model = PeftModel.from_pretrained(model, peft_model_id)

# Set model to evaluation mode
model.eval()

# Define the inference function for translation
def make_inference(english_text):
    # Format the prompt based on the language pair
    prompt = f"You are English Runyakole language translator, Runyakoleis a language spoke by the bantu speaking people in western Uganda. Can you appropriately translate these user sentences appropriate and must make sense. ### English:\n{english_text}\n\n### Runyankole:"
    batch = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=256).to(model.device)

    # Generate the translation
    with torch.no_grad():
        with torch.cuda.amp.autocast():  # Mixed precision inference for speed
            output_tokens = model.generate(
                input_ids=batch["input_ids"],
                attention_mask=batch["attention_mask"],
                max_new_tokens=100,
                do_sample=True,
                temperature=0.7,
                num_return_sequences=1,
                pad_token_id=tokenizer.eos_token_id
            )

    # Decode the generated tokens to obtain the translation
    translated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    return translated_text

# Gradio Interface
def launch_gradio_interface():
    inputs = gr.components.Textbox(lines=2, label="English Text")  # Input text in English
    outputs = gr.components.Textbox(label="Translated Runyankole Text")  # Output in Runyankole

    # Launch Gradio app with public sharing link enabled
    gr.Interface(
        fn=make_inference,
        inputs=inputs,
        outputs=outputs,
        title="Dialogue of Delivery Translator",
        description="Translate English to Runyankole using Llama 2 model fine-tuned with LoRA.",
    ).launch(share=True)  # Set `share=True` to create a public link

# Entry point to run the Gradio app
if __name__ == "__main__":
    launch_gradio_interface()