VoiceBot

Sleeping

Chris4K commited on Jan 12

Commit

f7b948e

verified ·

1 Parent(s): d72e108

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,7 +5,8 @@ import gradio as gr
 # Load Llama 3.2 model
 model_name = "meta-llama/Llama-3.2-3B-Instruct"  # Replace with the exact model path
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
 # Helper function to process long contexts
 MAX_TOKENS = 100000  # Replace with the max token limit of the Llama model
@@ -131,7 +132,7 @@ def chat_with_model(user_input, chat_history=[]):
     print("prompt: ------------------------------------- \n"+prompt)
     input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=4096).to("cuda")
     tokenizer.pad_token = tokenizer.eos_token
-    attention_mask = torch.ones_like(input_ids).to("cuda")
     outputs = model.generate(input_ids, attention_mask=attention_mask,
                              max_new_tokens=1200, do_sample=True,
                              top_k=50, temperature=0.7)

 # Load Llama 3.2 model
 model_name = "meta-llama/Llama-3.2-3B-Instruct"  # Replace with the exact model path
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+#model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
+model = AutoModelForCausalLM.from_pretrained(model_name, device_map=None, torch_dtype=torch.float32)
 # Helper function to process long contexts
 MAX_TOKENS = 100000  # Replace with the max token limit of the Llama model
     print("prompt: ------------------------------------- \n"+prompt)
     input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=4096).to("cuda")
     tokenizer.pad_token = tokenizer.eos_token
+    attention_mask = torch.ones_like(input_ids).to("cpu")
     outputs = model.generate(input_ids, attention_mask=attention_mask,
                              max_new_tokens=1200, do_sample=True,
                              top_k=50, temperature=0.7)