Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -29,10 +29,8 @@ model_id = "llava-hf/llava-1.5-7b-hf"
|
|
29 |
pipe = pipeline("image-to-text", model=model_id)
|
30 |
|
31 |
|
32 |
-
# Load the
|
33 |
-
|
34 |
-
model = whisper.load_model("medium", device=DEVICE)
|
35 |
-
|
36 |
|
37 |
# Initialize conversation history
|
38 |
conversation_history = []
|
@@ -54,7 +52,7 @@ def img2txt(input_text, input_image):
|
|
54 |
writehistory(f"Input text: {input_text}")
|
55 |
prompt = "USER: <image>\n" + input_text + "\nASSISTANT:"
|
56 |
while True:
|
57 |
-
outputs =
|
58 |
|
59 |
if outputs and outputs[0]["generated_text"]:
|
60 |
match = re.search(r'ASSISTANT:\s*(.*)', outputs[0]["generated_text"])
|
@@ -80,16 +78,12 @@ def vid2txt(input_text, input_video):
|
|
80 |
return str(e)
|
81 |
|
82 |
def transcribe(audio_path):
|
83 |
-
"""Transcribe audio to text using Whisper
|
84 |
if not audio_path:
|
85 |
return ''
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
90 |
-
options = whisper.DecodingOptions()
|
91 |
-
result = whisper.decode(model, mel, options)
|
92 |
-
return result.text
|
93 |
|
94 |
def text_to_speech(text, file_path):
|
95 |
"""Convert text to speech and save to file."""
|
|
|
29 |
pipe = pipeline("image-to-text", model=model_id)
|
30 |
|
31 |
|
32 |
+
# Load the Whisper model using pipeline
|
33 |
+
pipe_audio = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
|
|
|
|
|
34 |
|
35 |
# Initialize conversation history
|
36 |
conversation_history = []
|
|
|
52 |
writehistory(f"Input text: {input_text}")
|
53 |
prompt = "USER: <image>\n" + input_text + "\nASSISTANT:"
|
54 |
while True:
|
55 |
+
outputs = pipe_image(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
|
56 |
|
57 |
if outputs and outputs[0]["generated_text"]:
|
58 |
match = re.search(r'ASSISTANT:\s*(.*)', outputs[0]["generated_text"])
|
|
|
78 |
return str(e)
|
79 |
|
80 |
def transcribe(audio_path):
|
81 |
+
"""Transcribe audio to text using Whisper pipeline."""
|
82 |
if not audio_path:
|
83 |
return ''
|
84 |
|
85 |
+
result = pipe_audio(audio_path)
|
86 |
+
return result["text"]
|
|
|
|
|
|
|
|
|
87 |
|
88 |
def text_to_speech(text, file_path):
|
89 |
"""Convert text to speech and save to file."""
|