Spaces:

hivecorp
/

kkr2

Runtime error

App Files Files Community

hivecorp commited on 8 days ago

Commit

454da09

verified ·

1 Parent(s): 05a4865

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -21

app.py CHANGED Viewed

@@ -1,31 +1,58 @@
 import gradio as gr
-from transformers import pipeline
-# Define a list of available speaker models
-SPEAKER_MODELS = {
-    "Default": "onnx-community/Kokoro-82M-ONNX",
-    "Speaker 1": "onnx-community/Kokoro-82M-ONNX",
-    "Speaker 2": "onnx-community/Kokoro-82M-ONNX"
 }
-def kokoro_tts(text, speaker):
-    # Initialize the transformers pipeline for text-to-speech with the selected speaker model
-    tts_pipeline = pipeline("text-to-speech", model=SPEAKER_MODELS[speaker])
-    # Generate speech from text
-    speech = tts_pipeline(text)
-    return speech["audio"]
-# Create a Gradio interface
 iface = gr.Interface(
-    fn=kokoro_tts,
     inputs=[
-        gr.Textbox(lines=2, placeholder="Enter text here..."),
-        gr.Dropdown(choices=list(SPEAKER_MODELS.keys()), label="Select Speaker")
     ],
-    outputs=gr.Audio(label="Generated Speech"),
-    title="Kokoro Text-to-Speech",
-    description="A Text-to-Speech app powered by Hugging Face Transformers.js with multiple speaker options"
 )
-if __name__ == "__main__":
-    iface.launch()

+import numpy as np
+import onnxruntime as ort
+import torch
+import scipy.io.wavfile as wav
 import gradio as gr
+# Load the ONNX model
+model_path = "Kokoro-82M-ONNX/model.onnx"
+ort_session = ort.InferenceSession(model_path)
+# Define speaker options (replace with actual speaker IDs or embeddings)
+speaker_options = {
+    "Speaker 1": "spk_1_embedding",
+    "Speaker 2": "spk_2_embedding",
+    "Speaker 3": "spk_3_embedding",
 }
+# Function to generate speech
+def generate_speech(text, speaker):
+    # Preprocess the input text and speaker embedding
+    input_text = np.array([text], dtype=np.str_)
+    speaker_embedding = np.array([speaker_options[speaker]], dtype=np.float32)
+    # Run the ONNX model
+    ort_inputs = {
+        "text": input_text,
+        "speaker_embedding": speaker_embedding,
+    }
+    ort_outputs = ort_session.run(None, ort_inputs)
+    # Postprocess the output (assuming the output is a waveform)
+    waveform = ort_outputs[0].squeeze()
+    # Save the waveform as a WAV file
+    output_file = "output.wav"
+    wav.write(output_file, 22050, waveform)  # Adjust sample rate as needed
+    return output_file
+# Gradio interface
+def tts_app(text, speaker):
+    audio_file = generate_speech(text, speaker)
+    return audio_file
+# Create the Gradio app
 iface = gr.Interface(
+    fn=tts_app,
     inputs=[
+        gr.Textbox(label="Input Text"),
+        gr.Dropdown(choices=list(speaker_options.keys()), label="Speaker"),
     ],
+    outputs=gr.Audio(label="Generated Speech", type="filepath"),
+    title="Text-to-Speech with Kokoro-82M-ONNX",
+    description="Generate speech from text using the Kokoro-82M-ONNX model with multiple speaker options.",
 )
+# Launch the app
+iface.launch()