Spaces:
Running
Running
Initial commit
Browse files- .gitignore +171 -0
- README.md +91 -11
- app.py +95 -0
- assets/edu_note.wav +0 -0
- assets/fun_fact.wav +0 -0
- assets/thanks.wav +0 -0
- example.ipynb +0 -0
- gradio_demo.png +0 -0
- inference.py +25 -0
- models/__init__.py +2 -0
- models/kokoro.py +125 -0
- models/tokenizer.py +238 -0
- requirements.txt +4 -0
- voices/af.pt +3 -0
- voices/af_bella.pt +3 -0
- voices/af_nicole.pt +3 -0
- voices/af_sarah.pt +3 -0
- voices/af_sky.pt +3 -0
- voices/am_adam.pt +3 -0
- voices/am_michael.pt +3 -0
- voices/bf_emma.pt +3 -0
- voices/bf_isabella.pt +3 -0
- voices/bm_george.pt +3 -0
- voices/bm_lewis.pt +3 -0
- weights/.gitkeep +0 -0
- weights/kokoro-v0_19.onnx +3 -0
.gitignore
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# UV
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
#uv.lock
|
102 |
+
|
103 |
+
# poetry
|
104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
106 |
+
# commonly ignored for libraries.
|
107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
108 |
+
#poetry.lock
|
109 |
+
|
110 |
+
# pdm
|
111 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
112 |
+
#pdm.lock
|
113 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
114 |
+
# in version control.
|
115 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
116 |
+
.pdm.toml
|
117 |
+
.pdm-python
|
118 |
+
.pdm-build/
|
119 |
+
|
120 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
121 |
+
__pypackages__/
|
122 |
+
|
123 |
+
# Celery stuff
|
124 |
+
celerybeat-schedule
|
125 |
+
celerybeat.pid
|
126 |
+
|
127 |
+
# SageMath parsed files
|
128 |
+
*.sage.py
|
129 |
+
|
130 |
+
# Environments
|
131 |
+
.env
|
132 |
+
.venv
|
133 |
+
env/
|
134 |
+
venv/
|
135 |
+
ENV/
|
136 |
+
env.bak/
|
137 |
+
venv.bak/
|
138 |
+
|
139 |
+
# Spyder project settings
|
140 |
+
.spyderproject
|
141 |
+
.spyproject
|
142 |
+
|
143 |
+
# Rope project settings
|
144 |
+
.ropeproject
|
145 |
+
|
146 |
+
# mkdocs documentation
|
147 |
+
/site
|
148 |
+
|
149 |
+
# mypy
|
150 |
+
.mypy_cache/
|
151 |
+
.dmypy.json
|
152 |
+
dmypy.json
|
153 |
+
|
154 |
+
# Pyre type checker
|
155 |
+
.pyre/
|
156 |
+
|
157 |
+
# pytype static type analyzer
|
158 |
+
.pytype/
|
159 |
+
|
160 |
+
# Cython debug symbols
|
161 |
+
cython_debug/
|
162 |
+
|
163 |
+
# PyCharm
|
164 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
165 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
166 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
167 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
168 |
+
#.idea/
|
169 |
+
|
170 |
+
# PyPI configuration file
|
171 |
+
.pypirc
|
README.md
CHANGED
@@ -1,14 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
---
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Kokoro-82M ONNX Runtime Inference
|
2 |
+
|
3 |
+
![Downloads](https://img.shields.io/github/downloads/yakhyo/kokoro-82m-onnx/total)
|
4 |
+
[![GitHub Repo stars](https://img.shields.io/github/stars/yakhyo/kokoro-82m-onnx)](https://github.com/yakhyo/kokoro-82m-onnx/stargazers)
|
5 |
+
[![GitHub Repository](https://img.shields.io/badge/GitHub-Repository-blue?logo=github)](https://github.com/yakhyo/kokoro-82m-onnx)
|
6 |
+
|
7 |
+
This repository contains minimal code and resources for inference using the **Kokoro-82M** model. The repository supports inference using **ONNX Runtime**.
|
8 |
+
|
9 |
+
<table>
|
10 |
+
<tr>
|
11 |
+
<td>Machine learning models rely on large datasets and complex algorithms to identify patterns and make predictions.</td>
|
12 |
+
<td>Did you know that honey never spoils? Archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still edible!</td>
|
13 |
+
</tr>
|
14 |
+
<tr>
|
15 |
+
<td align="center">
|
16 |
+
<video controls autoplay loop src="https://github.com/user-attachments/assets/a8e9bfb7-777a-4b44-901c-c79c39c02c6f" ></video>
|
17 |
+
</td>
|
18 |
+
<td align="center">
|
19 |
+
<video controls autoplay loop src="https://github.com/user-attachments/assets/358723ad-c0ab-44a3-90cc-64d89c042c9a" ></video>
|
20 |
+
</td>
|
21 |
+
</tr>
|
22 |
+
</table>
|
23 |
+
|
24 |
+
## Features
|
25 |
+
|
26 |
+
- **ONNX Runtime Inference**: Kokoro-82M (v0_19) Minimal ONNX Runtime Inference code. It supports `en-us` and `en-gb`.
|
27 |
+
|
28 |
---
|
29 |
+
|
30 |
+
## Installation
|
31 |
+
|
32 |
+
1. Clone the repository:
|
33 |
+
|
34 |
+
```bash
|
35 |
+
git clone https://github.com/yakhyo/kokoro-82m.git
|
36 |
+
cd kokoro-82m
|
37 |
+
```
|
38 |
+
|
39 |
+
2. Install dependencies:
|
40 |
+
|
41 |
+
```bash
|
42 |
+
pip install -r requirements.txt
|
43 |
+
```
|
44 |
+
|
45 |
+
3. Install `espeak` for text-to-speech functionality:
|
46 |
+
Linux:
|
47 |
+
```bash
|
48 |
+
apt-get install espeak -y
|
49 |
+
```
|
50 |
+
|
51 |
---
|
52 |
|
53 |
+
## Usage
|
54 |
+
|
55 |
+
### Download ONNX Model
|
56 |
+
|
57 |
+
[click to download](https://github.com/yakhyo/kokoro-82m/releases/download/v0.0.1/kokoro-v0_19.onnx)
|
58 |
+
|
59 |
+
### Jupyter Notebook Inference Example
|
60 |
+
|
61 |
+
Run inference using the jupyter notebook:
|
62 |
+
|
63 |
+
[example.ipynb](example.ipynb)
|
64 |
+
|
65 |
+
### CLI Inference
|
66 |
+
|
67 |
+
Specify input text and model weights in `inference.py` then run:
|
68 |
+
|
69 |
+
```bash
|
70 |
+
python inference.py
|
71 |
+
```
|
72 |
+
|
73 |
+
### Gradio App
|
74 |
+
|
75 |
+
Run below start Gradio App
|
76 |
+
```bash
|
77 |
+
python app.py
|
78 |
+
```
|
79 |
+
<div>
|
80 |
+
<img src="gradio_demo.png", width="100%>
|
81 |
+
</div>
|
82 |
+
|
83 |
+
---
|
84 |
+
|
85 |
+
## License
|
86 |
+
|
87 |
+
This project is licensed under the [MIT License](LICENSE).
|
88 |
+
Model weights licensed under the [Apache 2.0](#license)
|
89 |
+
|
90 |
+
---
|
91 |
+
|
92 |
+
## Acknowledgments
|
93 |
+
|
94 |
+
- https://huggingface.co/hexgrad/Kokoro-82M
|
app.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
import tempfile
|
4 |
+
import soundfile as sf
|
5 |
+
from models import Tokenizer, Kokoro
|
6 |
+
|
7 |
+
# Function to fetch available style vectors dynamically
|
8 |
+
def get_style_vector_choices(directory="voices"):
|
9 |
+
return [file for file in os.listdir(directory) if file.endswith(".pt")]
|
10 |
+
|
11 |
+
# Function to perform TTS using your local model
|
12 |
+
def local_tts(
|
13 |
+
text: str,
|
14 |
+
model_path: str,
|
15 |
+
style_vector: str,
|
16 |
+
output_file_format: str = "wav",
|
17 |
+
speed: float = 1.0
|
18 |
+
):
|
19 |
+
if len(text) > 0:
|
20 |
+
try:
|
21 |
+
tokenizer = Tokenizer()
|
22 |
+
style_vector_path = os.path.join("voices", style_vector)
|
23 |
+
inference = Kokoro(model_path, style_vector_path, tokenizer=tokenizer, lang='en-us')
|
24 |
+
|
25 |
+
audio, sample_rate = inference.generate_audio(text, speed=speed)
|
26 |
+
|
27 |
+
with tempfile.NamedTemporaryFile(suffix=f".{output_file_format}", delete=False) as temp_file:
|
28 |
+
sf.write(temp_file.name, audio, sample_rate)
|
29 |
+
temp_file_path = temp_file.name
|
30 |
+
|
31 |
+
return temp_file_path
|
32 |
+
|
33 |
+
except Exception as e:
|
34 |
+
raise gr.Error(f"An error occurred during TTS inference: {str(e)}")
|
35 |
+
else:
|
36 |
+
raise gr.Error("Input text cannot be empty.")
|
37 |
+
|
38 |
+
# Get the list of available style vectors
|
39 |
+
style_vector_choices = get_style_vector_choices()
|
40 |
+
|
41 |
+
# sample texts and their corresponding audio
|
42 |
+
sample_outputs = [
|
43 |
+
("Educational Note", "Machine learning models rely on large datasets and complex algorithms to identify patterns and make predictions.", "assets/edu_note.wav"),
|
44 |
+
("Fun Fact", "Did you know that honey never spoils? Archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still edible!", "assets/fun_fact.wav"),
|
45 |
+
("Thanks", "Thank you for listening to this audio. It was generated by the Kokoro TTS model.", "assets/thanks.wav")
|
46 |
+
]
|
47 |
+
|
48 |
+
example_texts = [
|
49 |
+
["Machine learning models rely on large datasets and complex algorithms to identify patterns and make predictions."],
|
50 |
+
["Did you know that honey never spoils? Archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still edible!"],
|
51 |
+
["Thank you for listening to this audio. It was generated by the Kokoro TTS model."]
|
52 |
+
]
|
53 |
+
|
54 |
+
# Gradio Interface
|
55 |
+
with gr.Blocks() as demo:
|
56 |
+
gr.Markdown("# <center> Kokoro-82m Text-to-Speech with Gradio </center>")
|
57 |
+
|
58 |
+
# Model-specific inputs
|
59 |
+
with gr.Row(variant="panel"):
|
60 |
+
model_path = gr.Textbox(label="Model Path", value="weights/kokoro-v0_19.onnx", interactive=False)
|
61 |
+
style_vector = gr.Dropdown(choices=style_vector_choices, label="Style Vector", value=style_vector_choices[0])
|
62 |
+
output_file_format = gr.Dropdown(choices=["wav", "mp3"], label="Output Format", value="wav")
|
63 |
+
speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed")
|
64 |
+
|
65 |
+
# Text input and output
|
66 |
+
text = gr.Textbox(
|
67 |
+
label="Input Text",
|
68 |
+
placeholder="Enter text to convert to speech."
|
69 |
+
)
|
70 |
+
btn = gr.Button("Generate Speech")
|
71 |
+
output_audio = gr.Audio(label="Generated Audio", type="filepath")
|
72 |
+
|
73 |
+
# Link inputs and outputs
|
74 |
+
btn.click(
|
75 |
+
fn=local_tts,
|
76 |
+
inputs=[text, model_path, style_vector, output_file_format, speed],
|
77 |
+
outputs=output_audio
|
78 |
+
)
|
79 |
+
|
80 |
+
# Add example texts
|
81 |
+
gr.Examples(
|
82 |
+
examples=example_texts,
|
83 |
+
inputs=[text],
|
84 |
+
label="Click an example to populate the input text"
|
85 |
+
)
|
86 |
+
|
87 |
+
# Add example texts and audios
|
88 |
+
gr.Markdown("### Sample Texts and Audio")
|
89 |
+
for topic, sample_text, sample_audio in sample_outputs:
|
90 |
+
with gr.Row():
|
91 |
+
gr.Textbox(value=sample_text, label=topic, interactive=False)
|
92 |
+
gr.Audio(value=sample_audio, label="Example Audio", type="filepath", interactive=False)
|
93 |
+
|
94 |
+
|
95 |
+
demo.launch(server_name="127.0.0.1")
|
assets/edu_note.wav
ADDED
Binary file (416 kB). View file
|
|
assets/fun_fact.wav
ADDED
Binary file (497 kB). View file
|
|
assets/thanks.wav
ADDED
Binary file (288 kB). View file
|
|
example.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
gradio_demo.png
ADDED
![]() |
inference.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import soundfile as sf
|
2 |
+
|
3 |
+
from models import Tokenizer, Kokoro
|
4 |
+
|
5 |
+
|
6 |
+
def main():
|
7 |
+
model_path = "weights/kokoro-v0_19.onnx"
|
8 |
+
style_vector_path = "voices/af.pt"
|
9 |
+
output_filename = "test_out.wav"
|
10 |
+
tokenizer = Tokenizer()
|
11 |
+
|
12 |
+
text = (
|
13 |
+
"This approach ensures the entire text is processed without exceeding the token limit and outputs seamless audio for the full input. Let me know if you need further assistance!"
|
14 |
+
)
|
15 |
+
|
16 |
+
inference = Kokoro(model_path, style_vector_path, tokenizer=tokenizer, lang='en-us')
|
17 |
+
audio, sample_rate = inference.generate_audio(text, speed=1.0)
|
18 |
+
|
19 |
+
# Save the audio to a file
|
20 |
+
sf.write(output_filename, audio, sample_rate)
|
21 |
+
print(f"Audio saved to {output_filename}")
|
22 |
+
|
23 |
+
|
24 |
+
if __name__ == "__main__":
|
25 |
+
main()
|
models/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .kokoro import Kokoro
|
2 |
+
from .tokenizer import Tokenizer
|
models/kokoro.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import numpy as np
|
3 |
+
import onnxruntime as ort
|
4 |
+
|
5 |
+
TOKEN_LIMIT = 510
|
6 |
+
SAMPLE_RATE = 24_000
|
7 |
+
|
8 |
+
|
9 |
+
class Kokoro:
|
10 |
+
def __init__(self, model_path: str, style_vector_path: str, tokenizer, lang: str = 'en-us') -> None:
|
11 |
+
"""
|
12 |
+
Initializes the ONNXInference class.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
model_path (str): Path to the ONNX model file.
|
16 |
+
style_vector_path (str): Path to the style vector file.
|
17 |
+
lang (str): Language code for the tokenizer.
|
18 |
+
"""
|
19 |
+
self.sess = ort.InferenceSession(model_path)
|
20 |
+
self.style_vector_path = style_vector_path
|
21 |
+
self.tokenizer = tokenizer
|
22 |
+
self.lang = lang
|
23 |
+
|
24 |
+
def preprocess(self, text):
|
25 |
+
"""
|
26 |
+
Converts input text to tokenized numerical IDs and loads the style vector.
|
27 |
+
|
28 |
+
Args:
|
29 |
+
text (str): Input text to preprocess.
|
30 |
+
|
31 |
+
Returns:
|
32 |
+
tuple: Tokenized input and corresponding style vector.
|
33 |
+
"""
|
34 |
+
# Convert text to phonemes and tokenize
|
35 |
+
phonemes = self.tokenizer.phonemize(text, lang=self.lang)
|
36 |
+
tokenized_phonemes = self.tokenizer.tokenize(phonemes)
|
37 |
+
|
38 |
+
if not tokenized_phonemes:
|
39 |
+
raise ValueError("No tokens found after tokenization")
|
40 |
+
|
41 |
+
style_vector = torch.load(self.style_vector_path, weights_only=True)
|
42 |
+
|
43 |
+
if len(tokenized_phonemes) > TOKEN_LIMIT:
|
44 |
+
token_chunks = self.split_into_chunks(tokenized_phonemes)
|
45 |
+
|
46 |
+
tokens_list = []
|
47 |
+
styles_list = []
|
48 |
+
|
49 |
+
for chunk in token_chunks:
|
50 |
+
token_chunk = [[0, *chunk, 0]]
|
51 |
+
style_chunk = style_vector[len(chunk)].numpy()
|
52 |
+
|
53 |
+
tokens_list.append(token_chunk)
|
54 |
+
styles_list.append(style_chunk)
|
55 |
+
|
56 |
+
return tokens_list, styles_list
|
57 |
+
|
58 |
+
style_vector = style_vector[len(tokenized_phonemes)].numpy()
|
59 |
+
tokenized_phonemes = [[0, *tokenized_phonemes, 0]]
|
60 |
+
|
61 |
+
return tokenized_phonemes, style_vector
|
62 |
+
|
63 |
+
@staticmethod
|
64 |
+
def split_into_chunks(tokens):
|
65 |
+
"""
|
66 |
+
Splits a list of tokens into chunks of size TOKEN_LIMIT.
|
67 |
+
|
68 |
+
Args:
|
69 |
+
tokens (list): List of tokens to split.
|
70 |
+
|
71 |
+
Returns:
|
72 |
+
list: List of token chunks.
|
73 |
+
"""
|
74 |
+
tokens_chunks = []
|
75 |
+
for i in range(0, len(tokens), TOKEN_LIMIT):
|
76 |
+
tokens_chunks.append(tokens[i:i+TOKEN_LIMIT])
|
77 |
+
return tokens_chunks
|
78 |
+
|
79 |
+
def infer(self, tokens, style_vector, speed=1.0):
|
80 |
+
"""
|
81 |
+
Runs inference using the ONNX model.
|
82 |
+
|
83 |
+
Args:
|
84 |
+
tokens (list): Tokenized input for the model.
|
85 |
+
style_vector (numpy.ndarray): Style vector for the model.
|
86 |
+
speed (float): Speed parameter for inference.
|
87 |
+
|
88 |
+
Returns:
|
89 |
+
numpy.ndarray: Generated audio data.
|
90 |
+
"""
|
91 |
+
# Perform inference
|
92 |
+
audio = self.sess.run(
|
93 |
+
None,
|
94 |
+
{
|
95 |
+
'tokens': tokens,
|
96 |
+
'style': style_vector,
|
97 |
+
'speed': np.array([speed], dtype=np.float32),
|
98 |
+
}
|
99 |
+
)[0]
|
100 |
+
return audio
|
101 |
+
|
102 |
+
def generate_audio(self, text, speed=1.0):
|
103 |
+
"""
|
104 |
+
Full pipeline: preprocess, infer, and save the generated audio.
|
105 |
+
|
106 |
+
Args:
|
107 |
+
text (str): Input text to generate audio from.
|
108 |
+
speed (float): Speed parameter for inference.
|
109 |
+
"""
|
110 |
+
# Preprocess text
|
111 |
+
tokenized_data, styles_data = self.preprocess(text)
|
112 |
+
|
113 |
+
audio_segments = []
|
114 |
+
if len(tokenized_data) > 1: # list of token chunks
|
115 |
+
for token_chunk, style_chunk in zip(tokenized_data, styles_data):
|
116 |
+
audio = self.infer(token_chunk, style_chunk, speed=speed)
|
117 |
+
audio_segments.append(audio)
|
118 |
+
else: # single token less than input limit
|
119 |
+
# Run inference
|
120 |
+
audio = self.infer(tokenized_data, styles_data, speed=speed)
|
121 |
+
audio_segments.append(audio)
|
122 |
+
|
123 |
+
full_audio = np.concatenate(audio_segments)
|
124 |
+
|
125 |
+
return full_audio, SAMPLE_RATE
|
models/tokenizer.py
ADDED
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from phonemizer import backend
|
3 |
+
from typing import List
|
4 |
+
|
5 |
+
|
6 |
+
class Tokenizer:
|
7 |
+
def __init__(self):
|
8 |
+
self.VOCAB = self._get_vocab()
|
9 |
+
self.phonemizers = {
|
10 |
+
'en-us': backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True),
|
11 |
+
'en-gb': backend.EspeakBackend(language='en-gb', preserve_punctuation=True, with_stress=True),
|
12 |
+
}
|
13 |
+
|
14 |
+
@staticmethod
|
15 |
+
def _get_vocab():
|
16 |
+
"""
|
17 |
+
Generates a mapping of symbols to integer indices for tokenization.
|
18 |
+
|
19 |
+
Returns:
|
20 |
+
dict: A dictionary where keys are symbols and values are unique integer indices.
|
21 |
+
"""
|
22 |
+
# Define the symbols
|
23 |
+
_pad = "$"
|
24 |
+
_punctuation = ';:,.!?¡¿—…"«»“” '
|
25 |
+
_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
26 |
+
_letters_ipa = (
|
27 |
+
"ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
|
28 |
+
)
|
29 |
+
symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
|
30 |
+
|
31 |
+
# Create a dictionary mapping each symbol to its index
|
32 |
+
return {symbol: index for index, symbol in enumerate(symbols)}
|
33 |
+
|
34 |
+
@staticmethod
|
35 |
+
def split_num(num: re.Match) -> str:
|
36 |
+
"""
|
37 |
+
Processes numeric strings, formatting them as time, years, or other representations.
|
38 |
+
|
39 |
+
Args:
|
40 |
+
num (re.Match): A regex match object representing the numeric string.
|
41 |
+
|
42 |
+
Returns:
|
43 |
+
str: A formatted string based on the numeric input.
|
44 |
+
"""
|
45 |
+
num = num.group()
|
46 |
+
|
47 |
+
# Handle time (e.g., "12:30")
|
48 |
+
if ':' in num:
|
49 |
+
hours, minutes = map(int, num.split(':'))
|
50 |
+
if minutes == 0:
|
51 |
+
return f"{hours} o'clock"
|
52 |
+
elif minutes < 10:
|
53 |
+
return f'{hours} oh {minutes}'
|
54 |
+
return f'{hours} {minutes}'
|
55 |
+
|
56 |
+
# Handle years or general numeric cases
|
57 |
+
year = int(num[:4])
|
58 |
+
if year < 1100 or year % 1000 < 10:
|
59 |
+
return num
|
60 |
+
|
61 |
+
left, right = num[:2], int(num[2:4])
|
62 |
+
suffix = 's' if num.endswith('s') else ''
|
63 |
+
|
64 |
+
# Format years
|
65 |
+
if 100 <= year % 1000 <= 999:
|
66 |
+
if right == 0:
|
67 |
+
return f'{left} hundred{suffix}'
|
68 |
+
elif right < 10:
|
69 |
+
return f'{left} oh {right}{suffix}'
|
70 |
+
return f'{left} {right}{suffix}'
|
71 |
+
|
72 |
+
@staticmethod
|
73 |
+
def flip_money(match: re.Match) -> str:
|
74 |
+
"""
|
75 |
+
Converts monetary values to a textual representation.
|
76 |
+
|
77 |
+
Args:
|
78 |
+
m (re.Match): A regex match object representing the monetary value.
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
str: A formatted string describing the monetary value.
|
82 |
+
"""
|
83 |
+
m = m.group()
|
84 |
+
currency = 'dollar' if m[0] == '$' else 'pound'
|
85 |
+
|
86 |
+
# Handle whole amounts (e.g., "$10", "£20")
|
87 |
+
if '.' not in m:
|
88 |
+
singular = '' if m[1:] == '1' else 's'
|
89 |
+
return f'{m[1:]} {currency}{singular}'
|
90 |
+
|
91 |
+
# Handle amounts with decimals (e.g., "$10.50", "£5.25")
|
92 |
+
whole, cents = m[1:].split('.')
|
93 |
+
singular = '' if whole == '1' else 's'
|
94 |
+
cents = int(cents.ljust(2, '0')) # Ensure 2 decimal places
|
95 |
+
coins = f"cent{'' if cents == 1 else 's'}" if m[0] == '$' else ('penny' if cents == 1 else 'pence')
|
96 |
+
return f'{whole} {currency}{singular} and {cents} {coins}'
|
97 |
+
|
98 |
+
@staticmethod
|
99 |
+
def point_num(match):
|
100 |
+
whole, fractional = match.group().split('.')
|
101 |
+
return ' point '.join([whole, ' '.join(fractional)])
|
102 |
+
|
103 |
+
def normalize_text(self, text: str) -> str:
|
104 |
+
"""
|
105 |
+
Normalizes input text by replacing special characters, punctuation, and applying custom transformations.
|
106 |
+
|
107 |
+
Args:
|
108 |
+
text (str): Input text to normalize.
|
109 |
+
|
110 |
+
Returns:
|
111 |
+
str: Normalized text.
|
112 |
+
"""
|
113 |
+
# Replace specific characters with standardized versions
|
114 |
+
replacements = {
|
115 |
+
chr(8216): "'", # Left single quotation mark
|
116 |
+
chr(8217): "'", # Right single quotation mark
|
117 |
+
'«': chr(8220), # Left double angle quotation mark to left double quotation mark
|
118 |
+
'»': chr(8221), # Right double angle quotation mark to right double quotation mark
|
119 |
+
chr(8220): '"', # Left double quotation mark
|
120 |
+
chr(8221): '"', # Right double quotation mark
|
121 |
+
'(': '«', # Replace parentheses with angle quotation marks
|
122 |
+
')': '»'
|
123 |
+
}
|
124 |
+
for old, new in replacements.items():
|
125 |
+
text = text.replace(old, new)
|
126 |
+
|
127 |
+
# Replace punctuation and add spaces
|
128 |
+
punctuation_replacements = {
|
129 |
+
'、': ',',
|
130 |
+
'。': '.',
|
131 |
+
'!': '!',
|
132 |
+
',': ',',
|
133 |
+
':': ':',
|
134 |
+
';': ';',
|
135 |
+
'?': '?',
|
136 |
+
}
|
137 |
+
for old, new in punctuation_replacements.items():
|
138 |
+
text = text.replace(old, new + ' ')
|
139 |
+
|
140 |
+
# Apply regex-based replacements
|
141 |
+
text = re.sub(r'[^\S\n]', ' ', text)
|
142 |
+
text = re.sub(r' +', ' ', text)
|
143 |
+
text = re.sub(r'(?<=\n) +(?=\n)', '', text)
|
144 |
+
|
145 |
+
# Expand abbreviations and handle special cases
|
146 |
+
abbreviation_patterns = [
|
147 |
+
(r'\bD[Rr]\.(?= [A-Z])', 'Doctor'),
|
148 |
+
(r'\b(?:Mr\.|MR\.(?= [A-Z]))', 'Mister'),
|
149 |
+
(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss'),
|
150 |
+
(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs'),
|
151 |
+
(r'\betc\.(?! [A-Z])', 'etc'),
|
152 |
+
(r'(?i)\b(y)eah?\b', r"\1e'a"),
|
153 |
+
]
|
154 |
+
for pattern, replacement in abbreviation_patterns:
|
155 |
+
text = re.sub(pattern, replacement, text)
|
156 |
+
|
157 |
+
# Handle numbers and monetary values
|
158 |
+
text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', self.split_num, text)
|
159 |
+
text = re.sub(r'(?<=\d),(?=\d)', '', text) # Remove commas from numbers
|
160 |
+
text = re.sub(
|
161 |
+
r'(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b',
|
162 |
+
self.flip_money,
|
163 |
+
text
|
164 |
+
)
|
165 |
+
text = re.sub(r'\d*\.\d+', self.point_num, text)
|
166 |
+
text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text)
|
167 |
+
|
168 |
+
# Handle possessives and specific letter cases
|
169 |
+
text = re.sub(r'(?<=\d)S', ' S', text)
|
170 |
+
text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
|
171 |
+
text = re.sub(r"(?<=X')S\b", 's', text)
|
172 |
+
|
173 |
+
# Handle abbreviations with dots
|
174 |
+
text = re.sub(r'(?:[A-Za-z]\.){2,} [a-z]', lambda m: m.group().replace('.', '-'), text)
|
175 |
+
text = re.sub(r'(?i)(?<=[A-Z])\.(?=[A-Z])', '-', text)
|
176 |
+
|
177 |
+
return text.strip()
|
178 |
+
|
179 |
+
def tokenize(self, phonemes: str) -> List[int]:
|
180 |
+
"""
|
181 |
+
Tokenizes a given string into a list of indices based on VOCAB.
|
182 |
+
|
183 |
+
Args:
|
184 |
+
text (str): Input string to tokenize.
|
185 |
+
|
186 |
+
Returns:
|
187 |
+
list: A list of integer indices corresponding to the characters in the input string.
|
188 |
+
"""
|
189 |
+
return [self.VOCAB[x] for x in phonemes if x in self.VOCAB]
|
190 |
+
|
191 |
+
def phonemize(self, text: str, lang: str = 'en-us', normalize: bool = True) -> str:
|
192 |
+
"""
|
193 |
+
Converts text to phonemes using the specified language phonemizer and applies normalization.
|
194 |
+
|
195 |
+
Args:
|
196 |
+
text (str): Input text to be phonemized.
|
197 |
+
lang (str): Language identifier ('en-us' or 'en-gb') for selecting the phonemizer.
|
198 |
+
normalize (bool): Whether to normalize the text before phonemization.
|
199 |
+
|
200 |
+
Returns:
|
201 |
+
str: A processed string of phonemes.
|
202 |
+
"""
|
203 |
+
# Normalize text if required
|
204 |
+
if normalize:
|
205 |
+
text = self.normalize_text(text)
|
206 |
+
|
207 |
+
# Generate phonemes using the specified phonemizer
|
208 |
+
if lang not in self.phonemizers:
|
209 |
+
print(f"Language '{lang}' not supported. Defaulting to 'en-us'.")
|
210 |
+
lang = 'en-us'
|
211 |
+
|
212 |
+
phonemes = self.phonemizers[lang].phonemize([text])
|
213 |
+
phonemes = phonemes[0] if phonemes else ''
|
214 |
+
|
215 |
+
# Apply custom phoneme replacements
|
216 |
+
replacements = {
|
217 |
+
'kəkˈoːɹoʊ': 'kˈoʊkəɹoʊ',
|
218 |
+
'kəkˈɔːɹəʊ': 'kˈəʊkəɹəʊ',
|
219 |
+
'ʲ': 'j',
|
220 |
+
'r': 'ɹ',
|
221 |
+
'x': 'k',
|
222 |
+
'ɬ': 'l',
|
223 |
+
}
|
224 |
+
for old, new in replacements.items():
|
225 |
+
phonemes = phonemes.replace(old, new)
|
226 |
+
|
227 |
+
# Apply regex-based replacements
|
228 |
+
phonemes = re.sub(r'(?<=[a-zɹː])(?=hˈʌndɹɪd)', ' ', phonemes)
|
229 |
+
phonemes = re.sub(r' z(?=[;:,.!?¡¿—…"«»“” ]|$)', 'z', phonemes)
|
230 |
+
|
231 |
+
# Additional language-specific rules
|
232 |
+
if lang == 'a':
|
233 |
+
phonemes = re.sub(r'(?<=nˈaɪn)ti(?!ː)', 'di', phonemes)
|
234 |
+
|
235 |
+
# Filter out characters not in VOCAB
|
236 |
+
phonemes = ''.join(filter(lambda p: p in self.VOCAB, phonemes))
|
237 |
+
|
238 |
+
return phonemes.strip()
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
torch
|
3 |
+
phonemizer
|
4 |
+
soundfile
|
voices/af.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fad4192fd8a840f925b0e3fc2be54e20531f91a9ac816a485b7992ca0bd83ebf
|
3 |
+
size 524355
|
voices/af_bella.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2828c6c2f94275ef3441a2edfcf48293298ee0f9b56ce70fb2e344345487b922
|
3 |
+
size 524449
|
voices/af_nicole.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9401802fb0b7080c324dec1a75d60f31d977ced600a99160e095dbc5a1172692
|
3 |
+
size 524454
|
voices/af_sarah.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba7918c4ace6ace4221e7e01eb3a6d16596cba9729850551c758cd2ad3a4cd08
|
3 |
+
size 524449
|
voices/af_sky.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f16f1bb778de36a177ae4b0b6f1e59783d5f4d3bcecf752c3e1ee98299b335e
|
3 |
+
size 524375
|
voices/am_adam.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1921528b400a553f66528c27899d95780918fe33b1ac7e2a871f6a0de475f176
|
3 |
+
size 524444
|
voices/am_michael.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a255c9562c363103adc56c09b7daf837139d3bdaa8bd4dd74847ab1e3e8c28be
|
3 |
+
size 524459
|
voices/bf_emma.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:992e6d8491b8926ef4a16205250e51a21d9924405a5d37e2db6e94adfd965c3b
|
3 |
+
size 524365
|
voices/bf_isabella.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0865a03931230100167f7a81d394b143c072efe2d7e4c4a87b5c54d6283f580
|
3 |
+
size 524365
|
voices/bm_george.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d763dfe13e934357f4d8322b718787d79e32f2181e29ca0cf6aa637d8092b96
|
3 |
+
size 524464
|
voices/bm_lewis.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f70d9ea4d65f522f224628f06d86ea74279faae23bd7e765848a374aba916b76
|
3 |
+
size 524449
|
weights/.gitkeep
ADDED
File without changes
|
weights/kokoro-v0_19.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebef42457f7efee9b60b4f1d5aec7692f2925923948a0d7a2a49d2c9edf57e49
|
3 |
+
size 345554732
|