diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000000000000000000000000000000000000..ed12c60a8b10c2b682985843bbe58333def3a36d --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,13 @@ +# These are supported funding model platforms + +github: # +patreon: # Replace with a single Patreon username +open_collective: # Replace with a single Open Collective username +ko_fi: iahispano +tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel +community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +liberapay: # Replace with a single Liberapay username +issuehunt: # Replace with a single IssueHunt username +otechie: # Replace with a single Otechie username +lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry +custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000000000000000000000000000000000000..05a9b3e07f1be01a432c3092ae0a8a05c8bb7a11 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,31 @@ +--- +name: Bug report +about: Create a report to help us improve +title: "[BUG]" +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Assets** +If applicable, add screenshots/videos to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. Windows 11] + - Browser [e.g. chrome, safari] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/workflows/code_formatter.yml b/.github/workflows/code_formatter.yml new file mode 100644 index 0000000000000000000000000000000000000000..93555538e8dcf9c3968ff33e5e8b689badbceac6 --- /dev/null +++ b/.github/workflows/code_formatter.yml @@ -0,0 +1,51 @@ +name: Code Formatter + +on: + push: + branches: + - main + +jobs: + push_format: + runs-on: ubuntu-latest + + permissions: + contents: write + pull-requests: write + + steps: + - uses: actions/checkout@v3 + with: + ref: ${{github.ref_name}} + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Black + run: pip install "black[jupyter]" + + - name: Run Black + # run: black $(git ls-files '*.py') + run: black . + + - name: Commit Back + continue-on-error: true + id: commitback + run: | + git config --local user.email "github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + git add --all + git commit -m "chore(format): run black on ${{github.ref_name}}" + + - name: Create Pull Request + if: steps.commitback.outcome == 'success' + continue-on-error: true + uses: peter-evans/create-pull-request@v5 + with: + delete-branch: true + body: "Automatically apply code formatter change" + title: "chore(format): run black on ${{github.ref_name}}" + commit-message: "chore(format): run black on ${{github.ref_name}}" + branch: formatter-${{github.ref_name}} diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml new file mode 100644 index 0000000000000000000000000000000000000000..7af2d7e37e5aaf171d9ff0f275fdcdaf59a03407 --- /dev/null +++ b/.github/workflows/unittest.yml @@ -0,0 +1,36 @@ +name: Unit Test +on: [ push, pull_request ] +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: ["3.9", "3.10"] + os: [ubuntu-latest] + fail-fast: true + + steps: + - uses: actions/checkout@master + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + sudo apt update + sudo apt -y install ffmpeg + sudo apt -y install -qq aria2 + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co./lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d ./ -o hubert_base.pt + python -m pip install --upgrade pip + python -m pip install --upgrade setuptools + python -m pip install --upgrade wheel + pip install torch torchvision torchaudio + pip install -r requirements.txt + - name: Test step 1 & 2 + run: | + mkdir -p logs/mi-test + touch logs/mi-test/preprocess.log + python rvc/train/preprocess/preprocess.py logs/mi-test logs/mute/0_gt_wavs 48000 8 3.7 + touch logs/mi-test/extract_f0_feature.log + python rvc/train/extract/extract_f0_print.py logs/mi-test pm 64 + python rvc/train/extract/extract_feature_print.py cpu 1 0 0 logs/mi-test v1 True diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..299d36e44d9f3ba488097a9d8997e82c02978b6c --- /dev/null +++ b/.gitignore @@ -0,0 +1,129 @@ +# Applio +logs +*.exe +*.pt +*.pth +*.index +*.wav + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..c60d425dfc2aa27bf543d15c48cfc92eafad3d85 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +# syntax=docker/dockerfile:1 + +FROM python:3.10-bullseye + +EXPOSE 6969 + +WORKDIR /app + +RUN apt update && apt install -y -qq ffmpeg aria2 && apt clean + +RUN pip3 install --no-cache-dir -r requirements.txt + +COPY . . + +VOLUME [ "/app/logs/weights", "/app/opt" ] + +ENTRYPOINT [ "python3" ] + +CMD ["app.py"] \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..671962a61672d9a0494135260190c24a857fe7b8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,26 @@ +MIT License (Non-Commercial) + +Copyright (c) 2023 AI Hispano + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to use, +copy, modify, merge, publish and/or distribute Applio-RVC-Fork, subject to the following conditions: + +1. The software and its derivatives may only be used for non-commercial + purposes. + +2. Any commercial use, sale, or distribution of the software or its derivatives + is strictly prohibited. + +3. The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +Please note that under this license, the software and its derivatives can only be used for non-commercial purposes, and any commercial use, sale, or distribution is prohibited. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..3d6b0212b2333ca0260938c87858ff6fbaa77b17 --- /dev/null +++ b/Makefile @@ -0,0 +1,24 @@ +.PHONY: +.ONESHELL: + +# Show help message +help: + @grep -hE '^[A-Za-z0-9_ \-]*?:.*##.*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +# Install dependencies +run-install: + apt-get -y install build-essential python3-dev ffmpeg + pip install --upgrade setuptools wheel + pip install --upgrade pip + pip install faiss-gpu fairseq gradio ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.1 + pip install -r requirements.txt + pip install --upgrade lxml + apt-get update + +# Run Applio +run-applio: + python app.py + +# Run Tensorboard +run-tensorboard: + python core.py tensorboard diff --git a/README.md b/README.md index 03cca18999aa1a2f32e0bc8eaddf04f344cf2d7a..06bc9a6910e683c398c23df9b94e732e00012dbd 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,112 @@ ---- -title: Applio V3 HF -emoji: 🐨 -colorFrom: pink -colorTo: yellow -sdk: gradio -sdk_version: 4.16.0 -app_file: app.py -pinned: false -license: openrail ---- - -Check out the configuration reference at https://huggingface.co./docs/hub/spaces-config-reference +# Applio + +Welcome to **Applio**, the ultimate voice cloning tool meticulously optimized for unrivaled power, modularity, and a user-friendly experience. + +[![Precompiled Versions](https://img.shields.io/badge/Precompiled%20Versions-ffffff?style=flat-square&logo=&link=https://huggingface.co./IAHispano/applio/tree/main/Applio%20V3%20Precompiled)](https://huggingface.co./IAHispano/applio/tree/main/Applio%20V3%20Precompiled) +![GitHub Release](https://img.shields.io/github/v/release/iahispano/applio-rvc-fork?style=flat-square) +![GitHub Repo stars](https://img.shields.io/github/stars/iahispano/applio-rvc-fork?style=flat-square) +![GitHub forks](https://img.shields.io/github/forks/iahispano/applio-rvc-fork?style=flat-square) +[![Support Discord](https://img.shields.io/discord/1096877223765606521?style=flat-square)](https://discord.gg/iahispano) +[![Issues](https://img.shields.io/github/issues/iahispano/applio-rvc-fork?style=flat-square)](https://github.com/IAHispano/Applio-RVC-Fork/issues) +[![Open In Collab](https://img.shields.io/badge/google_colab-F9AB00?style=flat-square&logo=googlecolab&logoColor=white)](https://colab.research.google.com/github/iahispano/applio/blob/master/assets/Applio.ipynb) + +## Content Table +- [**Installation**](#installation) + - [Windows](#windows) + - [Linux](#linux) + - [Using Makefile](#using-makefile-for-platforms-such-as-paperspace) +- [**Usage**](#usage) + - [Windows](#windows-1) + - [Linux](#linux-1) + - [Using Makefile](#using-makefile-for-platforms-such-as-paperspace-1) +- [**Repository Enhancements**](#repository-enhancements) +- [**Credits**](#credits) + - [Contributors](#contributors) + +## Installation +Download the latest version from [GitHub Releases](https://github.com/IAHispano/Applio-RVC-Fork/releases) or use [Precompiled Versions](https://huggingface.co./IAHispano/applio/tree/main/Applio%20V3%20Precompiled). + +### Windows +```bash +./run-install.bat +``` + +### Linux +```bash +chmod +x run-install.sh +./run-install.sh +``` + +### Using Makefile (for platforms such as [Paperspace](https://www.paperspace.com/)) +``` +make run-install +``` + +## Usage +Visit [Applio Documentation](https://docs.applio.org/) for a detailed UI usage explanation. + +### Windows +```bash +./run-applio.bat +``` + +### Linux +```bash +chmod +x run-applio.sh +./run-applio.sh +``` + +### Using Makefile (for platforms such as [Paperspace](https://www.paperspace.com/)) +``` +make run-applio +``` + +## Repository Enhancements + +This repository has undergone significant improvements to enhance its functionality and maintainability: + +- **Code Modularization:** The codebase has been restructured to follow a modular approach. This ensures better organization, readability, and ease of maintenance. +- **Hop Length Implementation:** Special thanks to [@Mangio621](https://github.com/Mangio621/Mangio-RVC-Fork) for introducing hop length implementation. This enhancement enhances the efficiency and performance on Crepe (previously known as Mangio-Crepe). +- **Translations to +30 Languages:** The repository now supports translations in over 30 languages, making it more accessible to a global audience. +- **Cross-Platform Compatibility:** With multiplatform compatibility, this repository can seamlessly operate across various platforms, providing a consistent experience to users. +- **Optimized Requirements:** The project's requirements have been fine-tuned for improved performance and resource utilization. +- **Simple Installation:** The installation process has been streamlined, ensuring a straightforward and user-friendly experience for setup. + +These enhancements contribute to a more robust and scalable codebase, making the repository more accessible for contributors and users alike. + +## Contributions +- **Backend Contributions:** If you want to contribute to the backend, make your pull requests [here](https://github.com/blaise-tk/RVC_CLI). +- **Frontend Contributions:** For interface or script-related contributions, feel free to contribute to this repository. + +We appreciate all contributions ❤️ + +## Planned Features +- Implement: Support for Apple Devices ([Issue Link](https://github.com/pytorch/pytorch/issues/77764)) +- Implement: rmvpe_gpu +- Implement: Theme selector, RPC toggle & version checker +- Implement: Overtraining detector +- Implement: Autotune +- Implement: Training stop +- Fix: Model fusion +- Fix: Harvest & Crepe + +## Credits +- [VITS](https://github.com/jaywalnut310/vits) by jaywalnut310 +- [Retrieval-based-Voice-Conversion-WebUI](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) by RVC-Project +- [Mangio-RVC-Fork](https://github.com/Mangio621/Mangio-RVC-Fork) by Mangio621 +- [Mangio-RVC-Tweaks](https://github.com/alexlnkp/Mangio-RVC-Tweaks) by alexlnkp +- [RVG_tts](https://github.com/Foxify52/RVG_tts) by Foxify52 +- [RMVPE](https://github.com/Dream-High/RMVPE) by Dream-High +- [ContentVec](https://github.com/auspicious3000/contentvec/) by auspicious3000 +- [HIFIGAN](https://github.com/jik876/hifi-gan) by jik876 +- [Gradio](https://github.com/gradio-app/gradio) by gradio-app +- [FFmpeg](https://github.com/FFmpeg/FFmpeg) by FFmpeg +- [audio-slicer](https://github.com/openvpi/audio-slicer) by openvpi +- [Ilaria-Audio-Analyzer](https://github.com/TheStingerX/Ilaria-Audio-Analyzer) by TheStingerX +- [gradio-screen-recorder](https://huggingface.co./spaces/gstaff/gradio-screen-recorder) by gstaff +- [RVC_CLI](https://github.com/blaise-tk/RVC_CLI) by blaise-tk + +### Contributors + + + diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..70bcb174cb45dd487d9f9141d2884419b08fb32e --- /dev/null +++ b/app.py @@ -0,0 +1,58 @@ +import gradio as gr +import sys +import os + +now_dir = os.getcwd() +sys.path.append(now_dir) + +from assets.i18n.i18n import I18nAuto + +i18n = I18nAuto() + +from tabs.inference.inference import inference_tab +from tabs.train.train import train_tab +from tabs.extra.extra import extra_tab +from tabs.report.report import report_tab +from tabs.download.download import download_tab +from tabs.tts.tts import tts_tab +from assets.discord_presence import rich_presence + +rich_presence() + +with gr.Blocks(theme="ParityError/Interstellar", title="Applio") as Applio: + gr.Markdown("# Applio") + gr.Markdown( + i18n( + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience." + ) + ) + gr.Markdown( + i18n( + "[Support](https://discord.gg/IAHispano) — [Discord Bot](https://discord.com/oauth2/authorize?client_id=1144714449563955302&permissions=1376674695271&scope=bot%20applications.commands) — [Find Voices](https://applio.org/models) — [GitHub](https://github.com/IAHispano/Applio)" + ) + ) + with gr.Tab(i18n("Inference")): + inference_tab() + + with gr.Tab(i18n("Train")): + train_tab() + + with gr.Tab(i18n("TTS")): + tts_tab() + + with gr.Tab(i18n("Extra")): + extra_tab() + + with gr.Tab(i18n("Download")): + download_tab() + + with gr.Tab(i18n("Report a Bug")): + report_tab() + +if __name__ == "__main__": + Applio.launch( + favicon_path="assets/ICON.ico", + share="--share" in sys.argv, + inbrowser="--open" in sys.argv, + server_port=6969, + ) diff --git a/assets/Applio.ipynb b/assets/Applio.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..1f4cad9c6af73f8a8226acf6bd1dffb3fc066e57 --- /dev/null +++ b/assets/Applio.ipynb @@ -0,0 +1,85 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "vtON700qokuQ" + }, + "outputs": [], + "source": [ + "# @title **Install Applio**\n", + "\n", + "import codecs\n", + "import time\n", + "\n", + "orig_name_of_program = codecs.decode(\"Nccyvb\", \"rot_13\")\n", + "new_name_of_program = codecs.decode(\"cebtenz\", \"rot_13\")\n", + "uioawhd = codecs.decode(\"uggcf://tvguho.pbz/VNUvfcnab/Nccyvb.tvg\", \"rot_13\")\n", + "uyadwa = codecs.decode(\"ncc.cl\", \"rot_13\")\n", + "\n", + "from IPython.display import clear_output, Javascript\n", + "\n", + "!git clone --depth 1 $uioawhd\n", + "!mv $orig_name_of_program $new_name_of_program\n", + "%cd $new_name_of_program/\n", + "\n", + "clear_output()\n", + "file_path = \"requirements.txt\"\n", + "!pip install -r \"requirements.txt\" --quiet\n", + "\n", + "clear_output()\n", + "print(\"Finished installing requirements!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "-7cQtXouqpQi" + }, + "outputs": [], + "source": [ + "# @title **Start Applio**\n", + "import codecs\n", + "\n", + "uyadwa = codecs.decode(\"ncc.cl\", \"rot_13\")\n", + "\n", + "%load_ext tensorboard\n", + "%reload_ext tensorboard\n", + "%tensorboard --logdir logs --bind_all\n", + "\n", + "!python $uyadwa --share" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ymhGfgFSR17k" + }, + "source": [ + "## **Credits**\n", + "- Special thanks to [Hina](https://github.com/hinabl) 💗\n", + "- [Blaise](https://github.com/blaise-tk) and [Applio Team](https://github.com/IAHispano)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/assets/ICON.ico b/assets/ICON.ico new file mode 100644 index 0000000000000000000000000000000000000000..340358a598d8a110c798431c8ca99bd580099b02 Binary files /dev/null and b/assets/ICON.ico differ diff --git a/assets/audios/audio-others/.gitignore b/assets/audios/audio-others/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/assets/discord_presence.py b/assets/discord_presence.py new file mode 100644 index 0000000000000000000000000000000000000000..a89e42d21859e69b2a5e95b7e6743784f991430e --- /dev/null +++ b/assets/discord_presence.py @@ -0,0 +1,38 @@ +from pypresence import Presence +import datetime as dt +import time + + +def rich_presence(): + client_id = "1144714449563955302" + RPC = Presence(client_id) + try: + RPC.connect() + RPC.update( + state="applio.org", + details="Ultimate voice cloning tool.", + buttons=[ + {"label": "Home", "url": "https://applio.org"}, + {"label": "Download", "url": "https://applio.org/download"}, + ], + large_image="logo", + large_text="experimenting with applio", + start=dt.datetime.now().timestamp(), + ) + return RPC + except Exception as e: + print(f"An error occurred: {e}") + return None + + +if __name__ == "__main__": + rpc = rich_presence() + + if rpc: + try: + while True: + time.sleep(15) + except KeyboardInterrupt: + rpc.close() + else: + print("Failed to initialize Rich Presence.") diff --git a/assets/i18n/i18n.py b/assets/i18n/i18n.py new file mode 100644 index 0000000000000000000000000000000000000000..f09c2b647fac112d58c454a59c3f6e807bfeb4c6 --- /dev/null +++ b/assets/i18n/i18n.py @@ -0,0 +1,39 @@ +import json +from pathlib import Path +from locale import getdefaultlocale + + +class I18nAuto: + LANGUAGE_PATH = "./assets/i18n/languages/" + + def __init__(self, language=None): + language = language or getdefaultlocale()[0] + + lang_prefix = language[:2] if language is not None else "en" + available_languages = self._get_available_languages() + matching_languages = [ + lang for lang in available_languages if lang.startswith(lang_prefix) + ] + + self.language = matching_languages[0] if matching_languages else "en_US" + self.language_map = self._load_language_list() + + def _load_language_list(self): + try: + file_path = Path(self.LANGUAGE_PATH) / f"{self.language}.json" + with open(file_path, "r", encoding="utf-8") as f: + return json.load(f) + except FileNotFoundError: + raise FileNotFoundError( + f"Failed to load language file for {self.language}. Check if the correct .json file exists." + ) + + def _get_available_languages(self): + language_files = [path.stem for path in Path(self.LANGUAGE_PATH).glob("*.json")] + return language_files + + def _language_exists(self, language): + return (Path(self.LANGUAGE_PATH) / f"{language}.json").exists() + + def __call__(self, key): + return self.language_map.get(key, key) diff --git a/assets/i18n/languages/ar_AR.json b/assets/i18n/languages/ar_AR.json new file mode 100644 index 0000000000000000000000000000000000000000..1e3dd69983866695ed218fee28eff0e7179d9fe4 --- /dev/null +++ b/assets/i18n/languages/ar_AR.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "أداة استنساخ الصوت النهائية ، محسنة بدقة للحصول على قوة لا مثيل لها ، ونمطية ، وتجربة سهلة الاستخدام.", + "This section contains some extra utilities that often may be in experimental phases.": "يحتوي هذا القسم على بعض الأدوات المساعدة الإضافية التي قد تكون غالبا في المراحل التجريبية.", + "Output Information": "معلومات الإخراج", + "Inference": "استدلال", + "Train": "قطار", + "Extra": "اضافيه", + "Merge Audios": "دمج الصوتيات", + "Processing": "تجهيز", + "Audio Analyzer": "محلل الصوت", + "Model Information": "معلومات النموذج", + "Download": "تحميل", + "Report a Bug": "الإبلاغ عن خطأ", + "Preprocess": "المعالجة المسبقة", + "Model Name": "اسم الموديل", + "Enter model name": "أدخل اسم الطراز", + "Dataset Path": "مسار مجموعة البيانات", + "Enter dataset path": "إدخال مسار مجموعة البيانات", + "Sampling Rate": "معدل أخذ العينات", + "RVC Version": "نسخة RVC", + "Preprocess Dataset": "مجموعة بيانات ما قبل المعالجة", + "Extract": "استخرج", + "Hop Length": "طول القفزة", + "Batch Size": "حجم الدفعة", + "Save Every Epoch": "حفظ كل حقبة", + "Total Epoch": "إجمالي العصر", + "Pretrained": "التدريب المسبق", + "Save Only Latest": "حفظ الأحدث فقط", + "Save Every Weights": "حفظ كل الأوزان", + "Custom Pretrained": "تدريب مسبق مخصص", + "Upload Pretrained Model": "تحميل نموذج تم تدريبه مسبقا", + "Pretrained Custom Settings": "الإعدادات المخصصة المدربة مسبقا", + "The file you dropped is not a valid pretrained file. Please try again.": "الملف الذي أسقطته ليس ملفا صالحا تم تدريبه مسبقا. يرجى المحاولة مرة أخرى.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "انقر فوق زر التحديث لرؤية الملف الذي تم اختباره مسبقا في القائمة المنسدلة.", + "Pretrained G Path": "مخصص مسبقا G", + "Pretrained D Path": "مخصص مسبق التدريب D", + "GPU Settings": "إعدادات وحدة معالجة الرسومات", + "GPU Custom Settings": "الإعدادات المخصصة لوحدة معالجة الرسومات", + "GPU Number": "رقم وحدة معالجة الرسومات", + "0 to ∞ separated by -": "0 إلى ∞ مفصولة ب -", + "GPU Information": "معلومات وحدة معالجة الرسومات", + "Pitch Guidance": "توجيه الملعب", + "Extract Features": "استخراج الميزات", + "Start Training": "ابدأ التدريب", + "Generate Index": "إنشاء فهرس", + "Voice Model": "نموذج الصوت", + "Index File": "ملف الفهرس", + "Refresh": "تحديث", + "Unload Voice": "تفريغ الصوت", + "Single": "واحد", + "Upload Audio": "تحميل الصوت", + "Select Audio": "حدد الصوت", + "Advanced Settings": "الإعدادات المتقدمة", + "Clear Outputs (Deletes all audios in assets/audios)": "مخرجات واضحة (يحذف جميع الصوتيات في الأصول / الصوتيات)", + "Custom Output Path": "مسار الإخراج المخصص", + "Output Path": "مسار الإخراج", + "Pitch": "زفت", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "إذا كان > = 3: تطبيق التصفية المتوسطة على نتائج الملعب المحصودة. تمثل القيمة نصف قطر المرشح ويمكن أن تقلل من التنفس", + "Search Feature Ratio": "نسبة ميزة البحث", + "Pitch extraction algorithm": "خوارزمية استخراج الملعب", + "Convert": "حول", + "Export Audio": "تصدير الصوت", + "Batch": "الدفعه", + "Input Folder": "مجلد الإدخال", + "Enter input path": "أدخل مسار الإدخال", + "Output Folder": "مجلد الإخراج", + "Enter output path": "أدخل مسار الإخراج", + "Get information about the audio": "الحصول على معلومات حول الصوت", + "Information about the audio file": "معلومات حول الملف الصوتي", + "Waiting for information...": "في انتظار المعلومات...", + "Model fusion": "نموذج الانصهار", + "Weight for Model A": "وزن الموديل أ", + "Whether the model has pitch guidance": "ما إذا كان النموذج يحتوي على إرشادات الملعب", + "Model architecture version": "إصدار بنية النموذج", + "Path to Model A": "الطريق إلى النموذج أ", + "Path to Model B": "الطريق إلى النموذج ب", + "Path to model": "الطريق إلى النموذج", + "Model information to be placed": "معلومات النموذج المراد وضعها", + "Fusion": "اندماج", + "Modify model information": "تعديل معلومات النموذج", + "Path to Model": "الطريق إلى النموذج", + "Model information to be modified": "معلومات النموذج المراد تعديلها", + "Save file name": "حفظ اسم الملف", + "Modify": "حور", + "View model information": "عرض معلومات النموذج", + "View": "منظر", + "Model extraction": "استخراج النموذج", + "Model conversion": "تحويل النموذج", + "Pth file": "ملف Pth", + "Output of the pth file": "إخراج ملف pth", + "# How to Report an Issue on GitHub": "# كيفية الإبلاغ عن مشكلة على GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. انقر فوق الزر \"شاشة التسجيل\" أدناه لبدء تسجيل المشكلة التي تواجهها.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. بمجرد الانتهاء من تسجيل المشكلة ، انقر فوق الزر \"إيقاف التسجيل\" (نفس الزر ، لكن التسمية تتغير اعتمادا على ما إذا كنت تقوم بالتسجيل بنشاط أم لا).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. انتقل إلى [مشكلات GitHub] (https://github.com/IAHispano/Applio/issues) وانقر على زر \"إصدار جديد\".", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. أكمل نموذج المشكلة المقدم ، مع التأكد من تضمين التفاصيل حسب الحاجة ، واستخدم قسم الأصول لتحميل الملف المسجل من الخطوة السابقة.", + "Record Screen": "شاشة التسجيل", + "Record": "سجل", + "Stop Recording": "إيقاف التسجيل", + "Introduce the model .pth path": "تقديم نموذج مسار .pth", + "See Model Information": "انظر معلومات النموذج", + "## Download Model": "## تحميل الموديل", + "Model Link": "رابط النموذج", + "Introduce the model link": "تقديم رابط النموذج", + "Download Model": "ديسكارغار موديلو", + "## Drop files": "## إسقاط الملفات", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "اسحب ملف .pth وملف .index إلى هذه المساحة. اسحب أحدهما ثم الآخر.", + "TTS Voices": "أصوات تحويل النص إلى كلام", + "Text to Synthesize": "النص المراد توليفه", + "Enter text to synthesize": "أدخل نصا لتوليفه", + "Output Path for TTS Audio": "مسار الإخراج لصوت TTS", + "Output Path for RVC Audio": "مسار الإخراج لصوت RVC" +} \ No newline at end of file diff --git a/assets/i18n/languages/bn_BN.json b/assets/i18n/languages/bn_BN.json new file mode 100644 index 0000000000000000000000000000000000000000..84267c53c36cc4e7ce511fcfdc6ba9291896094e --- /dev/null +++ b/assets/i18n/languages/bn_BN.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "আলটিমেট ভয়েস ক্লোনিং টুল, অতুলনীয় শক্তি, মডুলারিটি এবং ব্যবহারকারী-বান্ধব অভিজ্ঞতার জন্য নিখুঁতভাবে অপ্টিমাইজ করা।", + "This section contains some extra utilities that often may be in experimental phases.": "এই বিভাগে কিছু অতিরিক্ত ইউটিলিটি রয়েছে যা প্রায়শই পরীক্ষামূলক পর্যায়ে থাকতে পারে।", + "Output Information": "আউটপুট তথ্য", + "Inference": "অনুমান", + "Train": "ট্রেন", + "Extra": "অতিরিক্ত", + "Merge Audios": "অডিওগুলি মার্জ করুন", + "Processing": "প্রক্রিয়াকরণ", + "Audio Analyzer": "অডিও বিশ্লেষক", + "Model Information": "মডেল তথ্য", + "Download": "ডাউনলোড", + "Report a Bug": "একটি বাগ রিপোর্ট করুন", + "Preprocess": "প্রিপ্রসেস", + "Model Name": "মডেলের নাম", + "Enter model name": "মডেলের নাম লিখুন", + "Dataset Path": "ডেটাসেট পাথ", + "Enter dataset path": "ডেটাসেটের পথ লিখুন", + "Sampling Rate": "নমুনা হার", + "RVC Version": "আরভিসি সংস্করণ", + "Preprocess Dataset": "প্রিপ্রসেস ডেটাসেট", + "Extract": "নিষ্কাশন", + "Hop Length": "হপ দৈর্ঘ্য", + "Batch Size": "ব্যাচের আকার", + "Save Every Epoch": "প্রতিটি যুগ সংরক্ষণ করুন", + "Total Epoch": "মোট যুগ", + "Pretrained": "পূর্বনির্ধারিত", + "Save Only Latest": "শুধুমাত্র সর্বশেষ সংরক্ষণ করুন", + "Save Every Weights": "প্রতিটি ওজন সংরক্ষণ করুন", + "Custom Pretrained": "কাস্টম প্রিট্রেইনড", + "Upload Pretrained Model": "প্রিট্রেনড মডেল আপলোড করুন", + "Pretrained Custom Settings": "পূর্বনির্ধারিত কাস্টম সেটিংস", + "The file you dropped is not a valid pretrained file. Please try again.": "আপনার ফেলে দেওয়া ফাইলটি একটি বৈধ পূর্বপ্রশিক্ষিত ফাইল নয়. অনুগ্রহ করে আবার চেষ্টা করুন।", + "Click the refresh button to see the pretrained file in the dropdown menu.": "ড্রপডাউন মেনুতে প্রিট্রেনড ফাইলটি দেখতে রিফ্রেশ বোতামটি ক্লিক করুন।", + "Pretrained G Path": "কাস্টম প্রিট্রেনড জি", + "Pretrained D Path": "কাস্টম প্রিট্রেনড ডি", + "GPU Settings": "জিপিইউ সেটিংস", + "GPU Custom Settings": "GPU কাস্টম সেটিংস", + "GPU Number": "জিপিইউ নম্বর", + "0 to ∞ separated by -": "0 থেকে ∞ দ্বারা পৃথক করা হয় -", + "GPU Information": "জিপিইউ তথ্য", + "Pitch Guidance": "পিচ গাইডেন্স", + "Extract Features": "এক্সট্রাক্ট বৈশিষ্ট্য", + "Start Training": "প্রশিক্ষণ শুরু করুন", + "Generate Index": "সূচী তৈরি করুন", + "Voice Model": "ভয়েস মডেল", + "Index File": "সূচী ফাইল", + "Refresh": "সতেজ", + "Unload Voice": "ভয়েস আনলোড করুন", + "Single": "একক", + "Upload Audio": "অডিও আপলোড করুন", + "Select Audio": "অডিও নির্বাচন করুন", + "Advanced Settings": "উন্নত সেটিংস", + "Clear Outputs (Deletes all audios in assets/audios)": "আউটপুট সাফ করুন (সম্পদ / অডিওতে সমস্ত অডিও মুছে ফেলে)", + "Custom Output Path": "কাস্টম আউটপুট পাথ", + "Output Path": "আউটপুট পাথ", + "Pitch": "পিচ", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "যদি > = 3: ফসল কাটা পিচ ফলাফলগুলিতে মধ্যমা ফিল্টারিং প্রয়োগ করুন। মানটি ফিল্টার ব্যাসার্ধের প্রতিনিধিত্ব করে এবং শ্বাসকষ্ট হ্রাস করতে পারে", + "Search Feature Ratio": "অনুসন্ধান বৈশিষ্ট্য অনুপাত", + "Pitch extraction algorithm": "পিচ নিষ্কাশন অ্যালগরিদম", + "Convert": "রূপান্তর", + "Export Audio": "অডিও রপ্তানি করুন", + "Batch": "ব্যাচ", + "Input Folder": "ইনপুট ফোল্ডার", + "Enter input path": "ইনপুট পথ লিখুন", + "Output Folder": "আউটপুট ফোল্ডার", + "Enter output path": "আউটপুট পথ লিখুন", + "Get information about the audio": "অডিও সম্পর্কে তথ্য পান", + "Information about the audio file": "অডিও ফাইল সম্পর্কে তথ্য", + "Waiting for information...": "তথ্যের অপেক্ষায়...", + "Model fusion": "মডেল ফিউশন", + "Weight for Model A": "মডেল এ জন্য ওজন", + "Whether the model has pitch guidance": "মডেলের পিচ গাইডেন্স আছে কিনা", + "Model architecture version": "মডেল আর্কিটেকচার সংস্করণ", + "Path to Model A": "মডেল এ পাথ", + "Path to Model B": "মডেল বি এর পথ", + "Path to model": "মডেলের পথ", + "Model information to be placed": "মডেল তথ্য স্থাপন করা হবে", + "Fusion": "ফিউশন", + "Modify model information": "মডেল তথ্য পরিবর্তন করুন", + "Path to Model": "মডেলের পথ", + "Model information to be modified": "মডেল তথ্য পরিবর্তন করা হবে", + "Save file name": "ফাইল নাম সংরক্ষণ করুন", + "Modify": "পরিবর্তন", + "View model information": "মডেল তথ্য দেখুন", + "View": "দর্শন", + "Model extraction": "মডেল নিষ্কাশন", + "Model conversion": "মডেল রূপান্তর", + "Pth file": "Pth ফাইল", + "Output of the pth file": "পিটিএইচ ফাইলের আউটপুট", + "# How to Report an Issue on GitHub": "# গিটহাবে একটি সমস্যা কিভাবে রিপোর্ট করবেন", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. আপনি যে সমস্যার সম্মুখীন হচ্ছেন তা রেকর্ড করা শুরু করতে নীচের 'রেকর্ড স্ক্রিন' বোতামে ক্লিক করুন।", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. একবার আপনি সমস্যাটি রেকর্ড করা শেষ করার পরে, 'রেকর্ডিং বন্ধ করুন' বোতামে ক্লিক করুন (একই বোতাম, তবে আপনি সক্রিয়ভাবে রেকর্ড করছেন কিনা তার উপর নির্ভর করে লেবেলটি পরিবর্তিত হয়)।", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "৩. [GitHub Issues](https://github.com/IAHispano/Applio/issues) এ যান এবং 'New Issue' বাটনে ক্লিক করুন।", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. প্রদত্ত ইস্যু টেমপ্লেটটি সম্পূর্ণ করুন, প্রয়োজন অনুসারে বিশদ অন্তর্ভুক্ত করা নিশ্চিত করুন এবং পূর্ববর্তী পদক্ষেপ থেকে রেকর্ড করা ফাইলটি আপলোড করতে সম্পদ বিভাগটি ব্যবহার করুন।", + "Record Screen": "রেকর্ড স্ক্রিন", + "Record": "রেকর্ড", + "Stop Recording": "রেকর্ডিং বন্ধ করুন", + "Introduce the model .pth path": "মডেল .pth পাথ পরিচয় করিয়ে দিন", + "See Model Information": "মডেল তথ্য দেখুন", + "## Download Model": "## মডেল ডাউনলোড করুন", + "Model Link": "মডেল লিংক", + "Introduce the model link": "মডেল লিঙ্কটি পরিচয় করিয়ে দিন", + "Download Model": "Descargar Modelo", + "## Drop files": "## ফাইল ড্রপ করুন", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "আপনার .pth ফাইল এবং .index ফাইলটি এই স্পেসে টেনে আনুন। একটা টেনে আনুন, তারপর অন্যটা।", + "TTS Voices": "টিটিএস ভয়েসেস", + "Text to Synthesize": "সংশ্লেষণ করার জন্য পাঠ্য", + "Enter text to synthesize": "সংশ্লেষ করতে পাঠ্য লিখুন", + "Output Path for TTS Audio": "TTS অডিওর জন্য আউটপুট পাথ", + "Output Path for RVC Audio": "আরভিসি অডিওর জন্য আউটপুট পাথ" +} \ No newline at end of file diff --git a/assets/i18n/languages/de_DE.json b/assets/i18n/languages/de_DE.json new file mode 100644 index 0000000000000000000000000000000000000000..deb5b6198dc870f4801791d7538c39b827c49e7d --- /dev/null +++ b/assets/i18n/languages/de_DE.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Ultimatives Tool zum Klonen von Stimmen, das sorgfältig für unübertroffene Leistung, Modularität und Benutzerfreundlichkeit optimiert wurde.", + "This section contains some extra utilities that often may be in experimental phases.": "Dieser Abschnitt enthält einige zusätzliche Dienstprogramme, die sich häufig in experimentellen Phasen befinden.", + "Output Information": "Informationen zur Ausgabe", + "Inference": "Schlussfolgerung", + "Train": "Zug", + "Extra": "Extra", + "Merge Audios": "Audios zusammenführen", + "Processing": "Verarbeitung", + "Audio Analyzer": "Audio-Analysator", + "Model Information": "Modell-Informationen", + "Download": "Herunterladen", + "Report a Bug": "Einen Fehler melden", + "Preprocess": "Vorverarbeiten", + "Model Name": "Modellbezeichnung", + "Enter model name": "Modellnamen eingeben", + "Dataset Path": "Datensatz-Pfad", + "Enter dataset path": "Datensatzpfad eingeben", + "Sampling Rate": "Samplingrate", + "RVC Version": "RVC-Ausführung", + "Preprocess Dataset": "Datensatz vorverarbeiten", + "Extract": "Auszug", + "Hop Length": "Hopfen-Länge", + "Batch Size": "Losgröße", + "Save Every Epoch": "Rette jede Epoche", + "Total Epoch": "Epoche insgesamt", + "Pretrained": "Vortrainiert", + "Save Only Latest": "Nur die neuesten speichern", + "Save Every Weights": "Speichern Sie alle Gewichte", + "Custom Pretrained": "Benutzerdefiniert vortrainiert", + "Upload Pretrained Model": "Vortrainiertes Modell hochladen", + "Pretrained Custom Settings": "Vortrainierte benutzerdefinierte Einstellungen", + "The file you dropped is not a valid pretrained file. Please try again.": "Die Datei, die Sie abgelegt haben, ist keine gültige vortrainierte Datei. Bitte versuchen Sie es erneut.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Klicken Sie auf die Schaltfläche \"Aktualisieren\", um die vortrainierte Datei im Dropdown-Menü anzuzeigen.", + "Pretrained G Path": "Benutzerdefiniertes vortrainiertes G", + "Pretrained D Path": "Benutzerdefiniertes vortrainiertes D", + "GPU Settings": "GPU-Einstellungen", + "GPU Custom Settings": "Benutzerdefinierte GPU-Einstellungen", + "GPU Number": "GPU-Nummer", + "0 to ∞ separated by -": "0 bis ∞ getrennt durch -", + "GPU Information": "GPU-Informationen", + "Pitch Guidance": "Pitch-Führung", + "Extract Features": "Extrahieren von Features", + "Start Training": "Training starten", + "Generate Index": "Index generieren", + "Voice Model": "Voice-Modell", + "Index File": "Index-Datei", + "Refresh": "Auffrischen", + "Unload Voice": "Sprache entladen", + "Single": "Ledig", + "Upload Audio": "Audio hochladen", + "Select Audio": "Wählen Sie Audio", + "Advanced Settings": "Erweiterte Einstellungen", + "Clear Outputs (Deletes all audios in assets/audios)": "Ausgänge löschen (Löscht alle Audios in Assets/Audios)", + "Custom Output Path": "Benutzerdefinierter Ausgabepfad", + "Output Path": "Ausgabepfad", + "Pitch": "Pech", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Wenn >=3: Wenden Sie die Medianfilterung auf die geernteten Tonhöhenergebnisse an. Der Wert stellt den Filterradius dar und kann die Atmung reduzieren", + "Search Feature Ratio": "Such-Feature-Verhältnis", + "Pitch extraction algorithm": "Algorithmus zur Tonhöhenextraktion", + "Convert": "Umwandeln", + "Export Audio": "Audio exportieren", + "Batch": "Stapel", + "Input Folder": "Eingabe-Ordner", + "Enter input path": "Eingabepfad eingeben", + "Output Folder": "Ausgabe-Ordner", + "Enter output path": "Ausgabepfad eingeben", + "Get information about the audio": "Abrufen von Informationen zum Audio", + "Information about the audio file": "Informationen zur Audiodatei", + "Waiting for information...": "Warten auf Informationen...", + "Model fusion": "Modell-Fusion", + "Weight for Model A": "Gewicht für Modell A", + "Whether the model has pitch guidance": "Ob das Modell über eine Tonhöhenführung verfügt", + "Model architecture version": "Version der Modellarchitektur", + "Path to Model A": "Pfad zu Modell A", + "Path to Model B": "Pfad zu Modell B", + "Path to model": "Pfad zum Modell", + "Model information to be placed": "Zu platzierende Modellinformationen", + "Fusion": "Verschmelzung", + "Modify model information": "Ändern von Modellinformationen", + "Path to Model": "Pfad zum Modell", + "Model information to be modified": "Zu ändernde Modellinformationen", + "Save file name": "Dateinamen speichern", + "Modify": "Modifizieren", + "View model information": "Anzeigen von Modellinformationen", + "View": "Ansehen", + "Model extraction": "Modell-Extraktion", + "Model conversion": "Modell-Konvertierung", + "Pth file": "Pth-Datei", + "Output of the pth file": "Ausgabe der pth-Datei", + "# How to Report an Issue on GitHub": "# So melden Sie ein Problem auf GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Klicken Sie unten auf die Schaltfläche \"Bildschirm aufzeichnen\", um mit der Aufzeichnung des aufgetretenen Problems zu beginnen.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Wenn Sie mit der Aufnahme des Problems fertig sind, klicken Sie auf die Schaltfläche \"Aufnahme beenden\" (dieselbe Schaltfläche, aber die Beschriftung ändert sich, je nachdem, ob Sie aktiv aufnehmen oder nicht).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Gehen Sie zu [GitHub Issues](https://github.com/IAHispano/Applio/issues) und klicken Sie auf die Schaltfläche \"New Issue\".", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Füllen Sie die bereitgestellte Problemvorlage aus, stellen Sie sicher, dass Sie die erforderlichen Details angeben, und verwenden Sie den Abschnitt \"Assets\", um die aufgezeichnete Datei aus dem vorherigen Schritt hochzuladen.", + "Record Screen": "Bildschirm aufzeichnen", + "Record": "Aufzeichnung", + "Stop Recording": "Aufzeichnung beenden", + "Introduce the model .pth path": "Einführung in den PTH-Pfad des Modells", + "See Model Information": "Siehe Modellinformationen", + "## Download Model": "## Modell herunterladen", + "Model Link": "Modell-Link", + "Introduce the model link": "Einführung in die Modellverknüpfung", + "Download Model": "Descargar Modelo", + "## Drop files": "## Dateien ablegen", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Ziehen Sie Ihre PTH- und INDEX-Datei in diesen Bereich. Ziehen Sie das eine und dann das andere.", + "TTS Voices": "TTS-Stimmen", + "Text to Synthesize": "Zu synthetisierender Text", + "Enter text to synthesize": "Geben Sie den zu synthetisierenden Text ein", + "Output Path for TTS Audio": "Ausgabepfad für TTS-Audio", + "Output Path for RVC Audio": "Ausgabepfad für RVC-Audio" +} \ No newline at end of file diff --git a/assets/i18n/languages/en_US.json b/assets/i18n/languages/en_US.json new file mode 100644 index 0000000000000000000000000000000000000000..735e47a65a741734762d0a244ea53e12a20d6aae --- /dev/null +++ b/assets/i18n/languages/en_US.json @@ -0,0 +1,130 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.", + "This section contains some extra utilities that often may be in experimental phases.": "This section contains some extra utilities that often may be in experimental phases.", + "Output Information": "Output Information", + + "Inference": "Inference", + "Train": "Train", + "Extra": "Extra", + "Merge Audios": "Merge Audios", + "Processing": "Processing", + "Audio Analyzer": "Audio Analyzer", + "Model Information": "Model Information", + "Download": "Download", + "Report a Bug": "Report a Bug", + + "Preprocess": "Preprocess", + "Model Name": "Model Name", + "Enter model name": "Enter model name", + "Dataset Path": "Dataset Path", + "The audio file has been successfully added to the dataset. Please click the preprocess button.": "The audio file has been successfully added to the dataset. Please click the preprocess button.", + "Enter dataset path": "Enter dataset path", + "Sampling Rate": "Sampling Rate", + "RVC Version": "RVC Version", + "Preprocess Dataset": "Preprocess Dataset", + + "Extract": "Extract", + "Hop Length": "Hop Length", + "Batch Size": "Batch Size", + "Save Every Epoch": "Save Every Epoch", + "Total Epoch": "Total Epoch", + "Pretrained": "Pretrained", + "Save Only Latest": "Save Only Latest", + "Save Every Weights": "Save Every Weights", + "Custom Pretrained": "Custom Pretrained", + "Upload Pretrained Model": "Upload Pretrained Model", + "Pretrained Custom Settings": "Pretrained Custom Settings", + "The file you dropped is not a valid pretrained file. Please try again.": "The file you dropped is not a valid pretrained file. Please try again.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Click the refresh button to see the pretrained file in the dropdown menu.", + "Pretrained G Path": "Custom Pretrained G", + "Pretrained D Path": "Custom Pretrained D", + "GPU Settings": "GPU Settings", + "GPU Custom Settings": "GPU Custom Settings", + "GPU Number": "GPU Number", + "0 to ∞ separated by -": "0 to ∞ separated by -", + "GPU Information": "GPU Information", + "Pitch Guidance": "Pitch Guidance", + "Extract Features": "Extract Features", + + "Start Training": "Start Training", + "Generate Index": "Generate Index", + + "Voice Model": "Voice Model", + "Index File": "Index File", + "Refresh": "Refresh", + "Unload Voice": "Unload Voice", + + "Single": "Single", + "Upload Audio": "Upload Audio", + "Select Audio": "Select Audio", + "Advanced Settings": "Advanced Settings", + "Clear Outputs (Deletes all audios in assets/audios)": "Clear Outputs (Deletes all audios in assets/audios)", + "Custom Output Path": "Custom Output Path", + "Output Path": "Output Path", + "Split Audio": "Split Audio", + "Pitch": "Pitch", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness", + "Search Feature Ratio": "Search Feature Ratio", + "Pitch extraction algorithm": "Pitch extraction algorithm", + "Convert": "Convert", + "Export Audio": "Export Audio", + + "Batch": "Batch", + "Input Folder": "Input Folder", + "Enter input path": "Enter input path", + "Output Folder": "Output Folder", + "Enter output path": "Enter output path", + + "Get information about the audio": "Get information about the audio", + "Information about the audio file": "Information about the audio file", + "Waiting for information...": "Waiting for information...", + + "Model fusion": "Model fusion", + "Weight for Model A": "Weight for Model A", + "Whether the model has pitch guidance": "Whether the model has pitch guidance", + "Model architecture version": "Model architecture version", + "Path to Model A": "Path to Model A", + "Path to Model B": "Path to Model B", + "Path to model": "Path to model", + "Model information to be placed": "Model information to be placed", + "Fusion": "Fusion", + + "Modify model information": "Modify model information", + "Path to Model": "Path to Model", + "Model information to be modified": "Model information to be modified", + "Save file name": "Save file name", + "Modify": "Modify", + + "View model information": "View model information", + "View": "View", + "Model extraction": "Model extraction", + "Model conversion": "Model conversion", + "Pth file": "Pth file", + "Output of the pth file": "Output of the pth file", + + "# How to Report an Issue on GitHub": "# How to Report an Issue on GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.", + + "Record Screen": "Record Screen", + "Record": "Record", + "Stop Recording": "Stop Recording", + + "Introduce the model .pth path": "Introduce the model .pth path", + "See Model Information": "See Model Information", + + "## Download Model": "## Download Model", + "Model Link": "Model Link", + "Introduce the model link": "Introduce the model link", + "Download Model": "Descargar Modelo", + "## Drop files": "## Drop files", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Drag your .pth file and .index file into this space. Drag one and then the other.", + + "TTS Voices": "TTS Voices", + "Text to Synthesize": "Text to Synthesize", + "Enter text to synthesize": "Enter text to synthesize", + "Output Path for TTS Audio": "Output Path for TTS Audio", + "Output Path for RVC Audio": "Output Path for RVC Audio" +} \ No newline at end of file diff --git a/assets/i18n/languages/es_ES.json b/assets/i18n/languages/es_ES.json new file mode 100644 index 0000000000000000000000000000000000000000..1c886127aaa2c38add6e12b2d309f6ee51fa022a --- /dev/null +++ b/assets/i18n/languages/es_ES.json @@ -0,0 +1,113 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "La herramienta de clonación de voz definitiva, meticulosamente optimizada para tener potencia, modularidad y experiencia de uso inigualable.", + "This section contains some extra utilities that often may be in experimental phases.": "Esta sección contiene algunas utilidades adicionales que a menudo pueden estar en fases experimentales.", + "Output Information": "Información", + "Inference": "Inferencia", + "Train": "Entrenamiento", + "Extra": "Extra", + "Merge Audios": "Fusionar audios", + "Processing": "Procesamiento", + "Audio Analyzer": "Analizador de audio", + "Model Information": "Información del modelo", + "Download": "Descargar", + "Report a Bug": "Reportar un fallo", + "Preprocess": "Preprocesar", + "Model Name": "Nombre del modelo", + "Enter model name": "Introduzca el nombre del modelo", + "Dataset Path": "Ruta del dataset", + "Enter dataset path": "Introduzca la ruta del dataset", + "Sampling Rate": "Frecuencia de muestreo", + "RVC Version": "Versión RVC", + "Preprocess Dataset": "Procesar dataset", + "Extract": "Extraer", + "Hop Length": "Longitud del salto", + "Batch Size": "Batch Size", + "Save Every Epoch": "Frecuencia de guardado", + "Total Epoch": "Epochs totales", + "Pretrained": "Preentrenado", + "Save Only Latest": "Guardar solo lo último", + "Save Every Weights": "Guardar en cada punto de guardado", + "Custom Pretrained": "Preentrenado personalizado", + "Upload Pretrained Model": "Cargar modelo previamente entrenado", + "Pretrained Custom Settings": "Configuración personalizada previamente entrenada", + "The file you dropped is not a valid pretrained file. Please try again.": "El archivo que ha eliminado no es un archivo preentrenado válido. Por favor, inténtelo de nuevo.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Haga clic en el botón de actualización para ver el archivo previamente entrenado en el menú desplegable.", + "Pretrained G Path": "G preentrenado personalizado", + "Pretrained D Path": "D preentrenado personalizado", + "GPU Settings": "Configuración de GPU", + "GPU Custom Settings": "Configuración personalizada de la GPU", + "GPU Number": "Número de GPU", + "0 to ∞ separated by -": "0 a ∞ separados por -", + "GPU Information": "Información de GPU", + "Pitch Guidance": "Guía de tono", + "Extract Features": "Extraer características", + "Start Training": "Empezar a entrenar", + "Generate Index": "Generar índice", + "Voice Model": "Modelo de voz", + "Index File": "Archivo de índice", + "Refresh": "Actualizar", + "Unload Voice": "Eliminar voz", + "Single": "Individual", + "Upload Audio": "Subir audio", + "Select Audio": "Seleccionar audio", + "Advanced Settings": "Ajustes avanzados", + "Clear Outputs (Deletes all audios in assets/audios)": "Borrar resultados (elimina todos los audios de los assets/audios)", + "Custom Output Path": "Ruta de salida personalizada", + "Output Path": "Ruta de salida", + "Split Audio": "Audio dividido", + "Pitch": "Tono", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Si >=3: aplique el filtrado de la mediana a los resultados de la parcela cosechada. El valor representa el radio del filtro y puede reducir la respiración", + "Search Feature Ratio": "Relación de características de búsqueda", + "Pitch extraction algorithm": "Algoritmo de extracción de tono", + "Convert": "Convertir", + "Export Audio": "Exportar audio", + "Batch": "Lote", + "Input Folder": "Carpeta de entrada", + "Enter input path": "Introduzca la ruta de entrada", + "Output Folder": "Carpeta de salida", + "Enter output path": "Introduzca la ruta de salida", + "Get information about the audio": "Obtener información sobre el audio", + "Information about the audio file": "Información sobre el archivo de audio", + "Waiting for information...": "A la espera de la información...", + "Model fusion": "Fusión de modelos", + "Weight for Model A": "Peso del modelo A", + "Whether the model has pitch guidance": "Si el modelo tiene un guía de tono", + "Model architecture version": "Versión de la arquitectura del modelo", + "Path to Model A": "Ruta al modelo A", + "Path to Model B": "Ruta al modelo B", + "Path to model": "Ruta de acceso al modelo", + "Model information to be placed": "Información del modelo que se va a colocar", + "Fusion": "Fusión", + "Modify model information": "Modificar la información del modelo", + "Path to Model": "Ruta al modelo", + "Model information to be modified": "Información del modelo que se va a modificar", + "Save file name": "Guardar el nombre del archivo", + "Modify": "Modificar", + "View model information": "Ver información del modelo", + "View": "Ver", + "Model extraction": "Extracción de modelos", + "Model conversion": "Conversión de modelos", + "Pth file": "Archivo pth", + "Output of the pth file": "Salida del archivo pth", + "# How to Report an Issue on GitHub": "# Como reportar un fallo en GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Haga clic en el botón 'Grabar pantalla' a continuación para comenzar a grabar el problema que está experimentando.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Una vez que haya terminado de grabar el problema, haga clic en el botón 'Detener grabación' (el mismo botón, pero la etiqueta cambia dependiendo de si está grabando activamente o no).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Vaya a [Problemas de GitHub](https://github.com/IAHispano/Applio/issues) y haga clic en el botón 'Nuevo problema'.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Complete la plantilla de problema proporcionada, asegurándose de incluir los detalles según sea necesario, y utilice la sección de activos para cargar el archivo grabado del paso anterior.", + "Record Screen": "Grabar pantalla", + "Record": "Grabar", + "Stop Recording": "Detener la grabación", + "Introduce the model .pth path": "Introduce de la ruta del .pth del modelo", + "See Model Information": "Ver información del modelo", + "## Download Model": "## Descargar modelo", + "Model Link": "Enlace de modelo", + "Introduce the model link": "Introducir el enlace del modelo", + "Download Model": "Download Model", + "## Drop files": "## Soltar archivos", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Arrastre el archivo .pth y el archivo .index a este espacio. Arrastre uno y luego el otro.", + "TTS Voices": "Voces TTS", + "Text to Synthesize": "Texto a sintetizar", + "Enter text to synthesize": "Introduzca el texto a sintetizar", + "Output Path for TTS Audio": "Ruta de salida para el audio TTS", + "Output Path for RVC Audio": "Ruta de salida para el audio RVC" +} \ No newline at end of file diff --git a/assets/i18n/languages/fa_FA.json b/assets/i18n/languages/fa_FA.json new file mode 100644 index 0000000000000000000000000000000000000000..869a97828f7336305f0014b9b55eb4e614cef8fb --- /dev/null +++ b/assets/i18n/languages/fa_FA.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "ابزار شبیه سازی صدا نهایی، با دقت برای قدرت بی نظیر، مدولار بودن و تجربه کاربر پسند بهینه شده است.", + "This section contains some extra utilities that often may be in experimental phases.": "این بخش شامل برخی از ابزارهای اضافی است که اغلب ممکن است در مراحل ازمایشی باشد.", + "Output Information": "اطلاعات خروجی", + "Inference": "استنباط", + "Train": "قطار", + "Extra": "اضافی", + "Merge Audios": "ادغام Audios", + "Processing": "پردازش", + "Audio Analyzer": "انالایزر صوتی", + "Model Information": "اطلاعات مدل", + "Download": "دانلود", + "Report a Bug": "گزارش یک باگ", + "Preprocess": "پیش پردازش", + "Model Name": "نام مدل", + "Enter model name": "نام مدل را وارد کنید", + "Dataset Path": "مسیر مجموعه داده", + "Enter dataset path": "وارد کردن مسیر مجموعه داده ها", + "Sampling Rate": "نرخ نمونه برداری", + "RVC Version": "نسخه RVC", + "Preprocess Dataset": "مجموعه داده پیش پردازش", + "Extract": "عصاره", + "Hop Length": "طول هاپ", + "Batch Size": "اندازه دسته", + "Save Every Epoch": "ذخیره هر عصر", + "Total Epoch": "کل اپک", + "Pretrained": "پیش اموزش دیده", + "Save Only Latest": "ذخیره فقط اخرین", + "Save Every Weights": "صرفه جویی در هر وزن", + "Custom Pretrained": "سفارشی پیش اموزش دیده", + "Upload Pretrained Model": "اپلود مدل از پیش اموزش دیده", + "Pretrained Custom Settings": "تنظیمات سفارشی از پیش اموزش داده شده", + "The file you dropped is not a valid pretrained file. Please try again.": "پرونده ای که حذف کرده اید یک پرونده از پیش اموزش داده شده معتبر نیست. لطفا دوباره تلاش کنید.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "با کلیک بر روی دکمه تازه کردن برای دیدن فایل از پیش اموزش دیده در منوی کشویی.", + "Pretrained G Path": "سفارشی پیش اموزش G", + "Pretrained D Path": "سفارشی از پیش اموزش دیده D", + "GPU Settings": "تنظیمات GPU", + "GPU Custom Settings": "تنظیمات سفارشی GPU", + "GPU Number": "شماره GPU", + "0 to ∞ separated by -": "0 به ∞ جدا شده توسط -", + "GPU Information": "اطلاعات GPU", + "Pitch Guidance": "راهنمای زمین", + "Extract Features": "استخراج ویژگی ها", + "Start Training": "شروع اموزش", + "Generate Index": "ایجاد نمایه", + "Voice Model": "مدل صوتی", + "Index File": "پروندۀ نمایه", + "Refresh": "نوسازی", + "Unload Voice": "بارگیری صدا", + "Single": "تک", + "Upload Audio": "بارگذاری صدا", + "Select Audio": "انتخاب صدا", + "Advanced Settings": "تنظیمات پیشرفته", + "Clear Outputs (Deletes all audios in assets/audios)": "پاک کردن خروجی ها (حذف تمام فایل های صوتی در دارایی ها / audios)", + "Custom Output Path": "مسیر خروجی سفارشی", + "Output Path": "مسیر خروجی", + "Pitch": "زمین", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "اگر > = 3: اعمال فیلتر متوسط به نتایج زمین برداشت. مقدار نشان دهنده شعاع فیلتر است و می تواند تنفس را کاهش دهد", + "Search Feature Ratio": "نسبت ویژگی جستجو", + "Pitch extraction algorithm": "الگوریتم استخراج زمین", + "Convert": "تبدیل", + "Export Audio": "صادرات صدا", + "Batch": "دسته", + "Input Folder": "پوشه ورودی", + "Enter input path": "وارد کردن مسیر ورودی", + "Output Folder": "پوشۀ خروجی", + "Enter output path": "وارد کردن مسیر خروجی", + "Get information about the audio": "دریافت اطلاعات در مورد صدا", + "Information about the audio file": "اطلاعات مربوط به فایل صوتی", + "Waiting for information...": "در انتظار اطلاعات...", + "Model fusion": "فیوژن مدل", + "Weight for Model A": "وزن مدل A", + "Whether the model has pitch guidance": "اگر مدل دارای راهنمایی است", + "Model architecture version": "نسخه معماری مدل", + "Path to Model A": "مسیر مدل A", + "Path to Model B": "مسیر مدل B", + "Path to model": "مسیر مدل", + "Model information to be placed": "اطلاعات مدل قرار داده می شود", + "Fusion": "همجوشی", + "Modify model information": "تغییر اطلاعات مدل", + "Path to Model": "مسیر به مدل", + "Model information to be modified": "اطلاعات مدل باید اصلاح شود", + "Save file name": "ذخیرۀ نام پرونده", + "Modify": "تغییر", + "View model information": "مشاهده اطلاعات مدل", + "View": "مشاهده", + "Model extraction": "استخراج مدل", + "Model conversion": "تبدیل مدل", + "Pth file": "پرونده Pth", + "Output of the pth file": "خروجی پروندۀ pth", + "# How to Report an Issue on GitHub": "# چگونه یک مشکل را در GitHub گزارش دهیم", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. با کلیک بر روی 'ضبط صفحه نمایش' دکمه زیر برای شروع ضبط مسئله شما در حال تجربه.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. هنگامی که ضبط مسئله را به پایان رساندید، بر روی دکمه \"توقف ضبط\" کلیک کنید (همان دکمه، اما برچسب بسته به اینکه ایا شما به طور فعال ضبط می کنید یا نه تغییر می کند).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. به [GitHub Issues] (https://github.com/IAHispano/Applio/issues) بروید و بر روی دکمه \"New Issue\" کلیک کنید.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. تکمیل قالب موضوع ارائه شده، اطمینان حاصل شود که شامل جزئیات در صورت نیاز، و استفاده از بخش دارایی برای اپلود فایل ضبط شده از مرحله قبلی.", + "Record Screen": "صفحه ضبط", + "Record": "رکورد", + "Stop Recording": "توقف ضبط", + "Introduce the model .pth path": "معرفی مسیر .pth مدل", + "See Model Information": "مشاهده اطلاعات مدل", + "## Download Model": "## دانلود مدل", + "Model Link": "پیوند مدل", + "Introduce the model link": "معرفی لینک مدل", + "Download Model": "دکارگار مدلو", + "## Drop files": "## رها کردن پروندهها", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "فایل .pth و .index خود را به این فضا بکشید. یکی را بکشید و سپس دیگری را بکشید.", + "TTS Voices": "TTS صداها", + "Text to Synthesize": "متن برای سنتز", + "Enter text to synthesize": "متن را برای سنتز وارد کنید", + "Output Path for TTS Audio": "مسیر خروجی برای صدای TTS", + "Output Path for RVC Audio": "مسیر خروجی برای RVC Audio" +} \ No newline at end of file diff --git a/assets/i18n/languages/fr_FR.json b/assets/i18n/languages/fr_FR.json new file mode 100644 index 0000000000000000000000000000000000000000..281df63717fbda8b226818c1fdad9b74de7de750 --- /dev/null +++ b/assets/i18n/languages/fr_FR.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Outil ultime de clonage vocal, méticuleusement optimisé pour une puissance, une modularité et une expérience utilisateur inégalées.", + "This section contains some extra utilities that often may be in experimental phases.": "Cette section contient des utilitaires supplémentaires qui sont souvent en phase expérimentale.", + "Output Information": "Informations de sortie", + "Inference": "Inférence", + "Train": "Entraîner", + "Extra": "Extras", + "Merge Audios": "Fusionner les audios", + "Processing": "Traitement", + "Audio Analyzer": "Analyseur audio", + "Model Information": "Informations sur le modèle", + "Download": "Télécharger", + "Report a Bug": "Signaler un bug", + "Preprocess": "Prétraitement", + "Model Name": "Nom du modèle", + "Enter model name": "Entrez le nom du modèle", + "Dataset Path": "Chemin du jeu de données", + "Enter dataset path": "Entrez le chemin du jeu de données", + "Sampling Rate": "Fréquence d'échantillonnage", + "RVC Version": "Version RVC", + "Preprocess Dataset": "Prétraiter le jeu de données", + "Extract": "Extraire", + "Hop Length": "Longueur de saut (hop)", + "Batch Size": "Taille du lot (batch)", + "Save Every Epoch": "Sauvegarder à chaque epoch", + "Total Epoch": "Nombre total d'epochs", + "Pretrained": "Pré-entraîné", + "Save Only Latest": "Sauvegarder uniquement le dernier", + "Save Every Weights": "Sauvegarder tous les poids", + "Custom Pretrained": "Pré-entraîné personnalisé", + "Upload Pretrained Model": "Télécharger un modèle pré-entraîné", + "Pretrained Custom Settings": "Paramètres personnalisés de pré-entraînement", + "The file you dropped is not a valid pretrained file. Please try again.": "Le fichier que vous avez déposé n'est pas un fichier pré-entraîné valide. Veuillez réessayer.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Cliquez sur le bouton d'actualisation pour voir le fichier pré-entraîné dans le menu déroulant.", + "Pretrained G Path": "Chemin G pré-entraîné", + "Pretrained D Path": "Chemin D pré-entraîné", + "GPU Settings": "Paramètres du GPU", + "GPU Custom Settings": "Paramètres personnalisés du GPU", + "GPU Number": "Numéro de GPU", + "0 to ∞ separated by -": "De 0 à ∞ séparés par -", + "GPU Information": "Informations GPU", + "Pitch Guidance": "Guidage de hauteur", + "Extract Features": "Extraire les caractéristiques", + "Start Training": "Démarrer l'entraînement", + "Generate Index": "Générer un index", + "Voice Model": "Modèle vocal", + "Index File": "Fichier d'index", + "Refresh": "Rafraîchir", + "Unload Voice": "Décharger la voix", + "Single": "Fichier individuel", + "Upload Audio": "Télécharger l'audio", + "Select Audio": "Sélectionner l'audio", + "Advanced Settings": "Paramètres avancés", + "Clear Outputs (Deletes all audios in assets/audios)": "Effacer les sorties (supprime tous les audios dans assets/audios)", + "Custom Output Path": "Chemin de sortie personnalisé", + "Output Path": "Chemin de sortie", + "Pitch": "Hauteur", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Si >=3 : appliquer un filtrage médian aux résultats de hauteur récoltés. La valeur représente le rayon du filtre et peut réduire le souffle", + "Search Feature Ratio": "Rapport de Caractéristiques de Recherche", + "Pitch extraction algorithm": "Algorithme d'extraction de hauteur", + "Convert": "Convertir", + "Export Audio": "Exporter l'audio", + "Batch": "Traitement par lots (batch)", + "Input Folder": "Dossier d'entrée", + "Enter input path": "Entrez le chemin d'entrée", + "Output Folder": "Dossier de sortie", + "Enter output path": "Entrez le chemin de sortie", + "Get information about the audio": "Obtenir des informations sur l'audio", + "Information about the audio file": "Informations sur le fichier audio", + "Waiting for information...": "En attente d'informations...", + "Model fusion": "Fusion de modèles", + "Weight for Model A": "Poids pour le modèle A", + "Whether the model has pitch guidance": "Si le modèle a un guidage de hauteur", + "Model architecture version": "Version de l'architecture du modèle", + "Path to Model A": "Chemin vers le modèle A", + "Path to Model B": "Chemin vers le modèle B", + "Path to model": "Chemin vers le modèle", + "Model information to be placed": "Informations sur le modèle à placer", + "Fusion": "Fusion", + "Modify model information": "Modifier les informations du modèle", + "Path to Model": "Chemin vers le modèle", + "Model information to be modified": "Informations sur le modèle à modifier", + "Save file name": "Nom de fichier à sauvegarder", + "Modify": "Modifier", + "View model information": "Voir les informations du modèle", + "View": "Voir", + "Model extraction": "Extraction de modèle", + "Model conversion": "Conversion de modèle", + "Pth file": "Fichier Pth", + "Output of the pth file": "Sortie du fichier Pth", + "# How to Report an Issue on GitHub": "# Comment signaler un problème sur GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Cliquez sur le bouton 'Enregistrer l'écran' ci-dessous pour commencer à enregistrer le problème rencontré.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Une fois l'enregistrement du problème terminé, cliquez sur le bouton 'Arrêter l'enregistrement'.", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Rendez-vous sur [GitHub Issues](https://github.com/IAHispano/Applio/issues) et cliquez sur le bouton 'New Issue'.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Remplissez le modèle de problème fourni, en veillant à inclure les détails nécessaires et utilisez la section des ressources pour télécharger le fichier enregistré à l'étape précédente.", + "Record Screen": "Enregistrer l'écran", + "Record": "Enregistrer", + "Stop Recording": "Arrêter l'enregistrement", + "Introduce the model .pth path": "Saisissez le chemin du fichier .pth du modèle", + "See Model Information": "Voir les informations du modèle", + "## Download Model": "## Télécharger le modèle", + "Model Link": "Lien du modèle", + "Introduce the model link": "Saisissez le lien du modèle", + "Download Model": "Télécharger le modèle", + "## Drop files": "## Déposer des fichiers", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Glissez votre fichier .pth et votre fichier .index dans cet espace. Glissez l'un puis l'autre.", + "TTS Voices": "Voix TTS", + "Text to Synthesize": "Texte à synthétiser", + "Enter text to synthesize": "Saisissez le texte à synthétiser", + "Output Path for TTS Audio": "Chemin de sortie pour l'audio TTS", + "Output Path for RVC Audio": "Chemin de sortie pour l'audio RVC" +} \ No newline at end of file diff --git a/assets/i18n/languages/gu_GU.json b/assets/i18n/languages/gu_GU.json new file mode 100644 index 0000000000000000000000000000000000000000..80447b8f144730f41ae73864be652ecb25d64c40 --- /dev/null +++ b/assets/i18n/languages/gu_GU.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "અંતિમ વોઇસ ક્લોનિંગ ટૂલ, જે અજોડ શક્તિ, મોડ્યુલારિટી અને વપરાશકર્તા-મૈત્રીપૂર્ણ અનુભવ માટે સાવચેતીપૂર્વક ઓપ્ટિમાઇઝ કરવામાં આવ્યું છે.", + "This section contains some extra utilities that often may be in experimental phases.": "આ વિભાગમાં કેટલીક વધારાની ઉપયોગિતાઓ છે જે ઘણીવાર પ્રાયોગિક તબક્કામાં હોઈ શકે છે.", + "Output Information": "આઉટપુટ જાણકારી", + "Inference": "અનુમાન", + "Train": "રેલગાડી", + "Extra": "વધારાનું", + "Merge Audios": "ઓડિયો ભેગા કરો", + "Processing": "પ્રક્રિયા કરી રહ્યા છીએ", + "Audio Analyzer": "ઓડિયો વિશ્લેષક", + "Model Information": "મોડેલ જાણકારી", + "Download": "ડાઉનલોડ", + "Report a Bug": "ભૂલનો અહેવાલ આપો", + "Preprocess": "પ્રીપ્રોસેસ", + "Model Name": "મોડેલ નામ", + "Enter model name": "મોડેલ નામ દાખલ કરો", + "Dataset Path": "ડેટાસેટ પાથ", + "Enter dataset path": "ડેટાસેટ પાથને દાખલ કરો", + "Sampling Rate": "નમૂનાનો દર", + "RVC Version": "RVC આવૃત્તિ", + "Preprocess Dataset": "પ્રીપ્રોસેસ ડેટાસેટ", + "Extract": "અર્ક કાઢો", + "Hop Length": "હોપ લંબાઈ", + "Batch Size": "બેચ માપ", + "Save Every Epoch": "દરેક ઈપોકનો સંગ્રહ કરો", + "Total Epoch": "કુલ ઈપોક", + "Pretrained": "પૂર્વપ્રશિક્ષિત", + "Save Only Latest": "ફક્ત તાજેતરનાં ને સંગ્રહો", + "Save Every Weights": "દરેક વજનોને બચાવો", + "Custom Pretrained": "વૈવિધ્યપૂર્ણ પૂર્વટ્રેઈન થયેલ", + "Upload Pretrained Model": "પહેલેથી પ્રશિક્ષિત મોડેલ અપલોડ કરો", + "Pretrained Custom Settings": "પહેલેથી પ્રશિક્ષિત વૈવિધ્યપૂર્ણ સુયોજનો", + "The file you dropped is not a valid pretrained file. Please try again.": "તમે મૂકેલી ફાઇલ એ યોગ્ય પૂર્વતાલીમવાળી ફાઇલ નથી. કૃપા કરીને ફરી પ્રયાસ કરો.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "ડ્રોપડાઉન મેનુમાં પહેલેથી તાલીમ લીધેલ ફાઈલ જોવા માટે રિફ્રેશ બટન પર ક્લિક કરો.", + "Pretrained G Path": "વૈવિધ્યપૂર્ણ પૂર્વપ્રશિક્ષિત G", + "Pretrained D Path": "વૈવિધ્યપૂર્ણ પૂર્વપ્રશિક્ષિત D", + "GPU Settings": "GPU સુયોજનો", + "GPU Custom Settings": "GPU કસ્ટમ સુયોજનો", + "GPU Number": "GPU નંબર", + "0 to ∞ separated by -": "0 થી ∞ આના દ્વારા અલગ પાડવામાં આવે છે -", + "GPU Information": "GPU જાણકારી", + "Pitch Guidance": "પિચ માર્ગદર્શન", + "Extract Features": "લક્ષણોનો અર્ક કાઢો", + "Start Training": "તાલીમ શરૂ કરો", + "Generate Index": "અનુક્રમણિકા બનાવો", + "Voice Model": "અવાજ મોડેલ", + "Index File": "અનુક્રમણિકા ફાઇલ", + "Refresh": "પુનઃતાજું કરો", + "Unload Voice": "અવાજ અનલોડ કરો", + "Single": "એકલું", + "Upload Audio": "ઓડિયો અપલોડ કરો", + "Select Audio": "ઓડિયો પસંદ કરો", + "Advanced Settings": "અદ્યતન સુયોજનો", + "Clear Outputs (Deletes all audios in assets/audios)": "આઉટપુટ સાફ કરો (સંપત્તિઓ/ઓડિયોમાં બધા ઓડિયો કાઢી નાંખે છે)", + "Custom Output Path": "કસ્ટમ આઉટપુટ પાથ", + "Output Path": "આઉટપુટ પાથ", + "Pitch": "પિચ", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "જા >=૩ હોય તોઃ કાપણી કરેલી પિચના પરિણામો પર મધ્યક ફિલ્ટરિંગ લગાવો. મૂલ્ય ફિલ્ટર ત્રિજ્યાને રજૂ કરે છે અને શ્વાસ રુંધાઇ શકે છે", + "Search Feature Ratio": "શોધ લક્ષણ ગુણોત્તર", + "Pitch extraction algorithm": "પિચ નિષ્કર્ષણ અલગોરિધમ", + "Convert": "રૂપાંતર કરો", + "Export Audio": "ઓડિયો નિકાસ કરો", + "Batch": "બેચ", + "Input Folder": "ઇનપુટ ફોલ્ડર", + "Enter input path": "ઇનપુટ પાથને દાખલ કરો", + "Output Folder": "આઉટપુટ ફોલ્ડર", + "Enter output path": "આઉટપુટ પાથ દાખલ કરો", + "Get information about the audio": "ઓડિયો વિશે જાણકારી મેળવો", + "Information about the audio file": "ઓડિયો ફાઈલ વિશેની માહિતી", + "Waiting for information...": "જાણકારી માટે રાહ જોઇ રહ્યા છીએ...", + "Model fusion": "મોડેલ ફ્યુઝન", + "Weight for Model A": "મોડેલ A માટે વજન", + "Whether the model has pitch guidance": "શું મોડેલને પિચ માર્ગદર્શન છે", + "Model architecture version": "મોડેલ આર્કિટેક્ચર આવૃત્તિ", + "Path to Model A": "મોડેલ A નો પાથ", + "Path to Model B": "પાથ ટુ મોડેલ B", + "Path to model": "મોડેલનો પાથ", + "Model information to be placed": "મૂકવાની મોડેલ માહિતી", + "Fusion": "ફ્યુઝન", + "Modify model information": "મોડેલ જાણકારીમાં ફેરફાર કરો", + "Path to Model": "મોડેલનો પાથ", + "Model information to be modified": "બદલવા માટેના મોડેલ જાણકારી", + "Save file name": "ફાઇલ નામને સંગ્રહો", + "Modify": "બદલો", + "View model information": "મોડેલ જાણકારી જુઓ", + "View": "જુઓ", + "Model extraction": "મોડેલ નિષ્કર્ષણ", + "Model conversion": "મોડેલ રૂપાંતરણ", + "Pth file": "Pth ફાઈલ", + "Output of the pth file": "pth ફાઇલનું આઉટપુટ", + "# How to Report an Issue on GitHub": "# GitHub પર કોઈ સમસ્યાની જાણ કેવી રીતે કરવી", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. તમે જે સમસ્યાનો સામનો કરી રહ્યા છો તેને રેકોર્ડ કરવાનું શરૂ કરવા માટે નીચે આપેલા 'રેકોર્ડ સ્ક્રીન' બટન પર ક્લિક કરો.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. એક વખત તમે સમસ્યાનું રેકોર્ડિંગ પૂરું કરી લો એટલે 'સ્ટોપ રેકોર્ડિંગ' બટન પર ક્લિક કરો (આ જ બટન, પરંતુ તમે સક્રિયપણે રેકોર્ડિંગ કરી રહ્યા છો કે નહીં તેના આધારે લેબલ બદલાય છે).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub issues] (https://github.com/IAHispano/Applio/issues) પર જાઓ અને 'ન્યૂ ઇશ્યૂ' બટન પર ક્લિક કરો.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. પૂરી પાડવામાં આવેલ ઇશ્યૂ ટેમ્પલેટ પૂર્ણ કરો, જરૂરિયાત મુજબ વિગતોનો સમાવેશ કરવાની ખાતરી કરો અને અગાઉના પગલામાંથી રેકોર્ડ કરેલી ફાઇલને અપલોડ કરવા માટે અસ્કયામતોના વિભાગનો ઉપયોગ કરો.", + "Record Screen": "રેકોર્ડ સ્ક્રીન", + "Record": "રેકોર્ડ", + "Stop Recording": "રેકોર્ડ કરવાનું બંધ કરો", + "Introduce the model .pth path": "મોડલ .pth પાથને રજૂ કરો", + "See Model Information": "મોડેલ જાણકારી જુઓ", + "## Download Model": "## ડાઉનલોડ મોડેલ", + "Model Link": "મોડેલ કડી", + "Introduce the model link": "મોડેલ કડીનો પરિચય આપો", + "Download Model": "ડેસ્કારગર મોડેલો", + "## Drop files": "## ફાઇલો મૂકો", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "આ જગ્યામાં તમારી .pth ફાઇલ અને .index ફાઇલને ખેંચો. એકને ખેંચો અને પછી બીજું.", + "TTS Voices": "TTS અવાજો", + "Text to Synthesize": "સંશ્લેષણ કરવા માટેનું લખાણ", + "Enter text to synthesize": "સંશ્લેષણ કરવા માટે લખાણ દાખલ કરો", + "Output Path for TTS Audio": "TTS ઓડિયો માટે આઉટપુટ પાથ", + "Output Path for RVC Audio": "RVC ઓડિયો માટે આઉટપુટ પાથ" +} \ No newline at end of file diff --git a/assets/i18n/languages/hi_HI.json b/assets/i18n/languages/hi_HI.json new file mode 100644 index 0000000000000000000000000000000000000000..08dafe7e89f129af8af306401df5aa02cbe97f16 --- /dev/null +++ b/assets/i18n/languages/hi_HI.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "अंतिम आवाज क्लोनिंग उपकरण, बेजोड़ शक्ति, प्रतिरूपकता और उपयोगकर्ता के अनुकूल अनुभव के लिए सावधानीपूर्वक अनुकूलित।", + "This section contains some extra utilities that often may be in experimental phases.": "इस खंड में कुछ अतिरिक्त उपयोगिताओं हैं जो अक्सर प्रयोगात्मक चरणों में हो सकती हैं।", + "Output Information": "आउटपुट जानकारी", + "Inference": "अनुमान", + "Train": "रेलगाड़ी", + "Extra": "अति", + "Merge Audios": "ऑडियो मर्ज करें", + "Processing": "संसाधन", + "Audio Analyzer": "ऑडियो विश्लेषक", + "Model Information": "मॉडल जानकारी", + "Download": "डाउनलोड", + "Report a Bug": "बग की रिपोर्ट करें", + "Preprocess": "प्रीप्रोसेस", + "Model Name": "मॉडल का नाम", + "Enter model name": "मॉडल का नाम दर्ज करें", + "Dataset Path": "डेटासेट पथ", + "Enter dataset path": "डेटासेट पथ दर्ज करें", + "Sampling Rate": "नमूनाकरण दर", + "RVC Version": "RVC संस्करण", + "Preprocess Dataset": "प्रीप्रोसेस डेटासेट", + "Extract": "निकालना", + "Hop Length": "हॉप लंबाई", + "Batch Size": "बैच का आकार", + "Save Every Epoch": "हर युग को बचाओ", + "Total Epoch": "कुल युग", + "Pretrained": "पूर्व-प्रशिक्षित", + "Save Only Latest": "केवल नवीनतम सहेजें", + "Save Every Weights": "हर वजन बचाओ", + "Custom Pretrained": "कस्टम पूर्व-प्रशिक्षित", + "Upload Pretrained Model": "पूर्व-प्रशिक्षित मॉडल अपलोड करें", + "Pretrained Custom Settings": "पूर्व-प्रशिक्षित कस्टम सेटिंग्स", + "The file you dropped is not a valid pretrained file. Please try again.": "आपके द्वारा छोड़ी गई फ़ाइल कोई मान्य पूर्व-प्रशिक्षित फ़ाइल नहीं है. कृपया पुनः प्रयास करें.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "ड्रॉपडाउन मेनू में पूर्व-प्रशिक्षित फ़ाइल देखने के लिए ताज़ा करें बटन पर क्लिक करें।", + "Pretrained G Path": "कस्टम पूर्व-प्रशिक्षित जी", + "Pretrained D Path": "कस्टम पूर्वप्रशिक्षित डी", + "GPU Settings": "GPU सेटिंग्स", + "GPU Custom Settings": "GPU कस्टम सेटिंग्स", + "GPU Number": "GPU नंबर", + "0 to ∞ separated by -": "0 से ∞ द्वारा अलग किया गया -", + "GPU Information": "GPU सूचना", + "Pitch Guidance": "पिच मार्गदर्शन", + "Extract Features": "एक्सट्रैक्ट फीचर्स", + "Start Training": "प्रशिक्षण शुरू करें", + "Generate Index": "इंडेक्स जनरेट करें", + "Voice Model": "आवाज मॉडल", + "Index File": "अनुक्रमणिका फ़ाइल", + "Refresh": "आराम देना", + "Unload Voice": "आवाज उतारना", + "Single": "अकेला", + "Upload Audio": "ऑडियो अपलोड करें", + "Select Audio": "ऑडियो का चयन करें", + "Advanced Settings": "उन्नत सेटिंग्स", + "Clear Outputs (Deletes all audios in assets/audios)": "आउटपुट साफ़ करें (संपत्ति/ऑडियो में सभी ऑडियो हटाता है)", + "Custom Output Path": "कस्टम आउटपुट पथ", + "Output Path": "आउटपुट पथ", + "Pitch": "फेंकना", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "यदि > = 3: कटाई पिच परिणामों के लिए माध्यिका फ़िल्टरिंग लागू करें। मान फिल्टर त्रिज्या का प्रतिनिधित्व करता है और सांस लेने को कम कर सकता है", + "Search Feature Ratio": "खोज सुविधा अनुपात", + "Pitch extraction algorithm": "पिच निष्कर्षण एल्गोरिथ्म", + "Convert": "बदलना", + "Export Audio": "ऑडियो निर्यात करें", + "Batch": "जत्था", + "Input Folder": "इनपुट फ़ोल्डर", + "Enter input path": "इनपुट पथ दर्ज करें", + "Output Folder": "आउटपुट फ़ोल्डर", + "Enter output path": "आउटपुट पथ दर्ज करें", + "Get information about the audio": "ऑडियो के बारे में जानकारी प्राप्त करें", + "Information about the audio file": "ऑडियो फ़ाइल के बारे में जानकारी", + "Waiting for information...": "जानकारी का इंतजार...", + "Model fusion": "मॉडल फ्यूजन", + "Weight for Model A": "मॉडल ए के लिए वजन", + "Whether the model has pitch guidance": "क्या मॉडल में पिच मार्गदर्शन है", + "Model architecture version": "मॉडल वास्तुकला संस्करण", + "Path to Model A": "मॉडल ए के लिए पथ", + "Path to Model B": "मॉडल बी का रास्ता", + "Path to model": "मॉडल का मार्ग", + "Model information to be placed": "मॉडल जानकारी रखी जाएगी", + "Fusion": "परमाणु-संलयन", + "Modify model information": "मॉडल जानकारी संशोधित करें", + "Path to Model": "मॉडल का मार्ग", + "Model information to be modified": "संशोधित की जाने वाली मॉडल जानकारी", + "Save file name": "फ़ाइल नाम सहेजें", + "Modify": "सुधारना", + "View model information": "मॉडल की जानकारी देखें", + "View": "देखना", + "Model extraction": "मॉडल निष्कर्षण", + "Model conversion": "मॉडल रूपांतरण", + "Pth file": "Pth फ़ाइल", + "Output of the pth file": "pth फ़ाइल का आउटपुट", + "# How to Report an Issue on GitHub": "# GitHub पर किसी समस्या की रिपोर्ट कैसे करें", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. आप जिस समस्या का अनुभव कर रहे हैं उसे रिकॉर्ड करना शुरू करने के लिए नीचे दिए गए 'रिकॉर्ड स्क्रीन' बटन पर क्लिक करें।", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. एक बार जब आप समस्या को रिकॉर्ड करना समाप्त कर लेते हैं, तो 'स्टॉप रिकॉर्डिंग' बटन पर क्लिक करें (वही बटन, लेकिन लेबल इस बात पर निर्भर करता है कि आप सक्रिय रूप से रिकॉर्डिंग कर रहे हैं या नहीं)।", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Issues](https://github.com/IAHispano/Applio/issues) पर जाएं और 'नया मुद्दा' बटन पर क्लिक करें।", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. प्रदान किए गए मुद्दे टेम्पलेट को पूरा करें, आवश्यकतानुसार विवरण शामिल करना सुनिश्चित करें, और पिछले चरण से रिकॉर्ड की गई फ़ाइल को अपलोड करने के लिए संपत्ति अनुभाग का उपयोग करें।", + "Record Screen": "रिकॉर्ड स्क्रीन", + "Record": "रिकॉर्ड", + "Stop Recording": "रिकॉर्डिंग बंद करो", + "Introduce the model .pth path": "मॉडल .pth पथ का परिचय दें", + "See Model Information": "मॉडल जानकारी देखें", + "## Download Model": "## मॉडल डाउनलोड करें", + "Model Link": "मॉडल लिंक", + "Introduce the model link": "मॉडल लिंक का परिचय दें", + "Download Model": "Descargar Modelo", + "## Drop files": "## फ़ाइलें ड्रॉप करें", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "अपनी .pth फ़ाइल और .index फ़ाइल को इस स्थान में खींचें. एक को खींचें और फिर दूसरे को।", + "TTS Voices": "टीटीएस आवाज़ें", + "Text to Synthesize": "संश्लेषित करने के लिए पाठ", + "Enter text to synthesize": "संश्लेषित करने के लिए पाठ दर्ज करें", + "Output Path for TTS Audio": "TTS ऑडियो के लिए आउटपुट पथ", + "Output Path for RVC Audio": "RVC ऑडियो के लिए आउटपुट पथ" +} \ No newline at end of file diff --git a/assets/i18n/languages/hu_HU.json b/assets/i18n/languages/hu_HU.json new file mode 100644 index 0000000000000000000000000000000000000000..4ea978b9a85674e20af95abf757294125cd83d5c --- /dev/null +++ b/assets/i18n/languages/hu_HU.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "A legjobb hangklónozó eszköz, aprólékosan optimalizálva a páratlan teljesítmény, a modularitás és a felhasználóbarát élmény érdekében.", + "This section contains some extra utilities that often may be in experimental phases.": "Ez a szakasz néhány további segédprogramot tartalmaz, amelyek gyakran kísérleti fázisban vannak.", + "Output Information": "Kimeneti információk", + "Inference": "Következtetés", + "Train": "Vonat", + "Extra": "Többlet", + "Merge Audios": "Hangok egyesítése", + "Processing": "Feldolgozás", + "Audio Analyzer": "Hangelemző", + "Model Information": "Modell információk", + "Download": "Letöltés", + "Report a Bug": "Hiba jelentése", + "Preprocess": "Előfeldolgozás", + "Model Name": "Modell neve", + "Enter model name": "Adja meg a modell nevét", + "Dataset Path": "Adatkészlet elérési útja", + "Enter dataset path": "Adja meg az adatkészlet elérési útját", + "Sampling Rate": "Mintavételi arány", + "RVC Version": "RVC verzió", + "Preprocess Dataset": "Adatkészlet előfeldolgozása", + "Extract": "Kivonat", + "Hop Length": "Komló hossza", + "Batch Size": "Tétel mérete", + "Save Every Epoch": "Mentsd meg minden korszakot", + "Total Epoch": "Teljes korszak", + "Pretrained": "Előre betanított", + "Save Only Latest": "Csak a legújabbak mentése", + "Save Every Weights": "Takarítson meg minden súlyt", + "Custom Pretrained": "Egyéni előképzett", + "Upload Pretrained Model": "Előre betanított modell feltöltése", + "Pretrained Custom Settings": "Előre betanított egyéni beállítások", + "The file you dropped is not a valid pretrained file. Please try again.": "Az eldobott fájl nem érvényes előre betanított fájl. Kérjük, próbálja újra.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Kattintson a frissítés gombra az előre betanított fájl megjelenítéséhez a legördülő menüben.", + "Pretrained G Path": "Egyéni előképzett G", + "Pretrained D Path": "Egyéni előképzett D", + "GPU Settings": "GPU-beállítások", + "GPU Custom Settings": "GPU egyéni beállítások", + "GPU Number": "GPU-szám", + "0 to ∞ separated by -": "0-tól ∞-ig - választja el", + "GPU Information": "GPU-információk", + "Pitch Guidance": "Pitch útmutatás", + "Extract Features": "Jellemzők kivonása", + "Start Training": "Kezdje el az edzést", + "Generate Index": "Index létrehozása", + "Voice Model": "Hangmodell", + "Index File": "Index fájl", + "Refresh": "Felfrissít", + "Unload Voice": "Hang eltávolítása", + "Single": "Nőtlen", + "Upload Audio": "Hang feltöltése", + "Select Audio": "Válassza az Audio lehetőséget", + "Advanced Settings": "Speciális beállítások", + "Clear Outputs (Deletes all audios in assets/audios)": "Kimenetek törlése (Törli az összes hangot az eszközökből/hangokból)", + "Custom Output Path": "Egyéni kimeneti útvonal", + "Output Path": "Kimeneti útvonal", + "Pitch": "Hangmagasság", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Ha >=3: alkalmazzon medián szűrést a betakarított hangmagasság eredményeire. Az érték a szűrő sugarát jelöli, és csökkentheti a légszomjat", + "Search Feature Ratio": "Keresési funkciók aránya", + "Pitch extraction algorithm": "Pitch extrakciós algoritmus", + "Convert": "Megtérít", + "Export Audio": "Hang exportálása", + "Batch": "Halom", + "Input Folder": "Bemeneti mappa", + "Enter input path": "Adja meg a bemeneti útvonalat", + "Output Folder": "Kimeneti mappa", + "Enter output path": "Adja meg a kimeneti útvonalat", + "Get information about the audio": "Információk lekérése a hangról", + "Information about the audio file": "Információ a hangfájlról", + "Waiting for information...": "Információra várva...", + "Model fusion": "Modellfúzió", + "Weight for Model A": "Az A modell súlya", + "Whether the model has pitch guidance": "A modell rendelkezik-e hangmagasság-útmutatással", + "Model architecture version": "Modellarchitektúra verziója", + "Path to Model A": "Az A modell elérési útja", + "Path to Model B": "A B modell elérési útja", + "Path to model": "A modellhez vezető út", + "Model information to be placed": "Az elhelyezendő modellinformációk", + "Fusion": "Fúzió", + "Modify model information": "Modelladatok módosítása", + "Path to Model": "A modell elérési útja", + "Model information to be modified": "Módosítandó modellinformációk", + "Save file name": "Fájlnév mentése", + "Modify": "Módosít", + "View model information": "Modellinformációk megtekintése", + "View": "Nézet", + "Model extraction": "Modell kinyerése", + "Model conversion": "Modell átalakítás", + "Pth file": "Pth fájl", + "Output of the pth file": "A pth fájl kimenete", + "# How to Report an Issue on GitHub": "# Hogyan jelenthet problémát a GitHubon", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Kattintson az alábbi \"Felvétel képernyő\" gombra a tapasztalt probléma rögzítésének megkezdéséhez.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Miután befejezte a probléma rögzítését, kattintson a \"Felvétel leállítása\" gombra (ugyanaz a gomb, de a címke attól függően változik, hogy aktívan rögzít-e vagy sem).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Lépjen a [GitHub-problémák](https://github.com/IAHispano/Applio/issues) oldalra, és kattintson az \"Új probléma\" gombra.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Töltse ki a megadott problémasablont, ügyelve arra, hogy szükség szerint tartalmazza a részleteket, és használja az eszközök szakaszt az előző lépésből rögzített fájl feltöltéséhez.", + "Record Screen": "Felvétel képernyő", + "Record": "Rekord", + "Stop Recording": "Felvétel leállítása", + "Introduce the model .pth path": "A modell .pth elérési útjának bemutatása", + "See Model Information": "Modellinformációk megtekintése", + "## Download Model": "## Modell letöltése", + "Model Link": "Modell link", + "Introduce the model link": "A modellhivatkozás bemutatása", + "Download Model": "Töltse le a Modelo alkalmazást", + "## Drop files": "## Dobja el a fájlokat", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Húzza a .pth és .index fájlt erre a helyre. Húzza az egyiket, majd a másikat.", + "TTS Voices": "TTS-hangok", + "Text to Synthesize": "Szintetizálandó szöveg", + "Enter text to synthesize": "Írja be a szintetizálni kívánt szöveget", + "Output Path for TTS Audio": "A TTS Audio kimeneti útvonala", + "Output Path for RVC Audio": "Az RVC Audio kimeneti útvonala" +} \ No newline at end of file diff --git a/assets/i18n/languages/it_IT.json b/assets/i18n/languages/it_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..ebd74d3c0ca87bc8237affd569d8182db737a498 --- /dev/null +++ b/assets/i18n/languages/it_IT.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Strumento di clonazione vocale definitivo, meticolosamente ottimizzato per una potenza, una modularità e un'esperienza user-friendly senza rivali. /n Tradotto in italiano da Ilaria.", + "This section contains some extra utilities that often may be in experimental phases.": "Questa sezione contiene alcune utilità aggiuntive che spesso possono essere in fase sperimentale.", + "Output Information": "Informazioni sull'output", + "Inference": "Inferenza", + "Train": "Allena", + "Extra": "Altro", + "Merge Audios": "Unisci audio", + "Processing": "Elaborazione", + "Audio Analyzer": "Analizzatore audio", + "Model Information": "Informazioni sul modello", + "Download": "Scaricare", + "Report a Bug": "Segnala un bug", + "Preprocess": "Pre-processa", + "Model Name": "Nome del modello", + "Enter model name": "Inserisci il nome del modello", + "Dataset Path": "Percorso del dataset", + "Enter dataset path": "Inserisci il percorso del dataset", + "Sampling Rate": "Frequenza di campionamento", + "RVC Version": "Versione RVC", + "Preprocess Dataset": "Set di dati di pre-elaborazione", + "Extract": "Estrarre", + "Hop Length": "Hop Lenght", + "Batch Size": "Batch Size", + "Save Every Epoch": "Salva a ogni epoch", + "Total Epoch": "Epochs totali", + "Pretrained": "Pre-allenato", + "Save Only Latest": "Salva solo gli ultimi file", + "Save Every Weights": "Salva ogni Weight", + "Custom Pretrained": "Pre-allenamento personalizzato", + "Upload Pretrained Model": "Carica modello pre-allenato", + "Pretrained Custom Settings": "Impostazioni personalizzate modelli pre-allenati", + "The file you dropped is not a valid pretrained file. Please try again.": "Il file inseri non è un file pre-allenato valido. Si prega di riprovare.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Fare clic sul pulsante Aggiorna per visualizzare il file pre-allenato nel menù a discesa.", + "Pretrained G Path": "G pre-allenato personalizzato", + "Pretrained D Path": "D pre-allenato personalizzato", + "GPU Settings": "Impostazioni GPU", + "GPU Custom Settings": "Impostazioni personalizzate GPU", + "GPU Number": "Numero GPU", + "0 to ∞ separated by -": "Da 0 a ∞ separati da -", + "GPU Information": "Informazioni sulla GPU", + "Pitch Guidance": "Guida al pitch", + "Extract Features": "Estrai feature", + "Start Training": "Inizia l'allenamento", + "Generate Index": "Genera index", + "Voice Model": "Modello vocale", + "Index File": "File index", + "Refresh": "Aggiorna", + "Unload Voice": "Rimuovi modelli vocali", + "Single": "Singolo", + "Upload Audio": "Carica audio", + "Select Audio": "Seleziona Audio", + "Advanced Settings": "Impostazioni avanzate", + "Clear Outputs (Deletes all audios in assets/audios)": "Cancella output (elimina tutti gli audio in assets/audios)", + "Custom Output Path": "Percorso output personalizzato", + "Output Path": "Percorso output", + "Pitch": "Pitch", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Se >=3: applicare il filtro mediano ai risultati dell'altezza raccolta. Il valore rappresenta il raggio del filtro e può ridurre la respirazione", + "Search Feature Ratio": "Rapporto di ricerca feature nell'index", + "Pitch extraction algorithm": "Algoritmo di estrazione del pitch", + "Convert": "Converti", + "Export Audio": "Esporta audio", + "Batch": "Batch", + "Input Folder": "Cartella degli input", + "Enter input path": "Immettere il percorso dell'input", + "Output Folder": "Cartella per gli output", + "Enter output path": "Immettere il percorso di output", + "Get information about the audio": "Ottieni informazioni sull'audio", + "Information about the audio file": "Informazioni sul file audio", + "Waiting for information...": "In attesa di informazioni...", + "Model fusion": "Fusione di modelli", + "Weight for Model A": "Peso per il modello A", + "Whether the model has pitch guidance": "Se il modello dispone di una guida per l'intonazione", + "Model architecture version": "Versione dell'architettura del modello", + "Path to Model A": "Percorso verso il modello A", + "Path to Model B": "Percorso verso il modello B", + "Path to model": "Percorso verso il modello", + "Model information to be placed": "Informazioni sul modello da posizionare", + "Fusion": "Fusione", + "Modify model information": "Modificare le informazioni sul modello", + "Path to Model": "Percorso verso il modello", + "Model information to be modified": "Informazioni sul modello da modificare", + "Save file name": "Salva nome file", + "Modify": "Modificare", + "View model information": "Visualizzare le informazioni sul modello", + "View": "Vista", + "Model extraction": "Estrazione del modello", + "Model conversion": "Conversione del modello", + "Pth file": "File Pth", + "Output of the pth file": "Output del file pth", + "# How to Report an Issue on GitHub": "# Come segnalare un problema su GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Fare click sul pulsante \"Registra schermo\" in basso per avviare la registrazione del problema riscontrato.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Una volta terminata la registrazione del problema, fai clic sul pulsante \"Interrompi registrazione\" (lo stesso pulsante, ma l'etichetta cambia a seconda che tu stia registrando attivamente o meno).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Vai su [GitHub Issues](https://github.com/IAHispano/Applio/issues) e fai clic sul pulsante \"Nuovo problema\".", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Completa il modello di problema fornito, assicurandoti di includere i dettagli necessari e utilizza la sezione delle risorse per caricare il file registrato dal passaggio precedente.", + "Record Screen": "Registra schermo", + "Record": "Registrazione", + "Stop Recording": "Interrompi registrazione", + "Introduce the model .pth path": "Inserusci il percorso .pth del modello", + "See Model Information": "Controlla le informazioni sul modello", + "## Download Model": "## Scarica il modello", + "Model Link": "Link del modello", + "Introduce the model link": "Inserisci il link modello", + "Download Model": "Scarica Modello", + "## Drop files": "## Trascina i file", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Trascina il file .pth e il file .index in questo spazio. Trascina uno e poi l'altro.", + "TTS Voices": "Voci TTS", + "Text to Synthesize": "Testo da sintetizzare", + "Enter text to synthesize": "Inserisci il testo da sintetizzare", + "Output Path for TTS Audio": "Percorso di uscita per l'audio TTS", + "Output Path for RVC Audio": "Percorso di uscita per l'audio RVC" +} diff --git a/assets/i18n/languages/ja_JA.json b/assets/i18n/languages/ja_JA.json new file mode 100644 index 0000000000000000000000000000000000000000..d0bc8433a4c9709c6021da9ea8f45d7244660e4f --- /dev/null +++ b/assets/i18n/languages/ja_JA.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "比類のないパワー、モジュール性、ユーザーフレンドリーなエクスペリエンスのために細心の注意を払って最適化された究極の音声クローニングツール。", + "This section contains some extra utilities that often may be in experimental phases.": "このセクションには、多くの場合、実験段階にある可能性のあるいくつかの追加のユーティリティが含まれています。", + "Output Information": "出力情報", + "Inference": "推論", + "Train": "列車", + "Extra": "余分", + "Merge Audios": "オーディオをマージ (Merge Audios)", + "Processing": "加工", + "Audio Analyzer": "オーディオアナライザ", + "Model Information": "モデル情報", + "Download": "ダウンロード", + "Report a Bug": "バグを報告する", + "Preprocess": "前処理", + "Model Name": "モデル名", + "Enter model name": "モデル名を入力", + "Dataset Path": "データセット パス", + "Enter dataset path": "データセットのパスを入力", + "Sampling Rate": "サンプリングレート", + "RVC Version": "RVC バージョン", + "Preprocess Dataset": "データセットの前処理", + "Extract": "抜く", + "Hop Length": "ホップ長(Hop Length)", + "Batch Size": "バッチサイズ", + "Save Every Epoch": "すべてのエポックを保存", + "Total Epoch": "トータルエポック", + "Pretrained": "事前トレーニング済み", + "Save Only Latest": "最新のみ保存", + "Save Every Weights": "すべてのウェイトを保存(Save Every Weights)", + "Custom Pretrained": "カスタム事前トレーニング済み", + "Upload Pretrained Model": "事前トレーニング済みモデルのアップロード", + "Pretrained Custom Settings": "事前トレーニング済みのカスタム設定", + "The file you dropped is not a valid pretrained file. Please try again.": "ドロップしたファイルは有効な事前学習済みファイルではありません。もう一度やり直してください。", + "Click the refresh button to see the pretrained file in the dropdown menu.": "更新ボタンをクリックすると、ドロップダウンメニューに事前トレーニング済みファイルが表示されます。", + "Pretrained G Path": "カスタム事前学習済み G", + "Pretrained D Path": "カスタム事前学習済み D", + "GPU Settings": "GPU 設定", + "GPU Custom Settings": "GPU カスタム設定", + "GPU Number": "GPU番号", + "0 to ∞ separated by -": "0 から ∞ で区切られます。", + "GPU Information": "GPU 情報", + "Pitch Guidance": "ピッチガイダンス", + "Extract Features": "フィーチャの抽出", + "Start Training": "トレーニングを開始", + "Generate Index": "インデックスの生成", + "Voice Model": "音声モデル", + "Index File": "インデックス ファイル", + "Refresh": "リフレッシュ", + "Unload Voice": "音声のアンロード", + "Single": "単", + "Upload Audio": "オーディオのアップロード", + "Select Audio": "オーディオを選択", + "Advanced Settings": "詳細設定", + "Clear Outputs (Deletes all audios in assets/audios)": "Clear Outputs (アセット/オーディオ内のすべてのオーディオを削除します)", + "Custom Output Path": "カスタム出力パス", + "Output Path": "出力パス", + "Pitch": "ピッチ", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": ">=3の場合:収集したピッチ結果に中央値フィルタリングを適用します。この値はフィルターの半径を表し、息苦しさを軽減することができます", + "Search Feature Ratio": "検索機能比率", + "Pitch extraction algorithm": "ピッチ抽出アルゴリズム", + "Convert": "コンバート", + "Export Audio": "オーディオのエクスポート", + "Batch": "バッチ", + "Input Folder": "入力フォルダ", + "Enter input path": "入力パスを入力", + "Output Folder": "出力フォルダ", + "Enter output path": "出力パスを入力", + "Get information about the audio": "オーディオに関する情報を取得する", + "Information about the audio file": "オーディオファイルに関する情報", + "Waiting for information...": "情報を待っています...", + "Model fusion": "モデル融合", + "Weight for Model A": "モデルAの重量", + "Whether the model has pitch guidance": "モデルにピッチガイダンスがあるかどうか", + "Model architecture version": "モデル アーキテクチャのバージョン", + "Path to Model A": "モデルAへのパス", + "Path to Model B": "モデルBへのパス", + "Path to model": "モデルへのパス", + "Model information to be placed": "配置するモデル情報", + "Fusion": "融合", + "Modify model information": "モデル情報の変更", + "Path to Model": "モデルへのパス", + "Model information to be modified": "修正するモデル情報", + "Save file name": "保存ファイル名", + "Modify": "修飾する", + "View model information": "モデル情報の表示", + "View": "眺める", + "Model extraction": "モデルの抽出", + "Model conversion": "モデル変換", + "Pth file": "Pth ファイル", + "Output of the pth file": "p番目のファイルの出力", + "# How to Report an Issue on GitHub": "# GitHub で問題を報告する方法", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1.下の[画面の記録]ボタンをクリックして、発生している問題の記録を開始します。", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2.問題の記録が終了したら、[記録の停止]ボタンをクリックします(同じボタンですが、アクティブに記録しているかどうかによってラベルが変わります)。", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Issues](https://github.com/IAHispano/Applio/issues)に移動し、[New Issue]ボタンをクリックします。", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. 提供された課題テンプレートに記入し、必要に応じて詳細を含め、アセット セクションを使用して前の手順で記録したファイルをアップロードします。", + "Record Screen": "録画画面", + "Record": "記録", + "Stop Recording": "記録の停止", + "Introduce the model .pth path": "モデルの .pth パスを導入する", + "See Model Information": "「モデル情報」を参照", + "## Download Model": "## モデルのダウンロード", + "Model Link": "モデルリンク", + "Introduce the model link": "モデルリンクの紹介", + "Download Model": "Descargar Modelo", + "## Drop files": "## ファイルのドロップ", + "Drag your .pth file and .index file into this space. Drag one and then the other.": ".pth ファイルと .index ファイルをこのスペースにドラッグします。一方をドラッグしてから、もう一方をドラッグします。", + "TTS Voices": "TTS ボイス", + "Text to Synthesize": "合成するテキスト", + "Enter text to synthesize": "合成するテキストを入力する", + "Output Path for TTS Audio": "TTSオーディオの出力パス", + "Output Path for RVC Audio": "RVCオーディオの出力パス" +} \ No newline at end of file diff --git a/assets/i18n/languages/jv_JV.json b/assets/i18n/languages/jv_JV.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/assets/i18n/languages/jv_JV.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/assets/i18n/languages/ko_KO.json b/assets/i18n/languages/ko_KO.json new file mode 100644 index 0000000000000000000000000000000000000000..d9ce7975a17583f0c300c9b641fb573c9265bbe9 --- /dev/null +++ b/assets/i18n/languages/ko_KO.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "타의 추종을 불허하는 성능, 모듈성 및 사용자 친화적인 경험을 위해 세심하게 최적화된 궁극의 음성 복제 도구입니다.", + "This section contains some extra utilities that often may be in experimental phases.": "이 섹션에는 종종 실험 단계에 있을 수 있는 몇 가지 추가 유틸리티가 포함되어 있습니다.", + "Output Information": "출력 정보", + "Inference": "추론", + "Train": "기차", + "Extra": "여분의", + "Merge Audios": "오디오 병합", + "Processing": "가공", + "Audio Analyzer": "오디오 분석기", + "Model Information": "모델 정보", + "Download": "다운로드", + "Report a Bug": "버그 신고", + "Preprocess": "전처리", + "Model Name": "모델명", + "Enter model name": "모델명 입력", + "Dataset Path": "데이터 세트 경로", + "Enter dataset path": "데이터 세트 경로 입력", + "Sampling Rate": "샘플링 레이트", + "RVC Version": "RVC 버전", + "Preprocess Dataset": "데이터 세트 전처리", + "Extract": "추출물", + "Hop Length": "홉 길이", + "Batch Size": "배치 크기(Batch Size)", + "Save Every Epoch": "모든 Epoch를 저장하십시오", + "Total Epoch": "Total Epoch(총 에포크)", + "Pretrained": "사전 학습된", + "Save Only Latest": "최신순만 저장(Save Only Latest)", + "Save Every Weights": "모든 가중치 저장", + "Custom Pretrained": "사용자 지정 사전 학습", + "Upload Pretrained Model": "사전 훈련된 모델 업로드하기", + "Pretrained Custom Settings": "사전 훈련된 사용자 지정 설정", + "The file you dropped is not a valid pretrained file. Please try again.": "드롭한 파일이 유효한 사전 학습된 파일이 아닙니다. 다시 시도하십시오.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "새로 고침 버튼을 클릭하면 드롭다운 메뉴에서 사전 학습된 파일을 볼 수 있습니다.", + "Pretrained G Path": "사용자 지정 사전 훈련된 G", + "Pretrained D Path": "사용자 지정 사전 훈련된 D", + "GPU Settings": "GPU 설정", + "GPU Custom Settings": "GPU 사용자 지정 설정", + "GPU Number": "GPU 번호", + "0 to ∞ separated by -": "0 - ∞ - 로 구분", + "GPU Information": "GPU 정보", + "Pitch Guidance": "피치 안내", + "Extract Features": "피처 추출", + "Start Training": "트레이닝 시작하기", + "Generate Index": "색인 생성", + "Voice Model": "음성 모델", + "Index File": "색인 파일", + "Refresh": "리프레쉬", + "Unload Voice": "음성 언로드", + "Single": "싱글", + "Upload Audio": "오디오 업로드", + "Select Audio": "오디오 선택", + "Advanced Settings": "Advanced Settings(고급 설정)", + "Clear Outputs (Deletes all audios in assets/audios)": "출력 지우기(자산/오디오의 모든 오디오 삭제)", + "Custom Output Path": "사용자 지정 출력 경로", + "Output Path": "출력 경로", + "Pitch": "음조", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": ">=3인 경우: 수확된 피치 결과에 중앙값 필터링을 적용합니다. 이 값은 필터 반경을 나타내며 호흡을 줄일 수 있습니다", + "Search Feature Ratio": "검색 기능 비율(Search Feature Ratio)", + "Pitch extraction algorithm": "피치 추출 알고리즘", + "Convert": "변환", + "Export Audio": "오디오 내보내기", + "Batch": "일괄", + "Input Folder": "입력 폴더", + "Enter input path": "입력 경로 입력", + "Output Folder": "출력 폴더", + "Enter output path": "출력 경로 입력", + "Get information about the audio": "오디오에 대한 정보 가져오기", + "Information about the audio file": "오디오 파일에 대한 정보", + "Waiting for information...": "정보를 기다리는 중...", + "Model fusion": "모델 융합", + "Weight for Model A": "모델 A의 무게", + "Whether the model has pitch guidance": "모델에 피치 안내가 있는지 여부", + "Model architecture version": "모델 아키텍처 버전", + "Path to Model A": "모델 A로 가는 길", + "Path to Model B": "모델 B로 가는 길", + "Path to model": "모델 경로", + "Model information to be placed": "배치할 모델 정보", + "Fusion": "융해", + "Modify model information": "모델 정보 수정", + "Path to Model": "모델 경로", + "Model information to be modified": "수정할 모델 정보", + "Save file name": "파일 이름 저장", + "Modify": "수정하다", + "View model information": "모델 정보 보기", + "View": "보기", + "Model extraction": "모델 추출", + "Model conversion": "모델 변환", + "Pth file": "Pth 파일", + "Output of the pth file": "pth 파일의 출력", + "# How to Report an Issue on GitHub": "# GitHub에서 문제를 보고하는 방법", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. 아래의 '녹화 화면' 버튼을 클릭하여 발생한 문제의 녹화를 시작합니다.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. 문제 기록이 끝나면 '기록 중지' 버튼(동일한 버튼이지만 현재 기록 중인지 여부에 따라 레이블이 변경됨)을 클릭합니다.", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub 문제](https://github.com/IAHispano/Applio/issues)로 이동하여 '새 문제' 버튼을 클릭합니다.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. 제공된 문제 템플릿을 완료하고 필요에 따라 세부 정보를 포함하고 자산 섹션을 활용하여 이전 단계에서 기록된 파일을 업로드합니다.", + "Record Screen": "녹화 화면", + "Record": "기록", + "Stop Recording": "기록 중지", + "Introduce the model .pth path": "모델 .pth 경로 소개", + "See Model Information": "모델 정보 보기", + "## Download Model": "## 모델 다운로드", + "Model Link": "모델 링크", + "Introduce the model link": "모델 소개 링크", + "Download Model": "Descargar Modelo", + "## Drop files": "## 파일 드롭", + "Drag your .pth file and .index file into this space. Drag one and then the other.": ".pth 파일과 .index 파일을 이 공간으로 드래그합니다. 하나를 드래그한 다음 다른 하나를 드래그합니다.", + "TTS Voices": "TTS 음성", + "Text to Synthesize": "합성할 텍스트(Text to Synthesize)", + "Enter text to synthesize": "합성할 텍스트 입력", + "Output Path for TTS Audio": "TTS 오디오의 출력 경로", + "Output Path for RVC Audio": "RVC 오디오의 출력 경로" +} \ No newline at end of file diff --git a/assets/i18n/languages/mr_MR.json b/assets/i18n/languages/mr_MR.json new file mode 100644 index 0000000000000000000000000000000000000000..a9ae585fad38120294c687a8345710a9cfd5252e --- /dev/null +++ b/assets/i18n/languages/mr_MR.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "अल्टिमेट व्हॉइस क्लोनिंग टूल, अप्रतिम शक्ती, मॉड्युलरिटी आणि वापरकर्ता-अनुकूल अनुभवासाठी काळजीपूर्वक ऑप्टिमाइझ केलेले.", + "This section contains some extra utilities that often may be in experimental phases.": "या विभागात काही अतिरिक्त उपयोगिता आहेत ज्या बर्याचदा प्रायोगिक टप्प्यात असू शकतात.", + "Output Information": "आउटपुट माहिती", + "Inference": "निष्कर्ष", + "Train": "आगगाडी", + "Extra": "अतिरिक्त", + "Merge Audios": "Merges Tऑडिओ", + "Processing": "प्रोसेसिंग", + "Audio Analyzer": "ऑडिओ विश्लेषक", + "Model Information": "मॉडेल माहिती", + "Download": "डाउनलोड करा", + "Report a Bug": "बग ची नोंद करा", + "Preprocess": "पूर्वप्रक्रिया", + "Model Name": "मॉडेलचे नाव", + "Enter model name": "मॉडेल नाव प्रविष्ट करा", + "Dataset Path": "डेटासेट पथ", + "Enter dataset path": "डेटासेट मार्ग प्रविष्ट करा", + "Sampling Rate": "नमुना दर", + "RVC Version": "आरव्हीसी आवृत्ती", + "Preprocess Dataset": "Preprocess Dataset", + "Extract": "अर्क", + "Hop Length": "हॉप लांबी", + "Batch Size": "बॅच आकार", + "Save Every Epoch": "प्रत्येक युग वाचवा", + "Total Epoch": "एकूण युग", + "Pretrained": "पूर्वप्रशिक्षित", + "Save Only Latest": "फक्त लेटेस्ट सेव्ह करा", + "Save Every Weights": "प्रत्येक वजन वाचवा", + "Custom Pretrained": "सानुकूल पूर्वप्रशिक्षित", + "Upload Pretrained Model": "पूर्वप्रशिक्षित मॉडेल अपलोड करा", + "Pretrained Custom Settings": "पूर्वप्रशिक्षित सानुकूल सेटिंग्ज", + "The file you dropped is not a valid pretrained file. Please try again.": "आपण टाकलेली फाईल वैध पूर्वप्रशिक्षित फाइल नाही. कृपया पुन्हा प्रयत्न करा.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "ड्रॉपडाउन मेनूमध्ये पूर्वप्रशिक्षित फाइल पाहण्यासाठी रिफ्रेश बटणावर क्लिक करा.", + "Pretrained G Path": "सानुकूल पूर्वप्रशिक्षित जी", + "Pretrained D Path": "सानुकूल पूर्वप्रशिक्षित डी", + "GPU Settings": "जीपीयू सेटिंग्स", + "GPU Custom Settings": "जीपीयू सानुकूल सेटिंग्स", + "GPU Number": "जीपीयू नंबर", + "0 to ∞ separated by -": "0 ते ∞ वेगळे केले जातात -", + "GPU Information": "जीपीयू माहिती", + "Pitch Guidance": "खेळपट्टी मार्गदर्शन", + "Extract Features": "अर्क वैशिष्ट्ये", + "Start Training": "प्रशिक्षण सुरू करा", + "Generate Index": "इंडेक्स तयार करा", + "Voice Model": "व्हॉइस मॉडेल", + "Index File": "अनुक्रमणिका फाइल", + "Refresh": "टवटवी आणणे", + "Unload Voice": "आवाज अनलोड करा", + "Single": "अविवाहित", + "Upload Audio": "ऑडिओ अपलोड करा", + "Select Audio": "ऑडिओ निवडा", + "Advanced Settings": "प्रगत सेटिंग्ज", + "Clear Outputs (Deletes all audios in assets/audios)": "स्पष्ट आउटपुट (मालमत्ता / ऑडिओमधील सर्व ऑडिओ हटवतात)", + "Custom Output Path": "सानुकूल आउटपुट पथ", + "Output Path": "आउटपुट पथ", + "Pitch": "खेळपट्टी", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "जर >= 3: कापणी केलेल्या खेळपट्टीच्या परिणामांवर मध्यम फिल्टरिंग लागू करा. मूल्य फिल्टर त्रिज्याचे प्रतिनिधित्व करते आणि श्वासोच्छवास कमी करू शकते", + "Search Feature Ratio": "शोध वैशिष्ट्य गुणोत्तर", + "Pitch extraction algorithm": "पिच निष्कर्षण अल्गोरिदम", + "Convert": "धर्मांतर करा", + "Export Audio": "निर्यात ऑडिओ", + "Batch": "तुकडी", + "Input Folder": "इनपुट फोल्डर", + "Enter input path": "इनपुट पथ प्रविष्ट करा", + "Output Folder": "आउटपुट फोल्डर", + "Enter output path": "आउटपुट पथ प्रविष्ट करा", + "Get information about the audio": "ऑडिओबद्दल माहिती मिळवा", + "Information about the audio file": "ऑडिओ फाईलची माहिती", + "Waiting for information...": "माहितीच्या प्रतीक्षेत...", + "Model fusion": "मॉडेल फ्यूजन", + "Weight for Model A": "मॉडेल ए साठी वजन", + "Whether the model has pitch guidance": "मॉडेलमध्ये खेळपट्टीमार्गदर्शन आहे की नाही", + "Model architecture version": "मॉडेल आर्किटेक्चर आवृत्ती", + "Path to Model A": "मॉडेल ए चा मार्ग", + "Path to Model B": "मॉडेल बी चा मार्ग", + "Path to model": "मॉडेलचा मार्ग", + "Model information to be placed": "मॉडेल माहिती ठेवली जाईल", + "Fusion": "फ्यूजन", + "Modify model information": "मॉडेल माहिती सुधारित करा", + "Path to Model": "मॉडेल चा मार्ग", + "Model information to be modified": "मॉडेल माहिती मध्ये बदल होणार", + "Save file name": "फाईलचे नाव सेव्ह करा", + "Modify": "सुधारित करा", + "View model information": "मॉडेल माहिती पहा", + "View": "पहा", + "Model extraction": "मॉडेल निष्कर्षण", + "Model conversion": "मॉडेल रूपांतरण", + "Pth file": "पीटीएच फाईल", + "Output of the pth file": "पीटीएच फाईलचे आउटपुट", + "# How to Report an Issue on GitHub": "# गिटहबवर एखाद्या समस्येची नोंद कशी करावी", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. आपण अनुभवत असलेल्या समस्येचे रेकॉर्डिंग सुरू करण्यासाठी खालील 'रेकॉर्ड स्क्रीन' बटणावर क्लिक करा.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "२. अंक रेकॉर्ड िंग पूर्ण झाल्यावर 'स्टॉप रेकॉर्डिंग' बटणावर क्लिक करा (तेच बटण, पण तुम्ही सक्रियपणे रेकॉर्डिंग करत आहात की नाही यावर अवलंबून लेबल बदलते).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [गिटहब इश्यूज] (https://github.com/IAHispano/Applio/issues) वर जा आणि 'न्यू इश्यू' बटणावर क्लिक करा.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. प्रदान केलेले इश्यू टेम्पलेट पूर्ण करा, आवश्यकतेनुसार तपशील समाविष्ट करण्याची खात्री करा आणि मागील चरणातून रेकॉर्ड केलेली फाइल अपलोड करण्यासाठी मालमत्ता विभागाचा वापर करा.", + "Record Screen": "रेकॉर्ड स्क्रीन", + "Record": "नोंदणे", + "Stop Recording": "रेकॉर्डिंग थांबवा", + "Introduce the model .pth path": "मॉडेल .पीटीएच पथ सादर करा", + "See Model Information": "मॉडेल माहिती पहा", + "## Download Model": "## मॉडेल डाऊनलोड करा", + "Model Link": "मॉडेल लिंक", + "Introduce the model link": "मॉडेल लिंक ची ओळख करून द्या", + "Download Model": "Descargar Modelo", + "## Drop files": "## फाइल्स ड्रॉप करा", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "आपली .pth फाइल आणि .अनुक्रमणिका फाईल या जागेत खेचून घ्या. एकाला ओढून घ्या आणि मग दुसरं.", + "TTS Voices": "टीटीएस आवाज", + "Text to Synthesize": "संश्लेषित करण्यासाठी मजकूर", + "Enter text to synthesize": "संश्लेषित करण्यासाठी मजकूर प्रविष्ट करा", + "Output Path for TTS Audio": "टीटीएस ऑडिओसाठी आउटपुट पथ", + "Output Path for RVC Audio": "आरव्हीसी ऑडिओसाठी आउटपुट पथ" +} \ No newline at end of file diff --git a/assets/i18n/languages/ms_MS.json b/assets/i18n/languages/ms_MS.json new file mode 100644 index 0000000000000000000000000000000000000000..f1b313e3d998af3ed8d9a87e76577c568901c39d --- /dev/null +++ b/assets/i18n/languages/ms_MS.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Alat pengklonan suara muktamad, dioptimumkan dengan teliti untuk kuasa yang tiada tandingan, modulariti, dan pengalaman mesra pengguna.", + "This section contains some extra utilities that often may be in experimental phases.": "Bahagian ini mengandungi beberapa utiliti tambahan yang selalunya berada dalam fasa percubaan.", + "Output Information": "Maklumat Output", + "Inference": "Inferens", + "Train": "Kereta api", + "Extra": "Tambahan", + "Merge Audios": "Mencantumkan Audio", + "Processing": "Pemprosesan", + "Audio Analyzer": "Penganalisis Audio", + "Model Information": "Maklumat Model", + "Download": "Muat turun", + "Report a Bug": "Laporkan pepijat", + "Preprocess": "Praproses", + "Model Name": "Nama Model", + "Enter model name": "Masukkan nama model", + "Dataset Path": "Laluan Set Data", + "Enter dataset path": "Memasukkan laluan set data", + "Sampling Rate": "Kadar Persampelan", + "RVC Version": "Versi RVC", + "Preprocess Dataset": "Set Data Praproses", + "Extract": "Cabutan", + "Hop Length": "Panjang Hop", + "Batch Size": "Saiz kelompok", + "Save Every Epoch": "Simpan Setiap Zaman", + "Total Epoch": "Jumlah Zaman", + "Pretrained": "Dipralatih", + "Save Only Latest": "Simpan Terkini Sahaja", + "Save Every Weights": "Jimat Setiap Berat", + "Custom Pretrained": "Pralatih Tersuai", + "Upload Pretrained Model": "Muat naik Model Pralatih", + "Pretrained Custom Settings": "Seting Tersuai Pralatih", + "The file you dropped is not a valid pretrained file. Please try again.": "Fail yang anda gugurkan bukan fail pralatih yang sah. Sila cuba lagi.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Klik butang segar semula untuk melihat fail pralatih dalam menu juntai bawah.", + "Pretrained G Path": "Custom Pretrained G", + "Pretrained D Path": "Custom Pretrained D", + "GPU Settings": "Seting GPU", + "GPU Custom Settings": "Seting Tersuai GPU", + "GPU Number": "Nombor GPU", + "0 to ∞ separated by -": "0 hingga ∞ dipisahkan oleh -", + "GPU Information": "Maklumat GPU", + "Pitch Guidance": "Panduan Padang", + "Extract Features": "Ciri Ekstrak", + "Start Training": "Mulakan Latihan", + "Generate Index": "Menjana Indeks", + "Voice Model": "Model Suara", + "Index File": "Fail Indeks", + "Refresh": "Refresh", + "Unload Voice": "Memunggah Suara", + "Single": "Tunggal", + "Upload Audio": "Muat naik Audio", + "Select Audio": "Pilih Audio", + "Advanced Settings": "Seting Lanjutan", + "Clear Outputs (Deletes all audios in assets/audios)": "Kosongkan Output (Memadamkan semua audio dalam aset/audio)", + "Custom Output Path": "Laluan Output Tersuai", + "Output Path": "Laluan Output", + "Pitch": "Pitch", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Jika >=3: gunakan penapisan median pada hasil padang yang dituai. Nilai mewakili jejari penapis dan boleh mengurangkan sesak nafas", + "Search Feature Ratio": "Nisbah Ciri Carian", + "Pitch extraction algorithm": "Algoritma pengekstrakan padang", + "Convert": "Menukar", + "Export Audio": "Eksport Audio", + "Batch": "Kelompok", + "Input Folder": "Input Folder", + "Enter input path": "Masukkan laluan input", + "Output Folder": "Output Folder", + "Enter output path": "Masukkan laluan output", + "Get information about the audio": "Mendapatkan maklumat tentang audio", + "Information about the audio file": "Maklumat mengenai fail audio", + "Waiting for information...": "Menunggu maklumat...", + "Model fusion": "Gabungan model", + "Weight for Model A": "Berat untuk Model A", + "Whether the model has pitch guidance": "Sama ada model itu mempunyai panduan padang", + "Model architecture version": "Versi seni bina model", + "Path to Model A": "Laluan ke Model A", + "Path to Model B": "Laluan ke Model B", + "Path to model": "Laluan ke model", + "Model information to be placed": "Maklumat model yang akan diletakkan", + "Fusion": "Gabungan", + "Modify model information": "Ubah suai maklumat model", + "Path to Model": "Laluan ke Model", + "Model information to be modified": "Maklumat model untuk diubah suai", + "Save file name": "Simpan nama fail", + "Modify": "Mengubah suai", + "View model information": "Lihat maklumat model", + "View": "Lihat", + "Model extraction": "Pengekstrakan model", + "Model conversion": "Penukaran model", + "Pth file": "Pth fail", + "Output of the pth file": "Output fail pth", + "# How to Report an Issue on GitHub": "# Cara Melaporkan Isu di GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Klik pada butang 'Skrin Rekod' di bawah untuk mula merakam isu yang anda alami.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Setelah anda selesai merakam isu ini, klik pada butang 'Hentikan Rakaman' (butang yang sama, tetapi label berubah bergantung pada sama ada anda merakam secara aktif atau tidak).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Pergi ke [Isu GitHub](https://github.com/IAHispano/Applio/issues) dan klik pada butang 'Isu Baru'.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Lengkapkan templat isu yang disediakan, pastikan untuk memasukkan butiran mengikut keperluan, dan gunakan bahagian aset untuk memuat naik fail yang dirakam dari langkah sebelumnya.", + "Record Screen": "Skrin Rakam", + "Record": "Rekod", + "Stop Recording": "Hentikan Rakaman", + "Introduce the model .pth path": "Memperkenalkan model laluan .pth", + "See Model Information": "Lihat Maklumat Model", + "## Download Model": "## Muat Turun Model", + "Model Link": "Pautan Model", + "Introduce the model link": "Memperkenalkan pautan model", + "Download Model": "Descargar Modelo", + "## Drop files": "## Jatuhkan fail", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Seret fail .pth dan fail .index anda ke dalam ruang ini. Seret satu dan kemudian yang lain.", + "TTS Voices": "Suara TTS", + "Text to Synthesize": "Teks untuk Mensintesis", + "Enter text to synthesize": "Masukkan teks untuk mensintesis saiz", + "Output Path for TTS Audio": "Laluan output untuk TTS Audio", + "Output Path for RVC Audio": "Laluan Output untuk Audio RVC" +} \ No newline at end of file diff --git a/assets/i18n/languages/nl_NL.json b/assets/i18n/languages/nl_NL.json new file mode 100644 index 0000000000000000000000000000000000000000..df098a61a7927d270911573c8b451e4cab450034 --- /dev/null +++ b/assets/i18n/languages/nl_NL.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Ultieme tool voor het klonen van stemmen, zorgvuldig geoptimaliseerd voor ongeëvenaarde kracht, modulariteit en gebruiksvriendelijke ervaring.", + "This section contains some extra utilities that often may be in experimental phases.": "Deze sectie bevat enkele extra hulpprogramma's die zich vaak in experimentele fasen bevinden.", + "Output Information": "Output Informatie", + "Inference": "Conclusie", + "Train": "Trein", + "Extra": "Extra", + "Merge Audios": "Audio samenvoegen", + "Processing": "Verwerking", + "Audio Analyzer": "Audio Analyzer", + "Model Information": "Modelinformatie", + "Download": "Downloaden", + "Report a Bug": "Een bug melden", + "Preprocess": "Voorbewerking", + "Model Name": "Modelnaam", + "Enter model name": "Voer de modelnaam in", + "Dataset Path": "Pad naar gegevensset", + "Enter dataset path": "Pad naar gegevensset invoeren", + "Sampling Rate": "Bemonsteringsfrequentie", + "RVC Version": "RVC-versie", + "Preprocess Dataset": "Gegevensset voor het proces", + "Extract": "Extract", + "Hop Length": "Hop Lengte", + "Batch Size": "Batchgrootte", + "Save Every Epoch": "Red elk tijdperk", + "Total Epoch": "Totale tijdvak", + "Pretrained": "Voorgetraind", + "Save Only Latest": "Alleen de nieuwste opslaan", + "Save Every Weights": "Sla alle gewichten op", + "Custom Pretrained": "Aangepaste voorgetrainde", + "Upload Pretrained Model": "Vooraf getraind model uploaden", + "Pretrained Custom Settings": "Vooraf getrainde aangepaste instellingen", + "The file you dropped is not a valid pretrained file. Please try again.": "Het bestand dat u hebt neergezet, is geen geldig vooraf getraind bestand. Probeer het opnieuw.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Klik op de knop Vernieuwen om het vooraf getrainde bestand in het vervolgkeuzemenu te zien.", + "Pretrained G Path": "Aangepaste voorgetrainde G", + "Pretrained D Path": "Aangepaste voorgetrainde D", + "GPU Settings": "GPU-instellingen", + "GPU Custom Settings": "Aangepaste GPU-instellingen", + "GPU Number": "GPU-nummer", + "0 to ∞ separated by -": "0 tot ∞ gescheiden door -", + "GPU Information": "GPU-informatie", + "Pitch Guidance": "Begeleiding bij het veld", + "Extract Features": "Extraheer functies", + "Start Training": "Begin met trainen", + "Generate Index": "Index genereren", + "Voice Model": "Stem Model", + "Index File": "Index-bestand", + "Refresh": "Opfrissen", + "Unload Voice": "Stem lossen", + "Single": "Ongetrouwd", + "Upload Audio": "Audio uploaden", + "Select Audio": "Selecteer Audio", + "Advanced Settings": "Geavanceerde instellingen", + "Clear Outputs (Deletes all audios in assets/audios)": "Uitvoer wissen (verwijdert alle audio in assets/audio)", + "Custom Output Path": "Aangepast uitvoerpad", + "Output Path": "Uitgang Pad", + "Pitch": "Toonhoogte", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Indien >=3: mediaanfiltering toepassen op de resultaten van de geoogste toonhoogte. De waarde vertegenwoordigt de filterstraal en kan de ademhaling verminderen", + "Search Feature Ratio": "Verhouding zoekfunctie", + "Pitch extraction algorithm": "Algoritme voor het extraheren van toonhoogte", + "Convert": "Omzetten", + "Export Audio": "Audio exporteren", + "Batch": "Batch", + "Input Folder": "Invoermap", + "Enter input path": "Voer het invoerpad in", + "Output Folder": "Uitvoer map", + "Enter output path": "Voer het uitvoerpad in", + "Get information about the audio": "Informatie over de audio opvragen", + "Information about the audio file": "Informatie over het audiobestand", + "Waiting for information...": "Wachten op informatie...", + "Model fusion": "Fusie van modellen", + "Weight for Model A": "Gewicht voor Model A", + "Whether the model has pitch guidance": "Of het model pitchbegeleiding heeft", + "Model architecture version": "Versie van de modelarchitectuur", + "Path to Model A": "Pad naar Model A", + "Path to Model B": "Pad naar Model B", + "Path to model": "Pad naar model", + "Model information to be placed": "Te plaatsen modelinformatie", + "Fusion": "Samensmelting", + "Modify model information": "Modelgegevens wijzigen", + "Path to Model": "Pad naar model", + "Model information to be modified": "Modelinformatie die moet worden gewijzigd", + "Save file name": "Bestandsnaam opslaan", + "Modify": "Modificeren", + "View model information": "Modelinformatie weergeven", + "View": "Bekijken", + "Model extraction": "Extractie van modellen", + "Model conversion": "Model conversie", + "Pth file": "Pth-bestand", + "Output of the pth file": "Uitvoer van het pth-bestand", + "# How to Report an Issue on GitHub": "# Een probleem melden op GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Klik op de knop 'Opnamescherm' hieronder om te beginnen met het opnemen van het probleem dat u ondervindt.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Als u klaar bent met het opnemen van het probleem, klikt u op de knop 'Opname stoppen' (dezelfde knop, maar het label verandert afhankelijk van of u actief aan het opnemen bent of niet).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Ga naar [GitHub Issues](https://github.com/IAHispano/Applio/issues) en klik op de knop 'New Issue'.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Vul het meegeleverde uitgiftesjabloon in, zorg ervoor dat u indien nodig details opneemt en gebruik het gedeelte Activa om het opgenomen bestand uit de vorige stap te uploaden.", + "Record Screen": "Scherm opnemen", + "Record": "Record", + "Stop Recording": "Opname stoppen", + "Introduce the model .pth path": "Introduceer het model .pth-pad", + "See Model Information": "Modelinformatie bekijken", + "## Download Model": "## Model downloaden", + "Model Link": "Koppeling naar het model", + "Introduce the model link": "Introduceer de modellink", + "Download Model": "Descargar Modelo", + "## Drop files": "## Bestanden neerzetten", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Sleep uw .pth-bestand en .index-bestand naar deze ruimte. Sleep de ene en dan de andere.", + "TTS Voices": "TTS-stemmen", + "Text to Synthesize": "Tekst om te synthetiseren", + "Enter text to synthesize": "Voer tekst in om te synthetiseren", + "Output Path for TTS Audio": "Uitvoerpad voor TTS-audio", + "Output Path for RVC Audio": "Uitvoerpad voor RVC-audio" +} \ No newline at end of file diff --git a/assets/i18n/languages/pa_PA.json b/assets/i18n/languages/pa_PA.json new file mode 100644 index 0000000000000000000000000000000000000000..b462ca624106e4cc58a57d682a21a43343090ce5 --- /dev/null +++ b/assets/i18n/languages/pa_PA.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "ਅੰਤਮ ਵੌਇਸ ਕਲੋਨਿੰਗ ਟੂਲ, ਬੇਮਿਸਾਲ ਸ਼ਕਤੀ, ਮਾਡਿਊਲਰਿਟੀ, ਅਤੇ ਉਪਭੋਗਤਾ-ਅਨੁਕੂਲ ਅਨੁਭਵ ਲਈ ਧਿਆਨ ਨਾਲ ਅਨੁਕੂਲ ਬਣਾਇਆ ਗਿਆ ਹੈ.", + "This section contains some extra utilities that often may be in experimental phases.": "ਇਸ ਭਾਗ ਵਿੱਚ ਕੁਝ ਵਾਧੂ ਉਪਯੋਗਤਾਵਾਂ ਹਨ ਜੋ ਅਕਸਰ ਪ੍ਰਯੋਗਾਤਮਕ ਪੜਾਵਾਂ ਵਿੱਚ ਹੋ ਸਕਦੀਆਂ ਹਨ।", + "Output Information": "ਆਊਟਪੁੱਟ ਜਾਣਕਾਰੀ", + "Inference": "ਅਨੁਮਾਨ", + "Train": "ਟ੍ਰੇਨ", + "Extra": "ਵਾਧੂ", + "Merge Audios": "ਆਡੀਓ ਨੂੰ ਮਿਲਾਓ", + "Processing": "ਪ੍ਰੋਸੈਸਿੰਗ", + "Audio Analyzer": "ਆਡੀਓ ਵਿਸ਼ਲੇਸ਼ਕ", + "Model Information": "ਮਾਡਲ ਜਾਣਕਾਰੀ", + "Download": "ਡਾਊਨਲੋਡ ਕਰੋ", + "Report a Bug": "ਇੱਕ ਬਗ ਦੀ ਰਿਪੋਰਟ ਕਰੋ", + "Preprocess": "ਪ੍ਰੀਪ੍ਰੋਸੈਸ", + "Model Name": "ਮਾਡਲ ਦਾ ਨਾਮ", + "Enter model name": "ਮਾਡਲ ਨਾਮ ਦਾਖਲ ਕਰੋ", + "Dataset Path": "ਡਾਟਾਸੈਟ ਪਾਥ", + "Enter dataset path": "ਡਾਟਾਸੈਟ ਪਾਥ ਦਾਖਲ ਕਰੋ", + "Sampling Rate": "ਨਮੂਨੇ ਲੈਣ ਦੀ ਦਰ", + "RVC Version": "RVC ਸੰਸਕਰਣ", + "Preprocess Dataset": "ਪ੍ਰੀਪ੍ਰੋਸੈਸ ਡੇਟਾਸੈਟ", + "Extract": "ਐਕਸਟਰੈਕਟ", + "Hop Length": "ਹੌਪ ਲੰਬਾਈ", + "Batch Size": "ਬੈਚ ਦਾ ਆਕਾਰ", + "Save Every Epoch": "ਹਰ ਯੁੱਗ ਨੂੰ ਸੁਰੱਖਿਅਤ ਕਰੋ", + "Total Epoch": "ਕੁੱਲ ਯੁੱਗ", + "Pretrained": "ਪਹਿਲਾਂ ਤੋਂ ਸਿਖਲਾਈ ਪ੍ਰਾਪਤ", + "Save Only Latest": "ਕੇਵਲ ਨਵੀਨਤਮ ਨੂੰ ਸੁਰੱਖਿਅਤ ਕਰੋ", + "Save Every Weights": "ਹਰ ਭਾਰ ਨੂੰ ਬਚਾਓ", + "Custom Pretrained": "ਕਸਟਮ ਪ੍ਰੀਟ੍ਰੇਨਡ", + "Upload Pretrained Model": "ਪਹਿਲਾਂ ਤੋਂ ਸਿਖਲਾਈ ਪ੍ਰਾਪਤ ਮਾਡਲ ਅਪਲੋਡ ਕਰੋ", + "Pretrained Custom Settings": "ਪਹਿਲਾਂ ਤੋਂ ਸਿਖਲਾਈ ਪ੍ਰਾਪਤ ਕਸਟਮ ਸੈਟਿੰਗਾਂ", + "The file you dropped is not a valid pretrained file. Please try again.": "ਤੁਹਾਡੇ ਵੱਲੋਂ ਛੱਡੀ ਗਈ ਫਾਇਲ ਇੱਕ ਵੈਧ ਪੂਰਵ-ਸਿਖਲਾਈ ਪ੍ਰਾਪਤ ਫਾਇਲ ਨਹੀਂ ਹੈ। ਕਿਰਪਾ ਕਰਕੇ ਦੁਬਾਰਾ ਕੋਸ਼ਿਸ਼ ਕਰੋ।", + "Click the refresh button to see the pretrained file in the dropdown menu.": "ਡ੍ਰੌਪਡਾਊਨ ਮੀਨੂ ਵਿੱਚ ਪਹਿਲਾਂ ਤੋਂ ਸਿਖਲਾਈ ਪ੍ਰਾਪਤ ਫਾਇਲ ਨੂੰ ਦੇਖਣ ਲਈ ਰੀਫਰੈਸ਼ ਬਟਨ 'ਤੇ ਕਲਿੱਕ ਕਰੋ।", + "Pretrained G Path": "ਕਸਟਮ ਪ੍ਰੀਟ੍ਰੇਨਡ G", + "Pretrained D Path": "ਕਸਟਮ ਪ੍ਰੀਟ੍ਰੇਨਡ ਡੀ", + "GPU Settings": "GPU ਸੈਟਿੰਗਾਂ", + "GPU Custom Settings": "GPU ਕਸਟਮ ਸੈਟਿੰਗਾਂ", + "GPU Number": "GPU ਨੰਬਰ", + "0 to ∞ separated by -": "0 ਤੋਂ ∞ ਦੁਆਰਾ ਵੱਖ ਕੀਤਾ ਜਾਂਦਾ ਹੈ -", + "GPU Information": "GPU ਜਾਣਕਾਰੀ", + "Pitch Guidance": "ਪਿਚ ਗਾਈਡੈਂਸ", + "Extract Features": "ਐਕਸਟਰੈਕਟ ਵਿਸ਼ੇਸ਼ਤਾਵਾਂ", + "Start Training": "ਸਿਖਲਾਈ ਸ਼ੁਰੂ ਕਰੋ", + "Generate Index": "ਇੰਡੈਕਸ ਜਨਰੇਟ ਕਰੋ", + "Voice Model": "ਵੌਇਸ ਮਾਡਲ", + "Index File": "ਇੰਡੈਕਸ ਫਾਇਲ", + "Refresh": "ਤਾਜ਼ਾ ਕਰੋ", + "Unload Voice": "ਆਵਾਜ਼ ਨੂੰ ਅਨਲੋਡ ਕਰੋ", + "Single": "ਸਿੰਗਲ", + "Upload Audio": "ਆਡੀਓ ਅੱਪਲੋਡ ਕਰੋ", + "Select Audio": "ਆਡੀਓ ਚੁਣੋ", + "Advanced Settings": "ਉੱਨਤ ਸੈਟਿੰਗਾਂ", + "Clear Outputs (Deletes all audios in assets/audios)": "ਸਪਸ਼ਟ ਆਊਟਪੁੱਟ (ਜਾਇਦਾਦਾਂ/ਆਡੀਓ ਵਿੱਚ ਸਾਰੇ ਆਡੀਓ ਮਿਟਾ ਦਿੰਦਾ ਹੈ)", + "Custom Output Path": "ਕਸਟਮ ਆਉਟਪੁੱਟ ਪਾਥ", + "Output Path": "ਆਊਟਪੁੱਟ ਪਾਥ", + "Pitch": "ਪਿਚ", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "ਜੇ >=3: ਕਟਾਈ ਕੀਤੀ ਪਿੱਚ ਦੇ ਨਤੀਜਿਆਂ 'ਤੇ ਮੀਡੀਅਨ ਫਿਲਟਰਿੰਗ ਲਾਗੂ ਕਰੋ. ਮੁੱਲ ਫਿਲਟਰ ਦੇ ਘੇਰੇ ਨੂੰ ਦਰਸਾਉਂਦਾ ਹੈ ਅਤੇ ਸਾਹ ਲੈਣ ਨੂੰ ਘਟਾ ਸਕਦਾ ਹੈ", + "Search Feature Ratio": "ਖੋਜ ਵਿਸ਼ੇਸ਼ਤਾ ਅਨੁਪਾਤ", + "Pitch extraction algorithm": "ਪਿਚ ਐਕਸਟਰੈਕਸ਼ਨ ਐਲਗੋਰਿਦਮ", + "Convert": "ਕਨਵਰਟ ਕਰੋ", + "Export Audio": "ਆਡੀਓ ਨਿਰਯਾਤ ਕਰੋ", + "Batch": "ਬੈਚ", + "Input Folder": "ਇਨਪੁੱਟ ਫੋਲਡਰ", + "Enter input path": "ਇਨਪੁੱਟ ਪਾਥ ਦਾਖਲ ਕਰੋ", + "Output Folder": "ਆਊਟਪੁੱਟ ਫੋਲਡਰ", + "Enter output path": "ਆਊਟਪੁੱਟ ਪਾਥ ਦਾਖਲ ਕਰੋ", + "Get information about the audio": "ਆਡੀਓ ਬਾਰੇ ਜਾਣਕਾਰੀ ਪ੍ਰਾਪਤ ਕਰੋ", + "Information about the audio file": "ਆਡੀਓ ਫਾਇਲ ਬਾਰੇ ਜਾਣਕਾਰੀ", + "Waiting for information...": "ਜਾਣਕਾਰੀ ਦੀ ਉਡੀਕ ਕੀਤੀ ਜਾ ਰਹੀ ਹੈ...", + "Model fusion": "ਮਾਡਲ ਫਿਊਜ਼ਨ", + "Weight for Model A": "ਮਾਡਲ A ਲਈ ਭਾਰ", + "Whether the model has pitch guidance": "ਕੀ ਮਾਡਲ ਕੋਲ ਪਿਚ ਗਾਈਡੈਂਸ ਹੈ", + "Model architecture version": "ਮਾਡਲ ਆਰਕੀਟੈਕਚਰ ਸੰਸਕਰਣ", + "Path to Model A": "ਮਾਡਲ A ਲਈ ਰਾਹ", + "Path to Model B": "ਮਾਡਲ B ਲਈ ਰਾਹ", + "Path to model": "ਮਾਡਲ ਲਈ ਰਾਹ", + "Model information to be placed": "ਮਾਡਲ ਜਾਣਕਾਰੀ ਰੱਖੀ ਜਾਣੀ ਚਾਹੀਦੀ ਹੈ", + "Fusion": "ਫਿਊਜ਼ਨ", + "Modify model information": "ਮਾਡਲ ਜਾਣਕਾਰੀ ਨੂੰ ਸੋਧੋ", + "Path to Model": "ਮਾਡਲ ਲਈ ਰਾਹ", + "Model information to be modified": "ਮਾਡਲ ਜਾਣਕਾਰੀ ਨੂੰ ਸੋਧਿਆ ਜਾਣਾ ਚਾਹੀਦਾ ਹੈ", + "Save file name": "ਫਾਇਲ ਨਾਮ ਸੁਰੱਖਿਅਤ ਕਰੋ", + "Modify": "ਸੋਧੋ", + "View model information": "ਮਾਡਲ ਜਾਣਕਾਰੀ ਦੇਖੋ", + "View": "ਦ੍ਰਿਸ਼", + "Model extraction": "ਮਾਡਲ ਕੱਢਣਾ", + "Model conversion": "ਮਾਡਲ ਪਰਿਵਰਤਨ", + "Pth file": "Pth ਫਾਇਲ", + "Output of the pth file": "pth ਫਾਇਲ ਦਾ ਆਊਟਪੁੱਟ", + "# How to Report an Issue on GitHub": "# GitHub 'ਤੇ ਕਿਸੇ ਮੁੱਦੇ ਦੀ ਰਿਪੋਰਟ ਕਿਵੇਂ ਕਰਨੀ ਹੈ", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. ਤੁਹਾਡੇ ਵੱਲੋਂ ਅਨੁਭਵ ਕੀਤੀ ਜਾ ਰਹੀ ਸਮੱਸਿਆ ਨੂੰ ਰਿਕਾਰਡ ਕਰਨਾ ਸ਼ੁਰੂ ਕਰਨ ਲਈ ਹੇਠਾਂ ਦਿੱਤੇ 'ਰਿਕਾਰਡ ਸਕ੍ਰੀਨ' ਬਟਨ 'ਤੇ ਕਲਿੱਕ ਕਰੋ।", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. ਇੱਕ ਵਾਰ ਜਦੋਂ ਤੁਸੀਂ ਸਮੱਸਿਆ ਨੂੰ ਰਿਕਾਰਡ ਕਰਨਾ ਪੂਰਾ ਕਰ ਲੈਂਦੇ ਹੋ, ਤਾਂ 'ਸਟਾਪ ਰਿਕਾਰਡਿੰਗ' ਬਟਨ 'ਤੇ ਕਲਿੱਕ ਕਰੋ (ਉਹੀ ਬਟਨ, ਪਰ ਲੇਬਲ ਇਸ ਗੱਲ 'ਤੇ ਨਿਰਭਰ ਕਰਦਾ ਹੈ ਕਿ ਤੁਸੀਂ ਸਰਗਰਮੀ ਨਾਲ ਰਿਕਾਰਡਿੰਗ ਕਰ ਰਹੇ ਹੋ ਜਾਂ ਨਹੀਂ)।", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub ਮੁੱਦੇ] (https://github.com/IAHispano/Applio/issues) 'ਤੇ ਜਾਓ ਅਤੇ 'ਨਵਾਂ ਮੁੱਦਾ' ਬਟਨ 'ਤੇ ਕਲਿੱਕ ਕਰੋ।", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. ਪ੍ਰਦਾਨ ਕੀਤੇ ਮੁੱਦੇ ਦੇ ਟੈਂਪਲੇਟ ਨੂੰ ਪੂਰਾ ਕਰੋ, ਲੋੜ ਅਨੁਸਾਰ ਵੇਰਵੇ ਸ਼ਾਮਲ ਕਰਨਾ ਯਕੀਨੀ ਬਣਾਓ, ਅਤੇ ਪਿਛਲੇ ਕਦਮ ਤੋਂ ਰਿਕਾਰਡ ਕੀਤੀ ਫਾਈਲ ਨੂੰ ਅੱਪਲੋਡ ਕਰਨ ਲਈ ਸੰਪਤੀ ਸੈਕਸ਼ਨ ਦੀ ਵਰਤੋਂ ਕਰੋ.", + "Record Screen": "ਰਿਕਾਰਡ ਸਕ੍ਰੀਨ", + "Record": "ਰਿਕਾਰਡ", + "Stop Recording": "ਰਿਕਾਰਡਿੰਗ ਬੰਦ ਕਰੋ", + "Introduce the model .pth path": "ਮਾਡਲ .pth path ਨੂੰ ਪੇਸ਼ ਕਰੋ", + "See Model Information": "ਮਾਡਲ ਜਾਣਕਾਰੀ ਦੇਖੋ", + "## Download Model": "## ਡਾਊਨਲੋਡ ਮਾਡਲ", + "Model Link": "ਮਾਡਲ ਲਿੰਕ", + "Introduce the model link": "ਮਾਡਲ ਲਿੰਕ ਪੇਸ਼ ਕਰੋ", + "Download Model": "Descargar Modelo", + "## Drop files": "## ਫਾਇਲਾਂ ਛੱਡੋ", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "ਆਪਣੀ .pth ਫਾਇਲ ਅਤੇ .index ਫਾਇਲ ਨੂੰ ਇਸ ਸਪੇਸ ਵਿੱਚ ਖਿੱਚੋ। ਇੱਕ ਨੂੰ ਖਿੱਚੋ ਅਤੇ ਫਿਰ ਦੂਜੇ ਨੂੰ।", + "TTS Voices": "TTS ਆਵਾਜ਼ਾਂ", + "Text to Synthesize": "ਸੰਸ਼ਲੇਸ਼ਣ ਕਰਨ ਲਈ ਟੈਕਸਟ", + "Enter text to synthesize": "ਸੰਸ਼ਲੇਸ਼ਿਤ ਕਰਨ ਲਈ ਟੈਕਸਟ ਦਾਖਲ ਕਰੋ", + "Output Path for TTS Audio": "TTS ਆਡੀਓ ਲਈ ਆਉਟਪੁੱਟ ਪਾਥ", + "Output Path for RVC Audio": "RVC ਆਡੀਓ ਲਈ ਆਉਟਪੁੱਟ ਪਾਥ" +} \ No newline at end of file diff --git a/assets/i18n/languages/pl_PL.json b/assets/i18n/languages/pl_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..0228f486fb8b9dfc9bbce5c8252a57b34fd8048b --- /dev/null +++ b/assets/i18n/languages/pl_PL.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Najlepsze narzędzie do klonowania głosu, skrupulatnie zoptymalizowane pod kątem niezrównanej mocy, modułowości i przyjazności dla użytkownika.", + "This section contains some extra utilities that often may be in experimental phases.": "Ta sekcja zawiera kilka dodatkowych narzędzi, które często mogą znajdować się w fazie eksperymentalnej.", + "Output Information": "Informacje wyjściowe", + "Inference": "Wnioskowanie", + "Train": "Pociąg", + "Extra": "Dodatkowych", + "Merge Audios": "Scal audio", + "Processing": "Przetwarzanie", + "Audio Analyzer": "Analizator dźwięku", + "Model Information": "Informacje o modelu", + "Download": "Pobierać", + "Report a Bug": "Zgłoś błąd", + "Preprocess": "Przetwarzanie wstępne", + "Model Name": "Nazwa modelu", + "Enter model name": "Wprowadź nazwę modelu", + "Dataset Path": "Ścieżka zestawu danych", + "Enter dataset path": "Wprowadź ścieżkę zestawu danych", + "Sampling Rate": "Częstotliwość próbkowania", + "RVC Version": "Wersja RVC", + "Preprocess Dataset": "Wstępne przetwarzanie zestawu danych", + "Extract": "Ekstrakt", + "Hop Length": "Długość chmielu", + "Batch Size": "Wielkość partii", + "Save Every Epoch": "Ocal każdą epokę", + "Total Epoch": "Epoka ogółem", + "Pretrained": "Wstępnie wytrenowany", + "Save Only Latest": "Zapisz tylko najnowsze", + "Save Every Weights": "Oszczędzaj wszystkie ciężary", + "Custom Pretrained": "Niestandardowe wstępnie wytrenowane", + "Upload Pretrained Model": "Przekazywanie wstępnie wytrenowanego modelu", + "Pretrained Custom Settings": "Wstępnie wytrenowane ustawienia niestandardowe", + "The file you dropped is not a valid pretrained file. Please try again.": "Upuszczony plik nie jest prawidłowym wstępnie wytrenowanym plikiem. Spróbuj ponownie.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Kliknij przycisk odświeżania, aby wyświetlić wstępnie wytrenowany plik w menu rozwijanym.", + "Pretrained G Path": "Niestandardowe wstępnie wytrenowane G", + "Pretrained D Path": "Niestandardowy wstępnie wytrenowany D", + "GPU Settings": "Ustawienia GPU", + "GPU Custom Settings": "Niestandardowe ustawienia GPU", + "GPU Number": "Numer GPU", + "0 to ∞ separated by -": "Od 0 do ∞ oddzielone -", + "GPU Information": "Informacje o procesorze GPU", + "Pitch Guidance": "Wskazówki dotyczące wysokości dźwięku", + "Extract Features": "Wyodrębnij funkcje", + "Start Training": "Rozpocznij szkolenie", + "Generate Index": "Generuj indeks", + "Voice Model": "Model głosu", + "Index File": "Plik indeksu", + "Refresh": "Odświeżyć", + "Unload Voice": "Uwolnij głos", + "Single": "Pojedynczy", + "Upload Audio": "Prześlij dźwięk", + "Select Audio": "Wybierz Audio (Dźwięk)", + "Advanced Settings": "Ustawienia zaawansowane", + "Clear Outputs (Deletes all audios in assets/audios)": "Wyczyść wyjścia (usuwa wszystkie pliki audio w zasobach/plikach audio)", + "Custom Output Path": "Niestandardowa ścieżka wyjściowa", + "Output Path": "Ścieżka wyjściowa", + "Pitch": "Rzucać", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Jeśli >=3: zastosuj filtrowanie mediany do zebranych wyników skoku. Wartość reprezentuje promień filtra i może zmniejszyć oddychanie", + "Search Feature Ratio": "Współczynnik funkcji wyszukiwania", + "Pitch extraction algorithm": "Algorytm ekstrakcji wysokości dźwięku", + "Convert": "Nawrócić", + "Export Audio": "Eksportuj dźwięk", + "Batch": "Partia", + "Input Folder": "Folder wejściowy", + "Enter input path": "Wprowadź ścieżkę wejściową", + "Output Folder": "Folder wyjściowy", + "Enter output path": "Wprowadź ścieżkę wyjściową", + "Get information about the audio": "Uzyskiwanie informacji o dźwięku", + "Information about the audio file": "Informacje o pliku audio", + "Waiting for information...": "Czekam na informację...", + "Model fusion": "Fuzja modeli", + "Weight for Model A": "Waga dla modelu A", + "Whether the model has pitch guidance": "Czy model ma wskazówki dotyczące wysokości dźwięku", + "Model architecture version": "Wersja architektury modelu", + "Path to Model A": "Ścieżka do Modelu A", + "Path to Model B": "Ścieżka do Modelu B", + "Path to model": "Ścieżka do modelu", + "Model information to be placed": "Informacje o modelu, które mają zostać umieszczone", + "Fusion": "Fuzja", + "Modify model information": "Modyfikowanie informacji o modelu", + "Path to Model": "Ścieżka do modelu", + "Model information to be modified": "Informacje o modelu, które mają zostać zmodyfikowane", + "Save file name": "Zapisz nazwę pliku", + "Modify": "Modyfikować", + "View model information": "Wyświetlanie informacji o modelu", + "View": "Widok", + "Model extraction": "Wyodrębnianie modelu", + "Model conversion": "Konwersja modelu", + "Pth file": "P-ty plik", + "Output of the pth file": "Wyjście pliku pth", + "# How to Report an Issue on GitHub": "# Jak zgłosić problem na GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Kliknij przycisk \"Ekran nagrywania\" poniżej, aby rozpocząć nagrywanie napotkanego problemu.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Po zakończeniu nagrywania problemu kliknij przycisk \"Zatrzymaj nagrywanie\" (ten sam przycisk, ale etykieta zmienia się w zależności od tego, czy aktywnie nagrywasz, czy nie).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Przejdź do [GitHub Issues](https://github.com/IAHispano/Applio/issues) i kliknij przycisk \"Nowe zgłoszenie\".", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Wypełnij dostarczony szablon problemu, upewniając się, że w razie potrzeby dołączyłeś szczegóły, i skorzystaj z sekcji zasobów, aby przesłać nagrany plik z poprzedniego kroku.", + "Record Screen": "Ekran nagrywania", + "Record": "Rekord", + "Stop Recording": "Zatrzymaj nagrywanie", + "Introduce the model .pth path": "Wprowadzenie ścieżki pth modelu", + "See Model Information": "Zobacz informacje o modelu", + "## Download Model": "## Pobierz model", + "Model Link": "Link do modelu", + "Introduce the model link": "Wprowadzenie linku do modelu", + "Download Model": "Descargar Modelo", + "## Drop files": "## Upuść pliki", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Przeciągnij plik .pth i plik .index do tego miejsca. Przeciągnij jedną, a potem drugą.", + "TTS Voices": "Głosy TTS", + "Text to Synthesize": "Tekst do syntezy", + "Enter text to synthesize": "Wprowadzanie tekstu do syntezy", + "Output Path for TTS Audio": "Ścieżka wyjściowa dla TTS Audio", + "Output Path for RVC Audio": "Ścieżka wyjściowa dla dźwięku RVC" +} \ No newline at end of file diff --git a/assets/i18n/languages/pt_BR.json b/assets/i18n/languages/pt_BR.json new file mode 100644 index 0000000000000000000000000000000000000000..c0b71f448deea586569504204b5e0b8bd130ce23 --- /dev/null +++ b/assets/i18n/languages/pt_BR.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "A melhor ferramenta de clonagem de voz, meticulosamente otimizada para potência incomparável, modularidade e experiência amigável.", + "This section contains some extra utilities that often may be in experimental phases.": "Esta seção contém alguns utilitários extras que muitas vezes podem estar em fases experimentais.", + "Output Information": "Informações de saída", + "Inference": "Inferência", + "Train": "Treinar", + "Extra": "Extra", + "Merge Audios": "Mesclar áudios", + "Processing": "Processamento", + "Audio Analyzer": "Analisador de áudio", + "Model Information": "Informação do modelo", + "Download": "Baixar", + "Report a Bug": "Reportar um Bug", + "Preprocess": "Pré-processo", + "Model Name": "Nome do modelo", + "Enter model name": "Insira o nome do modelo", + "Dataset Path": "Caminho do dataset", + "Enter dataset path": "Insira o caminho do dataset", + "Sampling Rate": "Taxa de amostragem", + "RVC Version": "Versão RVC", + "Preprocess Dataset": "Pré-processar dataset", + "Extract": "Extrair", + "Hop Length": "Comprimento do Hop", + "Batch Size": "Tamanho do lote", + "Save Every Epoch": "Salve Cada Epoch", + "Total Epoch": "Epoch Total", + "Pretrained": "Pré-treinamento", + "Save Only Latest": "Salvar Apenas o último", + "Save Every Weights": "Salvar todos os Weights", + "Custom Pretrained": "Pretrain personalizado", + "Upload Pretrained Model": "Carregar Pretrain", + "Pretrained Custom Settings": "Configurações personalizadas do pretrain", + "The file you dropped is not a valid pretrained file. Please try again.": "O arquivo que você soltou não é um arquivo de pretrain válido. Por favor, tente novamente.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Clique no botão Atualizar para ver o arquivo pretrain no menu suspenso.", + "Pretrained G Path": "Personalizado Pré-treinado G", + "Pretrained D Path": "Personalizado Pré-treinado D", + "GPU Settings": "Configurações da GPU", + "GPU Custom Settings": "Configurações personalizadas da GPU", + "GPU Number": "Número da GPU", + "0 to ∞ separated by -": "0 a ∞ separados por -", + "GPU Information": "Informações da GPU", + "Pitch Guidance": "Orientação de Pitch", + "Extract Features": "Extrair recursos", + "Start Training": "Iniciar Treinamento", + "Generate Index": "Gerar Index", + "Voice Model": "Modelo de voz", + "Index File": "Arquivo de Index", + "Refresh": "Atualizar", + "Unload Voice": "Descarregar voz", + "Single": "Único", + "Upload Audio": "Carregar áudio", + "Select Audio": "Selecione Áudio", + "Advanced Settings": "Configurações avançadas", + "Clear Outputs (Deletes all audios in assets/audios)": "Limpar saídas (exclui todos os áudios em ativos/áudios)", + "Custom Output Path": "Caminho de saída personalizado", + "Output Path": "Caminho de saída", + "Pitch": "Pitch", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Se >=3: aplicar filtragem mediana aos resultados do pitch colhido. O valor representa o raio do filtro e pode reduzir a soprosidade", + "Search Feature Ratio": "Proporção de recursos de Index", + "Pitch extraction algorithm": "Algoritmo de extração de pitch", + "Convert": "Converter", + "Export Audio": "Exportar áudio", + "Batch": "Lote", + "Input Folder": "Pasta de entrada", + "Enter input path": "Insira o caminho de entrada", + "Output Folder": "Pasta de saída", + "Enter output path": "Insira o caminho de saída", + "Get information about the audio": "Obter informações sobre o áudio", + "Information about the audio file": "Informações sobre o arquivo de áudio", + "Waiting for information...": "À espera de informações...", + "Model fusion": "Fusão de modelos", + "Weight for Model A": "Peso para o modelo A", + "Whether the model has pitch guidance": "Se o modelo tem orientação de pitch", + "Model architecture version": "Versão da arquitetura do modelo", + "Path to Model A": "Caminho para o Modelo A", + "Path to Model B": "Caminho para o Modelo B", + "Path to model": "Caminho para o modelo", + "Model information to be placed": "Modelo de informação a colocar", + "Fusion": "Fusão", + "Modify model information": "Modificar informações do modelo", + "Path to Model": "Caminho para o modelo", + "Model information to be modified": "Modelo de informação a modificar", + "Save file name": "Guardar nome de ficheiro", + "Modify": "Modificar", + "View model information": "Ver informações do modelo", + "View": "View", + "Model extraction": "Extração do modelo", + "Model conversion": "Conversão de modelo", + "Pth file": "Arquivo Pth", + "Output of the pth file": "Saída do arquivo pth", + "# How to Report an Issue on GitHub": "# Como relatar um problema no GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Clique no botão 'Gravar tela' abaixo para começar a gravar o problema que você está enfrentando.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Depois de terminar de gravar o problema, clique no botão 'Parar gravação' (o mesmo botão, mas a etiqueta muda dependendo se você está gravando ativamente ou não).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Vá para [GitHub Issues](https://github.com/IAHispano/Applio/issues) e clique no botão 'New Issue'.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Preencha o modelo de problema fornecido, garantindo incluir detalhes conforme necessário, e utilize a seção de ativos para carregar o arquivo gravado da etapa anterior.", + "Record Screen": "Gravar tela", + "Record": "Gravar", + "Stop Recording": "Parar gravação", + "Introduce the model .pth path": "Introduza o caminho .pth do modelo", + "See Model Information": "Consulte as informações do modelo", + "## Download Model": "## Baixar Modelo", + "Model Link": "Link do modelo", + "Introduce the model link": "Introduza o link do modelo", + "Download Model": "Baixar Modelo", + "## Drop files": "## Soltar arquivos", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Arraste o arquivo .pth e o arquivo .index para este espaço. Arraste um e depois o outro.", + "TTS Voices": "Vozes TTS", + "Text to Synthesize": "Texto a sintetizar", + "Enter text to synthesize": "Insira texto para sintetizar", + "Output Path for TTS Audio": "Caminho de saída para áudio TTS", + "Output Path for RVC Audio": "Caminho de saída para áudio RVC" +} \ No newline at end of file diff --git a/assets/i18n/languages/pt_PT.json b/assets/i18n/languages/pt_PT.json new file mode 100644 index 0000000000000000000000000000000000000000..0d8a43f67577d25f0d19be8b3a1e2e608ed0eabc --- /dev/null +++ b/assets/i18n/languages/pt_PT.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "A melhor ferramenta de clonagem de voz, meticulosamente otimizada para potência, modularidade e experiência de fácil utilização incomparáveis.", + "This section contains some extra utilities that often may be in experimental phases.": "Esta seção contém alguns utilitários extras que muitas vezes podem estar em fases experimentais.", + "Output Information": "Informações de saída", + "Inference": "Inferência", + "Train": "Trem", + "Extra": "Extra", + "Merge Audios": "Mesclar áudios", + "Processing": "Processamento", + "Audio Analyzer": "Analisador de áudio", + "Model Information": "Informações do modelo", + "Download": "Baixar", + "Report a Bug": "Relatar um bug", + "Preprocess": "Pré-processar", + "Model Name": "Nome do modelo", + "Enter model name": "Digite o nome do modelo", + "Dataset Path": "Caminho do conjunto de dados", + "Enter dataset path": "Inserir caminho do conjunto de dados", + "Sampling Rate": "Taxa de amostragem", + "RVC Version": "Versão RVC", + "Preprocess Dataset": "Pré-processar conjunto de dados", + "Extract": "Extrair", + "Hop Length": "Comprimento do salto", + "Batch Size": "Tamanho do lote", + "Save Every Epoch": "Salve todas as épocas", + "Total Epoch": "Época Total", + "Pretrained": "Pré-treinado", + "Save Only Latest": "Salvar somente as últimas", + "Save Every Weights": "Economize todos os pesos", + "Custom Pretrained": "Pré-treinado personalizado", + "Upload Pretrained Model": "Carregar modelo pré-treinado", + "Pretrained Custom Settings": "Configurações personalizadas pré-treinadas", + "The file you dropped is not a valid pretrained file. Please try again.": "O arquivo descartado não é um arquivo pré-treinado válido. Tente novamente.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Clique no botão Atualizar para ver o arquivo pré-treinado no menu suspenso.", + "Pretrained G Path": "G pré-treinado personalizado", + "Pretrained D Path": "D pré-treinado personalizado", + "GPU Settings": "Configurações da GPU", + "GPU Custom Settings": "Configurações personalizadas da GPU", + "GPU Number": "Número da GPU", + "0 to ∞ separated by -": "0 a ∞ separados por -", + "GPU Information": "Informações da GPU", + "Pitch Guidance": "Orientação de Pitch", + "Extract Features": "Recursos de extração", + "Start Training": "Comece a Treinar", + "Generate Index": "Gerar índice", + "Voice Model": "Modelo de Voz", + "Index File": "Arquivo de índice", + "Refresh": "Atualizar", + "Unload Voice": "Descarregar voz", + "Single": "Único", + "Upload Audio": "Carregar áudio", + "Select Audio": "Selecione Áudio", + "Advanced Settings": "Configurações avançadas", + "Clear Outputs (Deletes all audios in assets/audios)": "Limpar saídas (exclui todos os áudios em ativos/áudios)", + "Custom Output Path": "Caminho de saída personalizado", + "Output Path": "Caminho de saída", + "Pitch": "Campo", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Se >=3: aplicar filtragem mediana aos resultados do passo colhido. O valor representa o raio do filtro e pode reduzir a soprosidade", + "Search Feature Ratio": "Taxa de recursos de pesquisa", + "Pitch extraction algorithm": "Algoritmo de extração de pitch", + "Convert": "Converter", + "Export Audio": "Exportar áudio", + "Batch": "Lote", + "Input Folder": "Pasta de entrada", + "Enter input path": "Insira o caminho de entrada", + "Output Folder": "Pasta de saída", + "Enter output path": "Insira o caminho de saída", + "Get information about the audio": "Obter informações sobre o áudio", + "Information about the audio file": "Informações sobre o arquivo de áudio", + "Waiting for information...": "Aguardando informações...", + "Model fusion": "Fusão de modelos", + "Weight for Model A": "Peso para o Modelo A", + "Whether the model has pitch guidance": "Se o modelo tem orientação de pitch", + "Model architecture version": "Versão da arquitetura do modelo", + "Path to Model A": "Caminho para o Modelo A", + "Path to Model B": "Caminho para o Modelo B", + "Path to model": "Caminho para o modelo", + "Model information to be placed": "Informações do modelo a ser colocado", + "Fusion": "Fusão", + "Modify model information": "Modificar informações do modelo", + "Path to Model": "Caminho para o modelo", + "Model information to be modified": "Informações do modelo a serem modificadas", + "Save file name": "Salvar nome do arquivo", + "Modify": "Modificar", + "View model information": "Exibir informações do modelo", + "View": "Vista", + "Model extraction": "Extração do modelo", + "Model conversion": "Conversão de modelos", + "Pth file": "Arquivo Pth", + "Output of the pth file": "Saída do arquivo pth", + "# How to Report an Issue on GitHub": "# Como relatar um problema no GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Clique no botão 'Gravar tela' abaixo para começar a gravar o problema que você está enfrentando.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Depois de terminar de gravar o problema, clique no botão 'Parar gravação' (o mesmo botão, mas a etiqueta muda dependendo se você está gravando ativamente ou não).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Vá para [Problemas do GitHub](https://github.com/IAHispano/Applio/issues) e clique no botão 'Novo problema'.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Preencha o modelo de problema fornecido, garantindo incluir detalhes conforme necessário, e utilize a seção de ativos para carregar o arquivo gravado da etapa anterior.", + "Record Screen": "Tela de gravação", + "Record": "Registro", + "Stop Recording": "Parar gravação", + "Introduce the model .pth path": "Apresentar o caminho .pth do modelo", + "See Model Information": "Ver informações do modelo", + "## Download Model": "## Baixar Modelo", + "Model Link": "Link do modelo", + "Introduce the model link": "Apresentar o link do modelo", + "Download Model": "Descargar Modelo", + "## Drop files": "## Soltar arquivos", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Arraste o arquivo .pth e o arquivo .index para este espaço. Arraste um e depois o outro.", + "TTS Voices": "Vozes TTS", + "Text to Synthesize": "Texto para sintetizar", + "Enter text to synthesize": "Digite o texto para sintetizar", + "Output Path for TTS Audio": "Caminho de saída para áudio TTS", + "Output Path for RVC Audio": "Caminho de saída para áudio RVC" +} \ No newline at end of file diff --git a/assets/i18n/languages/ro_RO.json b/assets/i18n/languages/ro_RO.json new file mode 100644 index 0000000000000000000000000000000000000000..9d6854e2268214e874aab98460a0383dfbda1870 --- /dev/null +++ b/assets/i18n/languages/ro_RO.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Cel mai bun instrument de clonare a vocii, optimizat meticulos pentru putere, modularitate și experiență ușor de utilizat.", + "This section contains some extra utilities that often may be in experimental phases.": "Această secțiune conține câteva utilități suplimentare care pot fi adesea în faze experimentale.", + "Output Information": "Informații despre ieșire", + "Inference": "Deducţie", + "Train": "Tren", + "Extra": "Superfluu", + "Merge Audios": "Îmbinare audio", + "Processing": "Prelucrare", + "Audio Analyzer": "Analizor audio", + "Model Information": "Informații despre model", + "Download": "Descărca", + "Report a Bug": "Raportați o eroare", + "Preprocess": "Preproces", + "Model Name": "Numele modelului", + "Enter model name": "Introduceți numele modelului", + "Dataset Path": "Calea setului de date", + "Enter dataset path": "Introduceți calea setului de date", + "Sampling Rate": "Rata de eșantionare", + "RVC Version": "Versiunea RVC", + "Preprocess Dataset": "Set de date preproces", + "Extract": "Extract", + "Hop Length": "Lungimea hameiului", + "Batch Size": "Mărimea lotului", + "Save Every Epoch": "Salvați fiecare epocă", + "Total Epoch": "Epoca totală", + "Pretrained": "Preinstruit", + "Save Only Latest": "Salvați numai cele mai recente", + "Save Every Weights": "Economisiți fiecare greutate", + "Custom Pretrained": "Personalizat Pretrained", + "Upload Pretrained Model": "Încărcați modelul preinstruit", + "Pretrained Custom Settings": "Setări personalizate pre-instruite", + "The file you dropped is not a valid pretrained file. Please try again.": "Fișierul pe care l-ați scăpat nu este un fișier preinstruit valid. Vă rugăm să încercați din nou.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Faceți clic pe butonul de reîmprospătare pentru a vedea fișierul preantrenat în meniul derulant.", + "Pretrained G Path": "G personalizat preantrenat", + "Pretrained D Path": "Personalizat Pretrained D", + "GPU Settings": "Setări GPU", + "GPU Custom Settings": "Setări personalizate GPU", + "GPU Number": "Număr GPU", + "0 to ∞ separated by -": "0 până la ∞ separate de -", + "GPU Information": "Informații GPU", + "Pitch Guidance": "Pitch Guidance", + "Extract Features": "Extrageți caracteristicile", + "Start Training": "Începeți instruirea", + "Generate Index": "Generare index", + "Voice Model": "Model vocal", + "Index File": "Fișier index", + "Refresh": "Împrospăta", + "Unload Voice": "Descărcare voce", + "Single": "Singur", + "Upload Audio": "Încărcare audio", + "Select Audio": "Selectați Audio", + "Advanced Settings": "Setări avansate", + "Clear Outputs (Deletes all audios in assets/audios)": "Ștergeți ieșirile (Șterge toate audio-urile din active / audio)", + "Custom Output Path": "Cale de ieșire personalizată", + "Output Path": "Cale de ieșire", + "Pitch": "Smoală", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Dacă >=3: aplicați filtrarea mediană rezultatelor pitch-ului recoltat. Valoarea reprezintă raza filtrului și poate reduce respirația", + "Search Feature Ratio": "Raportul caracteristicilor de căutare", + "Pitch extraction algorithm": "Algoritm de extracție a pitch-ului", + "Convert": "Converti", + "Export Audio": "Exportați audio", + "Batch": "Lot", + "Input Folder": "Folder de intrare", + "Enter input path": "Introduceți calea de intrare", + "Output Folder": "Dosar de ieșire", + "Enter output path": "Introduceți calea de ieșire", + "Get information about the audio": "Obțineți informații despre audio", + "Information about the audio file": "Informații despre fișierul audio", + "Waiting for information...": "În așteptarea informațiilor...", + "Model fusion": "Fuziunea modelului", + "Weight for Model A": "Greutate pentru modelul A", + "Whether the model has pitch guidance": "Dacă modelul are ghidare pitch", + "Model architecture version": "Versiunea arhitecturii modelului", + "Path to Model A": "Calea către modelul A", + "Path to Model B": "Calea către modelul B", + "Path to model": "Calea către model", + "Model information to be placed": "Informații despre model care trebuie plasate", + "Fusion": "Fuziune", + "Modify model information": "Modificarea informațiilor despre model", + "Path to Model": "Calea către model", + "Model information to be modified": "Informații despre model care trebuie modificate", + "Save file name": "Salvați numele fișierului", + "Modify": "Modifica", + "View model information": "Vizualizarea informațiilor despre model", + "View": "Vedere", + "Model extraction": "Extragerea modelului", + "Model conversion": "Conversia modelului", + "Pth file": "Fișier Pth", + "Output of the pth file": "Ieșirea fișierului pth", + "# How to Report an Issue on GitHub": "# Cum să raportați o problemă pe GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Faceți clic pe butonul \"Ecran de înregistrare\" de mai jos pentru a începe înregistrarea problemei pe care o întâmpinați.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. După ce ați terminat de înregistrat problema, faceți clic pe butonul \"Opriți înregistrarea\" (același buton, dar eticheta se schimbă în funcție de înregistrarea activă sau nu).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Accesați [Probleme GitHub] (https://github.com/IAHispano/Applio/issues) și faceți clic pe butonul \"Problemă nouă\".", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Completați șablonul de problemă furnizat, asigurându-vă că includeți detalii după cum este necesar și utilizați secțiunea active pentru a încărca fișierul înregistrat din pasul anterior.", + "Record Screen": "Ecran de înregistrare", + "Record": "Disc", + "Stop Recording": "Opriți înregistrarea", + "Introduce the model .pth path": "Introducerea căii .pth a modelului", + "See Model Information": "Consultați informațiile despre model", + "## Download Model": "## Descărcați modelul", + "Model Link": "Model Link", + "Introduce the model link": "Introduceți linkul modelului", + "Download Model": "Descargar Modelo", + "## Drop files": "## Aruncați fișiere", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Glisați fișierul .pth și fișierul .index în acest spațiu. Trageți unul și apoi celălalt.", + "TTS Voices": "Voci TTS", + "Text to Synthesize": "Text pentru sintetizare", + "Enter text to synthesize": "Introduceți text pentru sintetizare", + "Output Path for TTS Audio": "Cale de ieșire pentru TTS Audio", + "Output Path for RVC Audio": "Cale de ieșire pentru RVC Audio" +} \ No newline at end of file diff --git a/assets/i18n/languages/ru_RU.json b/assets/i18n/languages/ru_RU.json new file mode 100644 index 0000000000000000000000000000000000000000..a41cc55749016e7e85482bd41df367712d394771 --- /dev/null +++ b/assets/i18n/languages/ru_RU.json @@ -0,0 +1,115 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Идеальный инструмент для клонирования голоса, тщательно оптимизированный для непревзойденной мощности, модульности и удобства использования.", + "This section contains some extra utilities that often may be in experimental phases.": "Этот раздел содержит некоторые дополнительные утилиты, которые часто находятся на экспериментальных стадиях.", + "Output Information": "Результат", + "Inference": "Использовать модель", + "Train": "Тренировать модель", + "Extra": "Дополнительно", + "Merge Audios": "Слияние аудио", + "Processing": "Обработка", + "Audio Analyzer": "Анализ Аудио", + "Model Information": "Информация о модели", + "Download": "Загружать", + "Report a Bug": "Сообщить об ошибке", + "Preprocess": "Предварительная обработка", + "Model Name": "Название модели", + "Enter model name": "Введите название модели", + "Dataset Path": "Путь к датасету", + "Enter dataset path": "Введите путь к датасету", + "Sampling Rate": "Частота дискретизации", + "RVC Version": "Версия RVC", + "Preprocess Dataset": "Обработать датасет", + "Extract": "Извлечь черты", + "Hop Length": "Размер шага", + "Batch Size": "Размер батча", + "Save Every Epoch": "Сохранять каждые _ эпох", + "Total Epoch": "Количество эпох", + "Pretrained": "Предварительно обученный", + "Save Only Latest": "Сохранить только последний CKPT", + "Save Every Weights": "Сохраняйте все веса", + "Custom Pretrained": "Кастомные претрейны", + "Upload Pretrained Model": "Загрузить претрейн", + "The file you dropped is not a valid pretrained file. Please try again.": "Файл, который вы удалили, не является допустимым предварительно обученным файлом. Повторите попытку.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Нажмите кнопку обновить, чтобы увидеть предварительно обученный файл в раскрывающемся меню.", + "Custom Pretrained G": "Кастомный претрейн G", + "Custom Pretrained D": "Кастомный претрейн D", + "GPU Settings": "Настройки графического процессора", + "GPU Custom Settings": "Пользовательские настройки графического процессора", + "GPU Number": "Номер графического процессора", + "0 to ∞ separated by -": "от 0 до ∞ разделенных -", + "GPU Information": "Информация о графическом процессоре", + "Pitch Guidance": "Модель имеет управление по высоте тона", + "Extract Features": "Извлечь черты", + "Start Training": "Начать Тренировку", + "Generate Index": "Сгенерировать индекс", + "Voice Model": "Голосовая модель", + "Index File": "Файл индекса", + "Refresh": "Обновить список", + "Unload Voice": "Выгрузить модель", + "Single": "Конвертировать одно аудио", + "Upload Audio": "Загрузить аудио", + "Select Audio": "Аудио", + "Advanced Settings": "Расширенные настройки", + "Clear Outputs (Deletes all audios in assets/audios)": "Очистить Папку с Аудио (Удаляет все аудиозаписи в assets/audios)", + "Custom Output Path": "Пользовательский выходной путь", + "Output Path": "Путь вывода", + "Pitch": "Высота тона голоса", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Если >=3: применить медианную фильтрацию к аудио. Значение представляет собой радиус фильтра и может уменьшить излишние вздохи и выдохи", + "Search Feature Ratio": "Влияние индекса", + "Pitch extraction algorithm": "Алгоритм извлечения высоты тона", + "Convert": "Конвертировать", + "Export Audio": "Аудио", + "Batch": "Конвертировать несколько аудио", + "Input Folder": "Папка с аудио", + "Enter input path": "Введите путь ввода", + "Output Folder": "Папка вывода аудио", + "Enter output path": "Введите выходной путь", + "Get information about the audio": "Показать информации об аудио", + "Information about the audio file": "Информация об аудиофайле", + "Waiting for information...": "Ожидание информации...", + "Model fusion (On progress)": "Слияние моделей", + "Weight for Model A": "Вес для модели A", + "Whether the model has pitch guidance": "Есть ли в модели направляющие по тангажу", + "Model architecture version": "Версия архитектуры модели", + "Path to Model A": "Путь к модели А", + "Path to Model B": "Путь к модели B", + "Path to model": "Путь к модели", + "Model information to be placed": "Информация о модели", + "Fusion": "Слияние", + "Modify model information": "Изменение информации о модели", + "Path to Model": "Путь к модели", + "Model information to be modified": "Информация о модели, подлежащая изменению", + "Save file name": "Сохранить имя файла", + "Modify": "Модифицировать", + "View model information": "Просмотр информации о модели", + "View": "Показать", + "Model extraction": "Извлечение модели", + "Model conversion": "Преобразование модели", + "Pth file": "P-й файл", + "Output of the pth file": "Вывод p-го файла", + "# How to Report an Issue on GitHub": "# Как сообщить о проблеме на GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Нажмите кнопку «Записать экран» ниже, чтобы начать запись проблемы, с которой вы столкнулись.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. После того, как вы закончили запись задачи, нажмите кнопку «Остановить запись» (та же кнопка, но метка меняется в зависимости от того, ведете ли вы активную запись или нет).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Перейдите в [GitHub Issues](https://github.com/IAHispano/Applio/issues) и нажмите кнопку «Новая проблема».", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Заполните предоставленный шаблон задачи, не забудьте включить необходимые сведения и используйте раздел ресурсов для загрузки записанного файла с предыдущего шага.", + "Record Screen": "Запись экрана", + "Record": "Запись", + "Stop Recording": "Остановить запись", + "Introduce the model .pth path": "Знакомство с моделью .pth-пути", + "See Model Information": "Посмотреть информацию о модели", + "## Download Model": "## Скачать модель", + "Model Link": "Ссылка на модель", + "Introduce the model link": "Введение ссылки на модель", + "Download Model": "Скачать Модель", + "## Drop files": "## Загрузить файлы с компьютера", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Перетащите файлы .pth и .index в это пространство. Перетащите один, а затем другой.", + "TTS Voices": "Голоса TTS", + "Text to Synthesize": "Текст", + "Enter text to synthesize": "Введите текст для синтеза", + "Output Path for TTS Audio": "Путь вывода для TTS аудио", + "Output Path for RVC Audio": "Путь вывода для RVC аудио", + "Split Audio": "Разделить Аудио", + "Refresh Custom Pretraineds": "Обновить список претрейнов", + "Pretrained Custom Settings": "Настройки кастомных претрейнов", + "Image": "Изображение" +} \ No newline at end of file diff --git a/assets/i18n/languages/ta_TA.json b/assets/i18n/languages/ta_TA.json new file mode 100644 index 0000000000000000000000000000000000000000..713a45d8731d77fde1e2423aa7ff76a8bc40ebbc --- /dev/null +++ b/assets/i18n/languages/ta_TA.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "இறுதி குரல் குளோனிங் கருவி, நிகரற்ற சக்தி, மாடுலாரிட்டி மற்றும் பயனர் நட்பு அனுபவத்திற்காக உன்னிப்பாக உகந்ததாக உள்ளது.", + "This section contains some extra utilities that often may be in experimental phases.": "இந்த பிரிவில் சில கூடுதல் பயன்பாடுகள் உள்ளன, அவை பெரும்பாலும் சோதனை கட்டங்களில் இருக்கலாம்.", + "Output Information": "வெளியீட்டு தகவல்", + "Inference": "அனுமானம்", + "Train": "தொடர்வண்டி", + "Extra": "கூடுதல்", + "Merge Audios": "ஆடியோக்களை ஒன்றிணைக்கவும்", + "Processing": "செயலாக்க", + "Audio Analyzer": "ஆடியோ அனலைசர்", + "Model Information": "மாதிரி தகவல்", + "Download": "பதிவிறக்க", + "Report a Bug": "பிழையைப் புகாரளி", + "Preprocess": "முன் செயல்முறை", + "Model Name": "மாடல் பெயர்", + "Enter model name": "மாடல் பெயரை உள்ளிடவும்", + "Dataset Path": "தரவுத்தொகுப்பு பாதை", + "Enter dataset path": "தரவுத்தொகுப்பு பாதையை உள்ளிடவும்", + "Sampling Rate": "மாதிரி மதிப்பீடு", + "RVC Version": "RVC பதிப்பு", + "Preprocess Dataset": "செயல்முறைக்கு முந்தைய தரவுத்தொகுப்பு", + "Extract": "பிரித்தெடுத்தல்", + "Hop Length": "ஹாப் நீளம்", + "Batch Size": "தொகுதி அளவு", + "Save Every Epoch": "ஒவ்வொரு சகாப்தத்தையும் காப்பாற்றுங்கள்", + "Total Epoch": "மொத்த சகாப்தம்", + "Pretrained": "முன் பயிற்சி", + "Save Only Latest": "சமீபத்தியதை மட்டும் சேமிக்கவும்", + "Save Every Weights": "ஒவ்வொரு எடைகளையும் சேமிக்கவும்", + "Custom Pretrained": "தனிப்பயன் முன்பயிற்சி", + "Upload Pretrained Model": "முன்பயிற்சி பெற்ற மாதிரியைப் பதிவேற்றவும்", + "Pretrained Custom Settings": "முன் பயிற்சி பெற்ற தனிப்பயன் அமைப்புகள்", + "The file you dropped is not a valid pretrained file. Please try again.": "நீங்கள் கைவிட்ட கோப்பு ஒரு செல்லத்தக்க முன்பயிற்சி பெற்ற கோப்பு அல்ல. மீண்டும் முயற்சிக்கவும்.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "கீழ்தோன்றும் மெனுவில் முன்பயிற்சி பெற்ற கோப்பைக் காண புதுப்பிப்பு பொத்தானைக் கிளிக் செய்யவும்.", + "Pretrained G Path": "தனிப்பயன் முன் பயிற்சி ஜி", + "Pretrained D Path": "தனிப்பயன் முன்பயிற்சி D", + "GPU Settings": "GPU அமைப்புகள்", + "GPU Custom Settings": "GPU தனிப்பயன் அமைப்புகள்", + "GPU Number": "GPU எண்", + "0 to ∞ separated by -": "0 முதல் ∞ வரை பிரிக்கப்பட்டுள்ளது -", + "GPU Information": "தகவல்", + "Pitch Guidance": "சுருதி வழிகாட்டுதல்", + "Extract Features": "பிரித்தெடுக்கும் அம்சங்கள்", + "Start Training": "பயிற்சியைத் தொடங்குங்கள்", + "Generate Index": "குறியீட்டை உருவாக்கவும்", + "Voice Model": "குரல் மாதிரி", + "Index File": "அட்டவணை கோப்பு", + "Refresh": "புதுப்பி", + "Unload Voice": "குரலை இறக்கவும்", + "Single": "ஒற்றை", + "Upload Audio": "ஆடியோவை பதிவேற்றவும்", + "Select Audio": "ஆடியோவை தேர்ந்தெடு", + "Advanced Settings": "மேம்பட்ட அமைப்புகள்", + "Clear Outputs (Deletes all audios in assets/audios)": "வெளியீடுகளை அழிக்கவும் (சொத்துக்கள் / ஆடியோக்களில் உள்ள அனைத்து ஆடியோக்களையும் நீக்குகிறது)", + "Custom Output Path": "தனிப்பயன் வெளியீட்டு பாதை", + "Output Path": "வெளியீட்டுப் பாதை", + "Pitch": "எறி", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "> = 3 என்றால்: அறுவடை செய்யப்பட்ட சுருதி முடிவுகளுக்கு சராசரி வடிகட்டலைப் பயன்படுத்துங்கள். மதிப்பு வடிகட்டி ஆரம் பிரதிபலிக்கிறது மற்றும் மூச்சு குறைக்க முடியும்", + "Search Feature Ratio": "தேடல் அம்ச விகிதம்", + "Pitch extraction algorithm": "சுருதி பிரித்தெடுத்தல் அல்காரிதம்", + "Convert": "உருமாற்று", + "Export Audio": "ஆடியோவை ஏற்றுமதி செய்யவும்", + "Batch": "தொகுதி", + "Input Folder": "உள்ளீட்டு கோப்புறை", + "Enter input path": "உள்ளீட்டு பாதையை உள்ளிடவும்", + "Output Folder": "வெளியீட்டு கோப்புறை", + "Enter output path": "வெளியீட்டு பாதையை உள்ளிடவும்", + "Get information about the audio": "ஆடியோ பற்றிய தகவலைப் பெறுங்கள்", + "Information about the audio file": "ஆடியோ கோப்பு பற்றிய தகவல்", + "Waiting for information...": "தகவலுக்காக காத்திருக்கிறேன்...", + "Model fusion": "மாதிரி இணைவு", + "Weight for Model A": "மாடல் A க்கான எடை", + "Whether the model has pitch guidance": "மாடலுக்கு சுருதி வழிகாட்டுதல் உள்ளதா", + "Model architecture version": "மாதிரி கட்டிடக்கலை பதிப்பு", + "Path to Model A": "மாதிரி A க்கான பாதை", + "Path to Model B": "மாடல் B க்கான பாதை", + "Path to model": "மாதிரிக்கான பாதை", + "Model information to be placed": "வைக்கப்பட வேண்டிய மாதிரி தகவல்", + "Fusion": "இணைவு", + "Modify model information": "மாதிரி தகவலை மாற்றவும்", + "Path to Model": "மாதிரிக்கான பாதை", + "Model information to be modified": "திருத்தப்பட வேண்டிய மாதிரி தகவல்", + "Save file name": "கோப்பு பெயரை சேமி", + "Modify": "திருத்து", + "View model information": "மாதிரி தகவலைக் காண்க", + "View": "பார்வை", + "Model extraction": "மாதிரி பிரித்தெடுத்தல்", + "Model conversion": "மாதிரி மாற்றம்", + "Pth file": "Pth கோப்பு", + "Output of the pth file": "pth கோப்பின் வெளியீடு", + "# How to Report an Issue on GitHub": "# GitHub இல் ஒரு சிக்கலை எவ்வாறு புகாரளிப்பது", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. நீங்கள் அனுபவிக்கும் சிக்கலைப் பதிவு செய்யத் தொடங்க கீழே உள்ள 'ரெக்கார்ட் ஸ்கிரீன்' பொத்தானைக் கிளிக் செய்க.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. நீங்கள் சிக்கலைப் பதிவு செய்து முடித்ததும், 'பதிவு செய்வதை நிறுத்து' பொத்தானைக் கிளிக் செய்க (அதே பொத்தான், ஆனால் நீங்கள் தீவிரமாக பதிவு செய்கிறீர்களா இல்லையா என்பதைப் பொறுத்து லேபிள் மாறுகிறது).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Issues](https://github.com/IAHispano/Applio/issues) என்பதற்குச் சென்று 'புதிய சிக்கல்' பொத்தானைக் கிளிக் செய்யவும்.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. வழங்கப்பட்ட சிக்கல் வார்ப்புருவை முடிக்கவும், தேவைக்கேற்ப விவரங்களைச் சேர்ப்பதை உறுதிசெய்து, முந்தைய கட்டத்திலிருந்து பதிவுசெய்யப்பட்ட கோப்பை பதிவேற்ற சொத்துக்கள் பிரிவைப் பயன்படுத்தவும்.", + "Record Screen": "பதிவு திரை", + "Record": "பதிவேடு", + "Stop Recording": "பதிவு செய்வதை நிறுத்து", + "Introduce the model .pth path": "மாதிரியை அறிமுகப்படுத்துங்கள் .pth பாதையை அறிமுகப்படுத்துங்கள்", + "See Model Information": "மாதிரி தகவலைப் பார்க்கவும்", + "## Download Model": "## பதிவிறக்க மாதிரி", + "Model Link": "மாதிரி இணைப்பு", + "Introduce the model link": "மாதிரி இணைப்பை அறிமுகப்படுத்தவும்", + "Download Model": "டெஸ்கார்கர் மாடலோ", + "## Drop files": "## கோப்புகளை கைவிடுங்கள்", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "உங்கள் .pth கோப்பு மற்றும் .index கோப்பை இந்த இடத்திற்கு இழுக்கவும். ஒன்றை இழுத்து மற்றொன்றை இழுக்கவும்.", + "TTS Voices": "TTS குரல்கள்", + "Text to Synthesize": "தொகுக்க உரை", + "Enter text to synthesize": "ஒருங்கிணைக்க உரையை உள்ளிடவும்", + "Output Path for TTS Audio": "TTS ஆடியோவுக்கான வெளியீட்டு பாதை", + "Output Path for RVC Audio": "RVC ஆடியோவுக்கான வெளியீட்டு பாதை" +} \ No newline at end of file diff --git a/assets/i18n/languages/te_TE.json b/assets/i18n/languages/te_TE.json new file mode 100644 index 0000000000000000000000000000000000000000..476312443c55865af54415abdd8f04303ffd4dc3 --- /dev/null +++ b/assets/i18n/languages/te_TE.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "అల్టిమేట్ వాయిస్ క్లోనింగ్ టూల్, సాటిలేని శక్తి, మాడ్యులారిటీ మరియు వినియోగదారు-స్నేహపూర్వక అనుభవం కోసం జాగ్రత్తగా ఆప్టిమైజ్ చేయబడింది.", + "This section contains some extra utilities that often may be in experimental phases.": "ఈ విభాగంలో కొన్ని అదనపు ఉపయోగాలు ఉన్నాయి, అవి తరచుగా ప్రయోగాత్మక దశలలో ఉండవచ్చు.", + "Output Information": "అవుట్ పుట్ సమాచారం", + "Inference": "అనిమితి", + "Train": "రైలు", + "Extra": "అదనం", + "Merge Audios": "ఆడియోలను విలీనం చేయండి", + "Processing": "ప్రాసెసింగ్", + "Audio Analyzer": "Audio Analyzer", + "Model Information": "నమూనా సమాచారం", + "Download": "డౌన్ లోడ్ చేసుకోండి", + "Report a Bug": "బగ్ ని రిపోర్ట్ చేయండి", + "Preprocess": "ప్రీప్రాసెస్", + "Model Name": "మోడల్ పేరు", + "Enter model name": "మోడల్ పేరు నమోదు చేయండి", + "Dataset Path": "Dataset Path", + "Enter dataset path": "డేటాసెట్ మార్గాన్ని నమోదు చేయండి", + "Sampling Rate": "నమూనా రేటు", + "RVC Version": "RVC Version", + "Preprocess Dataset": "ప్రీప్రాసెస్ Dataset", + "Extract": "ఉద్ధరించు", + "Hop Length": "హాప్ పొడవు", + "Batch Size": "బ్యాచ్ పరిమాణం", + "Save Every Epoch": "ప్రతి యుగాన్ని కాపాడండి", + "Total Epoch": "మొత్తం యుగం", + "Pretrained": "ప్రీ ట్రైనింగ్ చేయబడింది", + "Save Only Latest": "సేవ్ ఓన్లీ లేటెస్ట్", + "Save Every Weights": "ప్రతి బరువులను ఆదా చేయండి", + "Custom Pretrained": "Custom Pretrained", + "Upload Pretrained Model": "ప్రీ ట్రైన్డ్ మోడల్ అప్ లోడ్ చేయండి", + "Pretrained Custom Settings": "ముందస్తుగా శిక్షణ పొందిన కస్టమ్ సెట్టింగ్ లు", + "The file you dropped is not a valid pretrained file. Please try again.": "మీరు డ్రాప్ చేసిన ఫైల్ చెల్లుబాటు అయ్యే ముందస్తు శిక్షణ పొందిన ఫైల్ కాదు. దయచేసి మళ్లీ ప్రయత్నించండి.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "డ్రాప్ డౌన్ మెనూలో ముందుగా శిక్షణ పొందిన ఫైల్ ను చూడటం కొరకు రిఫ్రెష్ బటన్ మీద క్లిక్ చేయండి.", + "Pretrained G Path": "Custom Pretrained G", + "Pretrained D Path": "Custom Pretrained D", + "GPU Settings": "GPU సెట్టింగ్ లు", + "GPU Custom Settings": "GPU కస్టమ్ సెట్టింగ్ లు", + "GPU Number": "జిపియు నెంబరు", + "0 to ∞ separated by -": "0 నుండి ∞ వరకు దీని ద్వారా వేరు చేయబడతాయి -", + "GPU Information": "GPU సమాచారం", + "Pitch Guidance": "పిచ్ మార్గదర్శకత్వం", + "Extract Features": "ఎక్స్ ట్రాక్ట్ ఫీచర్లు", + "Start Training": "శిక్షణ ప్రారంభించండి", + "Generate Index": "ఇండెక్స్ జనరేట్ చేయండి", + "Voice Model": "వాయిస్ మోడల్", + "Index File": "ఇండెక్స్ ఫైల్", + "Refresh": "రిఫ్రెష్", + "Unload Voice": "వాయిస్ ను అన్ లోడ్ చేయండి", + "Single": "ఏక", + "Upload Audio": "ఆడియో అప్ లోడ్ చేయండి", + "Select Audio": "ఆడియోను ఎంచుకోండి", + "Advanced Settings": "అధునాతన సెట్టింగ్ లు", + "Clear Outputs (Deletes all audios in assets/audios)": "క్లియర్ అవుట్ పుట్స్ (అసెట్స్/ఆడియోల్లోని అన్ని ఆడియోలను తొలగిస్తుంది)", + "Custom Output Path": "కస్టమ్ అవుట్ పుట్ మార్గం", + "Output Path": "అవుట్ పుట్ మార్గం", + "Pitch": "గొంతు", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "ఒకవేళ >=3 అయితే: పండించిన పిచ్ ఫలితాలకు మధ్యస్థ వడపోతను వర్తింపజేయండి. విలువ ఫిల్టర్ వ్యాసార్థాన్ని సూచిస్తుంది మరియు శ్వాసను తగ్గిస్తుంది", + "Search Feature Ratio": "శోధన ఫీచర్ నిష్పత్తి", + "Pitch extraction algorithm": "పిచ్ వెలికితీత అల్గోరిథం", + "Convert": "మార్చండి", + "Export Audio": "Export Audio", + "Batch": "దొంతర", + "Input Folder": "ఇన్ పుట్ ఫోల్డర్", + "Enter input path": "ఇన్ పుట్ మార్గాన్ని నమోదు చేయండి", + "Output Folder": "అవుట్ పుట్ ఫోల్డర్", + "Enter output path": "అవుట్ పుట్ మార్గాన్ని నమోదు చేయండి", + "Get information about the audio": "ఆడియో గురించి సమాచారం పొందండి", + "Information about the audio file": "ఆడియో ఫైలు గురించి సమాచారం", + "Waiting for information...": "సమాచారం కోసం ఎదురుచూస్తూ...", + "Model fusion": "మోడల్ ఫ్యూజన్", + "Weight for Model A": "మోడల్ A కొరకు బరువు", + "Whether the model has pitch guidance": "మోడల్ కు పిచ్ గైడెన్స్ ఉందా", + "Model architecture version": "మోడల్ ఆర్కిటెక్చర్ వెర్షన్", + "Path to Model A": "మోడల్ ఎ కు మార్గం", + "Path to Model B": "మోడల్ బి కి మార్గం", + "Path to model": "మోడల్ కు మార్గం[మార్చు]", + "Model information to be placed": "మోడల్ సమాచారం ఉంచాలి", + "Fusion": "ఫ్యూజన్", + "Modify model information": "మోడల్ సమాచారాన్ని సవరించండి", + "Path to Model": "మోడల్ కు మార్గం[మార్చు]", + "Model information to be modified": "మోడల్ సమాచారాన్ని సవరించాలి", + "Save file name": "ఫైలు పేరును సేవ్ చేయండి", + "Modify": "సవరించండి", + "View model information": "నమూనా సమాచారాన్ని వీక్షించండి", + "View": "దృశ్యం", + "Model extraction": "నమూనా వెలికితీత", + "Model conversion": "నమూనా మార్పిడి", + "Pth file": "PTH ఫైల్", + "Output of the pth file": "పిటిహెచ్ ఫైల్ యొక్క అవుట్ పుట్", + "# How to Report an Issue on GitHub": "# గిట్హబ్లో సమస్యను ఎలా రిపోర్ట్ చేయాలి", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. మీరు ఎదుర్కొంటున్న సమస్యను రికార్డ్ చేయడం ప్రారంభించడానికి దిగువ 'రికార్డ్ స్క్రీన్' బటన్పై క్లిక్ చేయండి.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. మీరు సమస్యను రికార్డ్ చేయడం పూర్తి చేసిన తర్వాత, 'స్టాప్ రికార్డింగ్' బటన్పై క్లిక్ చేయండి (అదే బటన్, కానీ మీరు చురుకుగా రికార్డ్ చేస్తున్నారా లేదా అనే దానిపై ఆధారపడి లేబుల్ మారుతుంది).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. 'గిట్హబ్ ఇష్యూస్'(https://github.com/IAHispano/Applio/issues)లోకి వెళ్లి 'న్యూ ఇష్యూ' బటన్పై క్లిక్ చేయాలి.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. ఇచ్చిన ఇష్యూ టెంప్లేట్ ను పూర్తి చేసి, అవసరమైన విధంగా వివరాలను పొందుపర్చేలా చూసుకోవాలి మరియు మునుపటి దశ నుండి రికార్డ్ చేయబడిన ఫైల్ ను అప్ లోడ్ చేయడానికి ఆస్తుల విభాగాన్ని ఉపయోగించండి.", + "Record Screen": "స్క్రీన్ రికార్డ్ చేయండి", + "Record": "నమోదు", + "Stop Recording": "రికార్డింగ్ ఆపండి", + "Introduce the model .pth path": "మోడల్ .pth మార్గాన్ని పరిచయం చేయండి", + "See Model Information": "మోడల్ సమాచారం చూడండి", + "## Download Model": "## డౌన్ లోడ్ మోడల్", + "Model Link": "మోడల్ లింక్", + "Introduce the model link": "మోడల్ లింక్ ను పరిచయం చేయండి", + "Download Model": "Descargar Modelo", + "## Drop files": "## డ్రాప్ ఫైళ్లు", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "మీ .pth ఫైల్ మరియు .index ఫైల్ ని ఈ స్పేస్ లోకి లాగండి. ఒకదాన్ని లాగి, తర్వాత మరొకటి లాగండి.", + "TTS Voices": "టిటిఎస్ వాయిస్", + "Text to Synthesize": "సంశ్లేషణ చేయడానికి టెక్స్ట్", + "Enter text to synthesize": "సంశ్లేషణ చేయడానికి టెక్స్ట్ ను నమోదు చేయండి", + "Output Path for TTS Audio": "TTS ఆడియో కొరకు అవుట్ పుట్ మార్గం", + "Output Path for RVC Audio": "RVC ఆడియో కొరకు అవుట్ పుట్ మార్గం" +} \ No newline at end of file diff --git a/assets/i18n/languages/th_TH.json b/assets/i18n/languages/th_TH.json new file mode 100644 index 0000000000000000000000000000000000000000..e887fcd871205455de8a591c81e3b7e63315f0cc --- /dev/null +++ b/assets/i18n/languages/th_TH.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "สุดยอดเครื่องมือเลียนแบบเสียงที่ถูกปรับแต่งเพื่อพลังที่ไม่สามารถเทียบได้, แบ่งเป็นออกเป็นส่วน, และประสบการณ์ผู้ใช้ที่เยี่ยมยอด", + "This section contains some extra utilities that often may be in experimental phases.": "ส่วนนี้ประกอบด้วยอุปกรณ์อำนวยความสะดวกเพื่มเติมที่มักจะอยู่ในขั้นตอนการทดลอง", + "Output Information": "เอาท์พุท", + "Inference": "Inference", + "Train": "Train", + "Extra": "เพิ่มเติม", + "Merge Audios": "รวมไฟล์เสียง", + "Processing": "ประมวลผล", + "Audio Analyzer": "เครื่องมือวิเคราะห์เสียง", + "Model Information": "ข้อมูลโมเดล", + "Download": "ดาวน์โหลด", + "Report a Bug": "รายงานข้อบกพร่อง", + "Preprocess": "Preprocess", + "Model Name": "ชื่อโมเดล", + "Enter model name": "ใส่ชื่อโมเดล", + "Dataset Path": "ที่อยู่ Dataset", + "Enter dataset path": "ป้อนที่อยู่ Dataset", + "Sampling Rate": "Sampling Rate", + "RVC Version": "รุ่น RVC", + "Preprocess Dataset": "Preprocess Dataset", + "Extract": "แยกออกมา", + "Hop Length": "Hop Length", + "Batch Size": "Batch Size", + "Save Every Epoch": "บันทึก Epoch ทั้งหมด", + "Total Epoch": "Epoch ทั้งหมด", + "Pretrained": "Pretrained", + "Save Only Latest": "บันทึกเฉพาะล่าสุด", + "Save Every Weights": "บันทึก Weight ทุกอัน", + "Custom Pretrained": "Pretrained กําหนดเอง", + "Upload Pretrained Model": "อัปโหลดโมเดล Pretrained", + "Pretrained Custom Settings": "การตั้งค่า Pretrained เอง", + "The file you dropped is not a valid pretrained file. Please try again.": "ไฟล์ที่คุณให้ไม่ใช่ไฟล์ Pretrained ที่ถูกต้อง โปรดลองอีกครั้ง", + "Click the refresh button to see the pretrained file in the dropdown menu.": "กดปุ่มรีเฟรชเพื่อดู Pretrained ในเมนู", + "Pretrained G Path": "ที่อยู่ Pretrained G", + "Pretrained D Path": "ที่อยู่ Pretrained D", + "GPU Settings": "การตั้งค่า GPU", + "GPU Custom Settings": "การตั้งค่า GPU เอง", + "GPU Number": "เลขที่ GPU", + "0 to ∞ separated by -": "0 ถึง ∞ คั่นด้วย -", + "GPU Information": "ข้อมูล GPU", + "Pitch Guidance": "Pitch Guidance", + "Extract Features": "Extract Features", + "Start Training": "เริ่มการ Train", + "Generate Index": "สร้าง Index", + "Voice Model": "โมเดลเสียง", + "Index File": "ไฟล์ Index", + "Refresh": "รีเฟรช", + "Unload Voice": "ยกเลิกการโหลดเสียง", + "Single": "เดี่ยว", + "Upload Audio": "อัพโหลดเสียง", + "Select Audio": "เลือกเสียง", + "Advanced Settings": "การตั้งค่าขั้นสูง", + "Clear Outputs (Deletes all audios in assets/audios)": "เคลียร์เอาท์พุท (ลบไฟล์เสียงทั้งหมดใน assets/audios)", + "Custom Output Path": "กําหนดที่อยู่เอาท์พุทเอง", + "Output Path": "ที่อยู่เอาท์พุท", + "Pitch": "Ptich", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness", + "Search Feature Ratio": "Search Feature Ratio", + "Pitch extraction algorithm": "Pitch extraction algorithm", + "Convert": "แปลง", + "Export Audio": "ส่งออกเสียง", + "Batch": "Batch", + "Input Folder": "โฟลเดอร์อินพุต", + "Enter input path": "ป้อนที่อยู่อินพุต", + "Output Folder": "โฟลเดอร์เอาท์พุท", + "Enter output path": "ป้อนที่อยู่เอาท์พุท", + "Get information about the audio": "ดูข้อมูลเกี่ยวกับเสียง", + "Information about the audio file": "ข้อมูลเกี่ยวกับไฟล์เสียง", + "Waiting for information...": "รอข้อมูล...", + "Model fusion": "รวมโมเดล", + "Weight for Model A": "Weight สําหรับรุ่น A", + "Whether the model has pitch guidance": "โมเดลมี Pitch Guidance หรือไม่", + "Model architecture version": "เวอร์ชันสถาปัตยกรรมโมเดล", + "Path to Model A": "ที่อยู่โมเดล A", + "Path to Model B": "ที่อยู่โมเดล B", + "Path to model": "ที่อยู่โมเดล", + "Model information to be placed": "ข้อมูลโมเดลที่จะวาง", + "Fusion": "รวม", + "Modify model information": "ปรับเปลี่ยนข้อมูลโมเดล", + "Path to Model": "ที่อยู่โมเดล", + "Model information to be modified": "ข้อมูลโมเดลที่จะแก้ไข", + "Save file name": "บันทึกชื่อไฟล์", + "Modify": "ดัดแปลง", + "View model information": "ดูข้อมูลโมเดล", + "View": "ดู", + "Model extraction": "Model extraction", + "Model conversion": "Model conversion", + "Pth file": "ไฟล์ Pth", + "Output of the pth file": "เอาต์พุตของไฟล์ pth", + "# How to Report an Issue on GitHub": "# วิธีรายงานปัญหาใน GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. คลิกที่ปุ่ม 'บันทึกหน้าจอ' ด้านล่างเพื่อเริ่มบันทึกปัญหาที่พบ", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. เมื่อคุณบันทึกปัญหาเสร็จแล้ว ให้คลิกที่ปุ่ม 'หยุดการบันทึก' (ปุ่มเดิม แต่ป้ายกํากับจะเปลี่ยนขึ้นอยู่กับว่าคุณกําลังอยู่บันทึกอยู่หรือไม่)", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. ไปที่ [GitHub Issues](https://github.com/IAHispano/Applio/issues) และคลิกที่ปุ่ม 'New Issue'", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. กรอกแบบรายงานปัญหาที่ให้มา, ให้แน่ใจว่ามีรายละเอียดตามต้องการ, และอัปโหลดไฟล์ที่บันทึกไว้จากขั้นตอนก่อนหน้า", + "Record Screen": "บันทึกหน้าจอ", + "Record": "บันทึก", + "Stop Recording": "หยุดการบันทึก", + "Introduce the model .pth path": "ใส่ที่อยู่โมเดล .pth", + "See Model Information": "ดูข้อมูลโมเดล", + "## Download Model": "## ดาวน์โหลดโมเดล", + "Model Link": "ลิงค์โมเดล", + "Introduce the model link": "ใส่ลิงค์โมเดล", + "Download Model": "ดาวน์โหลดโมเดล", + "## Drop files": "## วางไฟล์", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "ลากไฟล์ .pth และไฟล์ .index ลงในช่องนี้ ลากอันหนึ่งแล้วลากอีกอันหนึ่งตาม", + "TTS Voices": "เสียง TTS", + "Text to Synthesize": "ข้อความที่จะสังเคราะห์", + "Enter text to synthesize": "กรอกข้อความที่จะสังเคราะห์", + "Output Path for TTS Audio": "ที่อยู่เอาต์พุตสําหรับเสียง TTS", + "Output Path for RVC Audio": "ที่อยู่เอาต์พุตสําหรับเสียง RVC" +} diff --git a/assets/i18n/languages/tr_TR.json b/assets/i18n/languages/tr_TR.json new file mode 100644 index 0000000000000000000000000000000000000000..05236a34059a1dde6ecbd4434a37a2a5f4c7e554 --- /dev/null +++ b/assets/i18n/languages/tr_TR.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Rakipsiz güç, modülerlik ve kullanıcı dostu deneyim için titizlikle optimize edilmiş nihai ses klonlama aracı.", + "This section contains some extra utilities that often may be in experimental phases.": "Bu bölüm, genellikle deneysel aşamalarda olabilecek bazı ek yardımcı programlar içerir.", + "Output Information": "Çıktı Bilgileri", + "Inference": "Kesmesi", + "Train": "Tren", + "Extra": "Fazladan", + "Merge Audios": "Sesleri Birleştir", + "Processing": "Işleme", + "Audio Analyzer": "Ses Analizörü", + "Model Information": "Model Bilgileri", + "Download": "İndirmek", + "Report a Bug": "Hata bildir", + "Preprocess": "Preprocess", + "Model Name": "Model Adı", + "Enter model name": "Model adını girin", + "Dataset Path": "Veri Kümesi Yolu", + "Enter dataset path": "Veri kümesi yolunu girme", + "Sampling Rate": "Örnekleme Oranı", + "RVC Version": "RVC Sürümü", + "Preprocess Dataset": "Ön İşlem Veri Kümesi", + "Extract": "Hulâsa", + "Hop Length": "Şerbetçiotu Uzunluğu", + "Batch Size": "Toplu İş Büyüklüğü", + "Save Every Epoch": "Her Çağı Kurtarın", + "Total Epoch": "Toplam Dönem", + "Pretrained": "Önceden eğitilmiş", + "Save Only Latest": "Yalnızca En Sonuncusunu Kaydet", + "Save Every Weights": "Her Ağırlığı Kurtarın", + "Custom Pretrained": "Özel Önceden Eğitilmiş", + "Upload Pretrained Model": "Önceden eğitilmiş modeli karşıya yükleme", + "Pretrained Custom Settings": "Önceden Eğitilmiş Özel Ayarlar", + "The file you dropped is not a valid pretrained file. Please try again.": "Bıraktığınız dosya önceden eğitilmiş geçerli bir dosya değil. Lütfen tekrar deneyin.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Açılır menüde önceden eğitilmiş dosyayı görmek için yenile düğmesine tıklayın.", + "Pretrained G Path": "Özel Önceden Eğitilmiş G", + "Pretrained D Path": "Özel Önceden Eğitilmiş D", + "GPU Settings": "GPU Ayarları", + "GPU Custom Settings": "GPU Özel Ayarları", + "GPU Number": "GPU Numarası", + "0 to ∞ separated by -": "0 ila ∞ - ile ayrılmış", + "GPU Information": "GPU Bilgileri", + "Pitch Guidance": "Saha Rehberliği", + "Extract Features": "Özellikleri Çıkar", + "Start Training": "Eğitime Başla", + "Generate Index": "Dizin Oluştur", + "Voice Model": "Ses Modeli", + "Index File": "Dizin Dosyası", + "Refresh": "Yenilemek", + "Unload Voice": "Sesi Kaldır", + "Single": "Tek", + "Upload Audio": "Ses Yükle", + "Select Audio": "Ses Seç", + "Advanced Settings": "Gelişmiş Ayarlar", + "Clear Outputs (Deletes all audios in assets/audios)": "Çıkışları Temizle (Varlıklardaki/seslerdeki tüm sesleri siler)", + "Custom Output Path": "Özel Çıktı Yolu", + "Output Path": "Çıkış Yolu", + "Pitch": "Perde", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": ">=3 ise: hasat edilen adım sonuçlarına medyan filtreleme uygulayın. Değer, filtre yarıçapını temsil eder ve nefes almayı azaltabilir", + "Search Feature Ratio": "Arama Özelliği Oranı", + "Pitch extraction algorithm": "Pitch çıkarma algoritması", + "Convert": "Dönüştürmek", + "Export Audio": "Sesi Dışa Aktar", + "Batch": "Toplu iş", + "Input Folder": "Giriş Klasörü", + "Enter input path": "Giriş yolunu girin", + "Output Folder": "Çıktı Klasörü", + "Enter output path": "Çıkış yolunu girin", + "Get information about the audio": "Ses hakkında bilgi alın", + "Information about the audio file": "Ses dosyası hakkında bilgi", + "Waiting for information...": "Bilgi bekliyorum...", + "Model fusion": "Model füzyonu", + "Weight for Model A": "Model A için ağırlık", + "Whether the model has pitch guidance": "Modelin perde kılavuzuna sahip olup olmadığı", + "Model architecture version": "Model mimarisi sürümü", + "Path to Model A": "Model A'ya Giden Yol", + "Path to Model B": "Model B'ye Giden Yol", + "Path to model": "Modele giden yol", + "Model information to be placed": "Yerleştirilecek model bilgileri", + "Fusion": "Füzyon", + "Modify model information": "Model bilgilerini değiştirme", + "Path to Model": "Modele Giden Yol", + "Model information to be modified": "Değiştirilecek model bilgileri", + "Save file name": "Dosya adını kaydet", + "Modify": "Değiştirmek", + "View model information": "Model bilgilerini görüntüleme", + "View": "Görünüm", + "Model extraction": "Model ayıklama", + "Model conversion": "Model dönüştürme", + "Pth file": "Pth dosyası", + "Output of the pth file": "pth dosyasının çıktısı", + "# How to Report an Issue on GitHub": "# GitHub'da Bir Sorun Nasıl Bildirilir", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Yaşadığınız sorunu kaydetmeye başlamak için aşağıdaki 'Kayıt Ekranı' düğmesine tıklayın.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Sorunu kaydetmeyi bitirdikten sonra, 'Kaydı Durdur' düğmesine tıklayın (aynı düğme, ancak aktif olarak kayıt yapıp yapmadığınıza bağlı olarak etiket değişir).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Sorunları](https://github.com/IAHispano/Applio/issues) bölümüne gidin ve 'Yeni Sorun' düğmesine tıklayın.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Sağlanan sorun şablonunu tamamlayın, gerektiğinde ayrıntıları eklediğinizden emin olun ve önceki adımdan kaydedilen dosyayı yüklemek için varlıklar bölümünü kullanın.", + "Record Screen": "Kayıt Ekranı", + "Record": "Kayıt", + "Stop Recording": "Kaydı durdur", + "Introduce the model .pth path": "Model .pth yolunu tanıtma", + "See Model Information": "Model Bilgilerine Bakın", + "## Download Model": "## Modeli İndir", + "Model Link": "Model Bağlantısı", + "Introduce the model link": "Model bağlantısını tanıtın", + "Download Model": "İndirme Modelo", + "## Drop files": "## Dosyaları bırak", + "Drag your .pth file and .index file into this space. Drag one and then the other.": ".pth dosyanızı ve .index dosyanızı bu alana sürükleyin. Birini ve ardından diğerini sürükleyin.", + "TTS Voices": "TTS Sesleri", + "Text to Synthesize": "Sentezlenecek Metin", + "Enter text to synthesize": "Sentezlenecek metni girin", + "Output Path for TTS Audio": "TTS Audio için Çıkış Yolu", + "Output Path for RVC Audio": "RVC Ses için Çıkış Yolu" +} \ No newline at end of file diff --git a/assets/i18n/languages/uk_UK.json b/assets/i18n/languages/uk_UK.json new file mode 100644 index 0000000000000000000000000000000000000000..e82ce6bc9c04cbe5c1a02995f51a9b52538e563b --- /dev/null +++ b/assets/i18n/languages/uk_UK.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Найкращий інструмент для клонування голосу, ретельно оптимізований для неперевершеної потужності, модульності та зручного використання.", + "This section contains some extra utilities that often may be in experimental phases.": "Цей розділ містить деякі додаткові утиліти, які часто можуть перебувати на експериментальних стадіях.", + "Output Information": "Вихідна інформація", + "Inference": "Висновок", + "Train": "Поїзд", + "Extra": "Додаткові", + "Merge Audios": "Об'єднання аудіо", + "Processing": "Обробки", + "Audio Analyzer": "Аналізатор звуку", + "Model Information": "Інформація про модель", + "Download": "Завантажити", + "Report a Bug": "Повідомити про помилку", + "Preprocess": "Попередня обробка", + "Model Name": "Назва моделі", + "Enter model name": "Введіть назву моделі", + "Dataset Path": "Шлях набору даних", + "Enter dataset path": "Введіть шлях набору даних", + "Sampling Rate": "Частота дискретизації", + "RVC Version": "Версія RVC", + "Preprocess Dataset": "Набір даних попередньої обробки", + "Extract": "Екстракт", + "Hop Length": "Довжина хмелю", + "Batch Size": "Розмір партії", + "Save Every Epoch": "Врятуйте кожну епоху", + "Total Epoch": "Ціла епоха", + "Pretrained": "Попереднє навчання", + "Save Only Latest": "Зберігайте лише останні", + "Save Every Weights": "Збережіть кожну вагу", + "Custom Pretrained": "Індивідуальне попереднє навчання", + "Upload Pretrained Model": "Завантажте попередньо навчену модель", + "Pretrained Custom Settings": "Попередньо навчені користувацькі налаштування", + "The file you dropped is not a valid pretrained file. Please try again.": "Файл, який ви скинули, не є дійсним попередньо навченим файлом. Будь ласка, спробуйте ще раз.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Натисніть кнопку «Оновити», щоб переглянути попередньо підготовлений файл у розкривному меню.", + "Pretrained G Path": "Спеціальна попередньо навчена G", + "Pretrained D Path": "Спеціальний попередньо навчений D", + "GPU Settings": "Налаштування графічного процесора", + "GPU Custom Settings": "Користувацькі налаштування графічного процесора", + "GPU Number": "Номер графічного процесора", + "0 to ∞ separated by -": "від 0 до ∞ розділені -", + "GPU Information": "Інформація про графічний процесор", + "Pitch Guidance": "Керівництво пітчем", + "Extract Features": "особливості витягу", + "Start Training": "Почати навчання", + "Generate Index": "Згенерувати індекс", + "Voice Model": "Голосова модель", + "Index File": "Індексний файл", + "Refresh": "Оновити", + "Unload Voice": "Вивантажити голос", + "Single": "Одного", + "Upload Audio": "Завантажити аудіо", + "Select Audio": "Виберіть Аудіо", + "Advanced Settings": "Розширені налаштування", + "Clear Outputs (Deletes all audios in assets/audios)": "Очистити виходи (видаляє всі аудіозаписи в ресурсах/аудіозаписах)", + "Custom Output Path": "Власний вихідний шлях", + "Output Path": "Вихідний шлях", + "Pitch": "Крок", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Якщо >=3: застосуйте медіанну фільтрацію до отриманих результатів смоли. Значення являє собою радіус фільтра і може зменшити задишку", + "Search Feature Ratio": "Співвідношення функцій пошуку", + "Pitch extraction algorithm": "Алгоритм вилучення кроку", + "Convert": "Перетворити", + "Export Audio": "Експорт аудіо", + "Batch": "Пакетний", + "Input Folder": "Папка введення", + "Enter input path": "Введіть вхідний шлях", + "Output Folder": "Вихідна папка", + "Enter output path": "Введіть вихідний шлях", + "Get information about the audio": "Отримання інформації про аудіо", + "Information about the audio file": "Інформація про аудіофайл", + "Waiting for information...": "Чекаємо на інформацію...", + "Model fusion": "Злиття моделей", + "Weight for Model A": "Вага для моделі А", + "Whether the model has pitch guidance": "Чи має модель наведення висоти тону", + "Model architecture version": "Версія архітектури моделі", + "Path to Model A": "Шлях до моделі А", + "Path to Model B": "Шлях до моделі B", + "Path to model": "Шлях до моделі", + "Model information to be placed": "Інформація про модель, яку потрібно розмістити", + "Fusion": "Злиття", + "Modify model information": "Змінення інформації про модель", + "Path to Model": "Шлях до моделі", + "Model information to be modified": "Інформація про модель, яку потрібно змінити", + "Save file name": "Зберегти ім'я файлу", + "Modify": "Змінити", + "View model information": "Перегляд інформації про модель", + "View": "Вид", + "Model extraction": "Вилучення моделі", + "Model conversion": "Перетворення моделі", + "Pth file": "Pth файл", + "Output of the pth file": "Висновок p-го файлу", + "# How to Report an Issue on GitHub": "# Як повідомити про проблему на GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Натисніть кнопку «Записати екран» нижче, щоб почати запис проблеми, з якою ви зіткнулися.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Після того, як ви закінчите запис випуску, натисніть кнопку «Зупинити запис» (та сама кнопка, але мітка змінюється залежно від того, активно ви записуєте чи ні).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Перейдіть до [GitHub Issues](https://github.com/IAHispano/Applio/issues) і натисніть кнопку «Новий випуск».", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Заповніть наданий шаблон проблеми, обов'язково включивши деталі за потреби, і скористайтеся розділом активів, щоб завантажити записаний файл з попереднього кроку.", + "Record Screen": "Екран запису", + "Record": "Запис", + "Stop Recording": "Зупинити записування", + "Introduce the model .pth path": "Представляємо модель .pth шляху", + "See Model Information": "Переглянути інформацію про модель", + "## Download Model": "## Завантажити модель", + "Model Link": "Посилання на модель", + "Introduce the model link": "Ознайомлення з моделлю", + "Download Model": "Дескаргар Модело", + "## Drop files": "## Скиньте файли", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Перетягніть файл .pth і файл .index у цей простір. Перетягніть одну, а потім іншу.", + "TTS Voices": "Голоси TTS", + "Text to Synthesize": "Текст для синтезу", + "Enter text to synthesize": "Введіть текст для синтезу", + "Output Path for TTS Audio": "Вихідний шлях для аудіо TTS", + "Output Path for RVC Audio": "Вихідний тракт для аудіо RVC" +} \ No newline at end of file diff --git a/assets/i18n/languages/ur_UR.json b/assets/i18n/languages/ur_UR.json new file mode 100644 index 0000000000000000000000000000000000000000..acc064fa36e293b4685dafe5d09765a70dac3a9a --- /dev/null +++ b/assets/i18n/languages/ur_UR.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "حتمی صوتی کلوننگ ٹول ، بے مثال طاقت ، ماڈیولریٹی ، اور صارف دوست تجربے کے لئے احتیاط سے بہتر بنایا گیا ہے۔", + "This section contains some extra utilities that often may be in experimental phases.": "اس سیکشن میں کچھ اضافی افادیت شامل ہیں جو اکثر تجرباتی مراحل میں ہوسکتی ہیں۔", + "Output Information": "آؤٹ پٹ معلومات", + "Inference": "استدلال", + "Train": "ٹرین", + "Extra": "اضافی", + "Merge Audios": "آڈیو کو ضم کریں", + "Processing": "پروسیسنگ", + "Audio Analyzer": "Audio Analyzer", + "Model Information": "ماڈل کی معلومات", + "Download": "ڈاؤن لوڈ", + "Report a Bug": "ایک بگ کی رپورٹ کریں", + "Preprocess": "پری پروسیس", + "Model Name": "ماڈل کا نام", + "Enter model name": "ماڈل کا نام درج کریں", + "Dataset Path": "ڈیٹا سیٹ کا راستہ", + "Enter dataset path": "ڈیٹا سیٹ کا راستہ درج کریں", + "Sampling Rate": "نمونے لینے کی شرح", + "RVC Version": "RVC Version", + "Preprocess Dataset": "پری پروسیس ڈیٹا سیٹ", + "Extract": "نکالنا", + "Hop Length": "ہاپ کی لمبائی", + "Batch Size": "Batch کا سائز", + "Save Every Epoch": "ہر دور کو محفوظ کریں", + "Total Epoch": "مجموعی دور کی نوعیت", + "Pretrained": "پہلے سے تربیت یافتہ", + "Save Only Latest": "صرف تازہ ترین محفوظ کریں", + "Save Every Weights": "ہر وزن کو بچائیں", + "Custom Pretrained": "اپنی مرضی کے مطابق پہلے سے تربیت یافتہ", + "Upload Pretrained Model": "پہلے سے تربیت یافتہ ماڈل اپ لوڈ کریں", + "Pretrained Custom Settings": "پہلے سے تربیت یافتہ کسٹم ترتیبات", + "The file you dropped is not a valid pretrained file. Please try again.": "آپ نے جو فائل چھوڑی ہے وہ درست پہلے سے تربیت یافتہ فائل نہیں ہے۔ براہ مہربانی دوبارہ کوشش کریں۔", + "Click the refresh button to see the pretrained file in the dropdown menu.": "ڈراپ ڈاؤن مینو میں پہلے سے تربیت یافتہ فائل دیکھنے کے لئے ریفریش بٹن پر کلک کریں۔", + "Pretrained G Path": "اپنی مرضی کے مطابق پہلے سے تربیت یافتہ G", + "Pretrained D Path": "اپنی مرضی کے مطابق پہلے سے تربیت یافتہ D", + "GPU Settings": "GPU Settings", + "GPU Custom Settings": "GPU اپنی مرضی کے مطابق ترتیبات", + "GPU Number": "GPU نمبر", + "0 to ∞ separated by -": "0 سے الگ ∞ -", + "GPU Information": "GPU Information", + "Pitch Guidance": "پچ گائیڈنس", + "Extract Features": "نکالنے کی خصوصیات", + "Start Training": "تربیت شروع کریں", + "Generate Index": "انڈیکس پیدا کریں", + "Voice Model": "صوتی ماڈل", + "Index File": "انڈیکس فائل", + "Refresh": "تازہ", + "Unload Voice": "آواز کو ان لوڈ کریں", + "Single": "تنہا", + "Upload Audio": "آڈیو اپ لوڈ کریں", + "Select Audio": "آڈیو منتخب کریں", + "Advanced Settings": "اعلی درجے کی ترتیبات", + "Clear Outputs (Deletes all audios in assets/audios)": "آؤٹ پٹ صاف کریں (اثاثوں / آڈیو میں تمام آڈیو حذف کرتا ہے)", + "Custom Output Path": "اپنی مرضی کے مطابق آؤٹ پٹ پتھ", + "Output Path": "آؤٹ پٹ پتھ", + "Pitch": "پچ", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "اگر > = 3 ہے: کٹائی شدہ پچ کے نتائج پر اوسط فلٹرنگ کا اطلاق کریں۔ قیمت فلٹر کے دائرے کی نمائندگی کرتی ہے اور سانس لینے کو کم کر سکتی ہے", + "Search Feature Ratio": "تلاش کی خصوصیت کا تناسب", + "Pitch extraction algorithm": "پچ نکالنے کا الگورتھم", + "Convert": "بدلیں", + "Export Audio": "آڈیو برآمد کریں", + "Batch": "بیچ", + "Input Folder": "ان پٹ فولڈر", + "Enter input path": "ان پٹ راستہ درج کریں", + "Output Folder": "آؤٹ پٹ فولڈر", + "Enter output path": "آؤٹ پٹ کا راستہ درج کریں", + "Get information about the audio": "آڈیو کے بارے میں معلومات حاصل کریں", + "Information about the audio file": "آڈیو فائل کے بارے میں معلومات", + "Waiting for information...": "معلومات کا انتظار ہے...", + "Model fusion": "ماڈل فیوژن", + "Weight for Model A": "ماڈل اے کے لئے وزن", + "Whether the model has pitch guidance": "کیا ماڈل کے پاس پچ گائیڈنس ہے", + "Model architecture version": "ماڈل آرکیٹیکچر ورژن", + "Path to Model A": "ماڈل اے کا راستہ", + "Path to Model B": "ماڈل بی کا راستہ", + "Path to model": "ماڈل کا راستہ", + "Model information to be placed": "ماڈل کی معلومات رکھی جائے گی", + "Fusion": "فیوژن", + "Modify model information": "ماڈل کی معلومات میں ترمیم کریں", + "Path to Model": "ماڈل کا راستہ", + "Model information to be modified": "ماڈل کی معلومات میں ترمیم کی جائے گی", + "Save file name": "فائل کا نام محفوظ کریں", + "Modify": "ترمیم", + "View model information": "ماڈل کی معلومات دیکھیں", + "View": "منظر", + "Model extraction": "ماڈل نکالنے", + "Model conversion": "ماڈل کی تبدیلی", + "Pth file": "پی ٹی ایچ فائل", + "Output of the pth file": "پی ٹی ایچ فائل کی آؤٹ پٹ", + "# How to Report an Issue on GitHub": "# گیٹ ہب پر کسی مسئلے کی اطلاع کیسے دیں", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. آپ جس مسئلے کا سامنا کر رہے ہیں اسے ریکارڈ کرنا شروع کرنے کے لئے نیچے 'ریکارڈ اسکرین' بٹن پر کلک کریں۔", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. ایک بار جب آپ مسئلے کی ریکارڈنگ مکمل کرلیں تو ، 'اسٹاپ ریکارڈنگ' بٹن پر کلک کریں (وہی بٹن ، لیکن لیبل اس بات پر منحصر ہے کہ آپ فعال طور پر ریکارڈنگ کر رہے ہیں یا نہیں)۔", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [گیٹ ہب ایشوز] (https://github.com/IAHispano/Applio/issues) پر جائیں اور 'نیا مسئلہ' بٹن پر کلک کریں۔", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. فراہم کردہ ایشو ٹیمپلیٹ کو مکمل کریں ، ضرورت کے مطابق تفصیلات شامل کرنے کو یقینی بنائیں ، اور پچھلے مرحلے سے ریکارڈ شدہ فائل کو اپ لوڈ کرنے کے لئے اثاثوں کے سیکشن کا استعمال کریں۔", + "Record Screen": "ریکارڈ اسکرین", + "Record": "ریکارڈ", + "Stop Recording": "ریکارڈنگ بند کریں", + "Introduce the model .pth path": "ماڈل .pth پتھ متعارف کروائیں", + "See Model Information": "ماڈل کی معلومات دیکھیں", + "## Download Model": "## ڈاؤن لوڈ ماڈل", + "Model Link": "ماڈل لنک", + "Introduce the model link": "ماڈل کا لنک متعارف کروائیں", + "Download Model": "Descargar Modelo", + "## Drop files": "## فائلیں چھوڑ دیں", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "اپنی .pth فائل اور .انڈیکس فائل کو اس جگہ میں گھسیٹیں۔ ایک کو گھسیٹیں اور پھر دوسرے کو۔", + "TTS Voices": "ٹی ٹی ایس وائسز", + "Text to Synthesize": "ترکیب کرنے کے لئے متن", + "Enter text to synthesize": "ترتیب دینے کے لئے متن درج کریں", + "Output Path for TTS Audio": "ٹی ٹی ایس آڈیو کے لئے آؤٹ پٹ پتھ", + "Output Path for RVC Audio": "آر وی سی آڈیو کے لئے آؤٹ پٹ پتھ" +} \ No newline at end of file diff --git a/assets/i18n/languages/vi_VI.json b/assets/i18n/languages/vi_VI.json new file mode 100644 index 0000000000000000000000000000000000000000..fc55e0bb03462f5a5bacfcfaba37aa0d68b6e854 --- /dev/null +++ b/assets/i18n/languages/vi_VI.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Công cụ nhân bản giọng nói tối ưu, được tối ưu hóa tỉ mỉ cho sức mạnh vô song, tính mô-đun và trải nghiệm thân thiện với người dùng.", + "This section contains some extra utilities that often may be in experimental phases.": "Phần này chứa một số tiện ích bổ sung thường có thể đang trong giai đoạn thử nghiệm.", + "Output Information": "Thông tin đầu ra", + "Inference": "Suy luận", + "Train": "Xe lửa", + "Extra": "Phụ", + "Merge Audios": "Hợp nhất âm thanh", + "Processing": "Xử lý", + "Audio Analyzer": "Máy phân tích âm thanh", + "Model Information": "Thông tin mô hình", + "Download": "Tải xuống", + "Report a Bug": "Báo cáo lỗi", + "Preprocess": "Tiền xử lý", + "Model Name": "Tên Model", + "Enter model name": "Nhập tên model", + "Dataset Path": "Đường dẫn tập dữ liệu", + "Enter dataset path": "Nhập đường dẫn tập dữ liệu", + "Sampling Rate": "Tỷ lệ lấy mẫu", + "RVC Version": "Phiên bản RVC", + "Preprocess Dataset": "Tập dữ liệu tiền xử lý", + "Extract": "Trích", + "Hop Length": "Chiều dài hop", + "Batch Size": "Kích thước lô", + "Save Every Epoch": "Lưu mọi kỷ nguyên", + "Total Epoch": "Tổng kỷ nguyên", + "Pretrained": "Đào tạo trước", + "Save Only Latest": "Chỉ lưu mới nhất", + "Save Every Weights": "Tiết kiệm mọi trọng lượng", + "Custom Pretrained": "Đào tạo trước tùy chỉnh", + "Upload Pretrained Model": "Tải lên mô hình được đào tạo trước", + "Pretrained Custom Settings": "Cài đặt tùy chỉnh được đào tạo sẵn", + "The file you dropped is not a valid pretrained file. Please try again.": "Tệp bạn đã bỏ không phải là tệp được đào tạo trước hợp lệ. Vui lòng thử lại.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Nhấp vào nút làm mới để xem tệp được đào tạo trước trong menu thả xuống.", + "Pretrained G Path": "Tùy chỉnh được đào tạo trước G", + "Pretrained D Path": "Tùy chỉnh được đào tạo trước D", + "GPU Settings": "Cài đặt GPU", + "GPU Custom Settings": "Cài đặt tùy chỉnh GPU", + "GPU Number": "Số GPU", + "0 to ∞ separated by -": "0 đến ∞ cách nhau bởi -", + "GPU Information": "Thông tin GPU", + "Pitch Guidance": "Hướng dẫn quảng cáo chiêu hàng", + "Extract Features": "Tính năng trích xuất", + "Start Training": "Bắt đầu đào tạo", + "Generate Index": "Tạo chỉ mục", + "Voice Model": "Mô hình giọng nói", + "Index File": "Tệp chỉ mục", + "Refresh": "Làm tươi", + "Unload Voice": "Dỡ giọng nói", + "Single": "Đơn", + "Upload Audio": "Tải lên âm thanh", + "Select Audio": "Chọn Âm thanh", + "Advanced Settings": "Cài đặt nâng cao", + "Clear Outputs (Deletes all audios in assets/audios)": "Xóa đầu ra (Xóa tất cả âm thanh trong nội dung / âm thanh)", + "Custom Output Path": "Đường dẫn đầu ra tùy chỉnh", + "Output Path": "Đường dẫn đầu ra", + "Pitch": "Sân", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Nếu >=3: áp dụng lọc trung bình cho kết quả cao độ đã thu hoạch. Giá trị đại diện cho bán kính bộ lọc và có thể làm giảm hơi thở", + "Search Feature Ratio": "Tỷ lệ tính năng tìm kiếm", + "Pitch extraction algorithm": "Thuật toán trích xuất cao độ", + "Convert": "Convert", + "Export Audio": "Xuất âm thanh", + "Batch": "Mẻ", + "Input Folder": "Thư mục đầu vào", + "Enter input path": "Nhập đường dẫn nhập liệu", + "Output Folder": "Thư mục đầu ra", + "Enter output path": "Nhập đường dẫn đầu ra", + "Get information about the audio": "Nhận thông tin về âm thanh", + "Information about the audio file": "Thông tin về tệp âm thanh", + "Waiting for information...": "Đang chờ thông tin...", + "Model fusion": "Hợp nhất mô hình", + "Weight for Model A": "Trọng lượng cho Model A", + "Whether the model has pitch guidance": "Mô hình có hướng dẫn cao độ hay không", + "Model architecture version": "Phiên bản kiến trúc mô hình", + "Path to Model A": "Đường dẫn đến Mẫu A", + "Path to Model B": "Đường dẫn đến Mẫu B", + "Path to model": "Đường dẫn đến mô hình", + "Model information to be placed": "Thông tin mô hình sẽ được đặt", + "Fusion": "Fusion", + "Modify model information": "Sửa đổi thông tin mô hình", + "Path to Model": "Đường dẫn đến mô hình", + "Model information to be modified": "Thông tin mô hình sẽ được sửa đổi", + "Save file name": "Lưu tên tệp", + "Modify": "Thay đổi", + "View model information": "Xem thông tin mô hình", + "View": "Cảnh", + "Model extraction": "Trích xuất mô hình", + "Model conversion": "Chuyển đổi mô hình", + "Pth file": "Tệp Pth", + "Output of the pth file": "Đầu ra của tệp pth", + "# How to Report an Issue on GitHub": "# Cách báo cáo sự cố trên GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Nhấp vào nút 'Ghi lại màn hình' bên dưới để bắt đầu ghi lại sự cố bạn đang gặp phải.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Khi bạn đã ghi xong sự cố, hãy nhấp vào nút 'Dừng ghi' (cùng một nút, nhưng nhãn thay đổi tùy thuộc vào việc bạn có chủ động ghi hay không).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Đi tới [Vấn đề GitHub] (https://github.com/IAHispano/Applio/issues) và nhấp vào nút 'Vấn đề mới'.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Hoàn thành mẫu vấn đề được cung cấp, đảm bảo bao gồm các chi tiết khi cần thiết và sử dụng phần tài sản để tải lên tệp đã ghi từ bước trước.", + "Record Screen": "Ghi lại màn hình", + "Record": "Ghi", + "Stop Recording": "Dừng ghi", + "Introduce the model .pth path": "Giới thiệu mô hình đường dẫn .pth", + "See Model Information": "Xem thông tin mô hình", + "## Download Model": "## Tải xuống mô hình", + "Model Link": "Liên kết mô hình", + "Introduce the model link": "Giới thiệu link mô hình", + "Download Model": "Descargar Modelo", + "## Drop files": "## Thả tệp", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Kéo tệp .pth và tệp .index của bạn vào không gian này. Kéo cái này rồi cái kia.", + "TTS Voices": "Tiếng nói TTS", + "Text to Synthesize": "Văn bản để tổng hợp", + "Enter text to synthesize": "Nhập văn bản để tổng hợp", + "Output Path for TTS Audio": "Đường dẫn đầu ra cho âm thanh TTS", + "Output Path for RVC Audio": "Đường dẫn đầu ra cho âm thanh RVC" +} \ No newline at end of file diff --git a/assets/i18n/languages/wu_WU.json b/assets/i18n/languages/wu_WU.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/assets/i18n/languages/wu_WU.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/assets/i18n/languages/zh_CN.json b/assets/i18n/languages/zh_CN.json new file mode 100644 index 0000000000000000000000000000000000000000..f755d9e4a2960e54ca7d3af566626a3b8e491ea3 --- /dev/null +++ b/assets/i18n/languages/zh_CN.json @@ -0,0 +1,112 @@ +{ + "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "终极语音克隆工具,经过精心优化,具有无与伦比的功能、模块化和用户友好的体验。", + "This section contains some extra utilities that often may be in experimental phases.": "本节包含一些额外的实用程序,这些实用程序通常处于实验阶段。", + "Output Information": "输出信息", + "Inference": "推理", + "Train": "火车", + "Extra": "额外", + "Merge Audios": "合并音频", + "Processing": "加工", + "Audio Analyzer": "音频分析仪", + "Model Information": "型号信息", + "Download": "下载", + "Report a Bug": "报告错误", + "Preprocess": "预处理", + "Model Name": "型号名称", + "Enter model name": "输入型号名称", + "Dataset Path": "数据集路径", + "Enter dataset path": "输入数据集路径", + "Sampling Rate": "采样率", + "RVC Version": "RVC 版本", + "Preprocess Dataset": "预处理数据集", + "Extract": "提取", + "Hop Length": "跳跃长度", + "Batch Size": "批量大小", + "Save Every Epoch": "保存每个纪元", + "Total Epoch": "总纪元", + "Pretrained": "预训练", + "Save Only Latest": "仅保存最新", + "Save Every Weights": "节省每一次砝码", + "Custom Pretrained": "自定义预训练", + "Upload Pretrained Model": "上传预训练模型", + "Pretrained Custom Settings": "预训练的自定义设置", + "The file you dropped is not a valid pretrained file. Please try again.": "您删除的文件不是有效的预训练文件。请再试一次。", + "Click the refresh button to see the pretrained file in the dropdown menu.": "单击刷新按钮,在下拉菜单中查看预训练文件。", + "Pretrained G Path": "自定义预训练 G", + "Pretrained D Path": "自定义预训练 D", + "GPU Settings": "GPU 设置", + "GPU Custom Settings": "GPU 自定义设置", + "GPU Number": "GPU 数量", + "0 to ∞ separated by -": "0 到 ∞ 之间用 -", + "GPU Information": "GPU 信息", + "Pitch Guidance": "音高指导", + "Extract Features": "提取特征", + "Start Training": "开始训练", + "Generate Index": "生成索引", + "Voice Model": "语音模型", + "Index File": "Index 文件", + "Refresh": "刷新", + "Unload Voice": "卸载语音", + "Single": "单", + "Upload Audio": "上传音频", + "Select Audio": "选择音频", + "Advanced Settings": "高级设置", + "Clear Outputs (Deletes all audios in assets/audios)": "清除输出(删除资产/音频中的所有音频)", + "Custom Output Path": "自定义输出路径", + "Output Path": "输出路径", + "Pitch": "投", + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "如果 >=3:对收获的音高结果应用中值滤波。该值表示过滤器半径,可以减少呼吸", + "Search Feature Ratio": "搜索特征比率", + "Pitch extraction algorithm": "音高提取算法", + "Convert": "转换", + "Export Audio": "导出音频", + "Batch": "批", + "Input Folder": "输入文件夹", + "Enter input path": "输入输入路径", + "Output Folder": "输出文件夹", + "Enter output path": "输入输出路径", + "Get information about the audio": "获取有关音频的信息", + "Information about the audio file": "有关音频文件的信息", + "Waiting for information...": "等待信息...", + "Model fusion": "模型融合", + "Weight for Model A": "A型重量", + "Whether the model has pitch guidance": "模型是否具有俯仰引导", + "Model architecture version": "模型架构版本", + "Path to Model A": "模型 A 的路径", + "Path to Model B": "模型 B 的路径", + "Path to model": "模型路径", + "Model information to be placed": "要放置的模型信息", + "Fusion": "融合", + "Modify model information": "修改模型信息", + "Path to Model": "模型路径", + "Model information to be modified": "要修改的模型信息", + "Save file name": "保存文件名", + "Modify": "修改", + "View model information": "查看型号信息", + "View": "视图", + "Model extraction": "模型提取", + "Model conversion": "模型转换", + "Pth file": "Pth 文件", + "Output of the pth file": "pth 文件的输出", + "# How to Report an Issue on GitHub": "# 如何在 GitHub 上报告问题", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1.单击下面的“录制屏幕”按钮开始记录您遇到的问题。", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. 录制完问题后,单击“停止录制”按钮(相同的按钮,但标签会根据您是否正在录制而变化)。", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. 转到 [GitHub 问题](https://github.com/IAHispano/Applio/issues),然后单击“新问题”按钮。", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. 填写提供的问题模板,确保根据需要包含详细信息,并利用资产部分上传上一步的记录文件。", + "Record Screen": "录制屏幕", + "Record": "记录", + "Stop Recording": "停止录制", + "Introduce the model .pth path": "引入模型 .pth 路径", + "See Model Information": "查看型号信息", + "## Download Model": "## 下载模型", + "Model Link": "模型链接", + "Introduce the model link": "介绍模型链接", + "Download Model": "Descargar Modelo", + "## Drop files": "## 删除文件", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "将 .pth 文件和 .index 文件拖到此空间中。拖动一个,然后拖动另一个。", + "TTS Voices": "TTS语音", + "Text to Synthesize": "要合成的文本", + "Enter text to synthesize": "输入要合成的文本", + "Output Path for TTS Audio": "TTS 音频的输出路径", + "Output Path for RVC Audio": "RVC 音频的输出路径" +} \ No newline at end of file diff --git a/assets/i18n/scan.py b/assets/i18n/scan.py new file mode 100644 index 0000000000000000000000000000000000000000..e14b41e54774b2ba13dcc193cfb850bc6d840633 --- /dev/null +++ b/assets/i18n/scan.py @@ -0,0 +1,71 @@ +import ast +import json +from pathlib import Path +from collections import OrderedDict + + +def extract_i18n_strings(node): + i18n_strings = [] + + if ( + isinstance(node, ast.Call) + and isinstance(node.func, ast.Name) + and node.func.id == "i18n" + ): + for arg in node.args: + if isinstance(arg, ast.Str): + i18n_strings.append(arg.s) + + for child_node in ast.iter_child_nodes(node): + i18n_strings.extend(extract_i18n_strings(child_node)) + + return i18n_strings + + +def process_file(file_path): + with open(file_path, "r") as f: + code = f.read() + if "I18nAuto" in code: + tree = ast.parse(code) + i18n_strings = extract_i18n_strings(tree) + print(file_path, len(i18n_strings)) + return i18n_strings + return [] + + +# Use pathlib for file handling +py_files = Path(".").rglob("*.py") + +# Use a set to store unique strings +code_keys = set() + +for py_file in py_files: + strings = process_file(py_file) + code_keys.update(strings) + +print() +print("Total unique:", len(code_keys)) + +standard_file = "languages/en_US.json" +with open(standard_file, "r", encoding="utf-8") as f: + standard_data = json.load(f, object_pairs_hook=OrderedDict) +standard_keys = set(standard_data.keys()) + +# Combine unused and missing keys sections +unused_keys = standard_keys - code_keys +missing_keys = code_keys - standard_keys + +print("Unused keys:", len(unused_keys)) +for unused_key in unused_keys: + print("\t", unused_key) + +print("Missing keys:", len(missing_keys)) +for missing_key in missing_keys: + print("\t", missing_key) + +code_keys_dict = OrderedDict((s, s) for s in code_keys) + +# Use context manager for writing back to the file +with open(standard_file, "w", encoding="utf-8") as f: + json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True) + f.write("\n") diff --git a/core.py b/core.py new file mode 100644 index 0000000000000000000000000000000000000000..7acc25ab7b670bc6912e338b1dfbf3ecca47c080 --- /dev/null +++ b/core.py @@ -0,0 +1,764 @@ +import os +import sys +import argparse +import subprocess + +now_dir = os.getcwd() +sys.path.append(now_dir) + +from rvc.configs.config import Config +from rvc.lib.tools.validators import ( + validate_sampling_rate, + validate_f0up_key, + validate_f0method, + validate_true_false, + validate_tts_voices, +) + +from rvc.train.extract.preparing_files import generate_config, generate_filelist +from rvc.lib.tools.pretrained_selector import pretrained_selector + +from rvc.lib.process.model_fusion import model_fusion +from rvc.lib.process.model_information import model_information + +config = Config() +current_script_directory = os.path.dirname(os.path.realpath(__file__)) +logs_path = os.path.join(current_script_directory, "logs") +subprocess.run( + ["python", os.path.join("rvc", "lib", "tools", "prerequisites_download.py")] +) + + +# Infer +def run_infer_script( + f0up_key, + filter_radius, + index_rate, + hop_length, + f0method, + input_path, + output_path, + pth_file, + index_path, + split_audio, +): + infer_script_path = os.path.join("rvc", "infer", "infer.py") + command = [ + "python", + infer_script_path, + str(f0up_key), + str(filter_radius), + str(index_rate), + str(hop_length), + f0method, + input_path, + output_path, + pth_file, + index_path, + str(split_audio), + ] + subprocess.run(command) + return f"File {input_path} inferred successfully.", output_path + + +# Batch infer +def run_batch_infer_script( + f0up_key, + filter_radius, + index_rate, + hop_length, + f0method, + input_folder, + output_folder, + pth_file, + index_path, +): + infer_script_path = os.path.join("rvc", "infer", "infer.py") + + audio_files = [ + f for f in os.listdir(input_folder) if f.endswith((".mp3", ".wav", ".flac")) + ] + print(f"Detected {len(audio_files)} audio files for inference.") + + for audio_file in audio_files: + if "_output" in audio_file: + pass + else: + input_path = os.path.join(input_folder, audio_file) + output_file_name = os.path.splitext(os.path.basename(audio_file))[0] + output_path = os.path.join( + output_folder, + f"{output_file_name}_output{os.path.splitext(audio_file)[1]}", + ) + print(f"Inferring {input_path}...") + + command = [ + "python", + infer_script_path, + str(f0up_key), + str(filter_radius), + str(index_rate), + str(hop_length), + f0method, + input_path, + output_path, + pth_file, + index_path, + ] + subprocess.run(command) + + return f"Files from {input_folder} inferred successfully." + + +# TTS +def run_tts_script( + tts_text, + tts_voice, + f0up_key, + filter_radius, + index_rate, + hop_length, + f0method, + output_tts_path, + output_rvc_path, + pth_file, + index_path, +): + tts_script_path = os.path.join("rvc", "lib", "tools", "tts.py") + infer_script_path = os.path.join("rvc", "infer", "infer.py") + + if os.path.exists(output_tts_path): + os.remove(output_tts_path) + + command_tts = [ + "python", + tts_script_path, + tts_text, + tts_voice, + output_tts_path, + ] + + command_infer = [ + "python", + infer_script_path, + str(f0up_key), + str(filter_radius), + str(index_rate), + str(hop_length), + f0method, + output_tts_path, + output_rvc_path, + pth_file, + index_path, + ] + subprocess.run(command_tts) + subprocess.run(command_infer) + return f"Text {tts_text} synthesized successfully.", output_rvc_path + + +# Preprocess +def run_preprocess_script(model_name, dataset_path, sampling_rate): + per = 3.0 if config.is_half else 3.7 + preprocess_script_path = os.path.join("rvc", "train", "preprocess", "preprocess.py") + command = [ + "python", + preprocess_script_path, + os.path.join(logs_path, str(model_name)), + dataset_path, + str(sampling_rate), + str(per), + ] + + os.makedirs(os.path.join(logs_path, str(model_name)), exist_ok=True) + subprocess.run(command) + return f"Model {model_name} preprocessed successfully." + + +# Extract +def run_extract_script(model_name, rvc_version, f0method, hop_length, sampling_rate): + model_path = os.path.join(logs_path, str(model_name)) + extract_f0_script_path = os.path.join( + "rvc", "train", "extract", "extract_f0_print.py" + ) + extract_feature_script_path = os.path.join( + "rvc", "train", "extract", "extract_feature_print.py" + ) + + command_1 = [ + "python", + extract_f0_script_path, + model_path, + f0method, + str(hop_length), + ] + command_2 = [ + "python", + extract_feature_script_path, + config.device, + "1", + "0", + "0", + model_path, + rvc_version, + "True", + ] + subprocess.run(command_1) + subprocess.run(command_2) + + generate_config(rvc_version, sampling_rate, model_path) + generate_filelist(f0method, model_path, rvc_version, sampling_rate) + return f"Model {model_name} extracted successfully." + + +# Train +def run_train_script( + model_name, + rvc_version, + save_every_epoch, + save_only_latest, + save_every_weights, + total_epoch, + sampling_rate, + batch_size, + gpu, + pitch_guidance, + pretrained, + custom_pretrained, + g_pretrained_path=None, + d_pretrained_path=None, +): + f0 = 1 if pitch_guidance == "True" else 0 + latest = 1 if save_only_latest == "True" else 0 + save_every = 1 if save_every_weights == "True" else 0 + + if pretrained == "True": + if custom_pretrained == "False": + pg, pd = pretrained_selector(f0)[rvc_version][sampling_rate] + else: + if g_pretrained_path is None or d_pretrained_path is None: + raise ValueError( + "Please provide the path to the pretrained G and D models." + ) + pg, pd = g_pretrained_path, d_pretrained_path + else: + pg, pd = "", "" + + train_script_path = os.path.join("rvc", "train", "train.py") + command = [ + "python", + train_script_path, + "-se", + str(save_every_epoch), + "-te", + str(total_epoch), + "-pg", + pg, + "-pd", + pd, + "-sr", + str(sampling_rate), + "-bs", + str(batch_size), + "-g", + gpu, + "-e", + os.path.join(logs_path, str(model_name)), + "-v", + rvc_version, + "-l", + str(latest), + "-c", + "0", + "-sw", + str(save_every), + "-f0", + str(f0), + ] + + subprocess.run(command) + run_index_script(model_name, rvc_version) + return f"Model {model_name} trained successfully." + + +# Index +def run_index_script(model_name, rvc_version): + index_script_path = os.path.join("rvc", "train", "index_generator.py") + command = [ + "python", + index_script_path, + os.path.join(logs_path, str(model_name)), + rvc_version, + ] + + subprocess.run(command) + return f"Index file for {model_name} generated successfully." + + +# Model information +def run_model_information_script(pth_path): + print(model_information(pth_path)) + + +# Model fusion +def run_model_fusion_script(model_name, pth_path_1, pth_path_2): + model_fusion(model_name, pth_path_1, pth_path_2) + + +# Tensorboard +def run_tensorboard_script(): + tensorboard_script_path = os.path.join( + "rvc", "lib", "tools", "launch_tensorboard.py" + ) + command = [ + "python", + tensorboard_script_path, + ] + subprocess.run(command) + + +# Download +def run_download_script(model_link): + download_script_path = os.path.join("rvc", "lib", "tools", "model_download.py") + command = [ + "python", + download_script_path, + model_link, + ] + subprocess.run(command) + return f"Model downloaded successfully." + + +# Parse arguments +def parse_arguments(): + parser = argparse.ArgumentParser( + description="Run the main.py script with specific parameters." + ) + subparsers = parser.add_subparsers( + title="subcommands", dest="mode", help="Choose a mode" + ) + + # Parser for 'infer' mode + infer_parser = subparsers.add_parser("infer", help="Run inference") + infer_parser.add_argument( + "f0up_key", + type=validate_f0up_key, + help="Value for f0up_key (-24 to +24)", + ) + infer_parser.add_argument( + "filter_radius", + type=str, + help="Value for filter_radius (0 to 10)", + ) + infer_parser.add_argument( + "index_rate", + type=str, + help="Value for index_rate (0.0 to 1)", + ) + infer_parser.add_argument( + "hop_length", + type=str, + help="Value for hop_length (1 to 512)", + ) + infer_parser.add_argument( + "f0method", + type=validate_f0method, + help="Value for f0method (pm, dio, crepe, crepe-tiny, harvest, rmvpe)", + ) + infer_parser.add_argument( + "input_path", type=str, help="Input path (enclose in double quotes)" + ) + infer_parser.add_argument( + "output_path", type=str, help="Output path (enclose in double quotes)" + ) + infer_parser.add_argument( + "pth_file", type=str, help="Path to the .pth file (enclose in double quotes)" + ) + infer_parser.add_argument( + "index_path", + type=str, + help="Path to the .index file (enclose in double quotes)", + ) + infer_parser.add_argument( + "split_audio", + type=str, + help="Enable split audio ( better results )", + ) + + # Parser for 'batch_infer' mode + batch_infer_parser = subparsers.add_parser( + "batch_infer", help="Run batch inference" + ) + batch_infer_parser.add_argument( + "f0up_key", + type=validate_f0up_key, + help="Value for f0up_key (-24 to +24)", + ) + batch_infer_parser.add_argument( + "filter_radius", + type=str, + help="Value for filter_radius (0 to 10)", + ) + batch_infer_parser.add_argument( + "index_rate", + type=str, + help="Value for index_rate (0.0 to 1)", + ) + batch_infer_parser.add_argument( + "hop_length", + type=str, + help="Value for hop_length (1 to 512)", + ) + batch_infer_parser.add_argument( + "f0method", + type=validate_f0method, + help="Value for f0method (pm, dio, crepe, crepe-tiny, harvest, rmvpe)", + ) + batch_infer_parser.add_argument( + "input_folder", type=str, help="Input folder (enclose in double quotes)" + ) + batch_infer_parser.add_argument( + "output_folder", type=str, help="Output folder (enclose in double quotes)" + ) + batch_infer_parser.add_argument( + "pth_file", type=str, help="Path to the .pth file (enclose in double quotes)" + ) + batch_infer_parser.add_argument( + "index_path", + type=str, + help="Path to the .index file (enclose in double quotes)", + ) + + # Parser for 'tts' mode + tts_parser = subparsers.add_parser("tts", help="Run TTS") + tts_parser.add_argument( + "tts_text", + type=str, + help="Text to be synthesized (enclose in double quotes)", + ) + tts_parser.add_argument( + "tts_voice", + type=validate_tts_voices, + help="Voice to be used (enclose in double quotes)", + ) + tts_parser.add_argument( + "f0up_key", + type=validate_f0up_key, + help="Value for f0up_key (-24 to +24)", + ) + tts_parser.add_argument( + "filter_radius", + type=str, + help="Value for filter_radius (0 to 10)", + ) + tts_parser.add_argument( + "index_rate", + type=str, + help="Value for index_rate (0.0 to 1)", + ) + tts_parser.add_argument( + "hop_length", + type=str, + help="Value for hop_length (1 to 512)", + ) + tts_parser.add_argument( + "f0method", + type=validate_f0method, + help="Value for f0method (pm, dio, crepe, crepe-tiny, harvest, rmvpe)", + ) + tts_parser.add_argument( + "output_tts_path", type=str, help="Output tts path (enclose in double quotes)" + ) + tts_parser.add_argument( + "output_rvc_path", type=str, help="Output rvc path (enclose in double quotes)" + ) + tts_parser.add_argument( + "pth_file", type=str, help="Path to the .pth file (enclose in double quotes)" + ) + tts_parser.add_argument( + "index_path", + type=str, + help="Path to the .index file (enclose in double quotes)", + ) + + # Parser for 'preprocess' mode + preprocess_parser = subparsers.add_parser("preprocess", help="Run preprocessing") + preprocess_parser.add_argument( + "model_name", type=str, help="Name of the model (enclose in double quotes)" + ) + preprocess_parser.add_argument( + "dataset_path", + type=str, + help="Path to the dataset (enclose in double quotes)", + ) + preprocess_parser.add_argument( + "sampling_rate", + type=validate_sampling_rate, + help="Sampling rate (32000, 40000 or 48000)", + ) + + # Parser for 'extract' mode + extract_parser = subparsers.add_parser("extract", help="Run extract") + extract_parser.add_argument( + "model_name", + type=str, + help="Name of the model (enclose in double quotes)", + ) + extract_parser.add_argument( + "rvc_version", + type=str, + help="Version of the model (v1 or v2)", + ) + extract_parser.add_argument( + "f0method", + type=validate_f0method, + help="Value for f0method (pm, dio, crepe, crepe-tiny, mangio-crepe, mangio-crepe-tiny, harvest, rmvpe)", + ) + extract_parser.add_argument( + "hop_length", + type=str, + help="Value for hop_length (1 to 512)", + ) + extract_parser.add_argument( + "sampling_rate", + type=validate_sampling_rate, + help="Sampling rate (32000, 40000 or 48000)", + ) + + # Parser for 'train' mode + train_parser = subparsers.add_parser("train", help="Run training") + train_parser.add_argument( + "model_name", + type=str, + help="Name of the model (enclose in double quotes)", + ) + train_parser.add_argument( + "rvc_version", + type=str, + help="Version of the model (v1 or v2)", + ) + train_parser.add_argument( + "save_every_epoch", + type=str, + help="Save every epoch", + ) + train_parser.add_argument( + "save_only_latest", + type=str, + help="Save weight only at last epoch", + ) + train_parser.add_argument( + "save_every_weights", + type=str, + help="Save weight every epoch", + ) + train_parser.add_argument( + "total_epoch", + type=str, + help="Total epoch", + ) + train_parser.add_argument( + "sampling_rate", + type=validate_sampling_rate, + help="Sampling rate (32000, 40000, or 48000)", + ) + train_parser.add_argument( + "batch_size", + type=str, + help="Batch size", + ) + train_parser.add_argument( + "gpu", + type=str, + help="GPU number (0 to 10 separated by -)", + ) + train_parser.add_argument( + "pitch_guidance", + type=validate_true_false, + help="Pitch guidance (True or False)", + ) + train_parser.add_argument( + "pretrained", + type=validate_true_false, + help="Pretrained (True or False)", + ) + train_parser.add_argument( + "custom_pretrained", + type=validate_true_false, + help="Custom pretrained (True or False)", + ) + train_parser.add_argument( + "g_pretrained_path", + type=str, + nargs="?", + default=None, + help="Path to the pretrained G file (enclose in double quotes)", + ) + train_parser.add_argument( + "d_pretrained_path", + type=str, + nargs="?", + default=None, + help="Path to the pretrained D file (enclose in double quotes)", + ) + + # Parser for 'index' mode + index_parser = subparsers.add_parser("index", help="Generate index file") + index_parser.add_argument( + "model_name", + type=str, + help="Name of the model (enclose in double quotes)", + ) + index_parser.add_argument( + "rvc_version", + type=str, + help="Version of the model (v1 or v2)", + ) + + # Parser for 'model_information' mode + model_information_parser = subparsers.add_parser( + "model_information", help="Print model information" + ) + model_information_parser.add_argument( + "pth_path", + type=str, + help="Path to the .pth file (enclose in double quotes)", + ) + + # Parser for 'model_fusion' mode + model_fusion_parser = subparsers.add_parser("model_fusion", help="Fuse two models") + model_fusion_parser.add_argument( + "model_name", + type=str, + help="Name of the model (enclose in double quotes)", + ) + model_fusion_parser.add_argument( + "pth_path_1", + type=str, + help="Path to the first .pth file (enclose in double quotes)", + ) + model_fusion_parser.add_argument( + "pth_path_2", + type=str, + help="Path to the second .pth file (enclose in double quotes)", + ) + + # Parser for 'tensorboard' mode + subparsers.add_parser("tensorboard", help="Run tensorboard") + + # Parser for 'download' mode + download_parser = subparsers.add_parser("download", help="Download models") + download_parser.add_argument( + "model_link", + type=str, + help="Link of the model (enclose in double quotes)", + ) + + return parser.parse_args() + + +def main(): + if len(sys.argv) == 1: + print("Please run the script with '-h' for more information.") + sys.exit(1) + + args = parse_arguments() + + try: + if args.mode == "infer": + run_infer_script( + args.f0up_key, + args.filter_radius, + args.index_rate, + args.hop_length, + args.f0method, + args.input_path, + args.output_path, + args.pth_file, + args.index_path, + args.split_audio, + ) + elif args.mode == "batch_infer": + run_batch_infer_script( + args.f0up_key, + args.filter_radius, + args.index_rate, + args.hop_length, + args.f0method, + args.input_folder, + args.output_folder, + args.pth_file, + args.index_path, + ) + elif args.mode == "tts": + run_tts_script( + args.tts_text, + args.tts_voice, + args.f0up_key, + args.filter_radius, + args.index_rate, + args.hop_length, + args.f0method, + args.output_tts_path, + args.output_rvc_path, + args.pth_file, + args.index_path, + ) + elif args.mode == "preprocess": + run_preprocess_script( + args.model_name, + args.dataset_path, + str(args.sampling_rate), + ) + + elif args.mode == "extract": + run_extract_script( + args.model_name, + args.rvc_version, + args.f0method, + args.hop_length, + args.sampling_rate, + ) + elif args.mode == "train": + run_train_script( + args.model_name, + args.rvc_version, + args.save_every_epoch, + args.save_only_latest, + args.save_every_weights, + args.total_epoch, + args.sampling_rate, + args.batch_size, + args.gpu, + args.pitch_guidance, + args.pretrained, + args.custom_pretrained, + args.g_pretrained_path, + args.d_pretrained_path, + ) + elif args.mode == "index": + run_index_script( + args.model_name, + args.rvc_version, + ) + elif args.mode == "model_information": + run_model_information_script( + args.pth_path, + ) + elif args.mode == "model_fusion": + run_model_fusion_script( + args.model_name, + args.pth_path_1, + args.pth_path_2, + ) + elif args.mode == "tensorboard": + run_tensorboard_script() + elif args.mode == "download": + run_download_script( + args.model_link, + ) + except Exception as error: + print(f"Error: {error}") + + +if __name__ == "__main__": + main() diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c40724ead38d497a48b6f05bf591e0f72b7f2d8f --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,16 @@ +version: '1' + +services: + applio: + build: + context: ./ + dockerfile: Dockerfile + ports: + - "6969" + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] \ No newline at end of file diff --git a/logs/mute/0_gt_wavs/mute32000.wav b/logs/mute/0_gt_wavs/mute32000.wav new file mode 100644 index 0000000000000000000000000000000000000000..b4b5029205bf72dee5856bbe0c65c34337dc8dd4 Binary files /dev/null and b/logs/mute/0_gt_wavs/mute32000.wav differ diff --git a/logs/mute/0_gt_wavs/mute40000.wav b/logs/mute/0_gt_wavs/mute40000.wav new file mode 100644 index 0000000000000000000000000000000000000000..fcf1281d4d1b01417a4d6738022f4402594a6802 Binary files /dev/null and b/logs/mute/0_gt_wavs/mute40000.wav differ diff --git a/logs/mute/0_gt_wavs/mute48000.wav b/logs/mute/0_gt_wavs/mute48000.wav new file mode 100644 index 0000000000000000000000000000000000000000..72822a01251e77d7d2a4a7da9d94805426829083 Binary files /dev/null and b/logs/mute/0_gt_wavs/mute48000.wav differ diff --git a/logs/mute/1_16k_wavs/mute.wav b/logs/mute/1_16k_wavs/mute.wav new file mode 100644 index 0000000000000000000000000000000000000000..27a7d638558539c521aacf8c0f34bd0d4816aa9d Binary files /dev/null and b/logs/mute/1_16k_wavs/mute.wav differ diff --git a/logs/mute/2a_f0/mute.wav.npy b/logs/mute/2a_f0/mute.wav.npy new file mode 100644 index 0000000000000000000000000000000000000000..dd7e9afd2e7f2aefaa30bcd4541a23ce96a9e150 --- /dev/null +++ b/logs/mute/2a_f0/mute.wav.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b9acf9ab7facdb032e1d687fe35182670b0b94566c4b209ae48c239d19956a6 +size 1332 diff --git a/logs/mute/2b-f0nsf/mute.wav.npy b/logs/mute/2b-f0nsf/mute.wav.npy new file mode 100644 index 0000000000000000000000000000000000000000..7644e325ddd34bd186153ecf7461aa1593a054f3 --- /dev/null +++ b/logs/mute/2b-f0nsf/mute.wav.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30792849c8e72d67e6691754077f2888b101cb741e9c7f193c91dd9692870c87 +size 2536 diff --git a/logs/mute/3_feature256/mute.npy b/logs/mute/3_feature256/mute.npy new file mode 100644 index 0000000000000000000000000000000000000000..c57ae95d19d969788ef186a81cdc2f4b462ed6df --- /dev/null +++ b/logs/mute/3_feature256/mute.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64d5abbac078e19a3f649c0d78a02cb33a71407ded3ddf2db78e6b803d0c0126 +size 152704 diff --git a/logs/mute/3_feature768/mute.npy b/logs/mute/3_feature768/mute.npy new file mode 100644 index 0000000000000000000000000000000000000000..ea5f9dddca08ff210791b27e3db3fc5676eabc90 --- /dev/null +++ b/logs/mute/3_feature768/mute.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ef62b957887ac9f0913aa5158f18983afff1ef5a3e4c5fd067ac20fc380d54 +size 457856 diff --git a/logs/zips/.gitignore b/logs/zips/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..de8d1c12ff699834a50ded5f74e97ed1a275fba0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,36 @@ +# General dependencies +ffmpeg-python>=0.2.0 +numpy==1.23.5 +requests +tqdm +wget + +# Audio processing +faiss-cpu==1.7.3 +librosa==0.9.1 +pyworld==0.3.4 +scipy==1.11.1 +soundfile==0.12.1 +praat-parselmouth + +# Machine learning +git+https://github.com/IAHispano/fairseq; sys_platform == 'linux' +fairseq==0.12.2; sys_platform == 'win32' +numba; sys_platform == 'linux' +numba==0.56.4; sys_platform == 'win32' +torch==2.1.1 +torchcrepe==0.0.21 +torchvision==0.16.1 + +# Visualization +matplotlib==3.7.2 +tensorboard +gradio==4.14.0 + +# Miscellaneous +ffmpy==0.3.1 +tensorboardX +requests==2.31.0 +edge-tts==6.1.9 +pypresence +beautifulsoup4 diff --git a/run-applio.bat b/run-applio.bat new file mode 100644 index 0000000000000000000000000000000000000000..c2d8789aba4964c11c57372397c54151ac242734 --- /dev/null +++ b/run-applio.bat @@ -0,0 +1,12 @@ +@echo off +setlocal +title Applio + +if not exist env ( + echo Please run 'run-install.bat' first to set up the environment. + pause + exit /b 1 +) + +env\python.exe app.py --open +pause diff --git a/run-applio.sh b/run-applio.sh new file mode 100644 index 0000000000000000000000000000000000000000..1aa8a57f3f1f79a5493449710fcb8c96fbd1cb65 --- /dev/null +++ b/run-applio.sh @@ -0,0 +1,6 @@ +#!/bin/sh +printf "\033]0;Applio\007" +. .venv/bin/activate + +clear +python app.py --open \ No newline at end of file diff --git a/run-install.bat b/run-install.bat new file mode 100644 index 0000000000000000000000000000000000000000..bcb9c91f46727b5e4863c0daa14b72f995b0ad49 --- /dev/null +++ b/run-install.bat @@ -0,0 +1,73 @@ +@echo off +setlocal +title Installer + +set "principal=%cd%" +set "URL_EXTRA=https://huggingface.co./IAHispano/applio/resolve/main" +set "CONDA_ROOT_PREFIX=%UserProfile%\Miniconda3" +set "INSTALL_ENV_DIR=%principal%\env" +set "MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py39_23.9.0-0-Windows-x86_64.exe" +set "CONDA_EXECUTABLE=%CONDA_ROOT_PREFIX%\Scripts\conda.exe" + +del Makefile +del Dockerfile +del docker-compose.yaml +del /q *.sh + +if not exist "%cd%\env.zip" ( + echo Downloading the fairseq build... + curl -s -LJO %URL_EXTRA%/env.zip -o env.zip +) + +if not exist "%cd%\env.zip" ( + echo Download failed, trying with the powershell method + powershell -Command "& {Invoke-WebRequest -Uri '%URL_EXTRA%/env.zip' -OutFile 'mingit.zip'}" +) + +if not exist "%cd%\env" ( + echo Extracting the file... + powershell -command "& { Add-Type -AssemblyName System.IO.Compression.FileSystem ; [System.IO.Compression.ZipFile]::ExtractToDirectory('%cd%\env.zip', '%cd%') }" +) + +if not exist "%cd%\env" ( + echo Extracting failed trying with the tar method... + tar -xf %cd%\env.zip +) + +if exist "%cd%\env" ( + del env.zip +) else ( + echo Theres a problem extracting the file please download the file and extract it manually. + echo https://huggingface.co./IAHispano/applio/resolve/main/env.zip + pause + exit +) + +if not exist "%CONDA_EXECUTABLE%" ( + echo Downloading Miniconda from %MINICONDA_DOWNLOAD_URL%... + curl %MINICONDA_DOWNLOAD_URL% -o miniconda.exe + + if not exist "%principal%\miniconda.exe" ( + echo Download failed trying with the powershell method. + powershell -Command "& {Invoke-WebRequest -Uri '%MINICONDA_DOWNLOAD_URL%' -OutFile 'miniconda.exe'}" + ) + + echo Installing Miniconda to %CONDA_ROOT_PREFIX%... + start /wait "" miniconda.exe /InstallationType=JustMe /RegisterPython=0 /S /D=%CONDA_ROOT_PREFIX% + del miniconda.exe +) + +call "%CONDA_ROOT_PREFIX%\_conda.exe" create --no-shortcuts -y -k --prefix "%INSTALL_ENV_DIR%" python=3.9 + +echo Installing the dependencies... +call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" +pip install --upgrade setuptools +pip install -r "%principal%\requirements.txt" +pip uninstall torch torchvision torchaudio -y +pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu121 +call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" deactivate +echo. + +echo Applio has been installed successfully, run 'run-applio.bat' to start it! +pause +cls diff --git a/run-install.sh b/run-install.sh new file mode 100644 index 0000000000000000000000000000000000000000..344a095134b3ca1746df6a28b71040f3385f9d6d --- /dev/null +++ b/run-install.sh @@ -0,0 +1,87 @@ +#!/bin/sh +printf "\033]0;Installer\007" +clear +rm *.bat + +# Function to create or activate a virtual environment +prepare_install() { + if [ -d ".venv" ]; then + echo "Venv found. This implies Applio has been already installed or this is a broken install" + printf "Do you want to execute run-applio.sh? (Y/N): " >&2 + read -r r + r=$(echo "$r" | tr '[:upper:]' '[:lower:]') + if [ "$r" = "y" ]; then + ./run-applio.sh && exit 1 + else + echo "Ok! The installation will continue. Good luck!" + fi + . .venv/bin/activate + else + echo "Creating venv..." + requirements_file="requirements.txt" + echo "Checking if python exists" + if command -v python3 > /dev/null 2>&1; then + py=$(which python3) + echo "Using python3" + else + if python --version | grep -q 3.; then + py=$(which python) + echo "Using python" + else + echo "Please install Python3 or 3.11 manually." + exit 1 + fi + fi + + $py -m venv .venv + . .venv/bin/activate + python -m ensurepip + # Update pip within the virtual environment + pip3 install --upgrade pip + echo + echo "Installing Applio dependencies..." + python -m pip install -r requirements.txt + python -m pip uninstall torch torchvision torchaudio -y + python -m pip install torch==2.0.0 torchvision==0.15.1 torchaudio==2.0.1 --index-url https://download.pytorch.org/whl/cu117 + finish + fi +} + +# Function to finish installation (this should install missing dependencies) +finish() { + # Check if required packages are installed and install them if not + if [ -f "${requirements_file}" ]; then + installed_packages=$(python -m pip freeze) + while IFS= read -r package; do + expr "${package}" : "^#.*" > /dev/null && continue + package_name=$(echo "${package}" | sed 's/[<>=!].*//') + if ! echo "${installed_packages}" | grep -q "${package_name}"; then + echo "${package_name} not found. Attempting to install..." + python -m pip install --upgrade "${package}" + fi + done < "${requirements_file}" + else + echo "${requirements_file} not found. Please ensure the requirements file with required packages exists." + exit 1 + fi + clear + echo "Applio has been successfully downloaded. Run the file run-applio.sh to run the web interface!" + exit 0 +} + +# Loop to the main menu +if [ "$(uname)" = "Darwin" ]; then + if ! command -v brew >/dev/null 2>&1; then + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" + else + brew install python + export PYTORCH_ENABLE_MPS_FALLBACK=1 + export PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 + fi +elif [ "$(uname)" != "Linux" ]; then + echo "Unsupported operating system. Are you using Windows...?" + echo "If yes, use the batch (.bat) file instead of this one!" + exit 1 +fi + +prepare_install \ No newline at end of file diff --git a/run-tensorboard.bat b/run-tensorboard.bat new file mode 100644 index 0000000000000000000000000000000000000000..1817dc561d8eff3e20136bbfb1bd6e32e82b00ef --- /dev/null +++ b/run-tensorboard.bat @@ -0,0 +1,6 @@ +@echo off +setlocal +title Tensorboard + +env\python.exe core.py tensorboard +pause \ No newline at end of file diff --git a/run-tensorboard.sh b/run-tensorboard.sh new file mode 100644 index 0000000000000000000000000000000000000000..26e98b58beed3b1272b0313455d75785f83fa0d0 --- /dev/null +++ b/run-tensorboard.sh @@ -0,0 +1,6 @@ +#!/bin/sh +printf "\033]0;Tensorboard\007" +. .venv/bin/activate + +clear +python core.py tensorboard \ No newline at end of file diff --git a/rvc/configs/config.py b/rvc/configs/config.py new file mode 100644 index 0000000000000000000000000000000000000000..d4bd0d4d605ac388387336d4235a1a70ea674bdc --- /dev/null +++ b/rvc/configs/config.py @@ -0,0 +1,165 @@ +import argparse +import os +import sys +import json +from multiprocessing import cpu_count + +import torch + +version_config_list = [ + "v1/32000.json", + "v1/40000.json", + "v1/48000.json", + "v2/48000.json", + "v2/32000.json", +] + + +def singleton_variable(func): + def wrapper(*args, **kwargs): + if not wrapper.instance: + wrapper.instance = func(*args, **kwargs) + return wrapper.instance + + wrapper.instance = None + return wrapper + + +@singleton_variable +class Config: + def __init__(self): + self.device = "cuda:0" + self.is_half = True + self.use_jit = False + self.n_cpu = 0 + self.gpu_name = None + self.json_config = self.load_config_json() + self.gpu_mem = None + self.instead = "" + self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() + + @staticmethod + def load_config_json() -> dict: + d = {} + for config_file in version_config_list: + with open(f"rvc/configs/{config_file}", "r") as f: + d[config_file] = json.load(f) + return d + + @staticmethod + def has_mps() -> bool: + if not torch.backends.mps.is_available(): + return False + try: + torch.zeros(1).to(torch.device("mps")) + return True + except Exception: + return False + + @staticmethod + def has_xpu() -> bool: + if hasattr(torch, "xpu") and torch.xpu.is_available(): + return True + else: + return False + + def use_fp32_config(self): + for config_file in version_config_list: + self.json_config[config_file]["train"]["fp16_run"] = False + with open(f"rvc/configs/{config_file}", "r") as f: + strr = f.read().replace("true", "false") + with open(f"rvc/configs/{config_file}", "w") as f: + f.write(strr) + with open("rvc/train/preprocess/preprocess.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("rvc/train/preprocess/preprocess.py", "w") as f: + f.write(strr) + + def device_config(self) -> tuple: + if torch.cuda.is_available(): + if self.has_xpu(): + self.device = self.instead = "xpu:0" + self.is_half = True + i_device = int(self.device.split(":")[-1]) + self.gpu_name = torch.cuda.get_device_name(i_device) + if ( + ("16" in self.gpu_name and "V100" not in self.gpu_name.upper()) + or "P40" in self.gpu_name.upper() + or "P10" in self.gpu_name.upper() + or "1060" in self.gpu_name + or "1070" in self.gpu_name + or "1080" in self.gpu_name + ): + self.is_half = False + self.use_fp32_config() + self.gpu_mem = int( + torch.cuda.get_device_properties(i_device).total_memory + / 1024 + / 1024 + / 1024 + + 0.4 + ) + if self.gpu_mem <= 4: + with open("rvc/train/preprocess/preprocess.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("rvc/train/preprocess/preprocess.py", "w") as f: + f.write(strr) + elif self.has_mps(): + print("No supported Nvidia GPU found") + self.device = self.instead = "mps" + self.is_half = False + self.use_fp32_config() + else: + print("No supported Nvidia GPU found") + self.device = self.instead = "cpu" + self.is_half = False + self.use_fp32_config() + + if self.n_cpu == 0: + self.n_cpu = cpu_count() + + if self.is_half: + x_pad = 3 + x_query = 10 + x_center = 60 + x_max = 65 + else: + x_pad = 1 + x_query = 6 + x_center = 38 + x_max = 41 + + if self.gpu_mem is not None and self.gpu_mem <= 4: + x_pad = 1 + x_query = 5 + x_center = 30 + x_max = 32 + + return x_pad, x_query, x_center, x_max + + +def max_vram_gpu(gpu): + if torch.cuda.is_available(): + gpu_properties = torch.cuda.get_device_properties(gpu) + total_memory_gb = round(gpu_properties.total_memory / 1024 / 1024 / 1024) + return total_memory_gb + else: + return "0" + + +def get_gpu_info(): + ngpu = torch.cuda.device_count() + gpu_infos = [] + if torch.cuda.is_available() or ngpu != 0: + for i in range(ngpu): + gpu_name = torch.cuda.get_device_name(i) + mem = int( + torch.cuda.get_device_properties(i).total_memory / 1024 / 1024 / 1024 + + 0.4 + ) + gpu_infos.append("%s: %s %s GB" % (i, gpu_name, mem)) + if len(gpu_infos) > 0: + gpu_info = "\n".join(gpu_infos) + else: + gpu_info = "Unfortunately, there is no compatible GPU available to support your training." + return gpu_info diff --git a/rvc/configs/v1/32000.json b/rvc/configs/v1/32000.json new file mode 100644 index 0000000000000000000000000000000000000000..d5f16d691ed798f4c974b431167c36269b2ce7d2 --- /dev/null +++ b/rvc/configs/v1/32000.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 12800, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 32000, + "filter_length": 1024, + "hop_length": 320, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [10,4,2,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16,16,4,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/rvc/configs/v1/40000.json b/rvc/configs/v1/40000.json new file mode 100644 index 0000000000000000000000000000000000000000..4ffc87b9e9725fcd59d81a68d41a61962213b777 --- /dev/null +++ b/rvc/configs/v1/40000.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 12800, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 40000, + "filter_length": 2048, + "hop_length": 400, + "win_length": 2048, + "n_mel_channels": 125, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [10,10,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16,16,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/rvc/configs/v1/48000.json b/rvc/configs/v1/48000.json new file mode 100644 index 0000000000000000000000000000000000000000..2d0e05beb794f6f61b769b48c7ae728bf59e6335 --- /dev/null +++ b/rvc/configs/v1/48000.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 11520, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 48000, + "filter_length": 2048, + "hop_length": 480, + "win_length": 2048, + "n_mel_channels": 128, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [10,6,2,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16,16,4,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/rvc/configs/v2/32000.json b/rvc/configs/v2/32000.json new file mode 100644 index 0000000000000000000000000000000000000000..70e534f4c641a5a2c8e5c1e172f61398ee97e6e0 --- /dev/null +++ b/rvc/configs/v2/32000.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 12800, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 32000, + "filter_length": 1024, + "hop_length": 320, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [10,8,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [20,16,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/rvc/configs/v2/48000.json b/rvc/configs/v2/48000.json new file mode 100644 index 0000000000000000000000000000000000000000..75f770cdacff3467e9e925ed2393b480881d0303 --- /dev/null +++ b/rvc/configs/v2/48000.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 17280, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 48000, + "filter_length": 2048, + "hop_length": 480, + "win_length": 2048, + "n_mel_channels": 128, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [12,10,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [24,20,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/rvc/infer/infer.py b/rvc/infer/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..6b1f36a85a4d4d34d1e4fdb3d8014f8f5a85af51 --- /dev/null +++ b/rvc/infer/infer.py @@ -0,0 +1,264 @@ +import os +import sys +import torch +import numpy as np +import soundfile as sf +from vc_infer_pipeline import VC +from rvc.lib.utils import load_audio +from rvc.lib.tools.split_audio import process_audio, merge_audio +from fairseq import checkpoint_utils +from rvc.lib.infer_pack.models import ( + SynthesizerTrnMs256NSFsid, + SynthesizerTrnMs256NSFsid_nono, + SynthesizerTrnMs768NSFsid, + SynthesizerTrnMs768NSFsid_nono, +) + +from rvc.configs.config import Config + +config = Config() + +torch.manual_seed(114514) +hubert_model = None + + +def load_hubert(): + global hubert_model + models, _, _ = checkpoint_utils.load_model_ensemble_and_task( + ["hubert_base.pt"], + suffix="", + ) + hubert_model = models[0] + hubert_model = hubert_model.to(config.device) + if config.is_half: + hubert_model = hubert_model.half() + else: + hubert_model = hubert_model.float() + hubert_model.eval() + + +def vc_single( + sid=0, + input_audio_path=None, + f0_up_key=None, + f0_file=None, + f0_method=None, + file_index=None, + index_rate=None, + resample_sr=0, + rms_mix_rate=1, + protect=0.33, + hop_length=None, + output_path=None, + split_audio=False, +): + global tgt_sr, net_g, vc, hubert_model, version + + if input_audio_path is None: + return "Please, load an audio!", None + + f0_up_key = int(f0_up_key) + try: + audio = load_audio(input_audio_path, 16000) + audio_max = np.abs(audio).max() / 0.95 + + if audio_max > 1: + audio /= audio_max + + if not hubert_model: + load_hubert() + if_f0 = cpt.get("f0", 1) + + file_index = ( + file_index.strip(" ") + .strip('"') + .strip("\n") + .strip('"') + .strip(" ") + .replace("trained", "added") + ) + if tgt_sr != resample_sr >= 16000: + tgt_sr = resample_sr + if split_audio == "True": + result, new_dir_path = process_audio(input_audio_path) + if result == "Error": + return "Error with Split Audio", None + dir_path = ( + new_dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + ) + if dir_path != "": + paths = [ + os.path.join(root, name) + for root, _, files in os.walk(dir_path, topdown=False) + for name in files + if name.endswith(".wav") and root == dir_path + ] + try: + for path in paths: + info, opt = vc_single( + sid, + path, + f0_up_key, + None, + f0_method, + file_index, + index_rate, + resample_sr, + rms_mix_rate, + protect, + hop_length, + path, + False, + ) + # new_dir_path + except Exception as error: + print(error) + return "Error", None + print("Finished processing segmented audio, now merging audio...") + merge_timestamps_file = os.path.join( + os.path.dirname(new_dir_path), + f"{os.path.basename(input_audio_path).split('.')[0]}_timestamps.txt", + ) + tgt_sr, audio_opt = merge_audio(merge_timestamps_file) + + else: + audio_opt = vc.pipeline( + hubert_model, + net_g, + sid, + audio, + input_audio_path, + f0_up_key, + f0_method, + file_index, + index_rate, + if_f0, + filter_radius, + tgt_sr, + resample_sr, + rms_mix_rate, + version, + protect, + hop_length, + f0_file=f0_file, + ) + + if output_path is not None: + sf.write(output_path, audio_opt, tgt_sr, format="WAV") + + return (tgt_sr, audio_opt) + + except Exception as error: + print(error) + + +def get_vc(weight_root, sid): + global n_spk, tgt_sr, net_g, vc, cpt, version + if sid == "" or sid == []: + global hubert_model + if hubert_model is not None: + print("clean_empty_cache") + del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt + hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + if_f0 = cpt.get("f0", 1) + version = cpt.get("version", "v1") + if version == "v1": + if if_f0 == 1: + net_g = SynthesizerTrnMs256NSFsid( + *cpt["config"], is_half=config.is_half + ) + else: + net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) + elif version == "v2": + if if_f0 == 1: + net_g = SynthesizerTrnMs768NSFsid( + *cpt["config"], is_half=config.is_half + ) + else: + net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) + del net_g, cpt + if torch.cuda.is_available(): + torch.cuda.empty_cache() + cpt = None + person = weight_root + cpt = torch.load(person, map_location="cpu") + tgt_sr = cpt["config"][-1] + cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] + if_f0 = cpt.get("f0", 1) + + version = cpt.get("version", "v1") + if version == "v1": + if if_f0 == 1: + net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) + else: + net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) + elif version == "v2": + if if_f0 == 1: + net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half) + else: + net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) + del net_g.enc_q + print(net_g.load_state_dict(cpt["weight"], strict=False)) + net_g.eval().to(config.device) + if config.is_half: + net_g = net_g.half() + else: + net_g = net_g.float() + vc = VC(tgt_sr, config) + n_spk = cpt["config"][-3] + + +f0up_key = sys.argv[1] +filter_radius = sys.argv[2] +index_rate = float(sys.argv[3]) +hop_length = sys.argv[4] +f0method = sys.argv[5] + +audio_input_path = sys.argv[6] +audio_output_path = sys.argv[7] + +model_path = sys.argv[8] +index_path = sys.argv[9] +try: + split_audio = sys.argv[10] +except IndexError: + split_audio = None + +sid = f0up_key +input_audio = audio_input_path +f0_pitch = f0up_key +f0_file = None +f0_method = f0method +file_index = index_path +index_rate = index_rate +output_file = audio_output_path +split_audio = split_audio + +get_vc(model_path, 0) + +try: + result, audio_opt = vc_single( + sid=0, + input_audio_path=input_audio, + f0_up_key=f0_pitch, + f0_file=None, + f0_method=f0_method, + file_index=file_index, + index_rate=index_rate, + hop_length=hop_length, + output_path=output_file, + split_audio=split_audio, + ) + + if os.path.exists(output_file) and os.path.getsize(output_file) > 0: + message = result + else: + message = result + + print(f"Conversion completed. Output file: '{output_file}'") + +except Exception as error: + print(f"Voice conversion failed: {error}") diff --git a/rvc/infer/vc_infer_pipeline.py b/rvc/infer/vc_infer_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..e293cae07404edfb9bc379802e8d750c7a16ebfa --- /dev/null +++ b/rvc/infer/vc_infer_pipeline.py @@ -0,0 +1,494 @@ +import numpy as np, parselmouth, torch, pdb, sys, os +from time import time as ttime +import torch.nn.functional as F +import torchcrepe +from torch import Tensor +import scipy.signal as signal +import pyworld, os, faiss, librosa, torchcrepe +from scipy import signal +from functools import lru_cache + +now_dir = os.getcwd() +sys.path.append(now_dir) + +bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000) + +input_audio_path2wav = {} + + +@lru_cache +def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period): + audio = input_audio_path2wav[input_audio_path] + f0, t = pyworld.harvest( + audio, + fs=fs, + f0_ceil=f0max, + f0_floor=f0min, + frame_period=frame_period, + ) + f0 = pyworld.stonemask(audio, f0, t, fs) + return f0 + + +def change_rms(data1, sr1, data2, sr2, rate): + rms1 = librosa.feature.rms(y=data1, frame_length=sr1 // 2 * 2, hop_length=sr1 // 2) + rms2 = librosa.feature.rms(y=data2, frame_length=sr2 // 2 * 2, hop_length=sr2 // 2) + rms1 = torch.from_numpy(rms1) + rms1 = F.interpolate( + rms1.unsqueeze(0), size=data2.shape[0], mode="linear" + ).squeeze() + rms2 = torch.from_numpy(rms2) + rms2 = F.interpolate( + rms2.unsqueeze(0), size=data2.shape[0], mode="linear" + ).squeeze() + rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-6) + data2 *= ( + torch.pow(rms1, torch.tensor(1 - rate)) + * torch.pow(rms2, torch.tensor(rate - 1)) + ).numpy() + return data2 + + +class VC(object): + def __init__(self, tgt_sr, config): + self.x_pad, self.x_query, self.x_center, self.x_max, self.is_half = ( + config.x_pad, + config.x_query, + config.x_center, + config.x_max, + config.is_half, + ) + self.sr = 16000 + self.window = 160 + self.t_pad = self.sr * self.x_pad + self.t_pad_tgt = tgt_sr * self.x_pad + self.t_pad2 = self.t_pad * 2 + self.t_query = self.sr * self.x_query + self.t_center = self.sr * self.x_center + self.t_max = self.sr * self.x_max + self.device = config.device + + def get_optimal_torch_device(self, index: int = 0) -> torch.device: + if torch.cuda.is_available(): + return torch.device(f"cuda:{index % torch.cuda.device_count()}") + elif torch.backends.mps.is_available(): + return torch.device("mps") + return torch.device("cpu") + + def get_f0_crepe_computation( + self, + x, + f0_min, + f0_max, + p_len, + hop_length=120, + model="full", + ): + x = x.astype(np.float32) + x /= np.quantile(np.abs(x), 0.999) + torch_device = self.get_optimal_torch_device() + audio = torch.from_numpy(x).to(torch_device, copy=True) + audio = torch.unsqueeze(audio, dim=0) + if audio.ndim == 2 and audio.shape[0] > 1: + audio = torch.mean(audio, dim=0, keepdim=True).detach() + audio = audio.detach() + print("Initiating prediction with a hop_length of: " + str(hop_length)) + pitch: Tensor = torchcrepe.predict( + audio, + self.sr, + hop_length, + f0_min, + f0_max, + model, + batch_size=hop_length * 2, + device=torch_device, + pad=True, + ) + p_len = p_len or x.shape[0] // hop_length + source = np.array(pitch.squeeze(0).cpu().float().numpy()) + source[source < 0.001] = np.nan + target = np.interp( + np.arange(0, len(source) * p_len, len(source)) / p_len, + np.arange(0, len(source)), + source, + ) + f0 = np.nan_to_num(target) + return f0 + + def get_f0_official_crepe_computation( + self, + x, + f0_min, + f0_max, + model="full", + ): + batch_size = 512 + audio = torch.tensor(np.copy(x))[None].float() + f0, pd = torchcrepe.predict( + audio, + self.sr, + self.window, + f0_min, + f0_max, + model, + batch_size=batch_size, + device=self.device, + return_periodicity=True, + ) + pd = torchcrepe.filter.median(pd, 3) + f0 = torchcrepe.filter.mean(f0, 3) + f0[pd < 0.1] = 0 + f0 = f0[0].cpu().numpy() + return f0 + + def get_f0( + self, + input_audio_path, + x, + p_len, + f0_up_key, + f0_method, + filter_radius, + hop_length, + inp_f0=None, + ): + global input_audio_path2wav + time_step = self.window / self.sr * 1000 + f0_min = 50 + f0_max = 1100 + f0_mel_min = 1127 * np.log(1 + f0_min / 700) + f0_mel_max = 1127 * np.log(1 + f0_max / 700) + if f0_method == "pm": + f0 = ( + parselmouth.Sound(x, self.sr) + .to_pitch_ac( + time_step=time_step / 1000, + voicing_threshold=0.6, + pitch_floor=f0_min, + pitch_ceiling=f0_max, + ) + .selected_array["frequency"] + ) + pad_size = (p_len - len(f0) + 1) // 2 + if pad_size > 0 or p_len - len(f0) - pad_size > 0: + f0 = np.pad( + f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant" + ) + elif f0_method == "harvest": + input_audio_path2wav[input_audio_path] = x.astype(np.double) + f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10) + if int(filter_radius) > 2: + f0 = signal.medfilt(f0, 3) + elif f0_method == "dio": + f0, t = pyworld.dio( + x.astype(np.double), + fs=self.sr, + f0_ceil=f0_max, + f0_floor=f0_min, + frame_period=10, + ) + f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr) + f0 = signal.medfilt(f0, 3) + elif f0_method == "crepe": + f0 = self.get_f0_crepe_computation( + x, f0_min, f0_max, p_len, int(hop_length) + ) + elif f0_method == "crepe-tiny": + f0 = self.get_f0_crepe_computation( + x, f0_min, f0_max, p_len, int(hop_length), "tiny" + ) + elif f0_method == "rmvpe": + if hasattr(self, "model_rmvpe") == False: + from rvc.lib.rmvpe import RMVPE + + self.model_rmvpe = RMVPE( + "rmvpe.pt", is_half=self.is_half, device=self.device + ) + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) + + f0 *= pow(2, f0_up_key / 12) + tf0 = self.sr // self.window + if inp_f0 is not None: + delta_t = np.round( + (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1 + ).astype("int16") + replace_f0 = np.interp( + list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1] + ) + shape = f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)].shape[0] + f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)] = replace_f0[ + :shape + ] + f0bak = f0.copy() + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / ( + f0_mel_max - f0_mel_min + ) + 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > 255] = 255 + f0_coarse = np.rint(f0_mel).astype(np.int) + + return f0_coarse, f0bak + + def vc( + self, + model, + net_g, + sid, + audio0, + pitch, + pitchf, + index, + big_npy, + index_rate, + version, + protect, + ): + feats = torch.from_numpy(audio0) + if self.is_half: + feats = feats.half() + else: + feats = feats.float() + if feats.dim() == 2: + feats = feats.mean(-1) + assert feats.dim() == 1, feats.dim() + feats = feats.view(1, -1) + padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False) + + inputs = { + "source": feats.to(self.device), + "padding_mask": padding_mask, + "output_layer": 9 if version == "v1" else 12, + } + t0 = ttime() + with torch.no_grad(): + logits = model.extract_features(**inputs) + feats = model.final_proj(logits[0]) if version == "v1" else logits[0] + if protect < 0.5 and pitch != None and pitchf != None: + feats0 = feats.clone() + if ( + isinstance(index, type(None)) == False + and isinstance(big_npy, type(None)) == False + and index_rate != 0 + ): + npy = feats[0].cpu().numpy() + if self.is_half: + npy = npy.astype("float32") + + score, ix = index.search(npy, k=8) + weight = np.square(1 / score) + weight /= weight.sum(axis=1, keepdims=True) + npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1) + + if self.is_half: + npy = npy.astype("float16") + feats = ( + torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate + + (1 - index_rate) * feats + ) + + feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) + if protect < 0.5 and pitch != None and pitchf != None: + feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute( + 0, 2, 1 + ) + t1 = ttime() + p_len = audio0.shape[0] // self.window + if feats.shape[1] < p_len: + p_len = feats.shape[1] + if pitch != None and pitchf != None: + pitch = pitch[:, :p_len] + pitchf = pitchf[:, :p_len] + + if protect < 0.5 and pitch != None and pitchf != None: + pitchff = pitchf.clone() + pitchff[pitchf > 0] = 1 + pitchff[pitchf < 1] = protect + pitchff = pitchff.unsqueeze(-1) + feats = feats * pitchff + feats0 * (1 - pitchff) + feats = feats.to(feats0.dtype) + p_len = torch.tensor([p_len], device=self.device).long() + with torch.no_grad(): + if pitch != None and pitchf != None: + audio1 = ( + (net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]) + .data.cpu() + .float() + .numpy() + ) + else: + audio1 = ( + (net_g.infer(feats, p_len, sid)[0][0, 0]).data.cpu().float().numpy() + ) + del feats, p_len, padding_mask + if torch.cuda.is_available(): + torch.cuda.empty_cache() + t2 = ttime() + return audio1 + + def pipeline( + self, + model, + net_g, + sid, + audio, + input_audio_path, + f0_up_key, + f0_method, + file_index, + index_rate, + if_f0, + filter_radius, + tgt_sr, + resample_sr, + rms_mix_rate, + version, + protect, + hop_length, + f0_file=None, + ): + if file_index != "" and os.path.exists(file_index) == True and index_rate != 0: + try: + index = faiss.read_index(file_index) + big_npy = index.reconstruct_n(0, index.ntotal) + except Exception as error: + print(error) + index = big_npy = None + else: + index = big_npy = None + audio = signal.filtfilt(bh, ah, audio) + audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect") + opt_ts = [] + if audio_pad.shape[0] > self.t_max: + audio_sum = np.zeros_like(audio) + for i in range(self.window): + audio_sum += audio_pad[i : i - self.window] + for t in range(self.t_center, audio.shape[0], self.t_center): + opt_ts.append( + t + - self.t_query + + np.where( + np.abs(audio_sum[t - self.t_query : t + self.t_query]) + == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min() + )[0][0] + ) + s = 0 + audio_opt = [] + t = None + t1 = ttime() + audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect") + p_len = audio_pad.shape[0] // self.window + inp_f0 = None + if hasattr(f0_file, "name") == True: + try: + with open(f0_file.name, "r") as f: + lines = f.read().strip("\n").split("\n") + inp_f0 = [] + for line in lines: + inp_f0.append([float(i) for i in line.split(",")]) + inp_f0 = np.array(inp_f0, dtype="float32") + except Exception as error: + print(error) + sid = torch.tensor(sid, device=self.device).unsqueeze(0).long() + pitch, pitchf = None, None + if if_f0 == 1: + pitch, pitchf = self.get_f0( + input_audio_path, + audio_pad, + p_len, + f0_up_key, + f0_method, + filter_radius, + hop_length, + inp_f0, + ) + pitch = pitch[:p_len] + pitchf = pitchf[:p_len] + if self.device == "mps": + pitchf = pitchf.astype(np.float32) + pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long() + pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float() + t2 = ttime() + for t in opt_ts: + t = t // self.window * self.window + if if_f0 == 1: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[s : t + self.t_pad2 + self.window], + pitch[:, s // self.window : (t + self.t_pad2) // self.window], + pitchf[:, s // self.window : (t + self.t_pad2) // self.window], + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + else: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[s : t + self.t_pad2 + self.window], + None, + None, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + s = t + if if_f0 == 1: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[t:], + pitch[:, t // self.window :] if t is not None else pitch, + pitchf[:, t // self.window :] if t is not None else pitchf, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + else: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[t:], + None, + None, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + audio_opt = np.concatenate(audio_opt) + if rms_mix_rate != 1: + audio_opt = change_rms(audio, 16000, audio_opt, tgt_sr, rms_mix_rate) + if resample_sr >= 16000 and tgt_sr != resample_sr: + audio_opt = librosa.resample( + audio_opt, orig_sr=tgt_sr, target_sr=resample_sr + ) + audio_max = np.abs(audio_opt).max() / 0.99 + max_int16 = 32768 + if audio_max > 1: + max_int16 /= audio_max + audio_opt = (audio_opt * max_int16).astype(np.int16) + del pitch, pitchf, sid + if torch.cuda.is_available(): + torch.cuda.empty_cache() + return audio_opt diff --git a/rvc/lib/infer_pack/__init__.py b/rvc/lib/infer_pack/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/rvc/lib/infer_pack/attentions.py b/rvc/lib/infer_pack/attentions.py new file mode 100644 index 0000000000000000000000000000000000000000..ea728e241c5c50fd516af78d0e7a059b5a6f6991 --- /dev/null +++ b/rvc/lib/infer_pack/attentions.py @@ -0,0 +1,398 @@ +import math +import torch +from torch import nn +from torch.nn import functional as F + +from . import commons +from .modules import LayerNorm + + +class Encoder(nn.Module): + def __init__( + self, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size=1, + p_dropout=0.0, + window_size=10, + **kwargs + ): + super().__init__() + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.window_size = window_size + + self.drop = nn.Dropout(p_dropout) + self.attn_layers = nn.ModuleList() + self.norm_layers_1 = nn.ModuleList() + self.ffn_layers = nn.ModuleList() + self.norm_layers_2 = nn.ModuleList() + for i in range(self.n_layers): + self.attn_layers.append( + MultiHeadAttention( + hidden_channels, + hidden_channels, + n_heads, + p_dropout=p_dropout, + window_size=window_size, + ) + ) + self.norm_layers_1.append(LayerNorm(hidden_channels)) + self.ffn_layers.append( + FFN( + hidden_channels, + hidden_channels, + filter_channels, + kernel_size, + p_dropout=p_dropout, + ) + ) + self.norm_layers_2.append(LayerNorm(hidden_channels)) + + def forward(self, x, x_mask): + attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1) + x = x * x_mask + for i in range(self.n_layers): + y = self.attn_layers[i](x, x, attn_mask) + y = self.drop(y) + x = self.norm_layers_1[i](x + y) + + y = self.ffn_layers[i](x, x_mask) + y = self.drop(y) + x = self.norm_layers_2[i](x + y) + x = x * x_mask + return x + + +class Decoder(nn.Module): + def __init__( + self, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size=1, + p_dropout=0.0, + proximal_bias=False, + proximal_init=True, + **kwargs + ): + super().__init__() + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.proximal_bias = proximal_bias + self.proximal_init = proximal_init + + self.drop = nn.Dropout(p_dropout) + self.self_attn_layers = nn.ModuleList() + self.norm_layers_0 = nn.ModuleList() + self.encdec_attn_layers = nn.ModuleList() + self.norm_layers_1 = nn.ModuleList() + self.ffn_layers = nn.ModuleList() + self.norm_layers_2 = nn.ModuleList() + for i in range(self.n_layers): + self.self_attn_layers.append( + MultiHeadAttention( + hidden_channels, + hidden_channels, + n_heads, + p_dropout=p_dropout, + proximal_bias=proximal_bias, + proximal_init=proximal_init, + ) + ) + self.norm_layers_0.append(LayerNorm(hidden_channels)) + self.encdec_attn_layers.append( + MultiHeadAttention( + hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout + ) + ) + self.norm_layers_1.append(LayerNorm(hidden_channels)) + self.ffn_layers.append( + FFN( + hidden_channels, + hidden_channels, + filter_channels, + kernel_size, + p_dropout=p_dropout, + causal=True, + ) + ) + self.norm_layers_2.append(LayerNorm(hidden_channels)) + + def forward(self, x, x_mask, h, h_mask): + """ + x: decoder input + h: encoder output + """ + self_attn_mask = commons.subsequent_mask(x_mask.size(2)).to( + device=x.device, dtype=x.dtype + ) + encdec_attn_mask = h_mask.unsqueeze(2) * x_mask.unsqueeze(-1) + x = x * x_mask + for i in range(self.n_layers): + y = self.self_attn_layers[i](x, x, self_attn_mask) + y = self.drop(y) + x = self.norm_layers_0[i](x + y) + + y = self.encdec_attn_layers[i](x, h, encdec_attn_mask) + y = self.drop(y) + x = self.norm_layers_1[i](x + y) + + y = self.ffn_layers[i](x, x_mask) + y = self.drop(y) + x = self.norm_layers_2[i](x + y) + x = x * x_mask + return x + + +class MultiHeadAttention(nn.Module): + def __init__( + self, + channels, + out_channels, + n_heads, + p_dropout=0.0, + window_size=None, + heads_share=True, + block_length=None, + proximal_bias=False, + proximal_init=False, + ): + super().__init__() + assert channels % n_heads == 0 + + self.channels = channels + self.out_channels = out_channels + self.n_heads = n_heads + self.p_dropout = p_dropout + self.window_size = window_size + self.heads_share = heads_share + self.block_length = block_length + self.proximal_bias = proximal_bias + self.proximal_init = proximal_init + self.attn = None + + self.k_channels = channels // n_heads + self.conv_q = nn.Conv1d(channels, channels, 1) + self.conv_k = nn.Conv1d(channels, channels, 1) + self.conv_v = nn.Conv1d(channels, channels, 1) + self.conv_o = nn.Conv1d(channels, out_channels, 1) + self.drop = nn.Dropout(p_dropout) + + if window_size is not None: + n_heads_rel = 1 if heads_share else n_heads + rel_stddev = self.k_channels**-0.5 + self.emb_rel_k = nn.Parameter( + torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) + * rel_stddev + ) + self.emb_rel_v = nn.Parameter( + torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) + * rel_stddev + ) + + nn.init.xavier_uniform_(self.conv_q.weight) + nn.init.xavier_uniform_(self.conv_k.weight) + nn.init.xavier_uniform_(self.conv_v.weight) + if proximal_init: + with torch.no_grad(): + self.conv_k.weight.copy_(self.conv_q.weight) + self.conv_k.bias.copy_(self.conv_q.bias) + + def forward(self, x, c, attn_mask=None): + q = self.conv_q(x) + k = self.conv_k(c) + v = self.conv_v(c) + + x, self.attn = self.attention(q, k, v, mask=attn_mask) + + x = self.conv_o(x) + return x + + def attention(self, query, key, value, mask=None): + b, d, t_s, t_t = (*key.size(), query.size(2)) + query = query.view(b, self.n_heads, self.k_channels, t_t).transpose(2, 3) + key = key.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3) + value = value.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3) + + scores = torch.matmul(query / math.sqrt(self.k_channels), key.transpose(-2, -1)) + if self.window_size is not None: + assert ( + t_s == t_t + ), "Relative attention is only available for self-attention." + key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s) + rel_logits = self._matmul_with_relative_keys( + query / math.sqrt(self.k_channels), key_relative_embeddings + ) + scores_local = self._relative_position_to_absolute_position(rel_logits) + scores = scores + scores_local + if self.proximal_bias: + assert t_s == t_t, "Proximal bias is only available for self-attention." + scores = scores + self._attention_bias_proximal(t_s).to( + device=scores.device, dtype=scores.dtype + ) + if mask is not None: + scores = scores.masked_fill(mask == 0, -1e4) + if self.block_length is not None: + assert ( + t_s == t_t + ), "Local attention is only available for self-attention." + block_mask = ( + torch.ones_like(scores) + .triu(-self.block_length) + .tril(self.block_length) + ) + scores = scores.masked_fill(block_mask == 0, -1e4) + p_attn = F.softmax(scores, dim=-1) + p_attn = self.drop(p_attn) + output = torch.matmul(p_attn, value) + if self.window_size is not None: + relative_weights = self._absolute_position_to_relative_position(p_attn) + value_relative_embeddings = self._get_relative_embeddings( + self.emb_rel_v, t_s + ) + output = output + self._matmul_with_relative_values( + relative_weights, value_relative_embeddings + ) + output = output.transpose(2, 3).contiguous().view(b, d, t_t) + return output, p_attn + + def _matmul_with_relative_values(self, x, y): + """ + x: [b, h, l, m] + y: [h or 1, m, d] + ret: [b, h, l, d] + """ + ret = torch.matmul(x, y.unsqueeze(0)) + return ret + + def _matmul_with_relative_keys(self, x, y): + """ + x: [b, h, l, d] + y: [h or 1, m, d] + ret: [b, h, l, m] + """ + ret = torch.matmul(x, y.unsqueeze(0).transpose(-2, -1)) + return ret + + def _get_relative_embeddings(self, relative_embeddings, length): + pad_length = max(length - (self.window_size + 1), 0) + slice_start_position = max((self.window_size + 1) - length, 0) + slice_end_position = slice_start_position + 2 * length - 1 + if pad_length > 0: + padded_relative_embeddings = F.pad( + relative_embeddings, + commons.convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]), + ) + else: + padded_relative_embeddings = relative_embeddings + used_relative_embeddings = padded_relative_embeddings[ + :, slice_start_position:slice_end_position + ] + return used_relative_embeddings + + def _relative_position_to_absolute_position(self, x): + """ + x: [b, h, l, 2*l-1] + ret: [b, h, l, l] + """ + batch, heads, length, _ = x.size() + + x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, 1]])) + x_flat = x.view([batch, heads, length * 2 * length]) + x_flat = F.pad( + x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [0, length - 1]]) + ) + + x_final = x_flat.view([batch, heads, length + 1, 2 * length - 1])[ + :, :, :length, length - 1 : + ] + return x_final + + def _absolute_position_to_relative_position(self, x): + """ + x: [b, h, l, l] + ret: [b, h, l, 2*l-1] + """ + batch, heads, length, _ = x.size() + x = F.pad( + x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length - 1]]) + ) + x_flat = x.view([batch, heads, length**2 + length * (length - 1)]) + x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [length, 0]])) + x_final = x_flat.view([batch, heads, length, 2 * length])[:, :, :, 1:] + return x_final + + def _attention_bias_proximal(self, length): + r = torch.arange(length, dtype=torch.float32) + diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1) + return torch.unsqueeze(torch.unsqueeze(-torch.log1p(torch.abs(diff)), 0), 0) + + +class FFN(nn.Module): + def __init__( + self, + in_channels, + out_channels, + filter_channels, + kernel_size, + p_dropout=0.0, + activation=None, + causal=False, + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.filter_channels = filter_channels + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.activation = activation + self.causal = causal + + if causal: + self.padding = self._causal_padding + else: + self.padding = self._same_padding + + self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size) + self.conv_2 = nn.Conv1d(filter_channels, out_channels, kernel_size) + self.drop = nn.Dropout(p_dropout) + + def forward(self, x, x_mask): + x = self.conv_1(self.padding(x * x_mask)) + if self.activation == "gelu": + x = x * torch.sigmoid(1.702 * x) + else: + x = torch.relu(x) + x = self.drop(x) + x = self.conv_2(self.padding(x * x_mask)) + return x * x_mask + + def _causal_padding(self, x): + if self.kernel_size == 1: + return x + pad_l = self.kernel_size - 1 + pad_r = 0 + padding = [[0, 0], [0, 0], [pad_l, pad_r]] + x = F.pad(x, commons.convert_pad_shape(padding)) + return x + + def _same_padding(self, x): + if self.kernel_size == 1: + return x + pad_l = (self.kernel_size - 1) // 2 + pad_r = self.kernel_size // 2 + padding = [[0, 0], [0, 0], [pad_l, pad_r]] + x = F.pad(x, commons.convert_pad_shape(padding)) + return x diff --git a/rvc/lib/infer_pack/commons.py b/rvc/lib/infer_pack/commons.py new file mode 100644 index 0000000000000000000000000000000000000000..54470986f37825b35d90d7efa7437d1c26b87215 --- /dev/null +++ b/rvc/lib/infer_pack/commons.py @@ -0,0 +1,166 @@ +import math +import numpy as np +import torch +from torch import nn +from torch.nn import functional as F + + +def init_weights(m, mean=0.0, std=0.01): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + m.weight.data.normal_(mean, std) + + +def get_padding(kernel_size, dilation=1): + return int((kernel_size * dilation - dilation) / 2) + + +def convert_pad_shape(pad_shape): + l = pad_shape[::-1] + pad_shape = [item for sublist in l for item in sublist] + return pad_shape + + +def kl_divergence(m_p, logs_p, m_q, logs_q): + """KL(P||Q)""" + kl = (logs_q - logs_p) - 0.5 + kl += ( + 0.5 * (torch.exp(2.0 * logs_p) + ((m_p - m_q) ** 2)) * torch.exp(-2.0 * logs_q) + ) + return kl + + +def rand_gumbel(shape): + """Sample from the Gumbel distribution, protect from overflows.""" + uniform_samples = torch.rand(shape) * 0.99998 + 0.00001 + return -torch.log(-torch.log(uniform_samples)) + + +def rand_gumbel_like(x): + g = rand_gumbel(x.size()).to(dtype=x.dtype, device=x.device) + return g + + +def slice_segments(x, ids_str, segment_size=4): + ret = torch.zeros_like(x[:, :, :segment_size]) + for i in range(x.size(0)): + idx_str = ids_str[i] + idx_end = idx_str + segment_size + ret[i] = x[i, :, idx_str:idx_end] + return ret + + +def slice_segments2(x, ids_str, segment_size=4): + ret = torch.zeros_like(x[:, :segment_size]) + for i in range(x.size(0)): + idx_str = ids_str[i] + idx_end = idx_str + segment_size + ret[i] = x[i, idx_str:idx_end] + return ret + + +def rand_slice_segments(x, x_lengths=None, segment_size=4): + b, d, t = x.size() + if x_lengths is None: + x_lengths = t + ids_str_max = x_lengths - segment_size + 1 + ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long) + ret = slice_segments(x, ids_str, segment_size) + return ret, ids_str + + +def get_timing_signal_1d(length, channels, min_timescale=1.0, max_timescale=1.0e4): + position = torch.arange(length, dtype=torch.float) + num_timescales = channels // 2 + log_timescale_increment = math.log(float(max_timescale) / float(min_timescale)) / ( + num_timescales - 1 + ) + inv_timescales = min_timescale * torch.exp( + torch.arange(num_timescales, dtype=torch.float) * -log_timescale_increment + ) + scaled_time = position.unsqueeze(0) * inv_timescales.unsqueeze(1) + signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], 0) + signal = F.pad(signal, [0, 0, 0, channels % 2]) + signal = signal.view(1, channels, length) + return signal + + +def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): + b, channels, length = x.size() + signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale) + return x + signal.to(dtype=x.dtype, device=x.device) + + +def cat_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4, axis=1): + b, channels, length = x.size() + signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale) + return torch.cat([x, signal.to(dtype=x.dtype, device=x.device)], axis) + + +def subsequent_mask(length): + mask = torch.tril(torch.ones(length, length)).unsqueeze(0).unsqueeze(0) + return mask + + +@torch.jit.script +def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): + n_channels_int = n_channels[0] + in_act = input_a + input_b + t_act = torch.tanh(in_act[:, :n_channels_int, :]) + s_act = torch.sigmoid(in_act[:, n_channels_int:, :]) + acts = t_act * s_act + return acts + + +def convert_pad_shape(pad_shape): + l = pad_shape[::-1] + pad_shape = [item for sublist in l for item in sublist] + return pad_shape + + +def shift_1d(x): + x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [1, 0]]))[:, :, :-1] + return x + + +def sequence_mask(length, max_length=None): + if max_length is None: + max_length = length.max() + x = torch.arange(max_length, dtype=length.dtype, device=length.device) + return x.unsqueeze(0) < length.unsqueeze(1) + + +def generate_path(duration, mask): + """ + duration: [b, 1, t_x] + mask: [b, 1, t_y, t_x] + """ + device = duration.device + + b, _, t_y, t_x = mask.shape + cum_duration = torch.cumsum(duration, -1) + + cum_duration_flat = cum_duration.view(b * t_x) + path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype) + path = path.view(b, t_x, t_y) + path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1] + path = path.unsqueeze(1).transpose(2, 3) * mask + return path + + +def clip_grad_value_(parameters, clip_value, norm_type=2): + if isinstance(parameters, torch.Tensor): + parameters = [parameters] + parameters = list(filter(lambda p: p.grad is not None, parameters)) + norm_type = float(norm_type) + if clip_value is not None: + clip_value = float(clip_value) + + total_norm = 0 + for p in parameters: + param_norm = p.grad.data.norm(norm_type) + total_norm += param_norm.item() ** norm_type + if clip_value is not None: + p.grad.data.clamp_(min=-clip_value, max=clip_value) + total_norm = total_norm ** (1.0 / norm_type) + return total_norm diff --git a/rvc/lib/infer_pack/models.py b/rvc/lib/infer_pack/models.py new file mode 100644 index 0000000000000000000000000000000000000000..94c884acfb25365278d4ea5f070a87eb6ceaf9c3 --- /dev/null +++ b/rvc/lib/infer_pack/models.py @@ -0,0 +1,1395 @@ +import math, pdb, os +from time import time as ttime +import torch +from torch import nn +from torch.nn import functional as F +from . import modules +from . import attentions +from . import commons +from .commons import init_weights, get_padding +from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d +from torch.nn.utils import remove_weight_norm +from torch.nn.utils.parametrizations import spectral_norm, weight_norm +import numpy as np +from typing import Optional + +has_xpu = bool(hasattr(torch, "xpu") and torch.xpu.is_available()) + + +class TextEncoder256(nn.Module): + def __init__( + self, + out_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + f0=True, + ): + super(TextEncoder256, self).__init__() + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = float(p_dropout) + self.emb_phone = nn.Linear(256, hidden_channels) + self.lrelu = nn.LeakyReLU(0.1, inplace=True) + if f0 == True: + self.emb_pitch = nn.Embedding(256, hidden_channels) # pitch 256 + self.encoder = attentions.Encoder( + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + float(p_dropout), + ) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + + def forward( + self, phone: torch.Tensor, pitch: Optional[torch.Tensor], lengths: torch.Tensor + ): + if pitch is None: + x = self.emb_phone(phone) + else: + x = self.emb_phone(phone) + self.emb_pitch(pitch) + x = x * math.sqrt(self.hidden_channels) # [b, t, h] + x = self.lrelu(x) + x = torch.transpose(x, 1, -1) # [b, h, t] + x_mask = torch.unsqueeze(commons.sequence_mask(lengths, x.size(2)), 1).to( + x.dtype + ) + x = self.encoder(x * x_mask, x_mask) + stats = self.proj(x) * x_mask + + m, logs = torch.split(stats, self.out_channels, dim=1) + return m, logs, x_mask + + +class TextEncoder768(nn.Module): + def __init__( + self, + out_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + f0=True, + ): + super(TextEncoder768, self).__init__() + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = float(p_dropout) + self.emb_phone = nn.Linear(768, hidden_channels) + self.lrelu = nn.LeakyReLU(0.1, inplace=True) + if f0 == True: + self.emb_pitch = nn.Embedding(256, hidden_channels) # pitch 256 + self.encoder = attentions.Encoder( + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + float(p_dropout), + ) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + + def forward(self, phone: torch.Tensor, pitch: torch.Tensor, lengths: torch.Tensor): + if pitch is None: + x = self.emb_phone(phone) + else: + x = self.emb_phone(phone) + self.emb_pitch(pitch) + x = x * math.sqrt(self.hidden_channels) # [b, t, h] + x = self.lrelu(x) + x = torch.transpose(x, 1, -1) # [b, h, t] + x_mask = torch.unsqueeze(commons.sequence_mask(lengths, x.size(2)), 1).to( + x.dtype + ) + x = self.encoder(x * x_mask, x_mask) + stats = self.proj(x) * x_mask + + m, logs = torch.split(stats, self.out_channels, dim=1) + return m, logs, x_mask + + +class ResidualCouplingBlock(nn.Module): + def __init__( + self, + channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + n_flows=4, + gin_channels=0, + ): + super(ResidualCouplingBlock, self).__init__() + self.channels = channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.n_flows = n_flows + self.gin_channels = gin_channels + + self.flows = nn.ModuleList() + for i in range(n_flows): + self.flows.append( + modules.ResidualCouplingLayer( + channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=gin_channels, + mean_only=True, + ) + ) + self.flows.append(modules.Flip()) + + def forward( + self, + x: torch.Tensor, + x_mask: torch.Tensor, + g: Optional[torch.Tensor] = None, + reverse: bool = False, + ): + if not reverse: + for flow in self.flows: + x, _ = flow(x, x_mask, g=g, reverse=reverse) + else: + for flow in self.flows[::-1]: + x = flow.forward(x, x_mask, g=g, reverse=reverse) + return x + + def remove_weight_norm(self): + for i in range(self.n_flows): + self.flows[i * 2].remove_weight_norm() + + def __prepare_scriptable__(self): + for i in range(self.n_flows): + for hook in self.flows[i * 2]._forward_pre_hooks.values(): + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.flows[i * 2]) + + return self + + +class PosteriorEncoder(nn.Module): + def __init__( + self, + in_channels, + out_channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=0, + ): + super(PosteriorEncoder, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + + self.pre = nn.Conv1d(in_channels, hidden_channels, 1) + self.enc = modules.WN( + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=gin_channels, + ) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + + def forward( + self, x: torch.Tensor, x_lengths: torch.Tensor, g: Optional[torch.Tensor] = None + ): + x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to( + x.dtype + ) + x = self.pre(x) * x_mask + x = self.enc(x, x_mask, g=g) + stats = self.proj(x) * x_mask + m, logs = torch.split(stats, self.out_channels, dim=1) + z = (m + torch.randn_like(m) * torch.exp(logs)) * x_mask + return z, m, logs, x_mask + + def remove_weight_norm(self): + self.enc.remove_weight_norm() + + def __prepare_scriptable__(self): + for hook in self.enc._forward_pre_hooks.values(): + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.enc) + return self + + +class Generator(torch.nn.Module): + def __init__( + self, + initial_channel, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels=0, + ): + super(Generator, self).__init__() + self.num_kernels = len(resblock_kernel_sizes) + self.num_upsamples = len(upsample_rates) + self.conv_pre = Conv1d( + initial_channel, upsample_initial_channel, 7, 1, padding=3 + ) + resblock = modules.ResBlock1 if resblock == "1" else modules.ResBlock2 + + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): + self.ups.append( + weight_norm( + ConvTranspose1d( + upsample_initial_channel // (2**i), + upsample_initial_channel // (2 ** (i + 1)), + k, + u, + padding=(k - u) // 2, + ) + ) + ) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = upsample_initial_channel // (2 ** (i + 1)) + for j, (k, d) in enumerate( + zip(resblock_kernel_sizes, resblock_dilation_sizes) + ): + self.resblocks.append(resblock(ch, k, d)) + + self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False) + self.ups.apply(init_weights) + + if gin_channels != 0: + self.cond = nn.Conv1d(gin_channels, upsample_initial_channel, 1) + + def forward(self, x: torch.Tensor, g: Optional[torch.Tensor] = None): + x = self.conv_pre(x) + if g is not None: + x = x + self.cond(g) + + for i in range(self.num_upsamples): + x = F.leaky_relu(x, modules.LRELU_SLOPE) + x = self.ups[i](x) + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i * self.num_kernels + j](x) + else: + xs += self.resblocks[i * self.num_kernels + j](x) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + + return x + + def __prepare_scriptable__(self): + for l in self.ups: + for hook in l._forward_pre_hooks.values(): + # The hook we want to remove is an instance of WeightNorm class, so + # normally we would do `if isinstance(...)` but this class is not accessible + # because of shadowing, so we check the module name directly. + # https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3 + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(l) + + for l in self.resblocks: + for hook in l._forward_pre_hooks.values(): + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(l) + return self + + def remove_weight_norm(self): + for l in self.ups: + remove_weight_norm(l) + for l in self.resblocks: + l.remove_weight_norm() + + +class SineGen(torch.nn.Module): + """Definition of sine generator + SineGen(samp_rate, harmonic_num = 0, + sine_amp = 0.1, noise_std = 0.003, + voiced_threshold = 0, + flag_for_pulse=False) + samp_rate: sampling rate in Hz + harmonic_num: number of harmonic overtones (default 0) + sine_amp: amplitude of sine-wavefrom (default 0.1) + noise_std: std of Gaussian noise (default 0.003) + voiced_thoreshold: F0 threshold for U/V classification (default 0) + flag_for_pulse: this SinGen is used inside PulseGen (default False) + Note: when flag_for_pulse is True, the first time step of a voiced + segment is always sin(torch.pi) or cos(0) + """ + + def __init__( + self, + samp_rate, + harmonic_num=0, + sine_amp=0.1, + noise_std=0.003, + voiced_threshold=0, + flag_for_pulse=False, + ): + super(SineGen, self).__init__() + self.sine_amp = sine_amp + self.noise_std = noise_std + self.harmonic_num = harmonic_num + self.dim = self.harmonic_num + 1 + self.sampling_rate = samp_rate + self.voiced_threshold = voiced_threshold + + def _f02uv(self, f0): + # generate uv signal + uv = torch.ones_like(f0) + uv = uv * (f0 > self.voiced_threshold) + if uv.device.type == "privateuseone": # for DirectML + uv = uv.float() + return uv + + def forward(self, f0: torch.Tensor, upp: int): + """sine_tensor, uv = forward(f0) + input F0: tensor(batchsize=1, length, dim=1) + f0 for unvoiced steps should be 0 + output sine_tensor: tensor(batchsize=1, length, dim) + output uv: tensor(batchsize=1, length, 1) + """ + with torch.no_grad(): + f0 = f0[:, None].transpose(1, 2) + f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim, device=f0.device) + # fundamental component + f0_buf[:, :, 0] = f0[:, :, 0] + for idx in range(self.harmonic_num): + f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * ( + idx + 2 + ) # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic + rad_values = (f0_buf / float(self.sampling_rate)) % 1 + rand_ini = torch.rand( + f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device + ) + rand_ini[:, 0] = 0 + rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini + tmp_over_one = torch.cumsum(rad_values, 1) + tmp_over_one *= upp + tmp_over_one = F.interpolate( + tmp_over_one.transpose(2, 1), + scale_factor=float(upp), + mode="linear", + align_corners=True, + ).transpose(2, 1) + rad_values = F.interpolate( + rad_values.transpose(2, 1), scale_factor=float(upp), mode="nearest" + ).transpose( + 2, 1 + ) ####### + tmp_over_one %= 1 + tmp_over_one_idx = (tmp_over_one[:, 1:, :] - tmp_over_one[:, :-1, :]) < 0 + cumsum_shift = torch.zeros_like(rad_values) + cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0 + sine_waves = torch.sin( + torch.cumsum(rad_values + cumsum_shift, dim=1) * 2 * torch.pi + ) + sine_waves = sine_waves * self.sine_amp + uv = self._f02uv(f0) + uv = F.interpolate( + uv.transpose(2, 1), scale_factor=float(upp), mode="nearest" + ).transpose(2, 1) + noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3 + noise = noise_amp * torch.randn_like(sine_waves) + sine_waves = sine_waves * uv + noise + return sine_waves, uv, noise + + +class SourceModuleHnNSF(torch.nn.Module): + """SourceModule for hn-nsf + SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1, + add_noise_std=0.003, voiced_threshod=0) + sampling_rate: sampling_rate in Hz + harmonic_num: number of harmonic above F0 (default: 0) + sine_amp: amplitude of sine source signal (default: 0.1) + add_noise_std: std of additive Gaussian noise (default: 0.003) + note that amplitude of noise in unvoiced is decided + by sine_amp + voiced_threshold: threhold to set U/V given F0 (default: 0) + Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) + F0_sampled (batchsize, length, 1) + Sine_source (batchsize, length, 1) + noise_source (batchsize, length 1) + uv (batchsize, length, 1) + """ + + def __init__( + self, + sampling_rate, + harmonic_num=0, + sine_amp=0.1, + add_noise_std=0.003, + voiced_threshod=0, + is_half=True, + ): + super(SourceModuleHnNSF, self).__init__() + + self.sine_amp = sine_amp + self.noise_std = add_noise_std + self.is_half = is_half + # to produce sine waveforms + self.l_sin_gen = SineGen( + sampling_rate, harmonic_num, sine_amp, add_noise_std, voiced_threshod + ) + + # to merge source harmonics into a single excitation + self.l_linear = torch.nn.Linear(harmonic_num + 1, 1) + self.l_tanh = torch.nn.Tanh() + # self.ddtype:int = -1 + + def forward(self, x: torch.Tensor, upp: int = 1): + # if self.ddtype ==-1: + # self.ddtype = self.l_linear.weight.dtype + sine_wavs, uv, _ = self.l_sin_gen(x, upp) + # print(x.dtype,sine_wavs.dtype,self.l_linear.weight.dtype) + # if self.is_half: + # sine_wavs = sine_wavs.half() + # sine_merge = self.l_tanh(self.l_linear(sine_wavs.to(x))) + # print(sine_wavs.dtype,self.ddtype) + # if sine_wavs.dtype != self.l_linear.weight.dtype: + sine_wavs = sine_wavs.to(dtype=self.l_linear.weight.dtype) + sine_merge = self.l_tanh(self.l_linear(sine_wavs)) + return sine_merge, None, None # noise, uv + + +class GeneratorNSF(torch.nn.Module): + def __init__( + self, + initial_channel, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels, + sr, + is_half=False, + ): + super(GeneratorNSF, self).__init__() + self.num_kernels = len(resblock_kernel_sizes) + self.num_upsamples = len(upsample_rates) + + self.f0_upsamp = torch.nn.Upsample(scale_factor=math.prod(upsample_rates)) + self.m_source = SourceModuleHnNSF( + sampling_rate=sr, harmonic_num=0, is_half=is_half + ) + self.noise_convs = nn.ModuleList() + self.conv_pre = Conv1d( + initial_channel, upsample_initial_channel, 7, 1, padding=3 + ) + resblock = modules.ResBlock1 if resblock == "1" else modules.ResBlock2 + + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): + c_cur = upsample_initial_channel // (2 ** (i + 1)) + self.ups.append( + weight_norm( + ConvTranspose1d( + upsample_initial_channel // (2**i), + upsample_initial_channel // (2 ** (i + 1)), + k, + u, + padding=(k - u) // 2, + ) + ) + ) + if i + 1 < len(upsample_rates): + stride_f0 = math.prod(upsample_rates[i + 1 :]) + self.noise_convs.append( + Conv1d( + 1, + c_cur, + kernel_size=stride_f0 * 2, + stride=stride_f0, + padding=stride_f0 // 2, + ) + ) + else: + self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1)) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = upsample_initial_channel // (2 ** (i + 1)) + for j, (k, d) in enumerate( + zip(resblock_kernel_sizes, resblock_dilation_sizes) + ): + self.resblocks.append(resblock(ch, k, d)) + + self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False) + self.ups.apply(init_weights) + + if gin_channels != 0: + self.cond = nn.Conv1d(gin_channels, upsample_initial_channel, 1) + + self.upp = math.prod(upsample_rates) + + self.lrelu_slope = modules.LRELU_SLOPE + + def forward(self, x, f0, g: Optional[torch.Tensor] = None): + har_source, noi_source, uv = self.m_source(f0, self.upp) + har_source = har_source.transpose(1, 2) + x = self.conv_pre(x) + if g is not None: + x = x + self.cond(g) + # torch.jit.script() does not support direct indexing of torch modules + # That's why I wrote this + for i, (ups, noise_convs) in enumerate(zip(self.ups, self.noise_convs)): + if i < self.num_upsamples: + x = F.leaky_relu(x, self.lrelu_slope) + x = ups(x) + x_source = noise_convs(har_source) + x = x + x_source + xs: Optional[torch.Tensor] = None + l = [i * self.num_kernels + j for j in range(self.num_kernels)] + for j, resblock in enumerate(self.resblocks): + if j in l: + if xs is None: + xs = resblock(x) + else: + xs += resblock(x) + # This assertion cannot be ignored! \ + # If ignored, it will cause torch.jit.script() compilation errors + assert isinstance(xs, torch.Tensor) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + return x + + def remove_weight_norm(self): + for l in self.ups: + remove_weight_norm(l) + for l in self.resblocks: + l.remove_weight_norm() + + def __prepare_scriptable__(self): + for l in self.ups: + for hook in l._forward_pre_hooks.values(): + # The hook we want to remove is an instance of WeightNorm class, so + # normally we would do `if isinstance(...)` but this class is not accessible + # because of shadowing, so we check the module name directly. + # https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3 + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(l) + for l in self.resblocks: + for hook in self.resblocks._forward_pre_hooks.values(): + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(l) + return self + + +sr2sr = { + "32k": 32000, + "40k": 40000, + "48k": 48000, +} + + +class SynthesizerTrnMs256NSFsid(nn.Module): + def __init__( + self, + spec_channels, + segment_size, + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + spk_embed_dim, + gin_channels, + sr, + **kwargs + ): + super(SynthesizerTrnMs256NSFsid, self).__init__() + if isinstance(sr, str): + sr = sr2sr[sr] + self.spec_channels = spec_channels + self.inter_channels = inter_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = float(p_dropout) + self.resblock = resblock + self.resblock_kernel_sizes = resblock_kernel_sizes + self.resblock_dilation_sizes = resblock_dilation_sizes + self.upsample_rates = upsample_rates + self.upsample_initial_channel = upsample_initial_channel + self.upsample_kernel_sizes = upsample_kernel_sizes + self.segment_size = segment_size + self.gin_channels = gin_channels + # self.hop_length = hop_length# + self.spk_embed_dim = spk_embed_dim + self.enc_p = TextEncoder256( + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + float(p_dropout), + ) + self.dec = GeneratorNSF( + inter_channels, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels=gin_channels, + sr=sr, + is_half=kwargs["is_half"], + ) + self.enc_q = PosteriorEncoder( + spec_channels, + inter_channels, + hidden_channels, + 5, + 1, + 16, + gin_channels=gin_channels, + ) + self.flow = ResidualCouplingBlock( + inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels + ) + self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) + + def remove_weight_norm(self): + self.dec.remove_weight_norm() + self.flow.remove_weight_norm() + self.enc_q.remove_weight_norm() + + def __prepare_scriptable__(self): + for hook in self.dec._forward_pre_hooks.values(): + # The hook we want to remove is an instance of WeightNorm class, so + # normally we would do `if isinstance(...)` but this class is not accessible + # because of shadowing, so we check the module name directly. + # https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3 + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.dec) + for hook in self.flow._forward_pre_hooks.values(): + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.flow) + if hasattr(self, "enc_q"): + for hook in self.enc_q._forward_pre_hooks.values(): + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.enc_q) + return self + + @torch.jit.ignore + def forward( + self, + phone: torch.Tensor, + phone_lengths: torch.Tensor, + pitch: torch.Tensor, + pitchf: torch.Tensor, + y: torch.Tensor, + y_lengths: torch.Tensor, + ds: Optional[torch.Tensor] = None, + ): # 这里ds是id,[bs,1] + # print(1,pitch.shape)#[bs,t] + g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的 + m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) + z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g) + z_p = self.flow(z, y_mask, g=g) + z_slice, ids_slice = commons.rand_slice_segments( + z, y_lengths, self.segment_size + ) + # print(-1,pitchf.shape,ids_slice,self.segment_size,self.hop_length,self.segment_size//self.hop_length) + pitchf = commons.slice_segments2(pitchf, ids_slice, self.segment_size) + # print(-2,pitchf.shape,z_slice.shape) + o = self.dec(z_slice, pitchf, g=g) + return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) + + @torch.jit.export + def infer( + self, + phone: torch.Tensor, + phone_lengths: torch.Tensor, + pitch: torch.Tensor, + nsff0: torch.Tensor, + sid: torch.Tensor, + rate: Optional[torch.Tensor] = None, + ): + g = self.emb_g(sid).unsqueeze(-1) + m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) + z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask + if rate is not None: + assert isinstance(rate, torch.Tensor) + head = int(z_p.shape[2] * (1 - rate.item())) + z_p = z_p[:, :, head:] + x_mask = x_mask[:, :, head:] + nsff0 = nsff0[:, head:] + z = self.flow(z_p, x_mask, g=g, reverse=True) + o = self.dec(z * x_mask, nsff0, g=g) + return o, x_mask, (z, z_p, m_p, logs_p) + + +class SynthesizerTrnMs768NSFsid(nn.Module): + def __init__( + self, + spec_channels, + segment_size, + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + spk_embed_dim, + gin_channels, + sr, + **kwargs + ): + super(SynthesizerTrnMs768NSFsid, self).__init__() + if isinstance(sr, str): + sr = sr + self.spec_channels = spec_channels + self.inter_channels = inter_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = float(p_dropout) + self.resblock = resblock + self.resblock_kernel_sizes = resblock_kernel_sizes + self.resblock_dilation_sizes = resblock_dilation_sizes + self.upsample_rates = upsample_rates + self.upsample_initial_channel = upsample_initial_channel + self.upsample_kernel_sizes = upsample_kernel_sizes + self.segment_size = segment_size + self.gin_channels = gin_channels + # self.hop_length = hop_length# + self.spk_embed_dim = spk_embed_dim + self.enc_p = TextEncoder768( + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + float(p_dropout), + ) + self.dec = GeneratorNSF( + inter_channels, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels=gin_channels, + sr=sr, + is_half=kwargs["is_half"], + ) + self.enc_q = PosteriorEncoder( + spec_channels, + inter_channels, + hidden_channels, + 5, + 1, + 16, + gin_channels=gin_channels, + ) + self.flow = ResidualCouplingBlock( + inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels + ) + self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) + + def remove_weight_norm(self): + self.dec.remove_weight_norm() + self.flow.remove_weight_norm() + self.enc_q.remove_weight_norm() + + def __prepare_scriptable__(self): + for hook in self.dec._forward_pre_hooks.values(): + # The hook we want to remove is an instance of WeightNorm class, so + # normally we would do `if isinstance(...)` but this class is not accessible + # because of shadowing, so we check the module name directly. + # https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3 + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.dec) + for hook in self.flow._forward_pre_hooks.values(): + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.flow) + if hasattr(self, "enc_q"): + for hook in self.enc_q._forward_pre_hooks.values(): + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.enc_q) + return self + + @torch.jit.ignore + def forward( + self, phone, phone_lengths, pitch, pitchf, y, y_lengths, ds + ): # 这里ds是id,[bs,1] + # print(1,pitch.shape)#[bs,t] + g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的 + m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) + z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g) + z_p = self.flow(z, y_mask, g=g) + z_slice, ids_slice = commons.rand_slice_segments( + z, y_lengths, self.segment_size + ) + # print(-1,pitchf.shape,ids_slice,self.segment_size,self.hop_length,self.segment_size//self.hop_length) + pitchf = commons.slice_segments2(pitchf, ids_slice, self.segment_size) + # print(-2,pitchf.shape,z_slice.shape) + o = self.dec(z_slice, pitchf, g=g) + return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) + + @torch.jit.export + def infer( + self, + phone: torch.Tensor, + phone_lengths: torch.Tensor, + pitch: torch.Tensor, + nsff0: torch.Tensor, + sid: torch.Tensor, + rate: Optional[torch.Tensor] = None, + ): + g = self.emb_g(sid).unsqueeze(-1) + m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) + z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask + if rate is not None: + head = int(z_p.shape[2] * (1.0 - rate.item())) + z_p = z_p[:, :, head:] + x_mask = x_mask[:, :, head:] + nsff0 = nsff0[:, head:] + z = self.flow(z_p, x_mask, g=g, reverse=True) + o = self.dec(z * x_mask, nsff0, g=g) + return o, x_mask, (z, z_p, m_p, logs_p) + + +class SynthesizerTrnMs256NSFsid_nono(nn.Module): + def __init__( + self, + spec_channels, + segment_size, + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + spk_embed_dim, + gin_channels, + sr=None, + **kwargs + ): + super(SynthesizerTrnMs256NSFsid_nono, self).__init__() + self.spec_channels = spec_channels + self.inter_channels = inter_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = float(p_dropout) + self.resblock = resblock + self.resblock_kernel_sizes = resblock_kernel_sizes + self.resblock_dilation_sizes = resblock_dilation_sizes + self.upsample_rates = upsample_rates + self.upsample_initial_channel = upsample_initial_channel + self.upsample_kernel_sizes = upsample_kernel_sizes + self.segment_size = segment_size + self.gin_channels = gin_channels + # self.hop_length = hop_length# + self.spk_embed_dim = spk_embed_dim + self.enc_p = TextEncoder256( + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + float(p_dropout), + f0=False, + ) + self.dec = Generator( + inter_channels, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels=gin_channels, + ) + self.enc_q = PosteriorEncoder( + spec_channels, + inter_channels, + hidden_channels, + 5, + 1, + 16, + gin_channels=gin_channels, + ) + self.flow = ResidualCouplingBlock( + inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels + ) + self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) + + def remove_weight_norm(self): + self.dec.remove_weight_norm() + self.flow.remove_weight_norm() + self.enc_q.remove_weight_norm() + + def __prepare_scriptable__(self): + for hook in self.dec._forward_pre_hooks.values(): + # The hook we want to remove is an instance of WeightNorm class, so + # normally we would do `if isinstance(...)` but this class is not accessible + # because of shadowing, so we check the module name directly. + # https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3 + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.dec) + for hook in self.flow._forward_pre_hooks.values(): + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.flow) + if hasattr(self, "enc_q"): + for hook in self.enc_q._forward_pre_hooks.values(): + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.enc_q) + return self + + @torch.jit.ignore + def forward(self, phone, phone_lengths, y, y_lengths, ds): # 这里ds是id,[bs,1] + g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的 + m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths) + z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g) + z_p = self.flow(z, y_mask, g=g) + z_slice, ids_slice = commons.rand_slice_segments( + z, y_lengths, self.segment_size + ) + o = self.dec(z_slice, g=g) + return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) + + @torch.jit.export + def infer( + self, + phone: torch.Tensor, + phone_lengths: torch.Tensor, + sid: torch.Tensor, + rate: Optional[torch.Tensor] = None, + ): + g = self.emb_g(sid).unsqueeze(-1) + m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths) + z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask + if rate is not None: + head = int(z_p.shape[2] * (1.0 - rate.item())) + z_p = z_p[:, :, head:] + x_mask = x_mask[:, :, head:] + z = self.flow(z_p, x_mask, g=g, reverse=True) + o = self.dec(z * x_mask, g=g) + return o, x_mask, (z, z_p, m_p, logs_p) + + +class SynthesizerTrnMs768NSFsid_nono(nn.Module): + def __init__( + self, + spec_channels, + segment_size, + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + spk_embed_dim, + gin_channels, + sr=None, + **kwargs + ): + super(SynthesizerTrnMs768NSFsid_nono, self).__init__() + self.spec_channels = spec_channels + self.inter_channels = inter_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = float(p_dropout) + self.resblock = resblock + self.resblock_kernel_sizes = resblock_kernel_sizes + self.resblock_dilation_sizes = resblock_dilation_sizes + self.upsample_rates = upsample_rates + self.upsample_initial_channel = upsample_initial_channel + self.upsample_kernel_sizes = upsample_kernel_sizes + self.segment_size = segment_size + self.gin_channels = gin_channels + # self.hop_length = hop_length# + self.spk_embed_dim = spk_embed_dim + self.enc_p = TextEncoder768( + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + float(p_dropout), + f0=False, + ) + self.dec = Generator( + inter_channels, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels=gin_channels, + ) + self.enc_q = PosteriorEncoder( + spec_channels, + inter_channels, + hidden_channels, + 5, + 1, + 16, + gin_channels=gin_channels, + ) + self.flow = ResidualCouplingBlock( + inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels + ) + self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) + + def remove_weight_norm(self): + self.dec.remove_weight_norm() + self.flow.remove_weight_norm() + self.enc_q.remove_weight_norm() + + def __prepare_scriptable__(self): + for hook in self.dec._forward_pre_hooks.values(): + # The hook we want to remove is an instance of WeightNorm class, so + # normally we would do `if isinstance(...)` but this class is not accessible + # because of shadowing, so we check the module name directly. + # https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3 + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.dec) + for hook in self.flow._forward_pre_hooks.values(): + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.flow) + if hasattr(self, "enc_q"): + for hook in self.enc_q._forward_pre_hooks.values(): + if ( + hook.__module__ == "torch.nn.utils.weight_norm" + and hook.__class__.__name__ == "WeightNorm" + ): + torch.nn.utils.remove_weight_norm(self.enc_q) + return self + + @torch.jit.ignore + def forward(self, phone, phone_lengths, y, y_lengths, ds): # 这里ds是id,[bs,1] + g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的 + m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths) + z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g) + z_p = self.flow(z, y_mask, g=g) + z_slice, ids_slice = commons.rand_slice_segments( + z, y_lengths, self.segment_size + ) + o = self.dec(z_slice, g=g) + return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) + + @torch.jit.export + def infer( + self, + phone: torch.Tensor, + phone_lengths: torch.Tensor, + sid: torch.Tensor, + rate: Optional[torch.Tensor] = None, + ): + g = self.emb_g(sid).unsqueeze(-1) + m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths) + z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask + if rate is not None: + head = int(z_p.shape[2] * (1.0 - rate.item())) + z_p = z_p[:, :, head:] + x_mask = x_mask[:, :, head:] + z = self.flow(z_p, x_mask, g=g, reverse=True) + o = self.dec(z * x_mask, g=g) + return o, x_mask, (z, z_p, m_p, logs_p) + + +class MultiPeriodDiscriminator(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(MultiPeriodDiscriminator, self).__init__() + periods = [2, 3, 5, 7, 11, 17] + # periods = [3, 5, 7, 11, 17, 23, 37] + + discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)] + discs = discs + [ + DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods + ] + self.discriminators = nn.ModuleList(discs) + + def forward(self, y, y_hat): + y_d_rs = [] # + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + # for j in range(len(fmap_r)): + # print(i,j,y.shape,y_hat.shape,fmap_r[j].shape,fmap_g[j].shape) + y_d_rs.append(y_d_r) + y_d_gs.append(y_d_g) + fmap_rs.append(fmap_r) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +class MultiPeriodDiscriminatorV2(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(MultiPeriodDiscriminatorV2, self).__init__() + # periods = [2, 3, 5, 7, 11, 17] + periods = [2, 3, 5, 7, 11, 17, 23, 37] + + discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)] + discs = discs + [ + DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods + ] + self.discriminators = nn.ModuleList(discs) + + def forward(self, y, y_hat): + y_d_rs = [] # + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + # for j in range(len(fmap_r)): + # print(i,j,y.shape,y_hat.shape,fmap_r[j].shape,fmap_g[j].shape) + y_d_rs.append(y_d_r) + y_d_gs.append(y_d_g) + fmap_rs.append(fmap_r) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +class DiscriminatorS(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(DiscriminatorS, self).__init__() + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList( + [ + norm_f(Conv1d(1, 16, 15, 1, padding=7)), + norm_f(Conv1d(16, 64, 41, 4, groups=4, padding=20)), + norm_f(Conv1d(64, 256, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(256, 1024, 41, 4, groups=64, padding=20)), + norm_f(Conv1d(1024, 1024, 41, 4, groups=256, padding=20)), + norm_f(Conv1d(1024, 1024, 5, 1, padding=2)), + ] + ) + self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1)) + + def forward(self, x): + fmap = [] + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, modules.LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class DiscriminatorP(torch.nn.Module): + def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): + super(DiscriminatorP, self).__init__() + self.period = period + self.use_spectral_norm = use_spectral_norm + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList( + [ + norm_f( + Conv2d( + 1, + 32, + (kernel_size, 1), + (stride, 1), + padding=(get_padding(kernel_size, 1), 0), + ) + ), + norm_f( + Conv2d( + 32, + 128, + (kernel_size, 1), + (stride, 1), + padding=(get_padding(kernel_size, 1), 0), + ) + ), + norm_f( + Conv2d( + 128, + 512, + (kernel_size, 1), + (stride, 1), + padding=(get_padding(kernel_size, 1), 0), + ) + ), + norm_f( + Conv2d( + 512, + 1024, + (kernel_size, 1), + (stride, 1), + padding=(get_padding(kernel_size, 1), 0), + ) + ), + norm_f( + Conv2d( + 1024, + 1024, + (kernel_size, 1), + 1, + padding=(get_padding(kernel_size, 1), 0), + ) + ), + ] + ) + self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0))) + + def forward(self, x): + fmap = [] + + # 1d to 2d + b, c, t = x.shape + if t % self.period != 0: # pad first + n_pad = self.period - (t % self.period) + if has_xpu and x.dtype == torch.bfloat16: + x = F.pad(x.to(dtype=torch.float16), (0, n_pad), "reflect").to( + dtype=torch.bfloat16 + ) + else: + x = F.pad(x, (0, n_pad), "reflect") + t = t + n_pad + x = x.view(b, c, t // self.period, self.period) + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, modules.LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap diff --git a/rvc/lib/infer_pack/modules.py b/rvc/lib/infer_pack/modules.py new file mode 100644 index 0000000000000000000000000000000000000000..7c18733a9e4124821dc0b2f934f262e56098626a --- /dev/null +++ b/rvc/lib/infer_pack/modules.py @@ -0,0 +1,521 @@ +import math +import torch +from torch import nn +from torch.nn import functional as F + +from torch.nn import Conv1d +from torch.nn.utils import remove_weight_norm +from torch.nn.utils.parametrizations import weight_norm + + +from . import commons +from .commons import init_weights, get_padding +from .transforms import piecewise_rational_quadratic_transform + + +LRELU_SLOPE = 0.1 + + +class LayerNorm(nn.Module): + def __init__(self, channels, eps=1e-5): + super().__init__() + self.channels = channels + self.eps = eps + + self.gamma = nn.Parameter(torch.ones(channels)) + self.beta = nn.Parameter(torch.zeros(channels)) + + def forward(self, x): + x = x.transpose(1, -1) + x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps) + return x.transpose(1, -1) + + +class ConvReluNorm(nn.Module): + def __init__( + self, + in_channels, + hidden_channels, + out_channels, + kernel_size, + n_layers, + p_dropout, + ): + super().__init__() + self.in_channels = in_channels + self.hidden_channels = hidden_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.n_layers = n_layers + self.p_dropout = p_dropout + assert n_layers > 1, "Number of layers should be larger than 0." + + self.conv_layers = nn.ModuleList() + self.norm_layers = nn.ModuleList() + self.conv_layers.append( + nn.Conv1d( + in_channels, hidden_channels, kernel_size, padding=kernel_size // 2 + ) + ) + self.norm_layers.append(LayerNorm(hidden_channels)) + self.relu_drop = nn.Sequential(nn.ReLU(), nn.Dropout(p_dropout)) + for _ in range(n_layers - 1): + self.conv_layers.append( + nn.Conv1d( + hidden_channels, + hidden_channels, + kernel_size, + padding=kernel_size // 2, + ) + ) + self.norm_layers.append(LayerNorm(hidden_channels)) + self.proj = nn.Conv1d(hidden_channels, out_channels, 1) + self.proj.weight.data.zero_() + self.proj.bias.data.zero_() + + def forward(self, x, x_mask): + x_org = x + for i in range(self.n_layers): + x = self.conv_layers[i](x * x_mask) + x = self.norm_layers[i](x) + x = self.relu_drop(x) + x = x_org + self.proj(x) + return x * x_mask + + +class DDSConv(nn.Module): + def __init__(self, channels, kernel_size, n_layers, p_dropout=0.0): + super().__init__() + self.channels = channels + self.kernel_size = kernel_size + self.n_layers = n_layers + self.p_dropout = p_dropout + + self.drop = nn.Dropout(p_dropout) + self.convs_sep = nn.ModuleList() + self.convs_1x1 = nn.ModuleList() + self.norms_1 = nn.ModuleList() + self.norms_2 = nn.ModuleList() + for i in range(n_layers): + dilation = kernel_size**i + padding = (kernel_size * dilation - dilation) // 2 + self.convs_sep.append( + nn.Conv1d( + channels, + channels, + kernel_size, + groups=channels, + dilation=dilation, + padding=padding, + ) + ) + self.convs_1x1.append(nn.Conv1d(channels, channels, 1)) + self.norms_1.append(LayerNorm(channels)) + self.norms_2.append(LayerNorm(channels)) + + def forward(self, x, x_mask, g=None): + if g is not None: + x = x + g + for i in range(self.n_layers): + y = self.convs_sep[i](x * x_mask) + y = self.norms_1[i](y) + y = F.gelu(y) + y = self.convs_1x1[i](y) + y = self.norms_2[i](y) + y = F.gelu(y) + y = self.drop(y) + x = x + y + return x * x_mask + + +class WN(torch.nn.Module): + def __init__( + self, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=0, + p_dropout=0, + ): + super(WN, self).__init__() + assert kernel_size % 2 == 1 + self.hidden_channels = hidden_channels + self.kernel_size = (kernel_size,) + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + self.p_dropout = p_dropout + + self.in_layers = torch.nn.ModuleList() + self.res_skip_layers = torch.nn.ModuleList() + self.drop = nn.Dropout(p_dropout) + + if gin_channels != 0: + cond_layer = torch.nn.Conv1d( + gin_channels, 2 * hidden_channels * n_layers, 1 + ) + self.cond_layer = torch.nn.utils.parametrizations.weight_norm( + cond_layer, name="weight" + ) + + for i in range(n_layers): + dilation = dilation_rate**i + padding = int((kernel_size * dilation - dilation) / 2) + in_layer = torch.nn.Conv1d( + hidden_channels, + 2 * hidden_channels, + kernel_size, + dilation=dilation, + padding=padding, + ) + in_layer = torch.nn.utils.parametrizations.weight_norm( + in_layer, name="weight" + ) + self.in_layers.append(in_layer) + if i < n_layers - 1: + res_skip_channels = 2 * hidden_channels + else: + res_skip_channels = hidden_channels + + res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1) + res_skip_layer = torch.nn.utils.parametrizations.weight_norm( + res_skip_layer, name="weight" + ) + self.res_skip_layers.append(res_skip_layer) + + def forward(self, x, x_mask, g=None, **kwargs): + output = torch.zeros_like(x) + n_channels_tensor = torch.IntTensor([self.hidden_channels]) + + if g is not None: + g = self.cond_layer(g) + + for i in range(self.n_layers): + x_in = self.in_layers[i](x) + if g is not None: + cond_offset = i * 2 * self.hidden_channels + g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :] + else: + g_l = torch.zeros_like(x_in) + + acts = commons.fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor) + acts = self.drop(acts) + + res_skip_acts = self.res_skip_layers[i](acts) + if i < self.n_layers - 1: + res_acts = res_skip_acts[:, : self.hidden_channels, :] + x = (x + res_acts) * x_mask + output = output + res_skip_acts[:, self.hidden_channels :, :] + else: + output = output + res_skip_acts + return output * x_mask + + def remove_weight_norm(self): + if self.gin_channels != 0: + torch.nn.utils.remove_weight_norm(self.cond_layer) + for l in self.in_layers: + torch.nn.utils.remove_weight_norm(l) + for l in self.res_skip_layers: + torch.nn.utils.remove_weight_norm(l) + + +class ResBlock1(torch.nn.Module): + def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)): + super(ResBlock1, self).__init__() + self.convs1 = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2]), + ) + ), + ] + ) + self.convs1.apply(init_weights) + + self.convs2 = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + ] + ) + self.convs2.apply(init_weights) + + def forward(self, x, x_mask=None): + for c1, c2 in zip(self.convs1, self.convs2): + xt = F.leaky_relu(x, LRELU_SLOPE) + if x_mask is not None: + xt = xt * x_mask + xt = c1(xt) + xt = F.leaky_relu(xt, LRELU_SLOPE) + if x_mask is not None: + xt = xt * x_mask + xt = c2(xt) + x = xt + x + if x_mask is not None: + x = x * x_mask + return x + + def remove_weight_norm(self): + for l in self.convs1: + remove_weight_norm(l) + for l in self.convs2: + remove_weight_norm(l) + + +class ResBlock2(torch.nn.Module): + def __init__(self, channels, kernel_size=3, dilation=(1, 3)): + super(ResBlock2, self).__init__() + self.convs = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]), + ) + ), + ] + ) + self.convs.apply(init_weights) + + def forward(self, x, x_mask=None): + for c in self.convs: + xt = F.leaky_relu(x, LRELU_SLOPE) + if x_mask is not None: + xt = xt * x_mask + xt = c(xt) + x = xt + x + if x_mask is not None: + x = x * x_mask + return x + + def remove_weight_norm(self): + for l in self.convs: + remove_weight_norm(l) + + +class Log(nn.Module): + def forward(self, x, x_mask, reverse=False, **kwargs): + if not reverse: + y = torch.log(torch.clamp_min(x, 1e-5)) * x_mask + logdet = torch.sum(-y, [1, 2]) + return y, logdet + else: + x = torch.exp(x) * x_mask + return x + + +class Flip(nn.Module): + def forward(self, x, *args, reverse=False, **kwargs): + x = torch.flip(x, [1]) + if not reverse: + logdet = torch.zeros(x.size(0)).to(dtype=x.dtype, device=x.device) + return x, logdet + else: + return x + + +class ElementwiseAffine(nn.Module): + def __init__(self, channels): + super().__init__() + self.channels = channels + self.m = nn.Parameter(torch.zeros(channels, 1)) + self.logs = nn.Parameter(torch.zeros(channels, 1)) + + def forward(self, x, x_mask, reverse=False, **kwargs): + if not reverse: + y = self.m + torch.exp(self.logs) * x + y = y * x_mask + logdet = torch.sum(self.logs * x_mask, [1, 2]) + return y, logdet + else: + x = (x - self.m) * torch.exp(-self.logs) * x_mask + return x + + +class ResidualCouplingLayer(nn.Module): + def __init__( + self, + channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + p_dropout=0, + gin_channels=0, + mean_only=False, + ): + assert channels % 2 == 0, "channels should be divisible by 2" + super().__init__() + self.channels = channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.half_channels = channels // 2 + self.mean_only = mean_only + + self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1) + self.enc = WN( + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + p_dropout=p_dropout, + gin_channels=gin_channels, + ) + self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1) + self.post.weight.data.zero_() + self.post.bias.data.zero_() + + def forward(self, x, x_mask, g=None, reverse=False): + x0, x1 = torch.split(x, [self.half_channels] * 2, 1) + h = self.pre(x0) * x_mask + h = self.enc(h, x_mask, g=g) + stats = self.post(h) * x_mask + if not self.mean_only: + m, logs = torch.split(stats, [self.half_channels] * 2, 1) + else: + m = stats + logs = torch.zeros_like(m) + + if not reverse: + x1 = m + x1 * torch.exp(logs) * x_mask + x = torch.cat([x0, x1], 1) + logdet = torch.sum(logs, [1, 2]) + return x, logdet + else: + x1 = (x1 - m) * torch.exp(-logs) * x_mask + x = torch.cat([x0, x1], 1) + return x + + def remove_weight_norm(self): + self.enc.remove_weight_norm() + + +class ConvFlow(nn.Module): + def __init__( + self, + in_channels, + filter_channels, + kernel_size, + n_layers, + num_bins=10, + tail_bound=5.0, + ): + super().__init__() + self.in_channels = in_channels + self.filter_channels = filter_channels + self.kernel_size = kernel_size + self.n_layers = n_layers + self.num_bins = num_bins + self.tail_bound = tail_bound + self.half_channels = in_channels // 2 + + self.pre = nn.Conv1d(self.half_channels, filter_channels, 1) + self.convs = DDSConv(filter_channels, kernel_size, n_layers, p_dropout=0.0) + self.proj = nn.Conv1d( + filter_channels, self.half_channels * (num_bins * 3 - 1), 1 + ) + self.proj.weight.data.zero_() + self.proj.bias.data.zero_() + + def forward(self, x, x_mask, g=None, reverse=False): + x0, x1 = torch.split(x, [self.half_channels] * 2, 1) + h = self.pre(x0) + h = self.convs(h, x_mask, g=g) + h = self.proj(h) * x_mask + + b, c, t = x0.shape + h = h.reshape(b, c, -1, t).permute(0, 1, 3, 2) + + unnormalized_widths = h[..., : self.num_bins] / math.sqrt(self.filter_channels) + unnormalized_heights = h[..., self.num_bins : 2 * self.num_bins] / math.sqrt( + self.filter_channels + ) + unnormalized_derivatives = h[..., 2 * self.num_bins :] + + x1, logabsdet = piecewise_rational_quadratic_transform( + x1, + unnormalized_widths, + unnormalized_heights, + unnormalized_derivatives, + inverse=reverse, + tails="linear", + tail_bound=self.tail_bound, + ) + + x = torch.cat([x0, x1], 1) * x_mask + logdet = torch.sum(logabsdet * x_mask, [1, 2]) + if not reverse: + return x, logdet + else: + return x diff --git a/rvc/lib/infer_pack/modules/F0Predictor/DioF0Predictor.py b/rvc/lib/infer_pack/modules/F0Predictor/DioF0Predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..6d241b4712b1f0c6e764a4150b425cb2a764d538 --- /dev/null +++ b/rvc/lib/infer_pack/modules/F0Predictor/DioF0Predictor.py @@ -0,0 +1,86 @@ +from infer_pack.modules.F0Predictor.F0Predictor import F0Predictor +import pyworld +import numpy as np + + +class DioF0Predictor(F0Predictor): + def __init__(self, hop_length=512, f0_min=50, f0_max=1100, sampling_rate=44100): + self.hop_length = hop_length + self.f0_min = f0_min + self.f0_max = f0_max + self.sampling_rate = sampling_rate + + def interpolate_f0(self, f0): + data = np.reshape(f0, (f0.size, 1)) + + vuv_vector = np.zeros((data.size, 1), dtype=np.float32) + vuv_vector[data > 0.0] = 1.0 + vuv_vector[data <= 0.0] = 0.0 + + ip_data = data + + frame_number = data.size + last_value = 0.0 + for i in range(frame_number): + if data[i] <= 0.0: + j = i + 1 + for j in range(i + 1, frame_number): + if data[j] > 0.0: + break + if j < frame_number - 1: + if last_value > 0.0: + step = (data[j] - data[i - 1]) / float(j - i) + for k in range(i, j): + ip_data[k] = data[i - 1] + step * (k - i + 1) + else: + for k in range(i, j): + ip_data[k] = data[j] + else: + for k in range(i, frame_number): + ip_data[k] = last_value + else: + ip_data[i] = data[i] # 这里可能存在一个没有必要的拷贝 + last_value = data[i] + + return ip_data[:, 0], vuv_vector[:, 0] + + def resize_f0(self, x, target_len): + source = np.array(x) + source[source < 0.001] = np.nan + target = np.interp( + np.arange(0, len(source) * target_len, len(source)) / target_len, + np.arange(0, len(source)), + source, + ) + res = np.nan_to_num(target) + return res + + def compute_f0(self, wav, p_len=None): + if p_len is None: + p_len = wav.shape[0] // self.hop_length + f0, t = pyworld.dio( + wav.astype(np.double), + fs=self.sampling_rate, + f0_floor=self.f0_min, + f0_ceil=self.f0_max, + frame_period=1000 * self.hop_length / self.sampling_rate, + ) + f0 = pyworld.stonemask(wav.astype(np.double), f0, t, self.sampling_rate) + for index, pitch in enumerate(f0): + f0[index] = round(pitch, 1) + return self.interpolate_f0(self.resize_f0(f0, p_len))[0] + + def compute_f0_uv(self, wav, p_len=None): + if p_len is None: + p_len = wav.shape[0] // self.hop_length + f0, t = pyworld.dio( + wav.astype(np.double), + fs=self.sampling_rate, + f0_floor=self.f0_min, + f0_ceil=self.f0_max, + frame_period=1000 * self.hop_length / self.sampling_rate, + ) + f0 = pyworld.stonemask(wav.astype(np.double), f0, t, self.sampling_rate) + for index, pitch in enumerate(f0): + f0[index] = round(pitch, 1) + return self.interpolate_f0(self.resize_f0(f0, p_len)) diff --git a/rvc/lib/infer_pack/modules/F0Predictor/F0Predictor.py b/rvc/lib/infer_pack/modules/F0Predictor/F0Predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..384f43f8a15977edb66d5cf8b074a8109abf10a1 --- /dev/null +++ b/rvc/lib/infer_pack/modules/F0Predictor/F0Predictor.py @@ -0,0 +1,6 @@ +class F0Predictor(object): + def compute_f0(self, wav, p_len): + pass + + def compute_f0_uv(self, wav, p_len): + pass diff --git a/rvc/lib/infer_pack/modules/F0Predictor/HarvestF0Predictor.py b/rvc/lib/infer_pack/modules/F0Predictor/HarvestF0Predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..e533f284faa7320ddd5d29b4e3cfef952e00f6e8 --- /dev/null +++ b/rvc/lib/infer_pack/modules/F0Predictor/HarvestF0Predictor.py @@ -0,0 +1,82 @@ +from infer_pack.modules.F0Predictor.F0Predictor import F0Predictor +import pyworld +import numpy as np + + +class HarvestF0Predictor(F0Predictor): + def __init__(self, hop_length=512, f0_min=50, f0_max=1100, sampling_rate=44100): + self.hop_length = hop_length + self.f0_min = f0_min + self.f0_max = f0_max + self.sampling_rate = sampling_rate + + def interpolate_f0(self, f0): + data = np.reshape(f0, (f0.size, 1)) + + vuv_vector = np.zeros((data.size, 1), dtype=np.float32) + vuv_vector[data > 0.0] = 1.0 + vuv_vector[data <= 0.0] = 0.0 + + ip_data = data + + frame_number = data.size + last_value = 0.0 + for i in range(frame_number): + if data[i] <= 0.0: + j = i + 1 + for j in range(i + 1, frame_number): + if data[j] > 0.0: + break + if j < frame_number - 1: + if last_value > 0.0: + step = (data[j] - data[i - 1]) / float(j - i) + for k in range(i, j): + ip_data[k] = data[i - 1] + step * (k - i + 1) + else: + for k in range(i, j): + ip_data[k] = data[j] + else: + for k in range(i, frame_number): + ip_data[k] = last_value + else: + ip_data[i] = data[i] + last_value = data[i] + + return ip_data[:, 0], vuv_vector[:, 0] + + def resize_f0(self, x, target_len): + source = np.array(x) + source[source < 0.001] = np.nan + target = np.interp( + np.arange(0, len(source) * target_len, len(source)) / target_len, + np.arange(0, len(source)), + source, + ) + res = np.nan_to_num(target) + return res + + def compute_f0(self, wav, p_len=None): + if p_len is None: + p_len = wav.shape[0] // self.hop_length + f0, t = pyworld.harvest( + wav.astype(np.double), + fs=self.sampling_rate, + f0_ceil=self.f0_max, + f0_floor=self.f0_min, + frame_period=1000 * self.hop_length / self.sampling_rate, + ) + f0 = pyworld.stonemask(wav.astype(np.double), f0, t, self.fs) + return self.interpolate_f0(self.resize_f0(f0, p_len))[0] + + def compute_f0_uv(self, wav, p_len=None): + if p_len is None: + p_len = wav.shape[0] // self.hop_length + f0, t = pyworld.harvest( + wav.astype(np.double), + fs=self.sampling_rate, + f0_floor=self.f0_min, + f0_ceil=self.f0_max, + frame_period=1000 * self.hop_length / self.sampling_rate, + ) + f0 = pyworld.stonemask(wav.astype(np.double), f0, t, self.sampling_rate) + return self.interpolate_f0(self.resize_f0(f0, p_len)) diff --git a/rvc/lib/infer_pack/modules/F0Predictor/PMF0Predictor.py b/rvc/lib/infer_pack/modules/F0Predictor/PMF0Predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..0fe1c744fafb0c55df5d81fad21ad83731cee5e8 --- /dev/null +++ b/rvc/lib/infer_pack/modules/F0Predictor/PMF0Predictor.py @@ -0,0 +1,93 @@ +from infer_pack.modules.F0Predictor.F0Predictor import F0Predictor +import parselmouth +import numpy as np + + +class PMF0Predictor(F0Predictor): + def __init__(self, hop_length=512, f0_min=50, f0_max=1100, sampling_rate=44100): + self.hop_length = hop_length + self.f0_min = f0_min + self.f0_max = f0_max + self.sampling_rate = sampling_rate + + def interpolate_f0(self, f0): + data = np.reshape(f0, (f0.size, 1)) + + vuv_vector = np.zeros((data.size, 1), dtype=np.float32) + vuv_vector[data > 0.0] = 1.0 + vuv_vector[data <= 0.0] = 0.0 + + ip_data = data + + frame_number = data.size + last_value = 0.0 + for i in range(frame_number): + if data[i] <= 0.0: + j = i + 1 + for j in range(i + 1, frame_number): + if data[j] > 0.0: + break + if j < frame_number - 1: + if last_value > 0.0: + step = (data[j] - data[i - 1]) / float(j - i) + for k in range(i, j): + ip_data[k] = data[i - 1] + step * (k - i + 1) + else: + for k in range(i, j): + ip_data[k] = data[j] + else: + for k in range(i, frame_number): + ip_data[k] = last_value + else: + ip_data[i] = data[i] # 这里可能存在一个没有必要的拷贝 + last_value = data[i] + + return ip_data[:, 0], vuv_vector[:, 0] + + def compute_f0(self, wav, p_len=None): + x = wav + if p_len is None: + p_len = x.shape[0] // self.hop_length + else: + assert abs(p_len - x.shape[0] // self.hop_length) < 4, "pad length error" + time_step = self.hop_length / self.sampling_rate * 1000 + f0 = ( + parselmouth.Sound(x, self.sampling_rate) + .to_pitch_ac( + time_step=time_step / 1000, + voicing_threshold=0.6, + pitch_floor=self.f0_min, + pitch_ceiling=self.f0_max, + ) + .selected_array["frequency"] + ) + + pad_size = (p_len - len(f0) + 1) // 2 + if pad_size > 0 or p_len - len(f0) - pad_size > 0: + f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant") + f0, uv = self.interpolate_f0(f0) + return f0 + + def compute_f0_uv(self, wav, p_len=None): + x = wav + if p_len is None: + p_len = x.shape[0] // self.hop_length + else: + assert abs(p_len - x.shape[0] // self.hop_length) < 4, "pad length error" + time_step = self.hop_length / self.sampling_rate * 1000 + f0 = ( + parselmouth.Sound(x, self.sampling_rate) + .to_pitch_ac( + time_step=time_step / 1000, + voicing_threshold=0.6, + pitch_floor=self.f0_min, + pitch_ceiling=self.f0_max, + ) + .selected_array["frequency"] + ) + + pad_size = (p_len - len(f0) + 1) // 2 + if pad_size > 0 or p_len - len(f0) - pad_size > 0: + f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant") + f0, uv = self.interpolate_f0(f0) + return f0, uv diff --git a/rvc/lib/infer_pack/modules/F0Predictor/__init__.py b/rvc/lib/infer_pack/modules/F0Predictor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/rvc/lib/infer_pack/transforms.py b/rvc/lib/infer_pack/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..a11f799e023864ff7082c1f49c0cc18351a13b47 --- /dev/null +++ b/rvc/lib/infer_pack/transforms.py @@ -0,0 +1,209 @@ +import torch +from torch.nn import functional as F + +import numpy as np + + +DEFAULT_MIN_BIN_WIDTH = 1e-3 +DEFAULT_MIN_BIN_HEIGHT = 1e-3 +DEFAULT_MIN_DERIVATIVE = 1e-3 + + +def piecewise_rational_quadratic_transform( + inputs, + unnormalized_widths, + unnormalized_heights, + unnormalized_derivatives, + inverse=False, + tails=None, + tail_bound=1.0, + min_bin_width=DEFAULT_MIN_BIN_WIDTH, + min_bin_height=DEFAULT_MIN_BIN_HEIGHT, + min_derivative=DEFAULT_MIN_DERIVATIVE, +): + if tails is None: + spline_fn = rational_quadratic_spline + spline_kwargs = {} + else: + spline_fn = unconstrained_rational_quadratic_spline + spline_kwargs = {"tails": tails, "tail_bound": tail_bound} + + outputs, logabsdet = spline_fn( + inputs=inputs, + unnormalized_widths=unnormalized_widths, + unnormalized_heights=unnormalized_heights, + unnormalized_derivatives=unnormalized_derivatives, + inverse=inverse, + min_bin_width=min_bin_width, + min_bin_height=min_bin_height, + min_derivative=min_derivative, + **spline_kwargs + ) + return outputs, logabsdet + + +def searchsorted(bin_locations, inputs, eps=1e-6): + bin_locations[..., -1] += eps + return torch.sum(inputs[..., None] >= bin_locations, dim=-1) - 1 + + +def unconstrained_rational_quadratic_spline( + inputs, + unnormalized_widths, + unnormalized_heights, + unnormalized_derivatives, + inverse=False, + tails="linear", + tail_bound=1.0, + min_bin_width=DEFAULT_MIN_BIN_WIDTH, + min_bin_height=DEFAULT_MIN_BIN_HEIGHT, + min_derivative=DEFAULT_MIN_DERIVATIVE, +): + inside_interval_mask = (inputs >= -tail_bound) & (inputs <= tail_bound) + outside_interval_mask = ~inside_interval_mask + + outputs = torch.zeros_like(inputs) + logabsdet = torch.zeros_like(inputs) + + if tails == "linear": + unnormalized_derivatives = F.pad(unnormalized_derivatives, pad=(1, 1)) + constant = np.log(np.exp(1 - min_derivative) - 1) + unnormalized_derivatives[..., 0] = constant + unnormalized_derivatives[..., -1] = constant + + outputs[outside_interval_mask] = inputs[outside_interval_mask] + logabsdet[outside_interval_mask] = 0 + else: + raise RuntimeError("{} tails are not implemented.".format(tails)) + + ( + outputs[inside_interval_mask], + logabsdet[inside_interval_mask], + ) = rational_quadratic_spline( + inputs=inputs[inside_interval_mask], + unnormalized_widths=unnormalized_widths[inside_interval_mask, :], + unnormalized_heights=unnormalized_heights[inside_interval_mask, :], + unnormalized_derivatives=unnormalized_derivatives[inside_interval_mask, :], + inverse=inverse, + left=-tail_bound, + right=tail_bound, + bottom=-tail_bound, + top=tail_bound, + min_bin_width=min_bin_width, + min_bin_height=min_bin_height, + min_derivative=min_derivative, + ) + + return outputs, logabsdet + + +def rational_quadratic_spline( + inputs, + unnormalized_widths, + unnormalized_heights, + unnormalized_derivatives, + inverse=False, + left=0.0, + right=1.0, + bottom=0.0, + top=1.0, + min_bin_width=DEFAULT_MIN_BIN_WIDTH, + min_bin_height=DEFAULT_MIN_BIN_HEIGHT, + min_derivative=DEFAULT_MIN_DERIVATIVE, +): + if torch.min(inputs) < left or torch.max(inputs) > right: + raise ValueError("Input to a transform is not within its domain") + + num_bins = unnormalized_widths.shape[-1] + + if min_bin_width * num_bins > 1.0: + raise ValueError("Minimal bin width too large for the number of bins") + if min_bin_height * num_bins > 1.0: + raise ValueError("Minimal bin height too large for the number of bins") + + widths = F.softmax(unnormalized_widths, dim=-1) + widths = min_bin_width + (1 - min_bin_width * num_bins) * widths + cumwidths = torch.cumsum(widths, dim=-1) + cumwidths = F.pad(cumwidths, pad=(1, 0), mode="constant", value=0.0) + cumwidths = (right - left) * cumwidths + left + cumwidths[..., 0] = left + cumwidths[..., -1] = right + widths = cumwidths[..., 1:] - cumwidths[..., :-1] + + derivatives = min_derivative + F.softplus(unnormalized_derivatives) + + heights = F.softmax(unnormalized_heights, dim=-1) + heights = min_bin_height + (1 - min_bin_height * num_bins) * heights + cumheights = torch.cumsum(heights, dim=-1) + cumheights = F.pad(cumheights, pad=(1, 0), mode="constant", value=0.0) + cumheights = (top - bottom) * cumheights + bottom + cumheights[..., 0] = bottom + cumheights[..., -1] = top + heights = cumheights[..., 1:] - cumheights[..., :-1] + + if inverse: + bin_idx = searchsorted(cumheights, inputs)[..., None] + else: + bin_idx = searchsorted(cumwidths, inputs)[..., None] + + input_cumwidths = cumwidths.gather(-1, bin_idx)[..., 0] + input_bin_widths = widths.gather(-1, bin_idx)[..., 0] + + input_cumheights = cumheights.gather(-1, bin_idx)[..., 0] + delta = heights / widths + input_delta = delta.gather(-1, bin_idx)[..., 0] + + input_derivatives = derivatives.gather(-1, bin_idx)[..., 0] + input_derivatives_plus_one = derivatives[..., 1:].gather(-1, bin_idx)[..., 0] + + input_heights = heights.gather(-1, bin_idx)[..., 0] + + if inverse: + a = (inputs - input_cumheights) * ( + input_derivatives + input_derivatives_plus_one - 2 * input_delta + ) + input_heights * (input_delta - input_derivatives) + b = input_heights * input_derivatives - (inputs - input_cumheights) * ( + input_derivatives + input_derivatives_plus_one - 2 * input_delta + ) + c = -input_delta * (inputs - input_cumheights) + + discriminant = b.pow(2) - 4 * a * c + assert (discriminant >= 0).all() + + root = (2 * c) / (-b - torch.sqrt(discriminant)) + outputs = root * input_bin_widths + input_cumwidths + + theta_one_minus_theta = root * (1 - root) + denominator = input_delta + ( + (input_derivatives + input_derivatives_plus_one - 2 * input_delta) + * theta_one_minus_theta + ) + derivative_numerator = input_delta.pow(2) * ( + input_derivatives_plus_one * root.pow(2) + + 2 * input_delta * theta_one_minus_theta + + input_derivatives * (1 - root).pow(2) + ) + logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator) + + return outputs, -logabsdet + else: + theta = (inputs - input_cumwidths) / input_bin_widths + theta_one_minus_theta = theta * (1 - theta) + + numerator = input_heights * ( + input_delta * theta.pow(2) + input_derivatives * theta_one_minus_theta + ) + denominator = input_delta + ( + (input_derivatives + input_derivatives_plus_one - 2 * input_delta) + * theta_one_minus_theta + ) + outputs = input_cumheights + numerator / denominator + + derivative_numerator = input_delta.pow(2) * ( + input_derivatives_plus_one * theta.pow(2) + + 2 * input_delta * theta_one_minus_theta + + input_derivatives * (1 - theta).pow(2) + ) + logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator) + + return outputs, logabsdet diff --git a/rvc/lib/process/model_fusion.py b/rvc/lib/process/model_fusion.py new file mode 100644 index 0000000000000000000000000000000000000000..4cb3e0646a976f22f474d4ffec10c89db6a2130b --- /dev/null +++ b/rvc/lib/process/model_fusion.py @@ -0,0 +1,33 @@ +import torch +from collections import OrderedDict + + +def extract(ckpt): + model = ckpt["model"] + opt = OrderedDict() + opt["weight"] = {key: value for key, value in model.items() if "enc_q" not in key} + return opt + + +def model_fusion(model_name, pth_path_1, pth_path_2): + ckpt1 = torch.load(pth_path_1, map_location="cpu") + ckpt2 = torch.load(pth_path_2, map_location="cpu") + if "model" in ckpt1: + ckpt1 = extract(ckpt1) + else: + ckpt1 = ckpt1["weight"] + if "model" in ckpt2: + ckpt2 = extract(ckpt2) + else: + ckpt2 = ckpt2["weight"] + if sorted(ckpt1.keys()) != sorted(ckpt2.keys()): + return "Fail to merge the models. The model architectures are not the same." + opt = OrderedDict( + weight={ + key: 1 * value.float() + (1 - 1) * ckpt2[key].float() + for key, value in ckpt1.items() + } + ) + opt["info"] = f"Model fusion of {pth_path_1} and {pth_path_2}" + torch.save(opt, f"logs/{model_name}.pth") + print(f"Model fusion of {pth_path_1} and {pth_path_2} is done.") diff --git a/rvc/lib/process/model_information.py b/rvc/lib/process/model_information.py new file mode 100644 index 0000000000000000000000000000000000000000..25bc6704594f338df54c45164c68dd9a18a54f3d --- /dev/null +++ b/rvc/lib/process/model_information.py @@ -0,0 +1,15 @@ +import torch + +def model_information(path): + model_data = torch.load(path, map_location="cpu") + + print(f"Loaded model from {path}") + + data = model_data + + epochs = data.get("info", "None") + sr = data.get("sr", "None") + f0 = data.get("f0", "None") + version = data.get("version", "None") + + return(f"Epochs: {epochs}\nSampling rate: {sr}\nPitch guidance: {f0}\nVersion: {version}") \ No newline at end of file diff --git a/rvc/lib/rmvpe.py b/rvc/lib/rmvpe.py new file mode 100644 index 0000000000000000000000000000000000000000..2f50d085d9f5d848bcb6358392f6814ce9f0e1b9 --- /dev/null +++ b/rvc/lib/rmvpe.py @@ -0,0 +1,388 @@ +import torch.nn as nn +import torch, numpy as np +import torch.nn.functional as F +from librosa.filters import mel + + +class BiGRU(nn.Module): + def __init__(self, input_features, hidden_features, num_layers): + super(BiGRU, self).__init__() + self.gru = nn.GRU( + input_features, + hidden_features, + num_layers=num_layers, + batch_first=True, + bidirectional=True, + ) + + def forward(self, x): + return self.gru(x)[0] + + +class ConvBlockRes(nn.Module): + def __init__(self, in_channels, out_channels, momentum=0.01): + super(ConvBlockRes, self).__init__() + self.conv = nn.Sequential( + nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=(3, 3), + stride=(1, 1), + padding=(1, 1), + bias=False, + ), + nn.BatchNorm2d(out_channels, momentum=momentum), + nn.ReLU(), + nn.Conv2d( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=(3, 3), + stride=(1, 1), + padding=(1, 1), + bias=False, + ), + nn.BatchNorm2d(out_channels, momentum=momentum), + nn.ReLU(), + ) + if in_channels != out_channels: + self.shortcut = nn.Conv2d(in_channels, out_channels, (1, 1)) + self.is_shortcut = True + else: + self.is_shortcut = False + + def forward(self, x): + if self.is_shortcut: + return self.conv(x) + self.shortcut(x) + else: + return self.conv(x) + x + + +class Encoder(nn.Module): + def __init__( + self, + in_channels, + in_size, + n_encoders, + kernel_size, + n_blocks, + out_channels=16, + momentum=0.01, + ): + super(Encoder, self).__init__() + self.n_encoders = n_encoders + self.bn = nn.BatchNorm2d(in_channels, momentum=momentum) + self.layers = nn.ModuleList() + self.latent_channels = [] + for i in range(self.n_encoders): + self.layers.append( + ResEncoderBlock( + in_channels, out_channels, kernel_size, n_blocks, momentum=momentum + ) + ) + self.latent_channels.append([out_channels, in_size]) + in_channels = out_channels + out_channels *= 2 + in_size //= 2 + self.out_size = in_size + self.out_channel = out_channels + + def forward(self, x): + concat_tensors = [] + x = self.bn(x) + for i in range(self.n_encoders): + _, x = self.layers[i](x) + concat_tensors.append(_) + return x, concat_tensors + + +class ResEncoderBlock(nn.Module): + def __init__( + self, in_channels, out_channels, kernel_size, n_blocks=1, momentum=0.01 + ): + super(ResEncoderBlock, self).__init__() + self.n_blocks = n_blocks + self.conv = nn.ModuleList() + self.conv.append(ConvBlockRes(in_channels, out_channels, momentum)) + for i in range(n_blocks - 1): + self.conv.append(ConvBlockRes(out_channels, out_channels, momentum)) + self.kernel_size = kernel_size + if self.kernel_size is not None: + self.pool = nn.AvgPool2d(kernel_size=kernel_size) + + def forward(self, x): + for i in range(self.n_blocks): + x = self.conv[i](x) + if self.kernel_size is not None: + return x, self.pool(x) + else: + return x + + +class Intermediate(nn.Module): # + def __init__(self, in_channels, out_channels, n_inters, n_blocks, momentum=0.01): + super(Intermediate, self).__init__() + self.n_inters = n_inters + self.layers = nn.ModuleList() + self.layers.append( + ResEncoderBlock(in_channels, out_channels, None, n_blocks, momentum) + ) + for i in range(self.n_inters - 1): + self.layers.append( + ResEncoderBlock(out_channels, out_channels, None, n_blocks, momentum) + ) + + def forward(self, x): + for i in range(self.n_inters): + x = self.layers[i](x) + return x + + +class ResDecoderBlock(nn.Module): + def __init__(self, in_channels, out_channels, stride, n_blocks=1, momentum=0.01): + super(ResDecoderBlock, self).__init__() + out_padding = (0, 1) if stride == (1, 2) else (1, 1) + self.n_blocks = n_blocks + self.conv1 = nn.Sequential( + nn.ConvTranspose2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=(3, 3), + stride=stride, + padding=(1, 1), + output_padding=out_padding, + bias=False, + ), + nn.BatchNorm2d(out_channels, momentum=momentum), + nn.ReLU(), + ) + self.conv2 = nn.ModuleList() + self.conv2.append(ConvBlockRes(out_channels * 2, out_channels, momentum)) + for i in range(n_blocks - 1): + self.conv2.append(ConvBlockRes(out_channels, out_channels, momentum)) + + def forward(self, x, concat_tensor): + x = self.conv1(x) + x = torch.cat((x, concat_tensor), dim=1) + for i in range(self.n_blocks): + x = self.conv2[i](x) + return x + + +class Decoder(nn.Module): + def __init__(self, in_channels, n_decoders, stride, n_blocks, momentum=0.01): + super(Decoder, self).__init__() + self.layers = nn.ModuleList() + self.n_decoders = n_decoders + for i in range(self.n_decoders): + out_channels = in_channels // 2 + self.layers.append( + ResDecoderBlock(in_channels, out_channels, stride, n_blocks, momentum) + ) + in_channels = out_channels + + def forward(self, x, concat_tensors): + for i in range(self.n_decoders): + x = self.layers[i](x, concat_tensors[-1 - i]) + return x + + +class DeepUnet(nn.Module): + def __init__( + self, + kernel_size, + n_blocks, + en_de_layers=5, + inter_layers=4, + in_channels=1, + en_out_channels=16, + ): + super(DeepUnet, self).__init__() + self.encoder = Encoder( + in_channels, 128, en_de_layers, kernel_size, n_blocks, en_out_channels + ) + self.intermediate = Intermediate( + self.encoder.out_channel // 2, + self.encoder.out_channel, + inter_layers, + n_blocks, + ) + self.decoder = Decoder( + self.encoder.out_channel, en_de_layers, kernel_size, n_blocks + ) + + def forward(self, x): + x, concat_tensors = self.encoder(x) + x = self.intermediate(x) + x = self.decoder(x, concat_tensors) + return x + + +class E2E(nn.Module): + def __init__( + self, + n_blocks, + n_gru, + kernel_size, + en_de_layers=5, + inter_layers=4, + in_channels=1, + en_out_channels=16, + ): + super(E2E, self).__init__() + self.unet = DeepUnet( + kernel_size, + n_blocks, + en_de_layers, + inter_layers, + in_channels, + en_out_channels, + ) + self.cnn = nn.Conv2d(en_out_channels, 3, (3, 3), padding=(1, 1)) + if n_gru: + self.fc = nn.Sequential( + BiGRU(3 * 128, 256, n_gru), + nn.Linear(512, 360), + nn.Dropout(0.25), + nn.Sigmoid(), + ) + + def forward(self, mel): + mel = mel.transpose(-1, -2).unsqueeze(1) + x = self.cnn(self.unet(mel)).transpose(1, 2).flatten(-2) + x = self.fc(x) + return x + + +class MelSpectrogram(torch.nn.Module): + def __init__( + self, + is_half, + n_mel_channels, + sampling_rate, + win_length, + hop_length, + n_fft=None, + mel_fmin=0, + mel_fmax=None, + clamp=1e-5, + ): + super().__init__() + n_fft = win_length if n_fft is None else n_fft + self.hann_window = {} + mel_basis = mel( + sr=sampling_rate, + n_fft=n_fft, + n_mels=n_mel_channels, + fmin=mel_fmin, + fmax=mel_fmax, + htk=True, + ) + mel_basis = torch.from_numpy(mel_basis).float() + self.register_buffer("mel_basis", mel_basis) + self.n_fft = win_length if n_fft is None else n_fft + self.hop_length = hop_length + self.win_length = win_length + self.sampling_rate = sampling_rate + self.n_mel_channels = n_mel_channels + self.clamp = clamp + self.is_half = is_half + + def forward(self, audio, keyshift=0, speed=1, center=True): + factor = 2 ** (keyshift / 12) + n_fft_new = int(np.round(self.n_fft * factor)) + win_length_new = int(np.round(self.win_length * factor)) + hop_length_new = int(np.round(self.hop_length * speed)) + keyshift_key = str(keyshift) + "_" + str(audio.device) + if keyshift_key not in self.hann_window: + self.hann_window[keyshift_key] = torch.hann_window(win_length_new).to( + audio.device + ) + fft = torch.stft( + audio, + n_fft=n_fft_new, + hop_length=hop_length_new, + win_length=win_length_new, + window=self.hann_window[keyshift_key], + center=center, + return_complex=True, + ) + magnitude = torch.sqrt(fft.real.pow(2) + fft.imag.pow(2)) + if keyshift != 0: + size = self.n_fft // 2 + 1 + resize = magnitude.size(1) + if resize < size: + magnitude = F.pad(magnitude, (0, 0, 0, size - resize)) + magnitude = magnitude[:, :size, :] * self.win_length / win_length_new + mel_output = torch.matmul(self.mel_basis, magnitude) + if self.is_half == True: + mel_output = mel_output.half() + log_mel_spec = torch.log(torch.clamp(mel_output, min=self.clamp)) + return log_mel_spec + + +class RMVPE: + def __init__(self, model_path, is_half, device=None): + self.resample_kernel = {} + model = E2E(4, 1, (2, 2)) + ckpt = torch.load(model_path, map_location="cpu") + model.load_state_dict(ckpt) + model.eval() + if is_half == True: + model = model.half() + self.model = model + self.resample_kernel = {} + self.is_half = is_half + if device is None: + device = "cuda" if torch.cuda.is_available() else "cpu" + self.device = device + self.mel_extractor = MelSpectrogram( + is_half, 128, 16000, 1024, 160, None, 30, 8000 + ).to(device) + self.model = self.model.to(device) + cents_mapping = 20 * np.arange(360) + 1997.3794084376191 + self.cents_mapping = np.pad(cents_mapping, (4, 4)) # 368 + + def mel2hidden(self, mel): + with torch.no_grad(): + n_frames = mel.shape[-1] + mel = F.pad( + mel, (0, 32 * ((n_frames - 1) // 32 + 1) - n_frames), mode="reflect" + ) + hidden = self.model(mel) + return hidden[:, :n_frames] + + def decode(self, hidden, thred=0.03): + cents_pred = self.to_local_average_cents(hidden, thred=thred) + f0 = 10 * (2 ** (cents_pred / 1200)) + f0[f0 == 10] = 0 + return f0 + + def infer_from_audio(self, audio, thred=0.03): + audio = torch.from_numpy(audio).float().to(self.device).unsqueeze(0) + mel = self.mel_extractor(audio, center=True) + hidden = self.mel2hidden(mel) + hidden = hidden.squeeze(0).cpu().numpy() + if self.is_half == True: + hidden = hidden.astype("float32") + f0 = self.decode(hidden, thred=thred) + return f0 + + def to_local_average_cents(self, salience, thred=0.05): + center = np.argmax(salience, axis=1) + salience = np.pad(salience, ((0, 0), (4, 4))) + center += 4 + todo_salience = [] + todo_cents_mapping = [] + starts = center - 4 + ends = center + 5 + for idx in range(salience.shape[0]): + todo_salience.append(salience[:, starts[idx] : ends[idx]][idx]) + todo_cents_mapping.append(self.cents_mapping[starts[idx] : ends[idx]]) + todo_salience = np.array(todo_salience) + todo_cents_mapping = np.array(todo_cents_mapping) + product_sum = np.sum(todo_salience * todo_cents_mapping, 1) + weight_sum = np.sum(todo_salience, 1) + devided = product_sum / weight_sum + maxx = np.max(salience, axis=1) + devided[maxx <= thred] = 0 + return devided diff --git a/rvc/lib/tools/gdown.py b/rvc/lib/tools/gdown.py new file mode 100644 index 0000000000000000000000000000000000000000..5f13a56ce49c30940d2827ae05f570b9f59f0d58 --- /dev/null +++ b/rvc/lib/tools/gdown.py @@ -0,0 +1,402 @@ +from __future__ import print_function + +import json +import os +import os.path as osp +import re +import warnings +from six.moves import urllib_parse +import shutil +import sys +import tempfile +import textwrap +import time + +import requests +import six +import tqdm + +def indent(text, prefix): + def prefixed_lines(): + for line in text.splitlines(True): + yield (prefix + line if line.strip() else line) + + return "".join(prefixed_lines()) + +class FileURLRetrievalError(Exception): + pass + + +class FolderContentsMaximumLimitError(Exception): + pass + +def parse_url(url, warning=True): + """Parse URLs especially for Google Drive links. + + file_id: ID of file on Google Drive. + is_download_link: Flag if it is download link of Google Drive. + """ + parsed = urllib_parse.urlparse(url) + query = urllib_parse.parse_qs(parsed.query) + is_gdrive = parsed.hostname in ["drive.google.com", "docs.google.com"] + is_download_link = parsed.path.endswith("/uc") + + if not is_gdrive: + return is_gdrive, is_download_link + + file_id = None + if "id" in query: + file_ids = query["id"] + if len(file_ids) == 1: + file_id = file_ids[0] + else: + patterns = [ + r"^/file/d/(.*?)/(edit|view)$", + r"^/file/u/[0-9]+/d/(.*?)/(edit|view)$", + r"^/document/d/(.*?)/(edit|htmlview|view)$", + r"^/document/u/[0-9]+/d/(.*?)/(edit|htmlview|view)$", + r"^/presentation/d/(.*?)/(edit|htmlview|view)$", + r"^/presentation/u/[0-9]+/d/(.*?)/(edit|htmlview|view)$", + r"^/spreadsheets/d/(.*?)/(edit|htmlview|view)$", + r"^/spreadsheets/u/[0-9]+/d/(.*?)/(edit|htmlview|view)$", + ] + for pattern in patterns: + match = re.match(pattern, parsed.path) + if match: + file_id = match.groups()[0] + break + + if warning and not is_download_link: + warnings.warn( + "You specified a Google Drive link that is not the correct link " + "to download a file. You might want to try `--fuzzy` option " + "or the following url: {url}".format( + url="https://drive.google.com/uc?id={}".format(file_id) + ) + ) + + return file_id, is_download_link + + +CHUNK_SIZE = 512 * 1024 # 512KB +home = osp.expanduser("~") + + +def get_url_from_gdrive_confirmation(contents): + url = "" + m = re.search(r'href="(\/uc\?export=download[^"]+)', contents) + if m: + url = "https://docs.google.com" + m.groups()[0] + url = url.replace("&", "&") + return url + + m = re.search(r'href="/open\?id=([^"]+)"', contents) + if m: + url = m.groups()[0] + uuid = re.search(r'(.*)

', contents) + if m: + error = m.groups()[0] + raise FileURLRetrievalError(error) + + raise FileURLRetrievalError( + "Cannot retrieve the public link of the file. " + "You may need to change the permission to " + "'Anyone with the link', or have had many accesses." + ) +def _get_session(proxy, use_cookies, return_cookies_file=False): + sess = requests.session() + + sess.headers.update( + {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)"} + ) + + if proxy is not None: + sess.proxies = {"http": proxy, "https": proxy} + print("Using proxy:", proxy, file=sys.stderr) + + # Load cookies if exists + cookies_file = osp.join(home, ".cache/gdown/cookies.json") + if osp.exists(cookies_file) and use_cookies: + with open(cookies_file) as f: + cookies = json.load(f) + for k, v in cookies: + sess.cookies[k] = v + + if return_cookies_file: + return sess, cookies_file + else: + return sess + + +def download( + url=None, + output=None, + quiet=False, + proxy=None, + speed=None, + use_cookies=True, + verify=True, + id=None, + fuzzy=True, + resume=False, + format=None, +): + """Download file from URL. + + Parameters + ---------- + url: str + URL. Google Drive URL is also supported. + output: str + Output filename. Default is basename of URL. + quiet: bool + Suppress terminal output. Default is False. + proxy: str + Proxy. + speed: float + Download byte size per second (e.g., 256KB/s = 256 * 1024). + use_cookies: bool + Flag to use cookies. Default is True. + verify: bool or string + Either a bool, in which case it controls whether the server's TLS + certificate is verified, or a string, in which case it must be a path + to a CA bundle to use. Default is True. + id: str + Google Drive's file ID. + fuzzy: bool + Fuzzy extraction of Google Drive's file Id. Default is False. + resume: bool + Resume the download from existing tmp file if possible. + Default is False. + format: str, optional + Format of Google Docs, Spreadsheets and Slides. Default is: + - Google Docs: 'docx' + - Google Spreadsheet: 'xlsx' + - Google Slides: 'pptx' + + Returns + ------- + output: str + Output filename. + """ + if not (id is None) ^ (url is None): + raise ValueError("Either url or id has to be specified") + if id is not None: + url = "https://drive.google.com/uc?id={id}".format(id=id) + + url_origin = url + + sess, cookies_file = _get_session( + proxy=proxy, use_cookies=use_cookies, return_cookies_file=True + ) + + gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy) + + if fuzzy and gdrive_file_id: + # overwrite the url with fuzzy match of a file id + url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id) + url_origin = url + is_gdrive_download_link = True + + + + while True: + res = sess.get(url, stream=True, verify=verify) + + if url == url_origin and res.status_code == 500: + # The file could be Google Docs or Spreadsheets. + url = "https://drive.google.com/open?id={id}".format( + id=gdrive_file_id + ) + continue + + if res.headers["Content-Type"].startswith("text/html"): + m = re.search("(.+)", res.text) + if m and m.groups()[0].endswith(" - Google Docs"): + url = ( + "https://docs.google.com/document/d/{id}/export" + "?format={format}".format( + id=gdrive_file_id, + format="docx" if format is None else format, + ) + ) + continue + elif m and m.groups()[0].endswith(" - Google Sheets"): + url = ( + "https://docs.google.com/spreadsheets/d/{id}/export" + "?format={format}".format( + id=gdrive_file_id, + format="xlsx" if format is None else format, + ) + ) + continue + elif m and m.groups()[0].endswith(" - Google Slides"): + url = ( + "https://docs.google.com/presentation/d/{id}/export" + "?format={format}".format( + id=gdrive_file_id, + format="pptx" if format is None else format, + ) + ) + continue + elif ( + "Content-Disposition" in res.headers + and res.headers["Content-Disposition"].endswith("pptx") + and format not in {None, "pptx"} + ): + url = ( + "https://docs.google.com/presentation/d/{id}/export" + "?format={format}".format( + id=gdrive_file_id, + format="pptx" if format is None else format, + ) + ) + continue + + if use_cookies: + if not osp.exists(osp.dirname(cookies_file)): + os.makedirs(osp.dirname(cookies_file)) + # Save cookies + with open(cookies_file, "w") as f: + cookies = [ + (k, v) + for k, v in sess.cookies.items() + if not k.startswith("download_warning_") + ] + json.dump(cookies, f, indent=2) + + if "Content-Disposition" in res.headers: + # This is the file + break + if not (gdrive_file_id and is_gdrive_download_link): + break + + # Need to redirect with confirmation + try: + url = get_url_from_gdrive_confirmation(res.text) + except FileURLRetrievalError as e: + message = ( + "Failed to retrieve file url:\n\n{}\n\n" + "You may still be able to access the file from the browser:" + "\n\n\t{}\n\n" + "but Gdown can't. Please check connections and permissions." + ).format( + indent("\n".join(textwrap.wrap(str(e))), prefix="\t"), + url_origin, + ) + raise FileURLRetrievalError(message) + + if gdrive_file_id and is_gdrive_download_link: + content_disposition = six.moves.urllib_parse.unquote( + res.headers["Content-Disposition"] + ) + + m = re.search(r"filename\*=UTF-8''(.*)", content_disposition) + if not m: + m = re.search(r'filename=["\']?(.*?)["\']?$', content_disposition) + filename_from_url = m.groups()[0] + filename_from_url = filename_from_url.replace(osp.sep, "_") + else: + filename_from_url = osp.basename(url) + + if output is None: + output = filename_from_url + + output_is_path = isinstance(output, six.string_types) + if output_is_path and output.endswith(osp.sep): + if not osp.exists(output): + os.makedirs(output) + output = osp.join(output, filename_from_url) + + if output_is_path: + existing_tmp_files = [] + for file in os.listdir(osp.dirname(output) or "."): + if file.startswith(osp.basename(output)): + existing_tmp_files.append(osp.join(osp.dirname(output), file)) + if resume and existing_tmp_files: + if len(existing_tmp_files) != 1: + print( + "There are multiple temporary files to resume:", + file=sys.stderr, + ) + print("\n") + for file in existing_tmp_files: + print("\t", file, file=sys.stderr) + print("\n") + print( + "Please remove them except one to resume downloading.", + file=sys.stderr, + ) + return + tmp_file = existing_tmp_files[0] + else: + resume = False + # mkstemp is preferred, but does not work on Windows + # https://github.com/wkentaro/gdown/issues/153 + tmp_file = tempfile.mktemp( + suffix=tempfile.template, + prefix=osp.basename(output), + dir=osp.dirname(output), + ) + f = open(tmp_file, "ab") + else: + tmp_file = None + f = output + + if tmp_file is not None and f.tell() != 0: + headers = {"Range": "bytes={}-".format(f.tell())} + res = sess.get(url, headers=headers, stream=True, verify=verify) + + if not quiet: + # print("Downloading...", file=sys.stderr) + if resume: + print("Resume:", tmp_file, file=sys.stderr) + # if url_origin != url: + # print("From (original):", url_origin, file=sys.stderr) + # print("From (redirected):", url, file=sys.stderr) + # else: + # print("From:", url, file=sys.stderr) + print( + "To:", + osp.abspath(output) if output_is_path else output, + file=sys.stderr, + ) + + try: + total = res.headers.get("Content-Length") + if total is not None: + total = int(total) + if not quiet: + pbar = tqdm.tqdm(total=total, unit="B", unit_scale=True) + t_start = time.time() + for chunk in res.iter_content(chunk_size=CHUNK_SIZE): + f.write(chunk) + if not quiet: + pbar.update(len(chunk)) + if speed is not None: + elapsed_time_expected = 1.0 * pbar.n / speed + elapsed_time = time.time() - t_start + if elapsed_time < elapsed_time_expected: + time.sleep(elapsed_time_expected - elapsed_time) + if not quiet: + pbar.close() + if tmp_file: + f.close() + shutil.move(tmp_file, output) + finally: + sess.close() + + return output diff --git a/rvc/lib/tools/launch_tensorboard.py b/rvc/lib/tools/launch_tensorboard.py new file mode 100644 index 0000000000000000000000000000000000000000..3c628d3895cbbbfdec024ec5ad493a711f55acd4 --- /dev/null +++ b/rvc/lib/tools/launch_tensorboard.py @@ -0,0 +1,15 @@ +import time +from tensorboard import program + +log_path = "logs" + +if __name__ == "__main__": + tb = program.TensorBoard() + tb.configure(argv=[None, "--logdir", log_path]) + url = tb.launch() + print( + f"Access the tensorboard using the following link:\n{url}?pinnedCards=%5B%7B%22plugin%22%3A%22scalars%22%2C%22tag%22%3A%22loss%2Fg%2Ftotal%22%7D%2C%7B%22plugin%22%3A%22scalars%22%2C%22tag%22%3A%22loss%2Fd%2Ftotal%22%7D%2C%7B%22plugin%22%3A%22scalars%22%2C%22tag%22%3A%22loss%2Fg%2Fkl%22%7D%2C%7B%22plugin%22%3A%22scalars%22%2C%22tag%22%3A%22loss%2Fg%2Fmel%22%7D%5D" + ) + + while True: + time.sleep(600) diff --git a/rvc/lib/tools/model_download.py b/rvc/lib/tools/model_download.py new file mode 100644 index 0000000000000000000000000000000000000000..4356dcc904fa64ea8f8136e1724398bd88154bc0 --- /dev/null +++ b/rvc/lib/tools/model_download.py @@ -0,0 +1,225 @@ +import os +import sys +import wget +import zipfile +from bs4 import BeautifulSoup +import requests +from urllib.parse import unquote +import re + +def find_folder_parent(search_dir, folder_name): + for dirpath, dirnames, _ in os.walk(search_dir): + if folder_name in dirnames: + return os.path.abspath(dirpath) + return None + +now_dir = os.getcwd() +sys.path.append(now_dir) + +import rvc.lib.tools.gdown as gdown + +file_path = find_folder_parent(now_dir, "logs") + +zips_path = os.getcwd() + "/logs/zips" + + +def search_pth_index(folder): + pth_paths = [ + os.path.join(folder, file) + for file in os.listdir(folder) + if os.path.isfile(os.path.join(folder, file)) and file.endswith(".pth") + ] + index_paths = [ + os.path.join(folder, file) + for file in os.listdir(folder) + if os.path.isfile(os.path.join(folder, file)) and file.endswith(".index") + ] + + return pth_paths, index_paths + + +def get_mediafire_download_link(url): + response = requests.get(url) + response.raise_for_status() + soup = BeautifulSoup(response.text, "html.parser") + download_button = soup.find( + "a", {"class": "input popsok", "aria-label": "Download file"} + ) + if download_button: + download_link = download_button.get("href") + return download_link + else: + return None + + +def download_from_url(url): + os.makedirs(zips_path, exist_ok=True) + if url != "": + if "drive.google.com" in url: + if "file/d/" in url: + file_id = url.split("file/d/")[1].split("/")[0] + elif "id=" in url: + file_id = url.split("id=")[1].split("&")[0] + else: + return None + + if file_id: + os.chdir(zips_path) + try: + gdown.download( + f"https://drive.google.com/uc?id={file_id}", + quiet=False, + fuzzy=True, + ) + except Exception as error: + error_message = str(error) + if ( + "Too many users have viewed or downloaded this file recently" + in error_message + ): + os.chdir(now_dir) + return "too much use" + elif ( + "Cannot retrieve the public link of the file." in error_message + ): + os.chdir(now_dir) + return "private link" + else: + print(error_message) + os.chdir(now_dir) + return None + + elif "/blob/" in url or "/resolve/" in url: + os.chdir(zips_path) + if "/blob/" in url: + url = url.replace("/blob/", "/resolve/") + + response = requests.get(url, stream=True) + if response.status_code == 200: + file_name = url.split("/")[-1] + file_name = unquote(file_name) + + file_name = re.sub(r"[^a-zA-Z0-9_.-]", "_", file_name) + + total_size_in_bytes = int(response.headers.get("content-length", 0)) + block_size = 1024 + progress_bar_length = 50 + progress = 0 + + with open(os.path.join(zips_path, file_name), "wb") as file: + for data in response.iter_content(block_size): + file.write(data) + progress += len(data) + progress_percent = int((progress / total_size_in_bytes) * 100) + num_dots = int( + (progress / total_size_in_bytes) * progress_bar_length + ) + progress_bar = ( + "[" + + "." * num_dots + + " " * (progress_bar_length - num_dots) + + "]" + ) + print( + f"{progress_percent}% {progress_bar} {progress}/{total_size_in_bytes} ", + end="\r", + ) + if progress_percent == 100: + print("\n") + + else: + os.chdir(now_dir) + return None + elif "/tree/main" in url: + os.chdir(zips_path) + response = requests.get(url) + soup = BeautifulSoup(response.content, "html.parser") + temp_url = "" + for link in soup.find_all("a", href=True): + if link["href"].endswith(".zip"): + temp_url = link["href"] + break + if temp_url: + url = temp_url + url = url.replace("blob", "resolve") + if "huggingface.co" not in url: + url = "https://huggingface.co." + url + + wget.download(url) + else: + os.chdir(now_dir) + return None + else: + try: + os.chdir(zips_path) + wget.download(url) + except Exception as error: + os.chdir(now_dir) + print(error) + return None + + for currentPath, _, zipFiles in os.walk(zips_path): + for Files in zipFiles: + filePart = Files.split(".") + extensionFile = filePart[len(filePart) - 1] + filePart.pop() + nameFile = "_".join(filePart) + realPath = os.path.join(currentPath, Files) + os.rename(realPath, nameFile + "." + extensionFile) + + os.chdir(now_dir) + return "downloaded" + + os.chdir(now_dir) + return None + + +def extract_and_show_progress(zipfile_path, unzips_path): + try: + with zipfile.ZipFile(zipfile_path, "r") as zip_ref: + for file_info in zip_ref.infolist(): + zip_ref.extract(file_info, unzips_path) + os.remove(zipfile_path) + return True + except Exception as error: + print(error) + return False + + +def unzip_file(zip_path, zip_file_name): + zip_file_path = os.path.join(zip_path, zip_file_name + ".zip") + extract_path = os.path.join(file_path, zip_file_name) + with zipfile.ZipFile(zip_file_path, "r") as zip_ref: + zip_ref.extractall(extract_path) + os.remove(zip_file_path) + + +url = sys.argv[1] +verify = download_from_url(url) + +if verify == "downloaded": + extract_folder_path = "" + for filename in os.listdir(zips_path): + if filename.endswith(".zip"): + zipfile_path = os.path.join(zips_path, filename) + print("Proceeding with the extraction...") + + model_name = os.path.basename(zipfile_path) + extract_folder_path = os.path.join( + "logs", + os.path.normpath(str(model_name).replace(".zip", "")), + ) + + success = extract_and_show_progress(zipfile_path, extract_folder_path) + if success: + print(f"Model {model_name} downloaded!") + else: + print(f"Error downloading {model_name}") + sys.exit() + if extract_folder_path == "": + print("No zip founded...") + sys.exit() + result = search_pth_index(extract_folder_path) +else: + message = "Error" + sys.exit() diff --git a/rvc/lib/tools/prerequisites_download.py b/rvc/lib/tools/prerequisites_download.py new file mode 100644 index 0000000000000000000000000000000000000000..7e46d24251bb0c38c4294f04f60af51fa5470cfb --- /dev/null +++ b/rvc/lib/tools/prerequisites_download.py @@ -0,0 +1,84 @@ +import os +import wget +import sys + +url_base = "https://huggingface.co./lj1995/VoiceConversionWebUI/resolve/main" +models_download = [ + ( + "pretrained/", + [ + "D32k.pth", + "D40k.pth", + "D48k.pth", + "G32k.pth", + "G40k.pth", + "G48k.pth", + "f0D32k.pth", + "f0D40k.pth", + "f0D48k.pth", + "f0G32k.pth", + "f0G40k.pth", + "f0G48k.pth", + ], + ), + ( + "pretrained_v2/", + [ + "D32k.pth", + "D40k.pth", + "D48k.pth", + "G32k.pth", + "G40k.pth", + "G48k.pth", + "f0D32k.pth", + "f0D40k.pth", + "f0D48k.pth", + "f0G32k.pth", + "f0G40k.pth", + "f0G48k.pth", + ], + ), +] + +models_file = [ + "hubert_base.pt", + "rmvpe.pt", + # "rmvpe.onnx", +] + +executables_file = [ + "ffmpeg.exe", + "ffprobe.exe", +] + +folder_mapping = { + "pretrained/": "rvc/pretraineds/pretrained_v1/", + "pretrained_v2/": "rvc/pretraineds/pretrained_v2/", +} + +for file_name in models_file: + destination_path = os.path.join(file_name) + url = f"{url_base}/{file_name}" + if not os.path.exists(destination_path): + os.makedirs(os.path.dirname(destination_path) or ".", exist_ok=True) + print(f"\nDownloading {url} to {destination_path}...") + wget.download(url, out=destination_path) + +for file_name in executables_file: + if sys.platform == "win32": + destination_path = os.path.join(file_name) + url = f"{url_base}/{file_name}" + if not os.path.exists(destination_path): + os.makedirs(os.path.dirname(destination_path) or ".", exist_ok=True) + print(f"\nDownloading {url} to {destination_path}...") + wget.download(url, out=destination_path) + +for remote_folder, file_list in models_download: + local_folder = folder_mapping.get(remote_folder, "") + for file in file_list: + destination_path = os.path.join(local_folder, file) + url = f"{url_base}/{remote_folder}{file}" + if not os.path.exists(destination_path): + os.makedirs(os.path.dirname(destination_path) or ".", exist_ok=True) + print(f"\nDownloading {url} to {destination_path}...") + wget.download(url, out=destination_path) \ No newline at end of file diff --git a/rvc/lib/tools/pretrained_selector.py b/rvc/lib/tools/pretrained_selector.py new file mode 100644 index 0000000000000000000000000000000000000000..59960dacf60a1958790f0159283a15682c3062ea --- /dev/null +++ b/rvc/lib/tools/pretrained_selector.py @@ -0,0 +1,63 @@ +def pretrained_selector(pitch_guidance): + if pitch_guidance: + return { + "v1": { + "32000": ( + "rvc/pretraineds/pretrained_v1/f0G32k.pth", + "rvc/pretraineds/pretrained_v1/f0D32k.pth", + ), + "40000": ( + "rvc/pretraineds/pretrained_v1/f0G40k.pth", + "rvc/pretraineds/pretrained_v1/f0D40k.pth", + ), + "48000": ( + "rvc/pretraineds/pretrained_v1/f0G48k.pth", + "rvc/pretraineds/pretrained_v1/f0D48k.pth", + ), + }, + "v2": { + "32000": ( + "rvc/pretraineds/pretrained_v2/f0G32k.pth", + "rvc/pretraineds/pretrained_v2/f0D32k.pth", + ), + "40000": ( + "rvc/pretraineds/pretrained_v2/f0G40k.pth", + "rvc/pretraineds/pretrained_v2/f0D40k.pth", + ), + "48000": ( + "rvc/pretraineds/pretrained_v2/f0G48k.pth", + "rvc/pretraineds/pretrained_v2/f0D48k.pth", + ), + }, + } + else: + return { + "v1": { + "32000": ( + "rvc/pretraineds/pretrained_v1/G32k.pth", + "rvc/pretraineds/pretrained_v1/D32k.pth", + ), + "40000": ( + "rvc/pretraineds/pretrained_v1/G40k.pth", + "rvc/pretraineds/pretrained_v1/D40k.pth", + ), + "48000": ( + "rvc/pretraineds/pretrained_v1/G48k.pth", + "rvc/pretraineds/pretrained_v1/D48k.pth", + ), + }, + "v2": { + "32000": ( + "rvc/pretraineds/pretrained_v2/G32k.pth", + "rvc/pretraineds/pretrained_v2/D32k.pth", + ), + "40000": ( + "rvc/pretraineds/pretrained_v2/G40k.pth", + "rvc/pretraineds/pretrained_v2/D40k.pth", + ), + "48000": ( + "rvc/pretraineds/pretrained_v2/G48k.pth", + "rvc/pretraineds/pretrained_v2/D48k.pth", + ), + }, + } \ No newline at end of file diff --git a/rvc/lib/tools/split_audio.py b/rvc/lib/tools/split_audio.py new file mode 100644 index 0000000000000000000000000000000000000000..5f9bddc59e3dd48d4a19e5716145f5e7c1b3161a --- /dev/null +++ b/rvc/lib/tools/split_audio.py @@ -0,0 +1,106 @@ +from pydub.silence import detect_nonsilent +from pydub import AudioSegment +import numpy as np +import re +import os + +from rvc.lib.utils import format_title + +def process_audio(file_path): + try: + # load audio file + song = AudioSegment.from_file(file_path) + + print(f"Ignore the warning if you saw any...") + + # set silence threshold and duration + silence_thresh = -70 # dB + min_silence_len = 750 # ms, adjust as needed + + # detect nonsilent parts + nonsilent_parts = detect_nonsilent(song, min_silence_len=min_silence_len, silence_thresh=silence_thresh) + + # Create a new directory to store chunks + file_dir = os.path.dirname(file_path) + file_name = os.path.basename(file_path).split('.')[0] + file_name = format_title(file_name) + new_dir_path = os.path.join(file_dir, file_name) + os.makedirs(new_dir_path, exist_ok=True) + + # Check if timestamps file exists, if so delete it + timestamps_file = os.path.join(file_dir, f"{file_name}_timestamps.txt") + if os.path.isfile(timestamps_file): + os.remove(timestamps_file) + + # export chunks and save start times + segment_count = 0 + for i, (start_i, end_i) in enumerate(nonsilent_parts): + chunk = song[start_i:end_i] + chunk_file_path = os.path.join(new_dir_path, f"chunk{i}.wav") + chunk.export(chunk_file_path, format="wav") + + print(f"Segment {i} created!") + segment_count += 1 + + # write start times to file + with open(timestamps_file, "a", encoding="utf-8") as f: + f.write(f"{chunk_file_path} starts at {start_i} ms\n") + + print(f"Total segments created: {segment_count}") + print(f"Split all chunks for {file_path} successfully!") + + return "Finish", new_dir_path + + except Exception as e: + print(f"An error occurred: {e}") + return "Error", None + + +def merge_audio(timestamps_file): + try: + # Extract prefix from the timestamps filename + prefix = os.path.basename(timestamps_file).replace('_timestamps.txt', '') + timestamps_dir = os.path.dirname(timestamps_file) + + # Open the timestamps file + with open(timestamps_file, "r", encoding="utf-8") as f: + lines = f.readlines() + + # Initialize empty list to hold audio segments + audio_segments = [] + last_end_time = 0 + + print(f"Processing file: {timestamps_file}") + + for line in lines: + # Extract filename and start time from line + match = re.search(r"(chunk\d+.wav) starts at (\d+) ms", line) + if match: + filename, start_time = match.groups() + start_time = int(start_time) + + # Construct the complete path to the chunk file + chunk_file = os.path.join(timestamps_dir, prefix, filename) + + # Add silence from last_end_time to start_time + silence_duration = max(start_time - last_end_time, 0) + silence = AudioSegment.silent(duration=silence_duration) + audio_segments.append(silence) + + # Load audio file and append to list + audio = AudioSegment.from_wav(chunk_file) + audio_segments.append(audio) + + # Update last_end_time + last_end_time = start_time + len(audio) + + print(f"Processed chunk: {chunk_file}") + + # Concatenate all audio_segments and export + merged_audio = sum(audio_segments) + merged_audio_np = np.array(merged_audio.get_array_of_samples()) + #print(f"Exported merged file: {merged_filename}\n") + return merged_audio.frame_rate, merged_audio_np + + except Exception as e: + print(f"An error occurred: {e}") \ No newline at end of file diff --git a/rvc/lib/tools/tts.py b/rvc/lib/tools/tts.py new file mode 100644 index 0000000000000000000000000000000000000000..917a2471302257caeb61a41d57c4f8246cc0f09e --- /dev/null +++ b/rvc/lib/tools/tts.py @@ -0,0 +1,16 @@ +import sys +import asyncio +import edge_tts + + +async def main(): + text = sys.argv[1] + voice = sys.argv[2] + output_file = sys.argv[3] + + await edge_tts.Communicate(text, voice).save(output_file) + print(f"TTS with {voice} completed. Output TTS file: '{output_file}'") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/rvc/lib/tools/tts_voices.json b/rvc/lib/tools/tts_voices.json new file mode 100644 index 0000000000000000000000000000000000000000..ce6c03fff9d2bee7a4a5f6937ea80597af36356b --- /dev/null +++ b/rvc/lib/tools/tts_voices.json @@ -0,0 +1,5462 @@ +[ + { + "Name": "Microsoft Server Speech Text to Speech Voice (af-ZA, AdriNeural)", + "ShortName": "af-ZA-AdriNeural", + "Gender": "Female", + "Locale": "af-ZA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Adri Online (Natural) - Afrikaans (South Africa)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (af-ZA, WillemNeural)", + "ShortName": "af-ZA-WillemNeural", + "Gender": "Male", + "Locale": "af-ZA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Willem Online (Natural) - Afrikaans (South Africa)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sq-AL, AnilaNeural)", + "ShortName": "sq-AL-AnilaNeural", + "Gender": "Female", + "Locale": "sq-AL", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Anila Online (Natural) - Albanian (Albania)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sq-AL, IlirNeural)", + "ShortName": "sq-AL-IlirNeural", + "Gender": "Male", + "Locale": "sq-AL", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ilir Online (Natural) - Albanian (Albania)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (am-ET, AmehaNeural)", + "ShortName": "am-ET-AmehaNeural", + "Gender": "Male", + "Locale": "am-ET", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ameha Online (Natural) - Amharic (Ethiopia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (am-ET, MekdesNeural)", + "ShortName": "am-ET-MekdesNeural", + "Gender": "Female", + "Locale": "am-ET", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Mekdes Online (Natural) - Amharic (Ethiopia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-DZ, AminaNeural)", + "ShortName": "ar-DZ-AminaNeural", + "Gender": "Female", + "Locale": "ar-DZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Amina Online (Natural) - Arabic (Algeria)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-DZ, IsmaelNeural)", + "ShortName": "ar-DZ-IsmaelNeural", + "Gender": "Male", + "Locale": "ar-DZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ismael Online (Natural) - Arabic (Algeria)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-BH, AliNeural)", + "ShortName": "ar-BH-AliNeural", + "Gender": "Male", + "Locale": "ar-BH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ali Online (Natural) - Arabic (Bahrain)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-BH, LailaNeural)", + "ShortName": "ar-BH-LailaNeural", + "Gender": "Female", + "Locale": "ar-BH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Laila Online (Natural) - Arabic (Bahrain)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-EG, SalmaNeural)", + "ShortName": "ar-EG-SalmaNeural", + "Gender": "Female", + "Locale": "ar-EG", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Salma Online (Natural) - Arabic (Egypt)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-EG, ShakirNeural)", + "ShortName": "ar-EG-ShakirNeural", + "Gender": "Male", + "Locale": "ar-EG", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Shakir Online (Natural) - Arabic (Egypt)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-IQ, BasselNeural)", + "ShortName": "ar-IQ-BasselNeural", + "Gender": "Male", + "Locale": "ar-IQ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Bassel Online (Natural) - Arabic (Iraq)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-IQ, RanaNeural)", + "ShortName": "ar-IQ-RanaNeural", + "Gender": "Female", + "Locale": "ar-IQ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Rana Online (Natural) - Arabic (Iraq)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-JO, SanaNeural)", + "ShortName": "ar-JO-SanaNeural", + "Gender": "Female", + "Locale": "ar-JO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sana Online (Natural) - Arabic (Jordan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-JO, TaimNeural)", + "ShortName": "ar-JO-TaimNeural", + "Gender": "Male", + "Locale": "ar-JO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Taim Online (Natural) - Arabic (Jordan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-KW, FahedNeural)", + "ShortName": "ar-KW-FahedNeural", + "Gender": "Male", + "Locale": "ar-KW", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Fahed Online (Natural) - Arabic (Kuwait)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-KW, NouraNeural)", + "ShortName": "ar-KW-NouraNeural", + "Gender": "Female", + "Locale": "ar-KW", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Noura Online (Natural) - Arabic (Kuwait)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-LB, LaylaNeural)", + "ShortName": "ar-LB-LaylaNeural", + "Gender": "Female", + "Locale": "ar-LB", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Layla Online (Natural) - Arabic (Lebanon)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-LB, RamiNeural)", + "ShortName": "ar-LB-RamiNeural", + "Gender": "Male", + "Locale": "ar-LB", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Rami Online (Natural) - Arabic (Lebanon)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-LY, ImanNeural)", + "ShortName": "ar-LY-ImanNeural", + "Gender": "Female", + "Locale": "ar-LY", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Iman Online (Natural) - Arabic (Libya)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-LY, OmarNeural)", + "ShortName": "ar-LY-OmarNeural", + "Gender": "Male", + "Locale": "ar-LY", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Omar Online (Natural) - Arabic (Libya)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-MA, JamalNeural)", + "ShortName": "ar-MA-JamalNeural", + "Gender": "Male", + "Locale": "ar-MA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Jamal Online (Natural) - Arabic (Morocco)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-MA, MounaNeural)", + "ShortName": "ar-MA-MounaNeural", + "Gender": "Female", + "Locale": "ar-MA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Mouna Online (Natural) - Arabic (Morocco)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-OM, AbdullahNeural)", + "ShortName": "ar-OM-AbdullahNeural", + "Gender": "Male", + "Locale": "ar-OM", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Abdullah Online (Natural) - Arabic (Oman)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-OM, AyshaNeural)", + "ShortName": "ar-OM-AyshaNeural", + "Gender": "Female", + "Locale": "ar-OM", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Aysha Online (Natural) - Arabic (Oman)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-QA, AmalNeural)", + "ShortName": "ar-QA-AmalNeural", + "Gender": "Female", + "Locale": "ar-QA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Amal Online (Natural) - Arabic (Qatar)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-QA, MoazNeural)", + "ShortName": "ar-QA-MoazNeural", + "Gender": "Male", + "Locale": "ar-QA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Moaz Online (Natural) - Arabic (Qatar)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-SA, HamedNeural)", + "ShortName": "ar-SA-HamedNeural", + "Gender": "Male", + "Locale": "ar-SA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Hamed Online (Natural) - Arabic (Saudi Arabia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-SA, ZariyahNeural)", + "ShortName": "ar-SA-ZariyahNeural", + "Gender": "Female", + "Locale": "ar-SA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Zariyah Online (Natural) - Arabic (Saudi Arabia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-SY, AmanyNeural)", + "ShortName": "ar-SY-AmanyNeural", + "Gender": "Female", + "Locale": "ar-SY", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Amany Online (Natural) - Arabic (Syria)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-SY, LaithNeural)", + "ShortName": "ar-SY-LaithNeural", + "Gender": "Male", + "Locale": "ar-SY", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Laith Online (Natural) - Arabic (Syria)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-TN, HediNeural)", + "ShortName": "ar-TN-HediNeural", + "Gender": "Male", + "Locale": "ar-TN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Hedi Online (Natural) - Arabic (Tunisia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-TN, ReemNeural)", + "ShortName": "ar-TN-ReemNeural", + "Gender": "Female", + "Locale": "ar-TN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Reem Online (Natural) - Arabic (Tunisia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-AE, FatimaNeural)", + "ShortName": "ar-AE-FatimaNeural", + "Gender": "Female", + "Locale": "ar-AE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Fatima Online (Natural) - Arabic (United Arab Emirates)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-AE, HamdanNeural)", + "ShortName": "ar-AE-HamdanNeural", + "Gender": "Male", + "Locale": "ar-AE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Hamdan Online (Natural) - Arabic (United Arab Emirates)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-YE, MaryamNeural)", + "ShortName": "ar-YE-MaryamNeural", + "Gender": "Female", + "Locale": "ar-YE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Maryam Online (Natural) - Arabic (Yemen)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ar-YE, SalehNeural)", + "ShortName": "ar-YE-SalehNeural", + "Gender": "Male", + "Locale": "ar-YE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Saleh Online (Natural) - Arabic (Yemen)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (az-AZ, BabekNeural)", + "ShortName": "az-AZ-BabekNeural", + "Gender": "Male", + "Locale": "az-AZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Babek Online (Natural) - Azerbaijani (Azerbaijan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (az-AZ, BanuNeural)", + "ShortName": "az-AZ-BanuNeural", + "Gender": "Female", + "Locale": "az-AZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Banu Online (Natural) - Azerbaijani (Azerbaijan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (bn-BD, NabanitaNeural)", + "ShortName": "bn-BD-NabanitaNeural", + "Gender": "Female", + "Locale": "bn-BD", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Nabanita Online (Natural) - Bangla (Bangladesh)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (bn-BD, PradeepNeural)", + "ShortName": "bn-BD-PradeepNeural", + "Gender": "Male", + "Locale": "bn-BD", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Pradeep Online (Natural) - Bangla (Bangladesh)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (bn-IN, BashkarNeural)", + "ShortName": "bn-IN-BashkarNeural", + "Gender": "Male", + "Locale": "bn-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Bashkar Online (Natural) - Bangla (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (bn-IN, TanishaaNeural)", + "ShortName": "bn-IN-TanishaaNeural", + "Gender": "Female", + "Locale": "bn-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Tanishaa Online (Natural) - Bengali (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (bs-BA, GoranNeural)", + "ShortName": "bs-BA-GoranNeural", + "Gender": "Male", + "Locale": "bs-BA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Goran Online (Natural) - Bosnian (Bosnia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (bs-BA, VesnaNeural)", + "ShortName": "bs-BA-VesnaNeural", + "Gender": "Female", + "Locale": "bs-BA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Vesna Online (Natural) - Bosnian (Bosnia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (bg-BG, BorislavNeural)", + "ShortName": "bg-BG-BorislavNeural", + "Gender": "Male", + "Locale": "bg-BG", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Borislav Online (Natural) - Bulgarian (Bulgaria)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (bg-BG, KalinaNeural)", + "ShortName": "bg-BG-KalinaNeural", + "Gender": "Female", + "Locale": "bg-BG", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Kalina Online (Natural) - Bulgarian (Bulgaria)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (my-MM, NilarNeural)", + "ShortName": "my-MM-NilarNeural", + "Gender": "Female", + "Locale": "my-MM", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Nilar Online (Natural) - Burmese (Myanmar)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (my-MM, ThihaNeural)", + "ShortName": "my-MM-ThihaNeural", + "Gender": "Male", + "Locale": "my-MM", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Thiha Online (Natural) - Burmese (Myanmar)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ca-ES, EnricNeural)", + "ShortName": "ca-ES-EnricNeural", + "Gender": "Male", + "Locale": "ca-ES", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Enric Online (Natural) - Catalan (Spain)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ca-ES, JoanaNeural)", + "ShortName": "ca-ES-JoanaNeural", + "Gender": "Female", + "Locale": "ca-ES", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Joana Online (Natural) - Catalan (Spain)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-HK, HiuGaaiNeural)", + "ShortName": "zh-HK-HiuGaaiNeural", + "Gender": "Female", + "Locale": "zh-HK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft HiuGaai Online (Natural) - Chinese (Cantonese Traditional)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-HK, HiuMaanNeural)", + "ShortName": "zh-HK-HiuMaanNeural", + "Gender": "Female", + "Locale": "zh-HK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft HiuMaan Online (Natural) - Chinese (Hong Kong)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-HK, WanLungNeural)", + "ShortName": "zh-HK-WanLungNeural", + "Gender": "Male", + "Locale": "zh-HK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft WanLung Online (Natural) - Chinese (Hong Kong)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)", + "ShortName": "zh-CN-XiaoxiaoNeural", + "Gender": "Female", + "Locale": "zh-CN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Xiaoxiao Online (Natural) - Chinese (Mainland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "News", + "Novel" + ], + "VoicePersonalities": [ + "Warm" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoyiNeural)", + "ShortName": "zh-CN-XiaoyiNeural", + "Gender": "Female", + "Locale": "zh-CN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Xiaoyi Online (Natural) - Chinese (Mainland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "Cartoon", + "Novel" + ], + "VoicePersonalities": [ + "Lively" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN, YunjianNeural)", + "ShortName": "zh-CN-YunjianNeural", + "Gender": "Male", + "Locale": "zh-CN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Yunjian Online (Natural) - Chinese (Mainland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "Sports", + " Novel" + ], + "VoicePersonalities": [ + "Passion" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiNeural)", + "ShortName": "zh-CN-YunxiNeural", + "Gender": "Male", + "Locale": "zh-CN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Yunxi Online (Natural) - Chinese (Mainland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "Novel" + ], + "VoicePersonalities": [ + "Lively", + "Sunshine" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiaNeural)", + "ShortName": "zh-CN-YunxiaNeural", + "Gender": "Male", + "Locale": "zh-CN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Yunxia Online (Natural) - Chinese (Mainland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "Cartoon", + "Novel" + ], + "VoicePersonalities": [ + "Cute" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN, YunyangNeural)", + "ShortName": "zh-CN-YunyangNeural", + "Gender": "Male", + "Locale": "zh-CN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Yunyang Online (Natural) - Chinese (Mainland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "News" + ], + "VoicePersonalities": [ + "Professional", + "Reliable" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN-liaoning, XiaobeiNeural)", + "ShortName": "zh-CN-liaoning-XiaobeiNeural", + "Gender": "Female", + "Locale": "zh-CN-liaoning", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Xiaobei Online (Natural) - Chinese (Northeastern Mandarin)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "Dialect" + ], + "VoicePersonalities": [ + "Humorous" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoChenNeural)", + "ShortName": "zh-TW-HsiaoChenNeural", + "Gender": "Female", + "Locale": "zh-TW", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft HsiaoChen Online (Natural) - Chinese (Taiwan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-TW, YunJheNeural)", + "ShortName": "zh-TW-YunJheNeural", + "Gender": "Male", + "Locale": "zh-TW", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft YunJhe Online (Natural) - Chinese (Taiwan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoYuNeural)", + "ShortName": "zh-TW-HsiaoYuNeural", + "Gender": "Female", + "Locale": "zh-TW", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft HsiaoYu Online (Natural) - Chinese (Taiwanese Mandarin)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN-shaanxi, XiaoniNeural)", + "ShortName": "zh-CN-shaanxi-XiaoniNeural", + "Gender": "Female", + "Locale": "zh-CN-shaanxi", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Xiaoni Online (Natural) - Chinese (Zhongyuan Mandarin Shaanxi)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "Dialect" + ], + "VoicePersonalities": [ + "Bright" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (hr-HR, GabrijelaNeural)", + "ShortName": "hr-HR-GabrijelaNeural", + "Gender": "Female", + "Locale": "hr-HR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Gabrijela Online (Natural) - Croatian (Croatia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (hr-HR, SreckoNeural)", + "ShortName": "hr-HR-SreckoNeural", + "Gender": "Male", + "Locale": "hr-HR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Srecko Online (Natural) - Croatian (Croatia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (cs-CZ, AntoninNeural)", + "ShortName": "cs-CZ-AntoninNeural", + "Gender": "Male", + "Locale": "cs-CZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Antonin Online (Natural) - Czech (Czech)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (cs-CZ, VlastaNeural)", + "ShortName": "cs-CZ-VlastaNeural", + "Gender": "Female", + "Locale": "cs-CZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Vlasta Online (Natural) - Czech (Czech)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (da-DK, ChristelNeural)", + "ShortName": "da-DK-ChristelNeural", + "Gender": "Female", + "Locale": "da-DK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Christel Online (Natural) - Danish (Denmark)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (da-DK, JeppeNeural)", + "ShortName": "da-DK-JeppeNeural", + "Gender": "Male", + "Locale": "da-DK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Jeppe Online (Natural) - Danish (Denmark)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (nl-BE, ArnaudNeural)", + "ShortName": "nl-BE-ArnaudNeural", + "Gender": "Male", + "Locale": "nl-BE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Arnaud Online (Natural) - Dutch (Belgium)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (nl-BE, DenaNeural)", + "ShortName": "nl-BE-DenaNeural", + "Gender": "Female", + "Locale": "nl-BE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Dena Online (Natural) - Dutch (Belgium)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (nl-NL, ColetteNeural)", + "ShortName": "nl-NL-ColetteNeural", + "Gender": "Female", + "Locale": "nl-NL", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Colette Online (Natural) - Dutch (Netherlands)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (nl-NL, FennaNeural)", + "ShortName": "nl-NL-FennaNeural", + "Gender": "Female", + "Locale": "nl-NL", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Fenna Online (Natural) - Dutch (Netherlands)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (nl-NL, MaartenNeural)", + "ShortName": "nl-NL-MaartenNeural", + "Gender": "Male", + "Locale": "nl-NL", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Maarten Online (Natural) - Dutch (Netherlands)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-AU, NatashaNeural)", + "ShortName": "en-AU-NatashaNeural", + "Gender": "Female", + "Locale": "en-AU", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Natasha Online (Natural) - English (Australia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-AU, WilliamNeural)", + "ShortName": "en-AU-WilliamNeural", + "Gender": "Male", + "Locale": "en-AU", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft William Online (Natural) - English (Australia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-CA, ClaraNeural)", + "ShortName": "en-CA-ClaraNeural", + "Gender": "Female", + "Locale": "en-CA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Clara Online (Natural) - English (Canada)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-CA, LiamNeural)", + "ShortName": "en-CA-LiamNeural", + "Gender": "Male", + "Locale": "en-CA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Liam Online (Natural) - English (Canada)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-HK, SamNeural)", + "ShortName": "en-HK-SamNeural", + "Gender": "Male", + "Locale": "en-HK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sam Online (Natural) - English (Hongkong)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-HK, YanNeural)", + "ShortName": "en-HK-YanNeural", + "Gender": "Female", + "Locale": "en-HK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Yan Online (Natural) - English (Hongkong)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-IN, NeerjaExpressiveNeural)", + "ShortName": "en-IN-NeerjaExpressiveNeural", + "Gender": "Female", + "Locale": "en-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Neerja Online (Natural) - English (India) (Preview)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-IN, NeerjaNeural)", + "ShortName": "en-IN-NeerjaNeural", + "Gender": "Female", + "Locale": "en-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Neerja Online (Natural) - English (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-IN, PrabhatNeural)", + "ShortName": "en-IN-PrabhatNeural", + "Gender": "Male", + "Locale": "en-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Prabhat Online (Natural) - English (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-IE, ConnorNeural)", + "ShortName": "en-IE-ConnorNeural", + "Gender": "Male", + "Locale": "en-IE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Connor Online (Natural) - English (Ireland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-IE, EmilyNeural)", + "ShortName": "en-IE-EmilyNeural", + "Gender": "Female", + "Locale": "en-IE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Emily Online (Natural) - English (Ireland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-KE, AsiliaNeural)", + "ShortName": "en-KE-AsiliaNeural", + "Gender": "Female", + "Locale": "en-KE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Asilia Online (Natural) - English (Kenya)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-KE, ChilembaNeural)", + "ShortName": "en-KE-ChilembaNeural", + "Gender": "Male", + "Locale": "en-KE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Chilemba Online (Natural) - English (Kenya)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-NZ, MitchellNeural)", + "ShortName": "en-NZ-MitchellNeural", + "Gender": "Male", + "Locale": "en-NZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Mitchell Online (Natural) - English (New Zealand)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-NZ, MollyNeural)", + "ShortName": "en-NZ-MollyNeural", + "Gender": "Female", + "Locale": "en-NZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Molly Online (Natural) - English (New Zealand)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-NG, AbeoNeural)", + "ShortName": "en-NG-AbeoNeural", + "Gender": "Male", + "Locale": "en-NG", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Abeo Online (Natural) - English (Nigeria)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-NG, EzinneNeural)", + "ShortName": "en-NG-EzinneNeural", + "Gender": "Female", + "Locale": "en-NG", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ezinne Online (Natural) - English (Nigeria)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-PH, JamesNeural)", + "ShortName": "en-PH-JamesNeural", + "Gender": "Male", + "Locale": "en-PH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft James Online (Natural) - English (Philippines)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-PH, RosaNeural)", + "ShortName": "en-PH-RosaNeural", + "Gender": "Female", + "Locale": "en-PH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Rosa Online (Natural) - English (Philippines)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-SG, LunaNeural)", + "ShortName": "en-SG-LunaNeural", + "Gender": "Female", + "Locale": "en-SG", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Luna Online (Natural) - English (Singapore)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-SG, WayneNeural)", + "ShortName": "en-SG-WayneNeural", + "Gender": "Male", + "Locale": "en-SG", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Wayne Online (Natural) - English (Singapore)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-ZA, LeahNeural)", + "ShortName": "en-ZA-LeahNeural", + "Gender": "Female", + "Locale": "en-ZA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Leah Online (Natural) - English (South Africa)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-ZA, LukeNeural)", + "ShortName": "en-ZA-LukeNeural", + "Gender": "Male", + "Locale": "en-ZA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Luke Online (Natural) - English (South Africa)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-TZ, ElimuNeural)", + "ShortName": "en-TZ-ElimuNeural", + "Gender": "Male", + "Locale": "en-TZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Elimu Online (Natural) - English (Tanzania)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-TZ, ImaniNeural)", + "ShortName": "en-TZ-ImaniNeural", + "Gender": "Female", + "Locale": "en-TZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Imani Online (Natural) - English (Tanzania)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-GB, LibbyNeural)", + "ShortName": "en-GB-LibbyNeural", + "Gender": "Female", + "Locale": "en-GB", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Libby Online (Natural) - English (United Kingdom)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-GB, MaisieNeural)", + "ShortName": "en-GB-MaisieNeural", + "Gender": "Female", + "Locale": "en-GB", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Maisie Online (Natural) - English (United Kingdom)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-GB, RyanNeural)", + "ShortName": "en-GB-RyanNeural", + "Gender": "Male", + "Locale": "en-GB", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ryan Online (Natural) - English (United Kingdom)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-GB, SoniaNeural)", + "ShortName": "en-GB-SoniaNeural", + "Gender": "Female", + "Locale": "en-GB", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sonia Online (Natural) - English (United Kingdom)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-GB, ThomasNeural)", + "ShortName": "en-GB-ThomasNeural", + "Gender": "Male", + "Locale": "en-GB", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Thomas Online (Natural) - English (United Kingdom)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)", + "ShortName": "en-US-AriaNeural", + "Gender": "Female", + "Locale": "en-US", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Aria Online (Natural) - English (United States)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "News", + "Novel" + ], + "VoicePersonalities": [ + "Positive", + "Confident" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-US, AnaNeural)", + "ShortName": "en-US-AnaNeural", + "Gender": "Female", + "Locale": "en-US", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ana Online (Natural) - English (United States)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "Cartoon", + "Conversation" + ], + "VoicePersonalities": [ + "Cute" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-US, AndrewNeural)", + "ShortName": "en-US-AndrewNeural", + "Gender": "Male", + "Locale": "en-US", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Andrew Online (Natural) - English (United States)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "Conversation", + "Copilot" + ], + "VoicePersonalities": [ + "Warm", + "Confident", + "Engaging" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-US, AvaNeural)", + "ShortName": "en-US-AvaNeural", + "Gender": "Female", + "Locale": "en-US", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ava Online (Natural) - English (United States)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "Conversation", + "Copilot" + ], + "VoicePersonalities": [ + "Pleasant", + "Genuine", + "Engaging" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-US, ChristopherNeural)", + "ShortName": "en-US-ChristopherNeural", + "Gender": "Male", + "Locale": "en-US", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Christopher Online (Natural) - English (United States)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "News", + "Novel" + ], + "VoicePersonalities": [ + "Reliable", + "Authority" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-US, EricNeural)", + "ShortName": "en-US-EricNeural", + "Gender": "Male", + "Locale": "en-US", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Eric Online (Natural) - English (United States)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "News", + "Novel" + ], + "VoicePersonalities": [ + "Rational" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-US, GuyNeural)", + "ShortName": "en-US-GuyNeural", + "Gender": "Male", + "Locale": "en-US", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Guy Online (Natural) - English (United States)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "News", + "Novel" + ], + "VoicePersonalities": [ + "Passion" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-US, JennyNeural)", + "ShortName": "en-US-JennyNeural", + "Gender": "Female", + "Locale": "en-US", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Jenny Online (Natural) - English (United States)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Considerate", + "Comfort" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-US, MichelleNeural)", + "ShortName": "en-US-MichelleNeural", + "Gender": "Female", + "Locale": "en-US", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Michelle Online (Natural) - English (United States)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "News", + "Novel" + ], + "VoicePersonalities": [ + "Friendly", + "Pleasant" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-US, RogerNeural)", + "ShortName": "en-US-RogerNeural", + "Gender": "Male", + "Locale": "en-US", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Roger Online (Natural) - English (United States)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "News", + "Novel" + ], + "VoicePersonalities": [ + "Lively" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (en-US, SteffanNeural)", + "ShortName": "en-US-SteffanNeural", + "Gender": "Male", + "Locale": "en-US", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Steffan Online (Natural) - English (United States)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "News", + "Novel" + ], + "VoicePersonalities": [ + "Rational" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (et-EE, AnuNeural)", + "ShortName": "et-EE-AnuNeural", + "Gender": "Female", + "Locale": "et-EE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Anu Online (Natural) - Estonian (Estonia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (et-EE, KertNeural)", + "ShortName": "et-EE-KertNeural", + "Gender": "Male", + "Locale": "et-EE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Kert Online (Natural) - Estonian (Estonia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fil-PH, AngeloNeural)", + "ShortName": "fil-PH-AngeloNeural", + "Gender": "Male", + "Locale": "fil-PH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Angelo Online (Natural) - Filipino (Philippines)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fil-PH, BlessicaNeural)", + "ShortName": "fil-PH-BlessicaNeural", + "Gender": "Female", + "Locale": "fil-PH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Blessica Online (Natural) - Filipino (Philippines)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fi-FI, HarriNeural)", + "ShortName": "fi-FI-HarriNeural", + "Gender": "Male", + "Locale": "fi-FI", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Harri Online (Natural) - Finnish (Finland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fi-FI, NooraNeural)", + "ShortName": "fi-FI-NooraNeural", + "Gender": "Female", + "Locale": "fi-FI", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Noora Online (Natural) - Finnish (Finland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fr-BE, CharlineNeural)", + "ShortName": "fr-BE-CharlineNeural", + "Gender": "Female", + "Locale": "fr-BE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Charline Online (Natural) - French (Belgium)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fr-BE, GerardNeural)", + "ShortName": "fr-BE-GerardNeural", + "Gender": "Male", + "Locale": "fr-BE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Gerard Online (Natural) - French (Belgium)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fr-CA, AntoineNeural)", + "ShortName": "fr-CA-AntoineNeural", + "Gender": "Male", + "Locale": "fr-CA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Antoine Online (Natural) - French (Canada)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fr-CA, JeanNeural)", + "ShortName": "fr-CA-JeanNeural", + "Gender": "Male", + "Locale": "fr-CA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Jean Online (Natural) - French (Canada)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fr-CA, SylvieNeural)", + "ShortName": "fr-CA-SylvieNeural", + "Gender": "Female", + "Locale": "fr-CA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sylvie Online (Natural) - French (Canada)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fr-FR, DeniseNeural)", + "ShortName": "fr-FR-DeniseNeural", + "Gender": "Female", + "Locale": "fr-FR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Denise Online (Natural) - French (France)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fr-FR, EloiseNeural)", + "ShortName": "fr-FR-EloiseNeural", + "Gender": "Female", + "Locale": "fr-FR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Eloise Online (Natural) - French (France)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fr-FR, HenriNeural)", + "ShortName": "fr-FR-HenriNeural", + "Gender": "Male", + "Locale": "fr-FR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Henri Online (Natural) - French (France)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fr-CH, ArianeNeural)", + "ShortName": "fr-CH-ArianeNeural", + "Gender": "Female", + "Locale": "fr-CH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ariane Online (Natural) - French (Switzerland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fr-CH, FabriceNeural)", + "ShortName": "fr-CH-FabriceNeural", + "Gender": "Male", + "Locale": "fr-CH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Fabrice Online (Natural) - French (Switzerland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (gl-ES, RoiNeural)", + "ShortName": "gl-ES-RoiNeural", + "Gender": "Male", + "Locale": "gl-ES", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Roi Online (Natural) - Galician (Spain)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (gl-ES, SabelaNeural)", + "ShortName": "gl-ES-SabelaNeural", + "Gender": "Female", + "Locale": "gl-ES", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sabela Online (Natural) - Galician (Spain)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ka-GE, EkaNeural)", + "ShortName": "ka-GE-EkaNeural", + "Gender": "Female", + "Locale": "ka-GE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Eka Online (Natural) - Georgian (Georgia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ka-GE, GiorgiNeural)", + "ShortName": "ka-GE-GiorgiNeural", + "Gender": "Male", + "Locale": "ka-GE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Giorgi Online (Natural) - Georgian (Georgia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (de-AT, IngridNeural)", + "ShortName": "de-AT-IngridNeural", + "Gender": "Female", + "Locale": "de-AT", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ingrid Online (Natural) - German (Austria)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (de-AT, JonasNeural)", + "ShortName": "de-AT-JonasNeural", + "Gender": "Male", + "Locale": "de-AT", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Jonas Online (Natural) - German (Austria)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (de-DE, AmalaNeural)", + "ShortName": "de-DE-AmalaNeural", + "Gender": "Female", + "Locale": "de-DE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Amala Online (Natural) - German (Germany)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (de-DE, ConradNeural)", + "ShortName": "de-DE-ConradNeural", + "Gender": "Male", + "Locale": "de-DE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Conrad Online (Natural) - German (Germany)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (de-DE, KatjaNeural)", + "ShortName": "de-DE-KatjaNeural", + "Gender": "Female", + "Locale": "de-DE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Katja Online (Natural) - German (Germany)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (de-DE, KillianNeural)", + "ShortName": "de-DE-KillianNeural", + "Gender": "Male", + "Locale": "de-DE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Killian Online (Natural) - German (Germany)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (de-CH, JanNeural)", + "ShortName": "de-CH-JanNeural", + "Gender": "Male", + "Locale": "de-CH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Jan Online (Natural) - German (Switzerland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (de-CH, LeniNeural)", + "ShortName": "de-CH-LeniNeural", + "Gender": "Female", + "Locale": "de-CH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Leni Online (Natural) - German (Switzerland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (el-GR, AthinaNeural)", + "ShortName": "el-GR-AthinaNeural", + "Gender": "Female", + "Locale": "el-GR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Athina Online (Natural) - Greek (Greece)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (el-GR, NestorasNeural)", + "ShortName": "el-GR-NestorasNeural", + "Gender": "Male", + "Locale": "el-GR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Nestoras Online (Natural) - Greek (Greece)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (gu-IN, DhwaniNeural)", + "ShortName": "gu-IN-DhwaniNeural", + "Gender": "Female", + "Locale": "gu-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Dhwani Online (Natural) - Gujarati (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (gu-IN, NiranjanNeural)", + "ShortName": "gu-IN-NiranjanNeural", + "Gender": "Male", + "Locale": "gu-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Niranjan Online (Natural) - Gujarati (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (he-IL, AvriNeural)", + "ShortName": "he-IL-AvriNeural", + "Gender": "Male", + "Locale": "he-IL", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Avri Online (Natural) - Hebrew (Israel)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (he-IL, HilaNeural)", + "ShortName": "he-IL-HilaNeural", + "Gender": "Female", + "Locale": "he-IL", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Hila Online (Natural) - Hebrew (Israel)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (hi-IN, MadhurNeural)", + "ShortName": "hi-IN-MadhurNeural", + "Gender": "Male", + "Locale": "hi-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Madhur Online (Natural) - Hindi (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (hi-IN, SwaraNeural)", + "ShortName": "hi-IN-SwaraNeural", + "Gender": "Female", + "Locale": "hi-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Swara Online (Natural) - Hindi (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (hu-HU, NoemiNeural)", + "ShortName": "hu-HU-NoemiNeural", + "Gender": "Female", + "Locale": "hu-HU", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Noemi Online (Natural) - Hungarian (Hungary)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (hu-HU, TamasNeural)", + "ShortName": "hu-HU-TamasNeural", + "Gender": "Male", + "Locale": "hu-HU", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Tamas Online (Natural) - Hungarian (Hungary)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (is-IS, GudrunNeural)", + "ShortName": "is-IS-GudrunNeural", + "Gender": "Female", + "Locale": "is-IS", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Gudrun Online (Natural) - Icelandic (Iceland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (is-IS, GunnarNeural)", + "ShortName": "is-IS-GunnarNeural", + "Gender": "Male", + "Locale": "is-IS", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Gunnar Online (Natural) - Icelandic (Iceland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (id-ID, ArdiNeural)", + "ShortName": "id-ID-ArdiNeural", + "Gender": "Male", + "Locale": "id-ID", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ardi Online (Natural) - Indonesian (Indonesia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (id-ID, GadisNeural)", + "ShortName": "id-ID-GadisNeural", + "Gender": "Female", + "Locale": "id-ID", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Gadis Online (Natural) - Indonesian (Indonesia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ga-IE, ColmNeural)", + "ShortName": "ga-IE-ColmNeural", + "Gender": "Male", + "Locale": "ga-IE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Colm Online (Natural) - Irish (Ireland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ga-IE, OrlaNeural)", + "ShortName": "ga-IE-OrlaNeural", + "Gender": "Female", + "Locale": "ga-IE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Orla Online (Natural) - Irish (Ireland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (it-IT, DiegoNeural)", + "ShortName": "it-IT-DiegoNeural", + "Gender": "Male", + "Locale": "it-IT", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Diego Online (Natural) - Italian (Italy)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (it-IT, ElsaNeural)", + "ShortName": "it-IT-ElsaNeural", + "Gender": "Female", + "Locale": "it-IT", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Elsa Online (Natural) - Italian (Italy)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (it-IT, IsabellaNeural)", + "ShortName": "it-IT-IsabellaNeural", + "Gender": "Female", + "Locale": "it-IT", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Isabella Online (Natural) - Italian (Italy)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ja-JP, KeitaNeural)", + "ShortName": "ja-JP-KeitaNeural", + "Gender": "Male", + "Locale": "ja-JP", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Keita Online (Natural) - Japanese (Japan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ja-JP, NanamiNeural)", + "ShortName": "ja-JP-NanamiNeural", + "Gender": "Female", + "Locale": "ja-JP", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Nanami Online (Natural) - Japanese (Japan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (jv-ID, DimasNeural)", + "ShortName": "jv-ID-DimasNeural", + "Gender": "Male", + "Locale": "jv-ID", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Dimas Online (Natural) - Javanese (Indonesia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (jv-ID, SitiNeural)", + "ShortName": "jv-ID-SitiNeural", + "Gender": "Female", + "Locale": "jv-ID", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Siti Online (Natural) - Javanese (Indonesia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (kn-IN, GaganNeural)", + "ShortName": "kn-IN-GaganNeural", + "Gender": "Male", + "Locale": "kn-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Gagan Online (Natural) - Kannada (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (kn-IN, SapnaNeural)", + "ShortName": "kn-IN-SapnaNeural", + "Gender": "Female", + "Locale": "kn-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sapna Online (Natural) - Kannada (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (kk-KZ, AigulNeural)", + "ShortName": "kk-KZ-AigulNeural", + "Gender": "Female", + "Locale": "kk-KZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Aigul Online (Natural) - Kazakh (Kazakhstan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (kk-KZ, DauletNeural)", + "ShortName": "kk-KZ-DauletNeural", + "Gender": "Male", + "Locale": "kk-KZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Daulet Online (Natural) - Kazakh (Kazakhstan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (km-KH, PisethNeural)", + "ShortName": "km-KH-PisethNeural", + "Gender": "Male", + "Locale": "km-KH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Piseth Online (Natural) - Khmer (Cambodia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (km-KH, SreymomNeural)", + "ShortName": "km-KH-SreymomNeural", + "Gender": "Female", + "Locale": "km-KH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sreymom Online (Natural) - Khmer (Cambodia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ko-KR, InJoonNeural)", + "ShortName": "ko-KR-InJoonNeural", + "Gender": "Male", + "Locale": "ko-KR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft InJoon Online (Natural) - Korean (Korea)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ko-KR, SunHiNeural)", + "ShortName": "ko-KR-SunHiNeural", + "Gender": "Female", + "Locale": "ko-KR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft SunHi Online (Natural) - Korean (Korea)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (lo-LA, ChanthavongNeural)", + "ShortName": "lo-LA-ChanthavongNeural", + "Gender": "Male", + "Locale": "lo-LA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Chanthavong Online (Natural) - Lao (Laos)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (lo-LA, KeomanyNeural)", + "ShortName": "lo-LA-KeomanyNeural", + "Gender": "Female", + "Locale": "lo-LA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Keomany Online (Natural) - Lao (Laos)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (lv-LV, EveritaNeural)", + "ShortName": "lv-LV-EveritaNeural", + "Gender": "Female", + "Locale": "lv-LV", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Everita Online (Natural) - Latvian (Latvia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (lv-LV, NilsNeural)", + "ShortName": "lv-LV-NilsNeural", + "Gender": "Male", + "Locale": "lv-LV", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Nils Online (Natural) - Latvian (Latvia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (lt-LT, LeonasNeural)", + "ShortName": "lt-LT-LeonasNeural", + "Gender": "Male", + "Locale": "lt-LT", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Leonas Online (Natural) - Lithuanian (Lithuania)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (lt-LT, OnaNeural)", + "ShortName": "lt-LT-OnaNeural", + "Gender": "Female", + "Locale": "lt-LT", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ona Online (Natural) - Lithuanian (Lithuania)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (mk-MK, AleksandarNeural)", + "ShortName": "mk-MK-AleksandarNeural", + "Gender": "Male", + "Locale": "mk-MK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Aleksandar Online (Natural) - Macedonian (Republic of North Macedonia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (mk-MK, MarijaNeural)", + "ShortName": "mk-MK-MarijaNeural", + "Gender": "Female", + "Locale": "mk-MK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Marija Online (Natural) - Macedonian (Republic of North Macedonia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ms-MY, OsmanNeural)", + "ShortName": "ms-MY-OsmanNeural", + "Gender": "Male", + "Locale": "ms-MY", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Osman Online (Natural) - Malay (Malaysia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ms-MY, YasminNeural)", + "ShortName": "ms-MY-YasminNeural", + "Gender": "Female", + "Locale": "ms-MY", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Yasmin Online (Natural) - Malay (Malaysia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ml-IN, MidhunNeural)", + "ShortName": "ml-IN-MidhunNeural", + "Gender": "Male", + "Locale": "ml-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Midhun Online (Natural) - Malayalam (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ml-IN, SobhanaNeural)", + "ShortName": "ml-IN-SobhanaNeural", + "Gender": "Female", + "Locale": "ml-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sobhana Online (Natural) - Malayalam (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (mt-MT, GraceNeural)", + "ShortName": "mt-MT-GraceNeural", + "Gender": "Female", + "Locale": "mt-MT", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Grace Online (Natural) - Maltese (Malta)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (mt-MT, JosephNeural)", + "ShortName": "mt-MT-JosephNeural", + "Gender": "Male", + "Locale": "mt-MT", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Joseph Online (Natural) - Maltese (Malta)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (mr-IN, AarohiNeural)", + "ShortName": "mr-IN-AarohiNeural", + "Gender": "Female", + "Locale": "mr-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Aarohi Online (Natural) - Marathi (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (mr-IN, ManoharNeural)", + "ShortName": "mr-IN-ManoharNeural", + "Gender": "Male", + "Locale": "mr-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Manohar Online (Natural) - Marathi (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (mn-MN, BataaNeural)", + "ShortName": "mn-MN-BataaNeural", + "Gender": "Male", + "Locale": "mn-MN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Bataa Online (Natural) - Mongolian (Mongolia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (mn-MN, YesuiNeural)", + "ShortName": "mn-MN-YesuiNeural", + "Gender": "Female", + "Locale": "mn-MN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Yesui Online (Natural) - Mongolian (Mongolia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ne-NP, HemkalaNeural)", + "ShortName": "ne-NP-HemkalaNeural", + "Gender": "Female", + "Locale": "ne-NP", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Hemkala Online (Natural) - Nepali (Nepal)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ne-NP, SagarNeural)", + "ShortName": "ne-NP-SagarNeural", + "Gender": "Male", + "Locale": "ne-NP", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sagar Online (Natural) - Nepali (Nepal)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (nb-NO, FinnNeural)", + "ShortName": "nb-NO-FinnNeural", + "Gender": "Male", + "Locale": "nb-NO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Finn Online (Natural) - Norwegian (Bokmål Norway)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (nb-NO, PernilleNeural)", + "ShortName": "nb-NO-PernilleNeural", + "Gender": "Female", + "Locale": "nb-NO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Pernille Online (Natural) - Norwegian (Bokmål, Norway)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ps-AF, GulNawazNeural)", + "ShortName": "ps-AF-GulNawazNeural", + "Gender": "Male", + "Locale": "ps-AF", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft GulNawaz Online (Natural) - Pashto (Afghanistan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ps-AF, LatifaNeural)", + "ShortName": "ps-AF-LatifaNeural", + "Gender": "Female", + "Locale": "ps-AF", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Latifa Online (Natural) - Pashto (Afghanistan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fa-IR, DilaraNeural)", + "ShortName": "fa-IR-DilaraNeural", + "Gender": "Female", + "Locale": "fa-IR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Dilara Online (Natural) - Persian (Iran)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (fa-IR, FaridNeural)", + "ShortName": "fa-IR-FaridNeural", + "Gender": "Male", + "Locale": "fa-IR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Farid Online (Natural) - Persian (Iran)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (pl-PL, MarekNeural)", + "ShortName": "pl-PL-MarekNeural", + "Gender": "Male", + "Locale": "pl-PL", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Marek Online (Natural) - Polish (Poland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (pl-PL, ZofiaNeural)", + "ShortName": "pl-PL-ZofiaNeural", + "Gender": "Female", + "Locale": "pl-PL", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Zofia Online (Natural) - Polish (Poland)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (pt-BR, AntonioNeural)", + "ShortName": "pt-BR-AntonioNeural", + "Gender": "Male", + "Locale": "pt-BR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Antonio Online (Natural) - Portuguese (Brazil)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (pt-BR, FranciscaNeural)", + "ShortName": "pt-BR-FranciscaNeural", + "Gender": "Female", + "Locale": "pt-BR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Francisca Online (Natural) - Portuguese (Brazil)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (pt-PT, DuarteNeural)", + "ShortName": "pt-PT-DuarteNeural", + "Gender": "Male", + "Locale": "pt-PT", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Duarte Online (Natural) - Portuguese (Portugal)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (pt-PT, RaquelNeural)", + "ShortName": "pt-PT-RaquelNeural", + "Gender": "Female", + "Locale": "pt-PT", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Raquel Online (Natural) - Portuguese (Portugal)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ro-RO, AlinaNeural)", + "ShortName": "ro-RO-AlinaNeural", + "Gender": "Female", + "Locale": "ro-RO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Alina Online (Natural) - Romanian (Romania)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ro-RO, EmilNeural)", + "ShortName": "ro-RO-EmilNeural", + "Gender": "Male", + "Locale": "ro-RO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Emil Online (Natural) - Romanian (Romania)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ru-RU, DmitryNeural)", + "ShortName": "ru-RU-DmitryNeural", + "Gender": "Male", + "Locale": "ru-RU", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Dmitry Online (Natural) - Russian (Russia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ru-RU, SvetlanaNeural)", + "ShortName": "ru-RU-SvetlanaNeural", + "Gender": "Female", + "Locale": "ru-RU", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Svetlana Online (Natural) - Russian (Russia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sr-RS, NicholasNeural)", + "ShortName": "sr-RS-NicholasNeural", + "Gender": "Male", + "Locale": "sr-RS", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Nicholas Online (Natural) - Serbian (Serbia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sr-RS, SophieNeural)", + "ShortName": "sr-RS-SophieNeural", + "Gender": "Female", + "Locale": "sr-RS", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sophie Online (Natural) - Serbian (Serbia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (si-LK, SameeraNeural)", + "ShortName": "si-LK-SameeraNeural", + "Gender": "Male", + "Locale": "si-LK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sameera Online (Natural) - Sinhala (Sri Lanka)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (si-LK, ThiliniNeural)", + "ShortName": "si-LK-ThiliniNeural", + "Gender": "Female", + "Locale": "si-LK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Thilini Online (Natural) - Sinhala (Sri Lanka)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sk-SK, LukasNeural)", + "ShortName": "sk-SK-LukasNeural", + "Gender": "Male", + "Locale": "sk-SK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Lukas Online (Natural) - Slovak (Slovakia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sk-SK, ViktoriaNeural)", + "ShortName": "sk-SK-ViktoriaNeural", + "Gender": "Female", + "Locale": "sk-SK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Viktoria Online (Natural) - Slovak (Slovakia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sl-SI, PetraNeural)", + "ShortName": "sl-SI-PetraNeural", + "Gender": "Female", + "Locale": "sl-SI", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Petra Online (Natural) - Slovenian (Slovenia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sl-SI, RokNeural)", + "ShortName": "sl-SI-RokNeural", + "Gender": "Male", + "Locale": "sl-SI", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Rok Online (Natural) - Slovenian (Slovenia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (so-SO, MuuseNeural)", + "ShortName": "so-SO-MuuseNeural", + "Gender": "Male", + "Locale": "so-SO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Muuse Online (Natural) - Somali (Somalia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (so-SO, UbaxNeural)", + "ShortName": "so-SO-UbaxNeural", + "Gender": "Female", + "Locale": "so-SO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ubax Online (Natural) - Somali (Somalia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-AR, ElenaNeural)", + "ShortName": "es-AR-ElenaNeural", + "Gender": "Female", + "Locale": "es-AR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Elena Online (Natural) - Spanish (Argentina)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-AR, TomasNeural)", + "ShortName": "es-AR-TomasNeural", + "Gender": "Male", + "Locale": "es-AR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Tomas Online (Natural) - Spanish (Argentina)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-BO, MarceloNeural)", + "ShortName": "es-BO-MarceloNeural", + "Gender": "Male", + "Locale": "es-BO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Marcelo Online (Natural) - Spanish (Bolivia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-BO, SofiaNeural)", + "ShortName": "es-BO-SofiaNeural", + "Gender": "Female", + "Locale": "es-BO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sofia Online (Natural) - Spanish (Bolivia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-CL, CatalinaNeural)", + "ShortName": "es-CL-CatalinaNeural", + "Gender": "Female", + "Locale": "es-CL", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Catalina Online (Natural) - Spanish (Chile)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-CL, LorenzoNeural)", + "ShortName": "es-CL-LorenzoNeural", + "Gender": "Male", + "Locale": "es-CL", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Lorenzo Online (Natural) - Spanish (Chile)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-CO, GonzaloNeural)", + "ShortName": "es-CO-GonzaloNeural", + "Gender": "Male", + "Locale": "es-CO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Gonzalo Online (Natural) - Spanish (Colombia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-CO, SalomeNeural)", + "ShortName": "es-CO-SalomeNeural", + "Gender": "Female", + "Locale": "es-CO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Salome Online (Natural) - Spanish (Colombia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-CR, JuanNeural)", + "ShortName": "es-CR-JuanNeural", + "Gender": "Male", + "Locale": "es-CR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Juan Online (Natural) - Spanish (Costa Rica)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-CR, MariaNeural)", + "ShortName": "es-CR-MariaNeural", + "Gender": "Female", + "Locale": "es-CR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Maria Online (Natural) - Spanish (Costa Rica)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-CU, BelkysNeural)", + "ShortName": "es-CU-BelkysNeural", + "Gender": "Female", + "Locale": "es-CU", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Belkys Online (Natural) - Spanish (Cuba)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-CU, ManuelNeural)", + "ShortName": "es-CU-ManuelNeural", + "Gender": "Male", + "Locale": "es-CU", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Manuel Online (Natural) - Spanish (Cuba)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-DO, EmilioNeural)", + "ShortName": "es-DO-EmilioNeural", + "Gender": "Male", + "Locale": "es-DO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Emilio Online (Natural) - Spanish (Dominican Republic)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-DO, RamonaNeural)", + "ShortName": "es-DO-RamonaNeural", + "Gender": "Female", + "Locale": "es-DO", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ramona Online (Natural) - Spanish (Dominican Republic)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-EC, AndreaNeural)", + "ShortName": "es-EC-AndreaNeural", + "Gender": "Female", + "Locale": "es-EC", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Andrea Online (Natural) - Spanish (Ecuador)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-EC, LuisNeural)", + "ShortName": "es-EC-LuisNeural", + "Gender": "Male", + "Locale": "es-EC", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Luis Online (Natural) - Spanish (Ecuador)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-SV, LorenaNeural)", + "ShortName": "es-SV-LorenaNeural", + "Gender": "Female", + "Locale": "es-SV", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Lorena Online (Natural) - Spanish (El Salvador)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-SV, RodrigoNeural)", + "ShortName": "es-SV-RodrigoNeural", + "Gender": "Male", + "Locale": "es-SV", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Rodrigo Online (Natural) - Spanish (El Salvador)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-GQ, JavierNeural)", + "ShortName": "es-GQ-JavierNeural", + "Gender": "Male", + "Locale": "es-GQ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Javier Online (Natural) - Spanish (Equatorial Guinea)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-GQ, TeresaNeural)", + "ShortName": "es-GQ-TeresaNeural", + "Gender": "Female", + "Locale": "es-GQ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Teresa Online (Natural) - Spanish (Equatorial Guinea)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-GT, AndresNeural)", + "ShortName": "es-GT-AndresNeural", + "Gender": "Male", + "Locale": "es-GT", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Andres Online (Natural) - Spanish (Guatemala)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-GT, MartaNeural)", + "ShortName": "es-GT-MartaNeural", + "Gender": "Female", + "Locale": "es-GT", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Marta Online (Natural) - Spanish (Guatemala)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-HN, CarlosNeural)", + "ShortName": "es-HN-CarlosNeural", + "Gender": "Male", + "Locale": "es-HN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Carlos Online (Natural) - Spanish (Honduras)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-HN, KarlaNeural)", + "ShortName": "es-HN-KarlaNeural", + "Gender": "Female", + "Locale": "es-HN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Karla Online (Natural) - Spanish (Honduras)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-MX, DaliaNeural)", + "ShortName": "es-MX-DaliaNeural", + "Gender": "Female", + "Locale": "es-MX", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Dalia Online (Natural) - Spanish (Mexico)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-MX, JorgeNeural)", + "ShortName": "es-MX-JorgeNeural", + "Gender": "Male", + "Locale": "es-MX", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Jorge Online (Natural) - Spanish (Mexico)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-NI, FedericoNeural)", + "ShortName": "es-NI-FedericoNeural", + "Gender": "Male", + "Locale": "es-NI", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Federico Online (Natural) - Spanish (Nicaragua)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-NI, YolandaNeural)", + "ShortName": "es-NI-YolandaNeural", + "Gender": "Female", + "Locale": "es-NI", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Yolanda Online (Natural) - Spanish (Nicaragua)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-PA, MargaritaNeural)", + "ShortName": "es-PA-MargaritaNeural", + "Gender": "Female", + "Locale": "es-PA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Margarita Online (Natural) - Spanish (Panama)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-PA, RobertoNeural)", + "ShortName": "es-PA-RobertoNeural", + "Gender": "Male", + "Locale": "es-PA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Roberto Online (Natural) - Spanish (Panama)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-PY, MarioNeural)", + "ShortName": "es-PY-MarioNeural", + "Gender": "Male", + "Locale": "es-PY", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Mario Online (Natural) - Spanish (Paraguay)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-PY, TaniaNeural)", + "ShortName": "es-PY-TaniaNeural", + "Gender": "Female", + "Locale": "es-PY", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Tania Online (Natural) - Spanish (Paraguay)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-PE, AlexNeural)", + "ShortName": "es-PE-AlexNeural", + "Gender": "Male", + "Locale": "es-PE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Alex Online (Natural) - Spanish (Peru)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-PE, CamilaNeural)", + "ShortName": "es-PE-CamilaNeural", + "Gender": "Female", + "Locale": "es-PE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Camila Online (Natural) - Spanish (Peru)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-PR, KarinaNeural)", + "ShortName": "es-PR-KarinaNeural", + "Gender": "Female", + "Locale": "es-PR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Karina Online (Natural) - Spanish (Puerto Rico)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-PR, VictorNeural)", + "ShortName": "es-PR-VictorNeural", + "Gender": "Male", + "Locale": "es-PR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Victor Online (Natural) - Spanish (Puerto Rico)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-ES, AlvaroNeural)", + "ShortName": "es-ES-AlvaroNeural", + "Gender": "Male", + "Locale": "es-ES", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Alvaro Online (Natural) - Spanish (Spain)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-ES, ElviraNeural)", + "ShortName": "es-ES-ElviraNeural", + "Gender": "Female", + "Locale": "es-ES", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Elvira Online (Natural) - Spanish (Spain)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-US, AlonsoNeural)", + "ShortName": "es-US-AlonsoNeural", + "Gender": "Male", + "Locale": "es-US", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Alonso Online (Natural) - Spanish (United States)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-US, PalomaNeural)", + "ShortName": "es-US-PalomaNeural", + "Gender": "Female", + "Locale": "es-US", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Paloma Online (Natural) - Spanish (United States)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-UY, MateoNeural)", + "ShortName": "es-UY-MateoNeural", + "Gender": "Male", + "Locale": "es-UY", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Mateo Online (Natural) - Spanish (Uruguay)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-UY, ValentinaNeural)", + "ShortName": "es-UY-ValentinaNeural", + "Gender": "Female", + "Locale": "es-UY", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Valentina Online (Natural) - Spanish (Uruguay)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-VE, PaolaNeural)", + "ShortName": "es-VE-PaolaNeural", + "Gender": "Female", + "Locale": "es-VE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Paola Online (Natural) - Spanish (Venezuela)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (es-VE, SebastianNeural)", + "ShortName": "es-VE-SebastianNeural", + "Gender": "Male", + "Locale": "es-VE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sebastian Online (Natural) - Spanish (Venezuela)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (su-ID, JajangNeural)", + "ShortName": "su-ID-JajangNeural", + "Gender": "Male", + "Locale": "su-ID", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Jajang Online (Natural) - Sundanese (Indonesia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (su-ID, TutiNeural)", + "ShortName": "su-ID-TutiNeural", + "Gender": "Female", + "Locale": "su-ID", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Tuti Online (Natural) - Sundanese (Indonesia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sw-KE, RafikiNeural)", + "ShortName": "sw-KE-RafikiNeural", + "Gender": "Male", + "Locale": "sw-KE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Rafiki Online (Natural) - Swahili (Kenya)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sw-KE, ZuriNeural)", + "ShortName": "sw-KE-ZuriNeural", + "Gender": "Female", + "Locale": "sw-KE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Zuri Online (Natural) - Swahili (Kenya)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sw-TZ, DaudiNeural)", + "ShortName": "sw-TZ-DaudiNeural", + "Gender": "Male", + "Locale": "sw-TZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Daudi Online (Natural) - Swahili (Tanzania)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sw-TZ, RehemaNeural)", + "ShortName": "sw-TZ-RehemaNeural", + "Gender": "Female", + "Locale": "sw-TZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Rehema Online (Natural) - Swahili (Tanzania)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sv-SE, MattiasNeural)", + "ShortName": "sv-SE-MattiasNeural", + "Gender": "Male", + "Locale": "sv-SE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Mattias Online (Natural) - Swedish (Sweden)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (sv-SE, SofieNeural)", + "ShortName": "sv-SE-SofieNeural", + "Gender": "Female", + "Locale": "sv-SE", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sofie Online (Natural) - Swedish (Sweden)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ta-IN, PallaviNeural)", + "ShortName": "ta-IN-PallaviNeural", + "Gender": "Female", + "Locale": "ta-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Pallavi Online (Natural) - Tamil (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ta-IN, ValluvarNeural)", + "ShortName": "ta-IN-ValluvarNeural", + "Gender": "Male", + "Locale": "ta-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Valluvar Online (Natural) - Tamil (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ta-MY, KaniNeural)", + "ShortName": "ta-MY-KaniNeural", + "Gender": "Female", + "Locale": "ta-MY", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Kani Online (Natural) - Tamil (Malaysia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ta-MY, SuryaNeural)", + "ShortName": "ta-MY-SuryaNeural", + "Gender": "Male", + "Locale": "ta-MY", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Surya Online (Natural) - Tamil (Malaysia)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ta-SG, AnbuNeural)", + "ShortName": "ta-SG-AnbuNeural", + "Gender": "Male", + "Locale": "ta-SG", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Anbu Online (Natural) - Tamil (Singapore)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ta-SG, VenbaNeural)", + "ShortName": "ta-SG-VenbaNeural", + "Gender": "Female", + "Locale": "ta-SG", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Venba Online (Natural) - Tamil (Singapore)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ta-LK, KumarNeural)", + "ShortName": "ta-LK-KumarNeural", + "Gender": "Male", + "Locale": "ta-LK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Kumar Online (Natural) - Tamil (Sri Lanka)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ta-LK, SaranyaNeural)", + "ShortName": "ta-LK-SaranyaNeural", + "Gender": "Female", + "Locale": "ta-LK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Saranya Online (Natural) - Tamil (Sri Lanka)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (te-IN, MohanNeural)", + "ShortName": "te-IN-MohanNeural", + "Gender": "Male", + "Locale": "te-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Mohan Online (Natural) - Telugu (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (te-IN, ShrutiNeural)", + "ShortName": "te-IN-ShrutiNeural", + "Gender": "Female", + "Locale": "te-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Shruti Online (Natural) - Telugu (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (th-TH, NiwatNeural)", + "ShortName": "th-TH-NiwatNeural", + "Gender": "Male", + "Locale": "th-TH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Niwat Online (Natural) - Thai (Thailand)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (th-TH, PremwadeeNeural)", + "ShortName": "th-TH-PremwadeeNeural", + "Gender": "Female", + "Locale": "th-TH", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Premwadee Online (Natural) - Thai (Thailand)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (tr-TR, AhmetNeural)", + "ShortName": "tr-TR-AhmetNeural", + "Gender": "Male", + "Locale": "tr-TR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ahmet Online (Natural) - Turkish (Turkey)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (tr-TR, EmelNeural)", + "ShortName": "tr-TR-EmelNeural", + "Gender": "Female", + "Locale": "tr-TR", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Emel Online (Natural) - Turkish (Turkey)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (uk-UA, OstapNeural)", + "ShortName": "uk-UA-OstapNeural", + "Gender": "Male", + "Locale": "uk-UA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Ostap Online (Natural) - Ukrainian (Ukraine)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (uk-UA, PolinaNeural)", + "ShortName": "uk-UA-PolinaNeural", + "Gender": "Female", + "Locale": "uk-UA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Polina Online (Natural) - Ukrainian (Ukraine)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ur-IN, GulNeural)", + "ShortName": "ur-IN-GulNeural", + "Gender": "Female", + "Locale": "ur-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Gul Online (Natural) - Urdu (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ur-IN, SalmanNeural)", + "ShortName": "ur-IN-SalmanNeural", + "Gender": "Male", + "Locale": "ur-IN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Salman Online (Natural) - Urdu (India)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ur-PK, AsadNeural)", + "ShortName": "ur-PK-AsadNeural", + "Gender": "Male", + "Locale": "ur-PK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Asad Online (Natural) - Urdu (Pakistan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (ur-PK, UzmaNeural)", + "ShortName": "ur-PK-UzmaNeural", + "Gender": "Female", + "Locale": "ur-PK", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Uzma Online (Natural) - Urdu (Pakistan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (uz-UZ, MadinaNeural)", + "ShortName": "uz-UZ-MadinaNeural", + "Gender": "Female", + "Locale": "uz-UZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Madina Online (Natural) - Uzbek (Uzbekistan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (uz-UZ, SardorNeural)", + "ShortName": "uz-UZ-SardorNeural", + "Gender": "Male", + "Locale": "uz-UZ", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Sardor Online (Natural) - Uzbek (Uzbekistan)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (vi-VN, HoaiMyNeural)", + "ShortName": "vi-VN-HoaiMyNeural", + "Gender": "Female", + "Locale": "vi-VN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft HoaiMy Online (Natural) - Vietnamese (Vietnam)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (vi-VN, NamMinhNeural)", + "ShortName": "vi-VN-NamMinhNeural", + "Gender": "Male", + "Locale": "vi-VN", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft NamMinh Online (Natural) - Vietnamese (Vietnam)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (cy-GB, AledNeural)", + "ShortName": "cy-GB-AledNeural", + "Gender": "Male", + "Locale": "cy-GB", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Aled Online (Natural) - Welsh (United Kingdom)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (cy-GB, NiaNeural)", + "ShortName": "cy-GB-NiaNeural", + "Gender": "Female", + "Locale": "cy-GB", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Nia Online (Natural) - Welsh (United Kingdom)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zu-ZA, ThandoNeural)", + "ShortName": "zu-ZA-ThandoNeural", + "Gender": "Female", + "Locale": "zu-ZA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Thando Online (Natural) - Zulu (South Africa)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + }, + { + "Name": "Microsoft Server Speech Text to Speech Voice (zu-ZA, ThembaNeural)", + "ShortName": "zu-ZA-ThembaNeural", + "Gender": "Male", + "Locale": "zu-ZA", + "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3", + "FriendlyName": "Microsoft Themba Online (Natural) - Zulu (South Africa)", + "Status": "GA", + "VoiceTag": { + "ContentCategories": [ + "General" + ], + "VoicePersonalities": [ + "Friendly", + "Positive" + ] + } + } +] \ No newline at end of file diff --git a/rvc/lib/tools/validators.py b/rvc/lib/tools/validators.py new file mode 100644 index 0000000000000000000000000000000000000000..9fb290bdfaa86c5bb210b9e969906447fb659d64 --- /dev/null +++ b/rvc/lib/tools/validators.py @@ -0,0 +1,67 @@ +import argparse +import os +import json + + +def validate_sampling_rate(value): + valid_sampling = [ + "32000", + "40000", + "48000", + ] + if value in valid_sampling: + return value + else: + raise argparse.ArgumentTypeError( + f"Invalid sampling_rate. Please choose from {valid_sampling} not {value}" + ) + + +def validate_f0up_key(value): + f0up_key = int(value) + if -24 <= f0up_key <= 24: + return f0up_key + else: + raise argparse.ArgumentTypeError(f"f0up_key must be in the range of -24 to +24") + +def validate_true_false(value): + valid_tf = [ + "True", + "False", + ] + if value in valid_tf: + return value + else: + raise argparse.ArgumentTypeError( + f"Invalid true_false. Please choose from {valid_tf} not {value}" + ) + +def validate_f0method(value): + valid_f0methods = [ + "pm", + "dio", + "crepe", + "crepe-tiny", + "harvest", + "rmvpe", + ] + if value in valid_f0methods: + return value + else: + raise argparse.ArgumentTypeError( + f"Invalid f0method. Please choose from {valid_f0methods} not {value}" + ) + +def validate_tts_voices(value): + json_path = os.path.join("rvc", "lib", "tools", "tts_voices.json") + with open(json_path, 'r') as file: + tts_voices_data = json.load(file) + + # Extrae los valores de "ShortName" del JSON + short_names = [voice.get("ShortName", "") for voice in tts_voices_data] + if value in short_names: + return value + else: + raise argparse.ArgumentTypeError( + f"Invalid voice. Please choose from {short_names} not {value}" + ) \ No newline at end of file diff --git a/rvc/lib/utils.py b/rvc/lib/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6eda9f80678fea7dab8fd6795b7c12deba52df5b --- /dev/null +++ b/rvc/lib/utils.py @@ -0,0 +1,24 @@ +import ffmpeg +import numpy as np +import re +import unicodedata + +def load_audio(file, sampling_rate): + try: + file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + out, _ = ( + ffmpeg.input(file, threads=0) + .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sampling_rate) + .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) + ) + except Exception as error: + raise RuntimeError(f"Failed to load audio: {error}") + + return np.frombuffer(out, np.float32).flatten() + +def format_title(title): + formatted_title = unicodedata.normalize('NFKD', title).encode('ascii', 'ignore').decode('utf-8') + formatted_title = re.sub(r'[\u2500-\u257F]+', '', formatted_title) # Corregido aquí + formatted_title = re.sub(r'[^\w\s.-]', '', formatted_title) + formatted_title = re.sub(r'\s+', '_', formatted_title) + return formatted_title \ No newline at end of file diff --git a/rvc/pretraineds/.gitignore b/rvc/pretraineds/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/rvc/pretraineds/pretrained_v1/.gitignore b/rvc/pretraineds/pretrained_v1/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/rvc/pretraineds/pretrained_v2/.gitignore b/rvc/pretraineds/pretrained_v2/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/rvc/train/data_utils.py b/rvc/train/data_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8d189751fb0ee2cac07c51a0c6812c6e3e086e58 --- /dev/null +++ b/rvc/train/data_utils.py @@ -0,0 +1,452 @@ +import os +import numpy as np +import torch +import torch.utils.data + +from mel_processing import spectrogram_torch +from utils import load_filepaths_and_text, load_wav_to_torch + + +class TextAudioLoaderMultiNSFsid(torch.utils.data.Dataset): + def __init__(self, hparams): + self.audiopaths_and_text = load_filepaths_and_text(hparams.training_files) + self.max_wav_value = hparams.max_wav_value + self.sampling_rate = hparams.sampling_rate + self.filter_length = hparams.filter_length + self.hop_length = hparams.hop_length + self.win_length = hparams.win_length + self.sampling_rate = hparams.sampling_rate + self.min_text_len = getattr(hparams, "min_text_len", 1) + self.max_text_len = getattr(hparams, "max_text_len", 5000) + self._filter() + + def _filter(self): + audiopaths_and_text_new = [] + lengths = [] + for audiopath, text, pitch, pitchf, dv in self.audiopaths_and_text: + if self.min_text_len <= len(text) and len(text) <= self.max_text_len: + audiopaths_and_text_new.append([audiopath, text, pitch, pitchf, dv]) + lengths.append(os.path.getsize(audiopath) // (3 * self.hop_length)) + self.audiopaths_and_text = audiopaths_and_text_new + self.lengths = lengths + + def get_sid(self, sid): + sid = torch.LongTensor([int(sid)]) + return sid + + def get_audio_text_pair(self, audiopath_and_text): + file = audiopath_and_text[0] + phone = audiopath_and_text[1] + pitch = audiopath_and_text[2] + pitchf = audiopath_and_text[3] + dv = audiopath_and_text[4] + + phone, pitch, pitchf = self.get_labels(phone, pitch, pitchf) + spec, wav = self.get_audio(file) + dv = self.get_sid(dv) + + len_phone = phone.size()[0] + len_spec = spec.size()[-1] + if len_phone != len_spec: + len_min = min(len_phone, len_spec) + len_wav = len_min * self.hop_length + + spec = spec[:, :len_min] + wav = wav[:, :len_wav] + + phone = phone[:len_min, :] + pitch = pitch[:len_min] + pitchf = pitchf[:len_min] + + return (spec, wav, phone, pitch, pitchf, dv) + + def get_labels(self, phone, pitch, pitchf): + phone = np.load(phone) + phone = np.repeat(phone, 2, axis=0) + pitch = np.load(pitch) + pitchf = np.load(pitchf) + n_num = min(phone.shape[0], 900) + phone = phone[:n_num, :] + pitch = pitch[:n_num] + pitchf = pitchf[:n_num] + phone = torch.FloatTensor(phone) + pitch = torch.LongTensor(pitch) + pitchf = torch.FloatTensor(pitchf) + return phone, pitch, pitchf + + def get_audio(self, filename): + audio, sampling_rate = load_wav_to_torch(filename) + if sampling_rate != self.sampling_rate: + raise ValueError( + "{} SR doesn't match target {} SR".format( + sampling_rate, self.sampling_rate + ) + ) + audio_norm = audio + audio_norm = audio_norm.unsqueeze(0) + spec_filename = filename.replace(".wav", ".spec.pt") + if os.path.exists(spec_filename): + try: + spec = torch.load(spec_filename) + except Exception as error: + print(f"{spec_filename}: {error}") + spec = spectrogram_torch( + audio_norm, + self.filter_length, + self.hop_length, + self.win_length, + center=False, + ) + spec = torch.squeeze(spec, 0) + torch.save(spec, spec_filename, _use_new_zipfile_serialization=False) + else: + spec = spectrogram_torch( + audio_norm, + self.filter_length, + self.hop_length, + self.win_length, + center=False, + ) + spec = torch.squeeze(spec, 0) + torch.save(spec, spec_filename, _use_new_zipfile_serialization=False) + return spec, audio_norm + + def __getitem__(self, index): + return self.get_audio_text_pair(self.audiopaths_and_text[index]) + + def __len__(self): + return len(self.audiopaths_and_text) + + +class TextAudioCollateMultiNSFsid: + def __init__(self, return_ids=False): + self.return_ids = return_ids + + def __call__(self, batch): + _, ids_sorted_decreasing = torch.sort( + torch.LongTensor([x[0].size(1) for x in batch]), dim=0, descending=True + ) + + max_spec_len = max([x[0].size(1) for x in batch]) + max_wave_len = max([x[1].size(1) for x in batch]) + spec_lengths = torch.LongTensor(len(batch)) + wave_lengths = torch.LongTensor(len(batch)) + spec_padded = torch.FloatTensor(len(batch), batch[0][0].size(0), max_spec_len) + wave_padded = torch.FloatTensor(len(batch), 1, max_wave_len) + spec_padded.zero_() + wave_padded.zero_() + + max_phone_len = max([x[2].size(0) for x in batch]) + phone_lengths = torch.LongTensor(len(batch)) + phone_padded = torch.FloatTensor( + len(batch), max_phone_len, batch[0][2].shape[1] + ) + pitch_padded = torch.LongTensor(len(batch), max_phone_len) + pitchf_padded = torch.FloatTensor(len(batch), max_phone_len) + phone_padded.zero_() + pitch_padded.zero_() + pitchf_padded.zero_() + sid = torch.LongTensor(len(batch)) + + for i in range(len(ids_sorted_decreasing)): + row = batch[ids_sorted_decreasing[i]] + + spec = row[0] + spec_padded[i, :, : spec.size(1)] = spec + spec_lengths[i] = spec.size(1) + + wave = row[1] + wave_padded[i, :, : wave.size(1)] = wave + wave_lengths[i] = wave.size(1) + + phone = row[2] + phone_padded[i, : phone.size(0), :] = phone + phone_lengths[i] = phone.size(0) + + pitch = row[3] + pitch_padded[i, : pitch.size(0)] = pitch + pitchf = row[4] + pitchf_padded[i, : pitchf.size(0)] = pitchf + + sid[i] = row[5] + + return ( + phone_padded, + phone_lengths, + pitch_padded, + pitchf_padded, + spec_padded, + spec_lengths, + wave_padded, + wave_lengths, + sid, + ) + + +class TextAudioLoader(torch.utils.data.Dataset): + def __init__(self, hparams): + self.audiopaths_and_text = load_filepaths_and_text(hparams.training_files) + self.max_wav_value = hparams.max_wav_value + self.sampling_rate = hparams.sampling_rate + self.filter_length = hparams.filter_length + self.hop_length = hparams.hop_length + self.win_length = hparams.win_length + self.sampling_rate = hparams.sampling_rate + self.min_text_len = getattr(hparams, "min_text_len", 1) + self.max_text_len = getattr(hparams, "max_text_len", 5000) + self._filter() + + def _filter(self): + audiopaths_and_text_new = [] + lengths = [] + for entry in self.audiopaths_and_text: + if len(entry) >= 3: + audiopath, text, dv = entry[:3] + if self.min_text_len <= len(text) and len(text) <= self.max_text_len: + audiopaths_and_text_new.append([audiopath, text, dv]) + lengths.append(os.path.getsize(audiopath) // (3 * self.hop_length)) + + self.audiopaths_and_text = audiopaths_and_text_new + self.lengths = lengths + + def get_sid(self, sid): + sid = os.path.basename(os.path.dirname(sid)) + + try: + sid = torch.LongTensor([int("".join(filter(str.isdigit, sid)))]) + except ValueError as error: + print(f"Error converting speaker ID '{sid}' to integer. Exception: {error}") + sid = torch.LongTensor([0]) + + return sid + + def get_audio_text_pair(self, audiopath_and_text): + file = audiopath_and_text[0] + phone = audiopath_and_text[1] + dv = audiopath_and_text[2] + + phone = self.get_labels(phone) + spec, wav = self.get_audio(file) + dv = self.get_sid(dv) + + len_phone = phone.size()[0] + len_spec = spec.size()[-1] + if len_phone != len_spec: + len_min = min(len_phone, len_spec) + len_wav = len_min * self.hop_length + spec = spec[:, :len_min] + wav = wav[:, :len_wav] + phone = phone[:len_min, :] + return (spec, wav, phone, dv) + + def get_labels(self, phone): + phone = np.load(phone) + phone = np.repeat(phone, 2, axis=0) + n_num = min(phone.shape[0], 900) + phone = phone[:n_num, :] + phone = torch.FloatTensor(phone) + return phone + + def get_audio(self, filename): + audio, sampling_rate = load_wav_to_torch(filename) + if sampling_rate != self.sampling_rate: + raise ValueError( + "{} SR doesn't match target {} SR".format( + sampling_rate, self.sampling_rate + ) + ) + audio_norm = audio + audio_norm = audio_norm.unsqueeze(0) + spec_filename = filename.replace(".wav", ".spec.pt") + if os.path.exists(spec_filename): + try: + spec = torch.load(spec_filename) + except Exception as error: + print(f"{spec_filename}: {error}") + spec = spectrogram_torch( + audio_norm, + self.filter_length, + self.hop_length, + self.win_length, + center=False, + ) + spec = torch.squeeze(spec, 0) + torch.save(spec, spec_filename, _use_new_zipfile_serialization=False) + else: + spec = spectrogram_torch( + audio_norm, + self.filter_length, + self.hop_length, + self.win_length, + center=False, + ) + spec = torch.squeeze(spec, 0) + torch.save(spec, spec_filename, _use_new_zipfile_serialization=False) + return spec, audio_norm + + def __getitem__(self, index): + return self.get_audio_text_pair(self.audiopaths_and_text[index]) + + def __len__(self): + return len(self.audiopaths_and_text) + + +class TextAudioCollate: + def __init__(self, return_ids=False): + self.return_ids = return_ids + + def __call__(self, batch): + _, ids_sorted_decreasing = torch.sort( + torch.LongTensor([x[0].size(1) for x in batch]), dim=0, descending=True + ) + + max_spec_len = max([x[0].size(1) for x in batch]) + max_wave_len = max([x[1].size(1) for x in batch]) + spec_lengths = torch.LongTensor(len(batch)) + wave_lengths = torch.LongTensor(len(batch)) + spec_padded = torch.FloatTensor(len(batch), batch[0][0].size(0), max_spec_len) + wave_padded = torch.FloatTensor(len(batch), 1, max_wave_len) + spec_padded.zero_() + wave_padded.zero_() + + max_phone_len = max([x[2].size(0) for x in batch]) + phone_lengths = torch.LongTensor(len(batch)) + phone_padded = torch.FloatTensor( + len(batch), max_phone_len, batch[0][2].shape[1] + ) + phone_padded.zero_() + sid = torch.LongTensor(len(batch)) + + for i in range(len(ids_sorted_decreasing)): + row = batch[ids_sorted_decreasing[i]] + + spec = row[0] + spec_padded[i, :, : spec.size(1)] = spec + spec_lengths[i] = spec.size(1) + + wave = row[1] + wave_padded[i, :, : wave.size(1)] = wave + wave_lengths[i] = wave.size(1) + + phone = row[2] + phone_padded[i, : phone.size(0), :] = phone + phone_lengths[i] = phone.size(0) + + sid[i] = row[3] + + return ( + phone_padded, + phone_lengths, + spec_padded, + spec_lengths, + wave_padded, + wave_lengths, + sid, + ) + + +class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler): + def __init__( + self, + dataset, + batch_size, + boundaries, + num_replicas=None, + rank=None, + shuffle=True, + ): + super().__init__(dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) + self.lengths = dataset.lengths + self.batch_size = batch_size + self.boundaries = boundaries + + self.buckets, self.num_samples_per_bucket = self._create_buckets() + self.total_size = sum(self.num_samples_per_bucket) + self.num_samples = self.total_size // self.num_replicas + + def _create_buckets(self): + buckets = [[] for _ in range(len(self.boundaries) - 1)] + for i in range(len(self.lengths)): + length = self.lengths[i] + idx_bucket = self._bisect(length) + if idx_bucket != -1: + buckets[idx_bucket].append(i) + + for i in range(len(buckets) - 1, -1, -1): # + if len(buckets[i]) == 0: + buckets.pop(i) + self.boundaries.pop(i + 1) + + num_samples_per_bucket = [] + for i in range(len(buckets)): + len_bucket = len(buckets[i]) + total_batch_size = self.num_replicas * self.batch_size + rem = ( + total_batch_size - (len_bucket % total_batch_size) + ) % total_batch_size + num_samples_per_bucket.append(len_bucket + rem) + return buckets, num_samples_per_bucket + + def __iter__(self): + g = torch.Generator() + g.manual_seed(self.epoch) + + indices = [] + if self.shuffle: + for bucket in self.buckets: + indices.append(torch.randperm(len(bucket), generator=g).tolist()) + else: + for bucket in self.buckets: + indices.append(list(range(len(bucket)))) + + batches = [] + for i in range(len(self.buckets)): + bucket = self.buckets[i] + len_bucket = len(bucket) + ids_bucket = indices[i] + num_samples_bucket = self.num_samples_per_bucket[i] + + rem = num_samples_bucket - len_bucket + ids_bucket = ( + ids_bucket + + ids_bucket * (rem // len_bucket) + + ids_bucket[: (rem % len_bucket)] + ) + + ids_bucket = ids_bucket[self.rank :: self.num_replicas] + + # batching + for j in range(len(ids_bucket) // self.batch_size): + batch = [ + bucket[idx] + for idx in ids_bucket[ + j * self.batch_size : (j + 1) * self.batch_size + ] + ] + batches.append(batch) + + if self.shuffle: + batch_ids = torch.randperm(len(batches), generator=g).tolist() + batches = [batches[i] for i in batch_ids] + self.batches = batches + + assert len(self.batches) * self.batch_size == self.num_samples + return iter(self.batches) + + def _bisect(self, x, lo=0, hi=None): + if hi is None: + hi = len(self.boundaries) - 1 + + if hi > lo: + mid = (hi + lo) // 2 + if self.boundaries[mid] < x and x <= self.boundaries[mid + 1]: + return mid + elif x <= self.boundaries[mid]: + return self._bisect(x, lo, mid) + else: + return self._bisect(x, mid + 1, hi) + else: + return -1 + + def __len__(self): + return self.num_samples // self.batch_size diff --git a/rvc/train/extract/extract_f0_print.py b/rvc/train/extract/extract_f0_print.py new file mode 100644 index 0000000000000000000000000000000000000000..c5c768933433785efbb4885850abab0dcc989fce --- /dev/null +++ b/rvc/train/extract/extract_f0_print.py @@ -0,0 +1,235 @@ +import os +import sys +import numpy as np +import pyworld +import torchcrepe +import torch +import parselmouth +import tqdm +from multiprocessing import Process, cpu_count + +current_directory = os.getcwd() +sys.path.append(current_directory) + + +from rvc.lib.utils import load_audio + + +exp_dir = sys.argv[1] +f0_method = sys.argv[2] +num_processes = cpu_count() + +try: + hop_length = int(sys.argv[3]) +except ValueError: + hop_length = 128 + +DoFormant = False +Quefrency = 1.0 +Timbre = 1.0 + + +class FeatureInput: + def __init__(self, sample_rate=16000, hop_size=160): + self.fs = sample_rate + self.hop = hop_size + + self.f0_method_dict = self.get_f0_method_dict() + + self.f0_bin = 256 + self.f0_max = 1100.0 + self.f0_min = 50.0 + self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) + self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) + + def mncrepe(self, method, x, p_len, hop_length): + f0 = None + torch_device_index = 0 + torch_device = ( + torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}") + if torch.cuda.is_available() + else torch.device("mps") + if torch.backends.mps.is_available() + else torch.device("cpu") + ) + + audio = torch.from_numpy(x.astype(np.float32)).to(torch_device, copy=True) + audio /= torch.quantile(torch.abs(audio), 0.999) + audio = torch.unsqueeze(audio, dim=0) + if audio.ndim == 2 and audio.shape[0] > 1: + audio = torch.mean(audio, dim=0, keepdim=True).detach() + audio = audio.detach() + + if method == "crepe": + pitch = torchcrepe.predict( + audio, + self.fs, + hop_length, + self.f0_min, + self.f0_max, + "full", + batch_size=hop_length * 2, + device=torch_device, + pad=True, + ) + p_len = p_len or x.shape[0] // hop_length + source = np.array(pitch.squeeze(0).cpu().float().numpy()) + source[source < 0.001] = np.nan + target = np.interp( + np.arange(0, len(source) * p_len, len(source)) / p_len, + np.arange(0, len(source)), + source, + ) + f0 = np.nan_to_num(target) + + return f0 + + def get_pm(self, x, p_len): + f0 = ( + parselmouth.Sound(x, self.fs) + .to_pitch_ac( + time_step=160 / 16000, + voicing_threshold=0.6, + pitch_floor=self.f0_min, + pitch_ceiling=self.f0_max, + ) + .selected_array["frequency"] + ) + + return np.pad( + f0, + [ + [ + max(0, (p_len - len(f0) + 1) // 2), + max(0, p_len - len(f0) - (p_len - len(f0) + 1) // 2), + ] + ], + mode="constant", + ) + + def get_harvest(self, x): + f0_spectral = pyworld.harvest( + x.astype(np.double), + fs=self.fs, + f0_ceil=self.f0_max, + f0_floor=self.f0_min, + frame_period=1000 * self.hop / self.fs, + ) + return pyworld.stonemask(x.astype(np.double), *f0_spectral, self.fs) + + def get_dio(self, x): + f0_spectral = pyworld.dio( + x.astype(np.double), + fs=self.fs, + f0_ceil=self.f0_max, + f0_floor=self.f0_min, + frame_period=1000 * self.hop / self.fs, + ) + return pyworld.stonemask(x.astype(np.double), *f0_spectral, self.fs) + + def get_rmvpe(self, x): + if not hasattr(self, "model_rmvpe"): + from rvc.lib.rmvpe import RMVPE + + self.model_rmvpe = RMVPE("rmvpe.pt", is_half=False, device="cpu") + return self.model_rmvpe.infer_from_audio(x, thred=0.03) + + def get_f0_method_dict(self): + return { + "pm": self.get_pm, + "harvest": self.get_harvest, + "dio": self.get_dio, + "rmvpe": self.get_rmvpe, + } + + def compute_f0(self, path, f0_method, hop_length): + x = load_audio(path, self.fs) + p_len = x.shape[0] // self.hop + + if f0_method in self.f0_method_dict: + f0 = ( + self.f0_method_dict[f0_method](x, p_len) + if f0_method == "pm" + else self.f0_method_dict[f0_method](x) + ) + elif f0_method == "crepe": + f0 = self.mncrepe(f0_method, x, p_len, hop_length) + return f0 + + def coarse_f0(self, f0): + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * ( + self.f0_bin - 2 + ) / (self.f0_mel_max - self.f0_mel_min) + 1 + + # use 0 or 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1 + f0_coarse = np.rint(f0_mel).astype(int) + assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, ( + f0_coarse.max(), + f0_coarse.min(), + ) + return f0_coarse + + def process_paths(self, paths, f0_method, hop_length, thread_n): + if len(paths) == 0: + print("There are no paths to process.") + return + with tqdm.tqdm(total=len(paths), leave=True, position=thread_n) as pbar: + description = f"Thread {thread_n} | Hop-Length {hop_length}" + pbar.set_description(description) + + for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths): + try: + if os.path.exists(opt_path1 + ".npy") and os.path.exists( + opt_path2 + ".npy" + ): + pbar.update(1) + continue + + feature_pit = self.compute_f0(inp_path, f0_method, hop_length) + np.save( + opt_path2, + feature_pit, + allow_pickle=False, + ) # nsf + coarse_pit = self.coarse_f0(feature_pit) + np.save( + opt_path1, + coarse_pit, + allow_pickle=False, + ) # ori + pbar.update(1) + except Exception as error: + print(f"f0fail-{idx}-{inp_path}-{error}") + + +if __name__ == "__main__": + feature_input = FeatureInput() + paths = [] + input_root = f"{exp_dir}/1_16k_wavs" + output_root1 = f"{exp_dir}/2a_f0" + output_root2 = f"{exp_dir}/2b-f0nsf" + + os.makedirs(output_root1, exist_ok=True) + os.makedirs(output_root2, exist_ok=True) + for name in sorted(list(os.listdir(input_root))): + input_path = f"{input_root}/{name}" + if "spec" in input_path: + continue + output_path1 = f"{output_root1}/{name}" + output_path2 = f"{output_root2}/{name}" + paths.append([input_path, output_path1, output_path2]) + + processes = [] + print("Using f0 method: " + f0_method) + for i in range(num_processes): + p = Process( + target=feature_input.process_paths, + args=(paths[i::num_processes], f0_method, hop_length, i), + ) + processes.append(p) + p.start() + for i in range(num_processes): + processes[i].join() diff --git a/rvc/train/extract/extract_feature_print.py b/rvc/train/extract/extract_feature_print.py new file mode 100644 index 0000000000000000000000000000000000000000..bf508003390703045cf4679c05774ef256f2e167 --- /dev/null +++ b/rvc/train/extract/extract_feature_print.py @@ -0,0 +1,106 @@ +import os +import sys +import tqdm +import torch +import torch.nn.functional as F +import fairseq +import soundfile as sf +import numpy as np + + +device = sys.argv[1] +n_parts = int(sys.argv[2]) +i_part = int(sys.argv[3]) + +if len(sys.argv) == 7: + exp_dir, version, is_half = sys.argv[4], sys.argv[5], sys.argv[6] +else: + i_gpu, exp_dir = sys.argv[4], sys.argv[5] + os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu) + version, is_half = sys.argv[6], sys.argv[7] + + +def forward_dml(ctx, x, scale): + ctx.scale = scale + res = x.clone().detach() + return res + + +fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml + +model_path = "hubert_base.pt" + +wav_path = f"{exp_dir}/1_16k_wavs" +out_path = f"{exp_dir}/3_feature256" if version == "v1" else f"{exp_dir}/3_feature768" +os.makedirs(out_path, exist_ok=True) + + +def read_wave(wav_path, normalize=False): + wav, sr = sf.read(wav_path) + assert sr == 16000 + feats = torch.from_numpy(wav) + feats = feats.half() if is_half else feats.float() + feats = feats.mean(-1) if feats.dim() == 2 else feats + feats = feats.view(1, -1) + if normalize: + with torch.no_grad(): + feats = F.layer_norm(feats, feats.shape) + return feats + + +print("Starting feature extraction...") +models, saved_cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task( + [model_path], + suffix="", +) +model = models[0] +model = model.to(device) +if device not in ["mps", "cpu"]: + model = model.half() +model.eval() + +todo = sorted(os.listdir(wav_path))[i_part::n_parts] +n = max(1, len(todo) // 10) + +if len(todo) == 0: + print( + "An error occurred in the feature extraction, make sure you have provided the audios correctly." + ) +else: + print(f"{len(todo)}") + with tqdm.tqdm(total=len(todo)) as pbar: + for idx, file in enumerate(todo): + try: + if file.endswith(".wav"): + wav_file_path = os.path.join(wav_path, file) + out_file_path = os.path.join(out_path, file.replace("wav", "npy")) + + if os.path.exists(out_file_path): + continue + + feats = read_wave(wav_file_path, normalize=saved_cfg.task.normalize) + padding_mask = torch.BoolTensor(feats.shape).fill_(False) + inputs = { + "source": feats.to(device), + "padding_mask": padding_mask.to(device), + "output_layer": 9 if version == "v1" else 12, + } + with torch.no_grad(): + logits = model.extract_features(**inputs) + feats = ( + model.final_proj(logits[0]) + if version == "v1" + else logits[0] + ) + + feats = feats.squeeze(0).float().cpu().numpy() + if np.isnan(feats).sum() == 0: + np.save(out_file_path, feats, allow_pickle=False) + else: + print(f"{file} - contains nan") + pbar.set_description(f"Processing {file} {feats.shape}") + except Exception as error: + print(error) + pbar.update(1) + + print("Feature extraction completed successfully!") diff --git a/rvc/train/extract/preparing_files.py b/rvc/train/extract/preparing_files.py new file mode 100644 index 0000000000000000000000000000000000000000..73bf6810c73bd939a0a766129aa22bbed169f9ff --- /dev/null +++ b/rvc/train/extract/preparing_files.py @@ -0,0 +1,71 @@ +import os +import json +import pathlib +from random import shuffle + +from rvc.configs.config import Config + +config = Config() +current_directory = os.getcwd() + + +def generate_config(rvc_version, sampling_rate, model_path): + if rvc_version == "v1" or sampling_rate == "40000": + config_path = f"v1/{sampling_rate}.json" + else: + config_path = f"v2/{sampling_rate}.json" + config_save_path = os.path.join(model_path, "config.json") + if not pathlib.Path(config_save_path).exists(): + with open(config_save_path, "w", encoding="utf-8") as f: + json.dump( + config.json_config[config_path], + f, + ensure_ascii=False, + indent=4, + sort_keys=True, + ) + f.write("\n") + + +def generate_filelist(f0_method, model_path, rvc_version, sampling_rate): + gt_wavs_dir = f"{model_path}/0_gt_wavs" + feature_dir = ( + f"{model_path}/3_feature256" + if rvc_version == "v1" + else f"{model_path}/3_feature768" + ) + if f0_method: + f0_dir = f"{model_path}/2a_f0" + f0nsf_dir = f"{model_path}/2b-f0nsf" + names = ( + set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) + & set([name.split(".")[0] for name in os.listdir(feature_dir)]) + & set([name.split(".")[0] for name in os.listdir(f0_dir)]) + & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) + ) + else: + names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( + [name.split(".")[0] for name in os.listdir(feature_dir)] + ) + options = [] + for name in names: + if f0_method: + options.append( + f"{gt_wavs_dir}/{name}.wav|{feature_dir}/{name}.npy|{f0_dir}/{name}.wav.npy|{f0nsf_dir}/{name}.wav.npy|0" + ) + else: + options.append(f"{gt_wavs_dir}/{name}.wav|{feature_dir}/{name}.npy|0") + fea_dim = 256 if rvc_version == "v1" else 768 + if f0_method: + for _ in range(2): + options.append( + f"{current_directory}/logs/mute/0_gt_wavs/mute{sampling_rate}.wav|{current_directory}/logs/mute/3_feature{fea_dim}/mute.npy|{current_directory}/logs/mute/2a_f0/mute.wav.npy|{current_directory}/logs/mute/2b-f0nsf/mute.wav.npy|0" + ) + else: + for _ in range(2): + options.append( + f"{current_directory}/logs/mute/0_gt_wavs/mute{sampling_rate}.wav|{current_directory}/logs/mute/3_feature{fea_dim}/mute.npy|0" + ) + shuffle(options) + with open(f"{model_path}/filelist.txt", "w") as f: + f.write("\n".join(options)) diff --git a/rvc/train/index_generator.py b/rvc/train/index_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..32bfb5e0c141525d8cce0a1d57ef406ab58a0848 --- /dev/null +++ b/rvc/train/index_generator.py @@ -0,0 +1,85 @@ +import os +import sys +import faiss +import numpy as np +from sklearn.cluster import MiniBatchKMeans +from multiprocessing import cpu_count + +exp_dir = sys.argv[1] +version = sys.argv[2] + +try: + if version == "v1": + feature_dir = os.path.join(exp_dir, "3_feature256") + elif version == "v2": + feature_dir = os.path.join(exp_dir, "3_feature768") + + npys = [] + listdir_res = sorted(os.listdir(feature_dir)) + + for name in listdir_res: + file_path = os.path.join(feature_dir, name) + phone = np.load(file_path) + npys.append(phone) + + big_npy = np.concatenate(npys, axis=0) + + big_npy_idx = np.arange(big_npy.shape[0]) + np.random.shuffle(big_npy_idx) + big_npy = big_npy[big_npy_idx] + + if big_npy.shape[0] > 2e5: + big_npy = ( + MiniBatchKMeans( + n_clusters=10000, + verbose=True, + batch_size=256 * cpu_count(), + compute_labels=False, + init="random", + ) + .fit(big_npy) + .cluster_centers_ + ) + + np.save(os.path.join(exp_dir, "total_fea.npy"), big_npy) + + n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) + + # index_trained + index_trained = faiss.index_factory( + 256 if version == "v1" else 768, f"IVF{n_ivf},Flat" + ) + index_ivf_trained = faiss.extract_index_ivf(index_trained) + index_ivf_trained.nprobe = 1 + index_trained.train(big_npy) + + index_filename_trained = ( + f"trained_IVF{n_ivf}_Flat_nprobe_{index_ivf_trained.nprobe}_{version}.index" + ) + index_filepath_trained = os.path.join(exp_dir, index_filename_trained) + + faiss.write_index(index_trained, index_filepath_trained) + + # index_added + index_added = faiss.index_factory( + 256 if version == "v1" else 768, f"IVF{n_ivf},Flat" + ) + index_ivf_added = faiss.extract_index_ivf(index_added) + index_ivf_added.nprobe = 1 + index_added.train(big_npy) + + index_filename_added = ( + f"added_IVF{n_ivf}_Flat_nprobe_{index_ivf_added.nprobe}_{version}.index" + ) + index_filepath_added = os.path.join(exp_dir, index_filename_added) + + batch_size_add = 8192 + for i in range(0, big_npy.shape[0], batch_size_add): + index_added.add(big_npy[i : i + batch_size_add]) + + faiss.write_index(index_added, index_filepath_added) + +except Exception as error: + print(f"Failed to train index: {error}") + +print("Index training finished!") diff --git a/rvc/train/losses.py b/rvc/train/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..b1b263e4c205e78ffe970f622ab6ff68f36d3b17 --- /dev/null +++ b/rvc/train/losses.py @@ -0,0 +1,58 @@ +import torch + + +def feature_loss(fmap_r, fmap_g): + loss = 0 + for dr, dg in zip(fmap_r, fmap_g): + for rl, gl in zip(dr, dg): + rl = rl.float().detach() + gl = gl.float() + loss += torch.mean(torch.abs(rl - gl)) + + return loss * 2 + + +def discriminator_loss(disc_real_outputs, disc_generated_outputs): + loss = 0 + r_losses = [] + g_losses = [] + for dr, dg in zip(disc_real_outputs, disc_generated_outputs): + dr = dr.float() + dg = dg.float() + r_loss = torch.mean((1 - dr) ** 2) + g_loss = torch.mean(dg**2) + loss += r_loss + g_loss + r_losses.append(r_loss.item()) + g_losses.append(g_loss.item()) + + return loss, r_losses, g_losses + + +def generator_loss(disc_outputs): + loss = 0 + gen_losses = [] + for dg in disc_outputs: + dg = dg.float() + l = torch.mean((1 - dg) ** 2) + gen_losses.append(l) + loss += l + + return loss, gen_losses + + +def kl_loss(z_p, logs_q, m_p, logs_p, z_mask): + """ + z_p, logs_q: [b, h, t_t] + m_p, logs_p: [b, h, t_t] + """ + z_p = z_p.float() + logs_q = logs_q.float() + m_p = m_p.float() + logs_p = logs_p.float() + z_mask = z_mask.float() + + kl = logs_p - logs_q - 0.5 + kl += 0.5 * ((z_p - m_p) ** 2) * torch.exp(-2.0 * logs_p) + kl = torch.sum(kl * z_mask) + l = kl / torch.sum(z_mask) + return l diff --git a/rvc/train/mel_processing.py b/rvc/train/mel_processing.py new file mode 100644 index 0000000000000000000000000000000000000000..b664712d5511b8fd82ee363f923067ccd5b6427a --- /dev/null +++ b/rvc/train/mel_processing.py @@ -0,0 +1,84 @@ +import torch +import torch.utils.data +from librosa.filters import mel as librosa_mel_fn + + +def dynamic_range_compression_torch(x, C=1, clip_val=1e-5): + return torch.log(torch.clamp(x, min=clip_val) * C) + + +def dynamic_range_decompression_torch(x, C=1): + return torch.exp(x) / C + + +def spectral_normalize_torch(magnitudes): + return dynamic_range_compression_torch(magnitudes) + + +def spectral_de_normalize_torch(magnitudes): + return dynamic_range_decompression_torch(magnitudes) + + +mel_basis = {} +hann_window = {} + + +def spectrogram_torch(y, n_fft, hop_size, win_size, center=False): + global hann_window + dtype_device = str(y.dtype) + "_" + str(y.device) + wnsize_dtype_device = str(win_size) + "_" + dtype_device + if wnsize_dtype_device not in hann_window: + hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to( + dtype=y.dtype, device=y.device + ) + + y = torch.nn.functional.pad( + y.unsqueeze(1), + (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), + mode="reflect", + ) + y = y.squeeze(1) + + spec = torch.stft( + y, + n_fft, + hop_length=hop_size, + win_length=win_size, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=True, + ) + + spec = torch.sqrt(spec.real.pow(2) + spec.imag.pow(2) + 1e-6) + + return spec + + +def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax): + global mel_basis + dtype_device = str(spec.dtype) + "_" + str(spec.device) + fmax_dtype_device = str(fmax) + "_" + dtype_device + if fmax_dtype_device not in mel_basis: + mel = librosa_mel_fn( + sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax + ) + mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to( + dtype=spec.dtype, device=spec.device + ) + + melspec = torch.matmul(mel_basis[fmax_dtype_device], spec) + melspec = spectral_normalize_torch(melspec) + return melspec + + +def mel_spectrogram_torch( + y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False +): + spec = spectrogram_torch(y, n_fft, hop_size, win_size, center) + + melspec = spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax) + + return melspec diff --git a/rvc/train/preprocess/preprocess.py b/rvc/train/preprocess/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..48534a897c4bf9f3e4451f9f7cc8fb3f8dcc5094 --- /dev/null +++ b/rvc/train/preprocess/preprocess.py @@ -0,0 +1,130 @@ +from multiprocessing import cpu_count +import os +import sys + +from scipy import signal +from scipy.io import wavfile +import librosa +import numpy as np + +now_directory = os.getcwd() +sys.path.append(now_directory) + +from rvc.lib.utils import load_audio +from rvc.train.slicer import Slicer + +experiment_directory = sys.argv[1] +input_root = sys.argv[2] +sampling_rate = int(sys.argv[3]) +percentage = float(sys.argv[4]) +num_processes = cpu_count() + +import multiprocessing + + +class PreProcess: + def __init__(self, sr, exp_dir, per=3.0): + self.slicer = Slicer( + sr=sr, + threshold=-42, + min_length=1500, + min_interval=400, + hop_size=15, + max_sil_kept=500, + ) + self.sr = sr + self.b_high, self.a_high = signal.butter(N=5, Wn=48, btype="high", fs=self.sr) + self.per = per + self.overlap = 0.3 + self.tail = self.per + self.overlap + self.max_amplitude = 0.9 + self.alpha = 0.75 + self.exp_dir = exp_dir + self.gt_wavs_dir = f"{exp_dir}/0_gt_wavs" + self.wavs16k_dir = f"{exp_dir}/1_16k_wavs" + os.makedirs(self.exp_dir, exist_ok=True) + os.makedirs(self.gt_wavs_dir, exist_ok=True) + os.makedirs(self.wavs16k_dir, exist_ok=True) + + def normalize_and_write(self, tmp_audio, idx0, idx1): + tmp_max = np.abs(tmp_audio).max() + if tmp_max > 2.5: + print(f"{idx0}-{idx1}-{tmp_max}-filtered") + return + tmp_audio = (tmp_audio / tmp_max * (self.max_amplitude * self.alpha)) + ( + 1 - self.alpha + ) * tmp_audio + wavfile.write( + f"{self.gt_wavs_dir}/{idx0}_{idx1}.wav", + self.sr, + tmp_audio.astype(np.float32), + ) + tmp_audio = librosa.resample( + tmp_audio, orig_sr=self.sr, target_sr=16000 + ) # , res_type="soxr_vhq" + wavfile.write( + f"{self.wavs16k_dir}/{idx0}_{idx1}.wav", + 16000, + tmp_audio.astype(np.float32), + ) + + def process_audio(self, path, idx0): + try: + audio = load_audio(path, self.sr) + audio = signal.lfilter(self.b_high, self.a_high, audio) + + idx1 = 0 + for audio_segment in self.slicer.slice(audio): + i = 0 + while 1: + start = int(self.sr * (self.per - self.overlap) * i) + i += 1 + if len(audio_segment[start:]) > self.tail * self.sr: + tmp_audio = audio_segment[ + start : start + int(self.per * self.sr) + ] + self.normalize_and_write(tmp_audio, idx0, idx1) + idx1 += 1 + else: + tmp_audio = audio_segment[start:] + idx1 += 1 + break + self.normalize_and_write(tmp_audio, idx0, idx1) + except Exception as error: + print(f"{path}: {error}") + + def process_audio_multiprocessing(self, infos): + for path, idx0 in infos: + self.process_audio(path, idx0) + + def process_audio_multiprocessing_input_directory(self, input_root, num_processes): + try: + infos = [ + (f"{input_root}/{name}", idx) + for idx, name in enumerate(sorted(list(os.listdir(input_root)))) + ] + processes = [] + for i in range(num_processes): + p = multiprocessing.Process( + target=self.process_audio_multiprocessing, + args=(infos[i::num_processes],), + ) + processes.append(p) + p.start() + for i in range(num_processes): + processes[i].join() + except Exception as error: + print(error) + + +def preprocess_training_set(input_root, sr, num_processes, exp_dir, per): + pp = PreProcess(sr, exp_dir, per) + print("Starting preprocessing...") + pp.process_audio_multiprocessing_input_directory(input_root, num_processes) + print("Preprocessing completed!") + + +if __name__ == "__main__": + preprocess_training_set( + input_root, sampling_rate, num_processes, experiment_directory, percentage + ) diff --git a/rvc/train/process_ckpt.py b/rvc/train/process_ckpt.py new file mode 100644 index 0000000000000000000000000000000000000000..30ef58d24773d8c4de7ab8820af9a004d0b5ae87 --- /dev/null +++ b/rvc/train/process_ckpt.py @@ -0,0 +1,221 @@ +import os +import torch +from collections import OrderedDict + +logs_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "logs") + + +def replace_keys_in_dict(d, old_key_part, new_key_part): + # Use OrderedDict if the original is an OrderedDict + if isinstance(d, OrderedDict): + updated_dict = OrderedDict() + else: + updated_dict = {} + for key, value in d.items(): + # Replace the key part if found + new_key = key.replace(old_key_part, new_key_part) + # If the value is a dictionary, apply the function recursively + if isinstance(value, dict): + value = replace_keys_in_dict(value, old_key_part, new_key_part) + updated_dict[new_key] = value + return updated_dict + + +def save_final(ckpt, sr, if_f0, name, epoch, version, hps): + try: + pth_file = f"{name}_{epoch}e.pth" + pth_file_path = os.path.join("logs", pth_file) + pth_file_old_version_path = os.path.join("logs", f"{pth_file}_old_version.pth") + + opt = OrderedDict( + weight={ + key: value.half() for key, value in ckpt.items() if "enc_q" not in key + } + ) + opt["config"] = [ + hps.data.filter_length // 2 + 1, + 32, + hps.model.inter_channels, + hps.model.hidden_channels, + hps.model.filter_channels, + hps.model.n_heads, + hps.model.n_layers, + hps.model.kernel_size, + hps.model.p_dropout, + hps.model.resblock, + hps.model.resblock_kernel_sizes, + hps.model.resblock_dilation_sizes, + hps.model.upsample_rates, + hps.model.upsample_initial_channel, + hps.model.upsample_kernel_sizes, + hps.model.spk_embed_dim, + hps.model.gin_channels, + hps.data.sampling_rate, + ] + opt["info"], opt["sr"], opt["f0"], opt["version"] = epoch, sr, if_f0, version + torch.save(opt, pth_file_path) + + model = torch.load(pth_file_path, map_location=torch.device("cpu")) + torch.save( + replace_keys_in_dict( + replace_keys_in_dict( + model, ".parametrizations.weight.original1", ".weight_v" + ), + ".parametrizations.weight.original0", + ".weight_g", + ), + pth_file_old_version_path, + ) + os.remove(pth_file_path) + os.rename(pth_file_old_version_path, pth_file_path) + + return "Success!" + except Exception as error: + print(error) + + +def extract_small_model(path, name, sr, if_f0, info, version): + try: + ckpt = torch.load(path, map_location="cpu") + if "model" in ckpt: + ckpt = ckpt["model"] + opt = OrderedDict( + weight={ + key: value.half() for key, value in ckpt.items() if "enc_q" not in key + } + ) + opt["config"] = { + "40000": [ + 1025, + 32, + 192, + 192, + 768, + 2, + 6, + 3, + 0, + "1", + [3, 7, 11], + [[1, 3, 5], [1, 3, 5], [1, 3, 5]], + [10, 10, 2, 2], + 512, + [16, 16, 4, 4], + 109, + 256, + 40000, + ], + "48000": { + "v1": [ + 1025, + 32, + 192, + 192, + 768, + 2, + 6, + 3, + 0, + "1", + [3, 7, 11], + [[1, 3, 5], [1, 3, 5], [1, 3, 5]], + [10, 6, 2, 2, 2], + 512, + [16, 16, 4, 4, 4], + 109, + 256, + 48000, + ], + "v2": [ + 1025, + 32, + 192, + 192, + 768, + 2, + 6, + 3, + 0, + "1", + [3, 7, 11], + [[1, 3, 5], [1, 3, 5], [1, 3, 5]], + [12, 10, 2, 2], + 512, + [24, 20, 4, 4], + 109, + 256, + 48000, + ], + }, + "32000": { + "v1": [ + 513, + 32, + 192, + 192, + 768, + 2, + 6, + 3, + 0, + "1", + [3, 7, 11], + [[1, 3, 5], [1, 3, 5], [1, 3, 5]], + [10, 4, 2, 2, 2], + 512, + [16, 16, 4, 4, 4], + 109, + 256, + 32000, + ], + "v2": [ + 513, + 32, + 192, + 192, + 768, + 2, + 6, + 3, + 0, + "1", + [3, 7, 11], + [[1, 3, 5], [1, 3, 5], [1, 3, 5]], + [10, 8, 2, 2], + 512, + [20, 16, 4, 4], + 109, + 256, + 32000, + ], + }, + } + opt["config"] = ( + opt["config"][sr] + if isinstance(opt["config"][sr], list) + else opt["config"][sr][version] + ) + if info == "": + info = "Extracted model." + opt["info"], opt["version"], opt["sr"], opt["f0"] = ( + info, + version, + sr, + int(if_f0), + ) + torch.save(opt, f"logs/{name}/{name}.pth") + return "Success." + except Exception as error: + print(error) + + +def change_info(path, info, name): + try: + ckpt = torch.load(path, map_location="cpu") + ckpt["info"] = info + if name == "": + name = os.path.basename(path) + torch.save(ckpt, f"logs/weights/{name}") + return "Success." + except Exception as error: + print(error) diff --git a/rvc/train/slicer.py b/rvc/train/slicer.py new file mode 100644 index 0000000000000000000000000000000000000000..797abc60a033b204e1d06b5afe6a97ded2b519ef --- /dev/null +++ b/rvc/train/slicer.py @@ -0,0 +1,244 @@ +import os +from argparse import ArgumentParser +import librosa +import soundfile +import numpy as np + + +class Slicer: + def __init__( + self, + sr: int, + threshold: float = -40.0, + min_length: int = 5000, + min_interval: int = 300, + hop_size: int = 20, + max_sil_kept: int = 5000, + ): + if not min_length >= min_interval >= hop_size: + raise ValueError("min_length >= min_interval >= hop_size is required") + if not max_sil_kept >= hop_size: + raise ValueError("max_sil_kept >= hop_size is required") + + min_interval = sr * min_interval / 1000 + self.threshold = 10 ** (threshold / 20.0) + self.hop_size = round(sr * hop_size / 1000) + self.win_size = min(round(min_interval), 4 * self.hop_size) + self.min_length = round(sr * min_length / 1000 / self.hop_size) + self.min_interval = round(min_interval / self.hop_size) + self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size) + + def _apply_slice(self, waveform, begin, end): + start_idx = begin * self.hop_size + if len(waveform.shape) > 1: + end_idx = min(waveform.shape[1], end * self.hop_size) + return waveform[:, start_idx:end_idx] + else: + end_idx = min(waveform.shape[0], end * self.hop_size) + return waveform[start_idx:end_idx] + + def slice(self, waveform): + samples = waveform.mean(axis=0) if len(waveform.shape) > 1 else waveform + if samples.shape[0] <= self.min_length: + return [waveform] + + rms_list = get_rms( + y=samples, frame_length=self.win_size, hop_length=self.hop_size + ).squeeze(0) + sil_tags = [] + silence_start, clip_start = None, 0 + + for i, rms in enumerate(rms_list): + if rms < self.threshold: + if silence_start is None: + silence_start = i + continue + + if silence_start is None: + continue + + is_leading_silence = silence_start == 0 and i > self.max_sil_kept + need_slice_middle = ( + i - silence_start >= self.min_interval + and i - clip_start >= self.min_length + ) + + if not is_leading_silence and not need_slice_middle: + silence_start = None + continue + + if i - silence_start <= self.max_sil_kept: + pos = rms_list[silence_start : i + 1].argmin() + silence_start + if silence_start == 0: + sil_tags.append((0, pos)) + else: + sil_tags.append((pos, pos)) + clip_start = pos + elif i - silence_start <= self.max_sil_kept * 2: + pos = rms_list[ + i - self.max_sil_kept : silence_start + self.max_sil_kept + 1 + ].argmin() + pos += i - self.max_sil_kept + pos_l = ( + rms_list[ + silence_start : silence_start + self.max_sil_kept + 1 + ].argmin() + + silence_start + ) + pos_r = ( + rms_list[i - self.max_sil_kept : i + 1].argmin() + + i + - self.max_sil_kept + ) + if silence_start == 0: + sil_tags.append((0, pos_r)) + clip_start = pos_r + else: + sil_tags.append((min(pos_l, pos), max(pos_r, pos))) + clip_start = max(pos_r, pos) + else: + pos_l = ( + rms_list[ + silence_start : silence_start + self.max_sil_kept + 1 + ].argmin() + + silence_start + ) + pos_r = ( + rms_list[i - self.max_sil_kept : i + 1].argmin() + + i + - self.max_sil_kept + ) + if silence_start == 0: + sil_tags.append((0, pos_r)) + else: + sil_tags.append((pos_l, pos_r)) + clip_start = pos_r + silence_start = None + + total_frames = rms_list.shape[0] + + if ( + silence_start is not None + and total_frames - silence_start >= self.min_interval + ): + silence_end = min(total_frames, silence_start + self.max_sil_kept) + pos = rms_list[silence_start : silence_end + 1].argmin() + silence_start + sil_tags.append((pos, total_frames + 1)) + + if not sil_tags: + return [waveform] + else: + chunks = [] + if sil_tags[0][0] > 0: + chunks.append(self._apply_slice(waveform, 0, sil_tags[0][0])) + + for i in range(len(sil_tags) - 1): + chunks.append( + self._apply_slice(waveform, sil_tags[i][1], sil_tags[i + 1][0]) + ) + + if sil_tags[-1][1] < total_frames: + chunks.append( + self._apply_slice(waveform, sil_tags[-1][1], total_frames) + ) + + return chunks + + +def get_rms( + y, + frame_length=2048, + hop_length=512, + pad_mode="constant", +): + padding = (int(frame_length // 2), int(frame_length // 2)) + y = np.pad(y, padding, mode=pad_mode) + + axis = -1 + out_strides = y.strides + tuple([y.strides[axis]]) + x_shape_trimmed = list(y.shape) + x_shape_trimmed[axis] -= frame_length - 1 + out_shape = tuple(x_shape_trimmed) + tuple([frame_length]) + xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides) + + if axis < 0: + target_axis = axis - 1 + else: + target_axis = axis + 1 + + xw = np.moveaxis(xw, -1, target_axis) + slices = [slice(None)] * xw.ndim + slices[axis] = slice(0, None, hop_length) + x = xw[tuple(slices)] + + power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True) + return np.sqrt(power) + + +def main(): + parser = ArgumentParser() + parser.add_argument("audio", type=str, help="The audio to be sliced") + parser.add_argument( + "--out", type=str, help="Output directory of the sliced audio clips" + ) + parser.add_argument( + "--db_thresh", + type=float, + default=-40, + help="The dB threshold for silence detection", + ) + parser.add_argument( + "--min_length", + type=int, + default=5000, + help="The minimum milliseconds required for each sliced audio clip", + ) + parser.add_argument( + "--min_interval", + type=int, + default=300, + help="The minimum milliseconds for a silence part to be sliced", + ) + parser.add_argument( + "--hop_size", type=int, default=10, help="Frame length in milliseconds" + ) + parser.add_argument( + "--max_sil_kept", + type=int, + default=500, + help="The maximum silence length kept around the sliced clip, presented in milliseconds", + ) + args = parser.parse_args() + + out = args.out or os.path.dirname(os.path.abspath(args.audio)) + audio, sr = librosa.load(args.audio, sr=None, mono=False) + + slicer = Slicer( + sr=sr, + threshold=args.db_thresh, + min_length=args.min_length, + min_interval=args.min_interval, + hop_size=args.hop_size, + max_sil_kept=args.max_sil_kept, + ) + + chunks = slicer.slice(audio) + + if not os.path.exists(out): + os.makedirs(out) + + for i, chunk in enumerate(chunks): + if len(chunk.shape) > 1: + chunk = chunk.T + soundfile.write( + os.path.join( + out, + f"{os.path.basename(args.audio).rsplit('.', maxsplit=1)[0]}_{i}.wav", + ), + chunk, + sr, + ) + + +if __name__ == "__main__": + main() diff --git a/rvc/train/train.py b/rvc/train/train.py new file mode 100644 index 0000000000000000000000000000000000000000..8fb4a14e40e5aaf93a190bbb3dad409fed1ef966 --- /dev/null +++ b/rvc/train/train.py @@ -0,0 +1,602 @@ +import torch +import sys +import os +import datetime + +from utils import ( + get_hparams, + plot_spectrogram_to_numpy, + summarize, + load_checkpoint, + save_checkpoint, + latest_checkpoint_path, +) +from random import randint, shuffle +from time import sleep +from time import time as ttime + +from torch.cuda.amp import GradScaler, autocast + +from torch.nn import functional as F +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.data import DataLoader +from torch.utils.tensorboard import SummaryWriter +import torch.distributed as dist +import torch.multiprocessing as mp + +now_dir = os.getcwd() +sys.path.append(os.path.join(now_dir)) + + +from data_utils import ( + DistributedBucketSampler, + TextAudioCollate, + TextAudioCollateMultiNSFsid, + TextAudioLoader, + TextAudioLoaderMultiNSFsid, +) + +from losses import ( + discriminator_loss, + feature_loss, + generator_loss, + kl_loss, +) +from mel_processing import mel_spectrogram_torch, spec_to_mel_torch +from process_ckpt import save_final + +from rvc.lib.infer_pack import commons + +hps = get_hparams() +if hps.version == "v1": + from rvc.lib.infer_pack.models import MultiPeriodDiscriminator + from rvc.lib.infer_pack.models import SynthesizerTrnMs256NSFsid as RVC_Model_f0 + from rvc.lib.infer_pack.models import ( + SynthesizerTrnMs256NSFsid_nono as RVC_Model_nof0, + ) +elif hps.version == "v2": + from rvc.lib.infer_pack.models import ( + SynthesizerTrnMs768NSFsid as RVC_Model_f0, + SynthesizerTrnMs768NSFsid_nono as RVC_Model_nof0, + MultiPeriodDiscriminatorV2 as MultiPeriodDiscriminator, + ) + +os.environ["CUDA_VISIBLE_DEVICES"] = hps.gpus.replace("-", ",") +n_gpus = len(hps.gpus.split("-")) + + +torch.backends.cudnn.deterministic = False +torch.backends.cudnn.benchmark = False + +global_step = 0 +bestEpochStep = 0 +lastValue = 1 +lowestValue = {"step": 0, "value": float("inf"), "epoch": 0} +dirtyTb = [] +dirtyValues = [] +dirtySteps = [] +dirtyEpochs = [] +continued = False + + +class EpochRecorder: + def __init__(self): + self.last_time = ttime() + + def record(self): + now_time = ttime() + elapsed_time = now_time - self.last_time + self.last_time = now_time + elapsed_time = round(elapsed_time, 1) + elapsed_time_str = str(datetime.timedelta(seconds=int(elapsed_time))) + current_time = datetime.datetime.now().strftime("%H:%M:%S") + return f"time={current_time} | training_speed={elapsed_time_str}" + + +def main(): + n_gpus = torch.cuda.device_count() + + if torch.cuda.is_available() == False and torch.backends.mps.is_available() == True: + n_gpus = 1 + if n_gpus < 1: + print("GPU not detected, reverting to CPU (not recommended)") + n_gpus = 1 + children = [] + for i in range(n_gpus): + subproc = mp.Process( + target=run, + args=(i, n_gpus, hps), + ) + children.append(subproc) + subproc.start() + + for i in range(n_gpus): + children[i].join() + + +def run( + rank, + n_gpus, + hps, +): + global global_step + if rank == 0: + writer = SummaryWriter(log_dir=hps.model_dir) + writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval")) + + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = str(randint(20000, 55555)) + dist.init_process_group( + backend="gloo", init_method="env://", world_size=n_gpus, rank=rank + ) + torch.manual_seed(hps.train.seed) + if torch.cuda.is_available(): + torch.cuda.set_device(rank) + + if hps.if_f0 == 1: + train_dataset = TextAudioLoaderMultiNSFsid(hps.data) + else: + train_dataset = TextAudioLoader(hps.data) + + train_sampler = DistributedBucketSampler( + train_dataset, + hps.train.batch_size * n_gpus, + [100, 200, 300, 400, 500, 600, 700, 800, 900], + num_replicas=n_gpus, + rank=rank, + shuffle=True, + ) + + if hps.if_f0 == 1: + collate_fn = TextAudioCollateMultiNSFsid() + else: + collate_fn = TextAudioCollate() + train_loader = DataLoader( + train_dataset, + num_workers=4, + shuffle=False, + pin_memory=True, + collate_fn=collate_fn, + batch_sampler=train_sampler, + persistent_workers=True, + prefetch_factor=8, + ) + if hps.if_f0 == 1: + net_g = RVC_Model_f0( + hps.data.filter_length // 2 + 1, + hps.train.segment_size // hps.data.hop_length, + **hps.model, + is_half=hps.train.fp16_run, + sr=hps.sample_rate, + ) + else: + net_g = RVC_Model_nof0( + hps.data.filter_length // 2 + 1, + hps.train.segment_size // hps.data.hop_length, + **hps.model, + is_half=hps.train.fp16_run, + ) + if torch.cuda.is_available(): + net_g = net_g.cuda(rank) + net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm) + if torch.cuda.is_available(): + net_d = net_d.cuda(rank) + optim_g = torch.optim.AdamW( + net_g.parameters(), + hps.train.learning_rate, + betas=hps.train.betas, + eps=hps.train.eps, + ) + optim_d = torch.optim.AdamW( + net_d.parameters(), + hps.train.learning_rate, + betas=hps.train.betas, + eps=hps.train.eps, + ) + if torch.cuda.is_available(): + net_g = DDP(net_g, device_ids=[rank]) + net_d = DDP(net_d, device_ids=[rank]) + else: + net_g = DDP(net_g) + net_d = DDP(net_d) + + try: + print("Starting training...") + _, _, _, epoch_str = load_checkpoint( + latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d, optim_d + ) + _, _, _, epoch_str = load_checkpoint( + latest_checkpoint_path(hps.model_dir, "G_*.pth"), net_g, optim_g + ) + global_step = (epoch_str - 1) * len(train_loader) + + except: + epoch_str = 1 + global_step = 0 + if hps.pretrainG != "": + if rank == 0: + print(f"Loaded pretrained_G {hps.pretrainG}") + if hasattr(net_g, "module"): + print( + net_g.module.load_state_dict( + torch.load(hps.pretrainG, map_location="cpu")["model"] + ) + ) + else: + print( + net_g.load_state_dict( + torch.load(hps.pretrainG, map_location="cpu")["model"] + ) + ) + if hps.pretrainD != "": + if rank == 0: + print(f"Loaded pretrained_D {hps.pretrainD}") + if hasattr(net_d, "module"): + print( + net_d.module.load_state_dict( + torch.load(hps.pretrainD, map_location="cpu")["model"] + ) + ) + else: + print( + net_d.load_state_dict( + torch.load(hps.pretrainD, map_location="cpu")["model"] + ) + ) + + scheduler_g = torch.optim.lr_scheduler.ExponentialLR( + optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2 + ) + scheduler_d = torch.optim.lr_scheduler.ExponentialLR( + optim_d, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2 + ) + + scaler = GradScaler(enabled=hps.train.fp16_run) + + cache = [] + for epoch in range(epoch_str, hps.train.epochs + 1): + if rank == 0: + train_and_evaluate( + rank, + epoch, + hps, + [net_g, net_d], + [optim_g, optim_d], + scaler, + [train_loader, None], + [writer, writer_eval], + cache, + ) + else: + train_and_evaluate( + rank, + epoch, + hps, + [net_g, net_d], + [optim_g, optim_d], + scaler, + [train_loader, None], + None, + cache, + ) + scheduler_g.step() + scheduler_d.step() + + +def train_and_evaluate(rank, epoch, hps, nets, optims, scaler, loaders, writers, cache): + net_g, net_d = nets + optim_g, optim_d = optims + train_loader = loaders[0] if loaders is not None else None + if writers is not None: + writer = writers[0] + + train_loader.batch_sampler.set_epoch(epoch) + global global_step + + net_g.train() + net_d.train() + + if hps.if_cache_data_in_gpu == True: + data_iterator = cache + if cache == []: + for batch_idx, info in enumerate(train_loader): + if hps.if_f0 == 1: + ( + phone, + phone_lengths, + pitch, + pitchf, + spec, + spec_lengths, + wave, + wave_lengths, + sid, + ) = info + else: + ( + phone, + phone_lengths, + spec, + spec_lengths, + wave, + wave_lengths, + sid, + ) = info + if torch.cuda.is_available(): + phone = phone.cuda(rank, non_blocking=True) + phone_lengths = phone_lengths.cuda(rank, non_blocking=True) + if hps.if_f0 == 1: + pitch = pitch.cuda(rank, non_blocking=True) + pitchf = pitchf.cuda(rank, non_blocking=True) + sid = sid.cuda(rank, non_blocking=True) + spec = spec.cuda(rank, non_blocking=True) + spec_lengths = spec_lengths.cuda(rank, non_blocking=True) + wave = wave.cuda(rank, non_blocking=True) + wave_lengths = wave_lengths.cuda(rank, non_blocking=True) + if hps.if_f0 == 1: + cache.append( + ( + batch_idx, + ( + phone, + phone_lengths, + pitch, + pitchf, + spec, + spec_lengths, + wave, + wave_lengths, + sid, + ), + ) + ) + else: + cache.append( + ( + batch_idx, + ( + phone, + phone_lengths, + spec, + spec_lengths, + wave, + wave_lengths, + sid, + ), + ) + ) + else: + shuffle(cache) + else: + data_iterator = enumerate(train_loader) + + epoch_recorder = EpochRecorder() + for batch_idx, info in data_iterator: + if hps.if_f0 == 1: + ( + phone, + phone_lengths, + pitch, + pitchf, + spec, + spec_lengths, + wave, + wave_lengths, + sid, + ) = info + else: + phone, phone_lengths, spec, spec_lengths, wave, wave_lengths, sid = info + if (hps.if_cache_data_in_gpu == False) and torch.cuda.is_available(): + phone = phone.cuda(rank, non_blocking=True) + phone_lengths = phone_lengths.cuda(rank, non_blocking=True) + if hps.if_f0 == 1: + pitch = pitch.cuda(rank, non_blocking=True) + pitchf = pitchf.cuda(rank, non_blocking=True) + sid = sid.cuda(rank, non_blocking=True) + spec = spec.cuda(rank, non_blocking=True) + spec_lengths = spec_lengths.cuda(rank, non_blocking=True) + wave = wave.cuda(rank, non_blocking=True) + + with autocast(enabled=hps.train.fp16_run): + if hps.if_f0 == 1: + ( + y_hat, + ids_slice, + x_mask, + z_mask, + (z, z_p, m_p, logs_p, m_q, logs_q), + ) = net_g(phone, phone_lengths, pitch, pitchf, spec, spec_lengths, sid) + else: + ( + y_hat, + ids_slice, + x_mask, + z_mask, + (z, z_p, m_p, logs_p, m_q, logs_q), + ) = net_g(phone, phone_lengths, spec, spec_lengths, sid) + mel = spec_to_mel_torch( + spec, + hps.data.filter_length, + hps.data.n_mel_channels, + hps.data.sampling_rate, + hps.data.mel_fmin, + hps.data.mel_fmax, + ) + y_mel = commons.slice_segments( + mel, ids_slice, hps.train.segment_size // hps.data.hop_length + ) + with autocast(enabled=False): + y_hat_mel = mel_spectrogram_torch( + y_hat.float().squeeze(1), + hps.data.filter_length, + hps.data.n_mel_channels, + hps.data.sampling_rate, + hps.data.hop_length, + hps.data.win_length, + hps.data.mel_fmin, + hps.data.mel_fmax, + ) + if hps.train.fp16_run == True: + y_hat_mel = y_hat_mel.half() + wave = commons.slice_segments( + wave, ids_slice * hps.data.hop_length, hps.train.segment_size + ) + + y_d_hat_r, y_d_hat_g, _, _ = net_d(wave, y_hat.detach()) + with autocast(enabled=False): + loss_disc, losses_disc_r, losses_disc_g = discriminator_loss( + y_d_hat_r, y_d_hat_g + ) + optim_d.zero_grad() + scaler.scale(loss_disc).backward() + scaler.unscale_(optim_d) + grad_norm_d = commons.clip_grad_value_(net_d.parameters(), None) + scaler.step(optim_d) + + with autocast(enabled=hps.train.fp16_run): + y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(wave, y_hat) + with autocast(enabled=False): + loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel + loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl + loss_fm = feature_loss(fmap_r, fmap_g) + loss_gen, losses_gen = generator_loss(y_d_hat_g) + loss_gen_all = loss_gen + loss_fm + loss_mel + loss_kl + optim_g.zero_grad() + scaler.scale(loss_gen_all).backward() + scaler.unscale_(optim_g) + grad_norm_g = commons.clip_grad_value_(net_g.parameters(), None) + scaler.step(optim_g) + scaler.update() + + if rank == 0: + if global_step % hps.train.log_interval == 0: + lr = optim_g.param_groups[0]["lr"] + # print("Epoch: {} [{:.0f}%]".format(epoch, 100.0 * batch_idx / len(train_loader))) + + if loss_mel > 75: + loss_mel = 75 + if loss_kl > 9: + loss_kl = 9 + + scalar_dict = { + "loss/g/total": loss_gen_all, + "loss/d/total": loss_disc, + "learning_rate": lr, + "grad_norm_d": grad_norm_d, + "grad_norm_g": grad_norm_g, + } + scalar_dict.update( + { + "loss/g/fm": loss_fm, + "loss/g/mel": loss_mel, + "loss/g/kl": loss_kl, + } + ) + + scalar_dict.update( + {"loss/g/{}".format(i): v for i, v in enumerate(losses_gen)} + ) + scalar_dict.update( + {"loss/d_r/{}".format(i): v for i, v in enumerate(losses_disc_r)} + ) + scalar_dict.update( + {"loss/d_g/{}".format(i): v for i, v in enumerate(losses_disc_g)} + ) + image_dict = { + "slice/mel_org": plot_spectrogram_to_numpy( + y_mel[0].data.cpu().numpy() + ), + "slice/mel_gen": plot_spectrogram_to_numpy( + y_hat_mel[0].data.cpu().numpy() + ), + "all/mel": plot_spectrogram_to_numpy(mel[0].data.cpu().numpy()), + } + summarize( + writer=writer, + global_step=global_step, + images=image_dict, + scalars=scalar_dict, + ) + global_step += 1 + + if epoch % hps.save_every_epoch == 0 and rank == 0: + if hps.if_latest == 0: + save_checkpoint( + net_g, + optim_g, + hps.train.learning_rate, + epoch, + os.path.join(hps.model_dir, "G_{}.pth".format(global_step)), + ) + save_checkpoint( + net_d, + optim_d, + hps.train.learning_rate, + epoch, + os.path.join(hps.model_dir, "D_{}.pth".format(global_step)), + ) + else: + save_checkpoint( + net_g, + optim_g, + hps.train.learning_rate, + epoch, + os.path.join(hps.model_dir, "G_{}.pth".format(2333333)), + ) + save_checkpoint( + net_d, + optim_d, + hps.train.learning_rate, + epoch, + os.path.join(hps.model_dir, "D_{}.pth".format(2333333)), + ) + if rank == 0 and hps.save_every_weights == "1": + if hasattr(net_g, "module"): + ckpt = net_g.module.state_dict() + else: + ckpt = net_g.state_dict() + print( + "saving ckpt %s_e%s:%s" + % ( + hps.name, + epoch, + save_final( + ckpt, + hps.sample_rate, + hps.if_f0, + hps.name + "_e%s_s%s" % (epoch, global_step), + epoch, + hps.version, + hps, + ), + ) + ) + + if rank == 0: + print( + f"{hps.name} | epoch={epoch} | step={global_step} | {epoch_recorder.record()} | loss_disc={loss_disc:.3f} | loss_gen={loss_gen:.3f} | loss_fm={loss_fm:.3f} | loss_mel={loss_mel:.3f} | loss_kl={loss_kl:.3f}" + ) + if epoch >= hps.total_epoch and rank == 0: + print( + f"Training has been successfully completed with {epoch} epoch and {global_step} steps." + ) + + if hasattr(net_g, "module"): + ckpt = net_g.module.state_dict() + else: + ckpt = net_g.state_dict() + print( + "Saving final checkpoint: %s" + % ( + save_final( + ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch, hps.version, hps + ) + ) + ) + sleep(1) + os._exit(2333333) + + +if __name__ == "__main__": + torch.multiprocessing.set_start_method("spawn") + main() diff --git a/rvc/train/utils.py b/rvc/train/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..64aee7a8113c9ba2a07f863bddc718a2273589a5 --- /dev/null +++ b/rvc/train/utils.py @@ -0,0 +1,275 @@ +import os +import glob +import json +import torch +import argparse +import numpy as np +from scipy.io.wavfile import read + + +def load_checkpoint_d(checkpoint_path, combd, sbd, optimizer=None, load_opt=1): + assert os.path.isfile(checkpoint_path) + checkpoint_dict = torch.load(checkpoint_path, map_location="cpu") + + def go(model, bkey): + saved_state_dict = checkpoint_dict[bkey] + if hasattr(model, "module"): + state_dict = model.module.state_dict() + else: + state_dict = model.state_dict() + new_state_dict = {} + for k, v in state_dict.items(): + try: + new_state_dict[k] = saved_state_dict[k] + if saved_state_dict[k].shape != state_dict[k].shape: + print( + "shape-%s-mismatch. need: %s, get: %s", + k, + state_dict[k].shape, + saved_state_dict[k].shape, + ) + raise KeyError + except: + print("%s is not in the checkpoint", k) + new_state_dict[k] = v + if hasattr(model, "module"): + model.module.load_state_dict(new_state_dict, strict=False) + else: + model.load_state_dict(new_state_dict, strict=False) + return model + + go(combd, "combd") + model = go(sbd, "sbd") + + iteration = checkpoint_dict["iteration"] + learning_rate = checkpoint_dict["learning_rate"] + if optimizer is not None and load_opt == 1: + optimizer.load_state_dict(checkpoint_dict["optimizer"]) + + print("Loaded checkpoint '{}' (epoch {})".format(checkpoint_path, iteration)) + return model, optimizer, learning_rate, iteration + + +def load_checkpoint(checkpoint_path, model, optimizer=None, load_opt=1): + assert os.path.isfile(checkpoint_path) + checkpoint_dict = torch.load(checkpoint_path, map_location="cpu") + + saved_state_dict = checkpoint_dict["model"] + if hasattr(model, "module"): + state_dict = model.module.state_dict() + else: + state_dict = model.state_dict() + new_state_dict = {} + for k, v in state_dict.items(): + try: + new_state_dict[k] = saved_state_dict[k] + if saved_state_dict[k].shape != state_dict[k].shape: + print( + "shape-%s-mismatch|need-%s|get-%s", + k, + state_dict[k].shape, + saved_state_dict[k].shape, + ) + raise KeyError + except: + print("%s is not in the checkpoint", k) + new_state_dict[k] = v + if hasattr(model, "module"): + model.module.load_state_dict(new_state_dict, strict=False) + else: + model.load_state_dict(new_state_dict, strict=False) + + iteration = checkpoint_dict["iteration"] + learning_rate = checkpoint_dict["learning_rate"] + if optimizer is not None and load_opt == 1: + optimizer.load_state_dict(checkpoint_dict["optimizer"]) + print(f"Loaded checkpoint '{checkpoint_path}' (epoch {iteration})") + return model, optimizer, learning_rate, iteration + + +def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path): + print(f"Saving model '{checkpoint_path}' (epoch {iteration})") + if hasattr(model, "module"): + state_dict = model.module.state_dict() + else: + state_dict = model.state_dict() + torch.save( + { + "model": state_dict, + "iteration": iteration, + "optimizer": optimizer.state_dict(), + "learning_rate": learning_rate, + }, + checkpoint_path, + ) + + +def summarize( + writer, + global_step, + scalars={}, + histograms={}, + images={}, + audios={}, + audio_sampling_rate=22050, +): + for k, v in scalars.items(): + writer.add_scalar(k, v, global_step) + for k, v in histograms.items(): + writer.add_histogram(k, v, global_step) + for k, v in images.items(): + writer.add_image(k, v, global_step, dataformats="HWC") + for k, v in audios.items(): + writer.add_audio(k, v, global_step, audio_sampling_rate) + + +def latest_checkpoint_path(dir_path, regex="G_*.pth"): + f_list = glob.glob(os.path.join(dir_path, regex)) + f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f)))) + x = f_list[-1] + return x + + +def plot_spectrogram_to_numpy(spectrogram): + import matplotlib.pylab as plt + import numpy as np + + fig, ax = plt.subplots(figsize=(10, 2)) + im = ax.imshow(spectrogram, aspect="auto", origin="lower", interpolation="none") + plt.colorbar(im, ax=ax) + plt.xlabel("Frames") + plt.ylabel("Channels") + plt.tight_layout() + + fig.canvas.draw() + data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="") + data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) + plt.close() + return data + + +def load_wav_to_torch(full_path): + sampling_rate, data = read(full_path) + return torch.FloatTensor(data.astype(np.float32)), sampling_rate + + +def load_filepaths_and_text(filename, split="|"): + with open(filename, encoding="utf-8") as f: + filepaths_and_text = [line.strip().split(split) for line in f] + return filepaths_and_text + + +def get_hparams(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-se", + "--save_every_epoch", + type=int, + required=True, + help="checkpoint save frequency (epoch)", + ) + parser.add_argument( + "-te", "--total_epoch", type=int, required=True, help="total_epoch" + ) + parser.add_argument( + "-pg", "--pretrainG", type=str, default="", help="Pretrained Discriminator path" + ) + parser.add_argument( + "-pd", "--pretrainD", type=str, default="", help="Pretrained Generator path" + ) + parser.add_argument("-g", "--gpus", type=str, default="0", help="split by -") + parser.add_argument( + "-bs", "--batch_size", type=int, required=True, help="batch size" + ) + parser.add_argument( + "-e", "--experiment_dir", type=str, required=True, help="experiment dir" + ) + parser.add_argument( + "-sr", "--sample_rate", type=str, required=True, help="sample rate, 32k/40k/48k" + ) + parser.add_argument( + "-sw", + "--save_every_weights", + type=str, + default="0", + help="save the extracted model in weights directory when saving checkpoints", + ) + parser.add_argument( + "-v", "--version", type=str, required=True, help="model version" + ) + parser.add_argument( + "-f0", + "--if_f0", + type=int, + required=True, + help="use f0 as one of the inputs of the model, 1 or 0", + ) + parser.add_argument( + "-l", + "--if_latest", + type=int, + required=True, + help="if only save the latest G/D pth file, 1 or 0", + ) + parser.add_argument( + "-c", + "--if_cache_data_in_gpu", + type=int, + required=True, + help="if caching the dataset in GPU memory, 1 or 0", + ) + args = parser.parse_args() + name = args.experiment_dir + experiment_dir = os.path.join("./logs", args.experiment_dir) + config_save_path = os.path.join(experiment_dir, "config.json") + with open(config_save_path, "r") as f: + config = json.load(f) + hparams = HParams(**config) + hparams.model_dir = hparams.experiment_dir = experiment_dir + hparams.save_every_epoch = args.save_every_epoch + hparams.name = name + hparams.total_epoch = args.total_epoch + hparams.pretrainG = args.pretrainG + hparams.pretrainD = args.pretrainD + hparams.version = args.version + hparams.gpus = args.gpus + hparams.train.batch_size = args.batch_size + hparams.sample_rate = args.sample_rate + hparams.if_f0 = args.if_f0 + hparams.if_latest = args.if_latest + hparams.save_every_weights = args.save_every_weights + hparams.if_cache_data_in_gpu = args.if_cache_data_in_gpu + hparams.data.training_files = f"{experiment_dir}/filelist.txt" + return hparams + + +class HParams: + def __init__(self, **kwargs): + for k, v in kwargs.items(): + if type(v) == dict: + v = HParams(**v) + self[k] = v + + def keys(self): + return self.__dict__.keys() + + def items(self): + return self.__dict__.items() + + def values(self): + return self.__dict__.values() + + def __len__(self): + return len(self.__dict__) + + def __getitem__(self, key): + return getattr(self, key) + + def __setitem__(self, key, value): + return setattr(self, key, value) + + def __contains__(self, key): + return key in self.__dict__ + + def __repr__(self): + return self.__dict__.__repr__() diff --git a/tabs/download/download.py b/tabs/download/download.py new file mode 100644 index 0000000000000000000000000000000000000000..e70a92fcddbcc1789c0763f5bdaa0ded81607108 --- /dev/null +++ b/tabs/download/download.py @@ -0,0 +1,84 @@ +import os, sys, shutil +import tempfile +import gradio as gr +from core import run_download_script + +from assets.i18n.i18n import I18nAuto + +from rvc.lib.utils import format_title + +i18n = I18nAuto() + +now_dir = os.getcwd() +sys.path.append(now_dir) + +gradio_temp_dir = os.path.join(tempfile.gettempdir(), "gradio") + +if os.path.exists(gradio_temp_dir): + shutil.rmtree(gradio_temp_dir) + + +def save_drop_model(dropbox): + if "pth" not in dropbox and "index" not in dropbox: + raise gr.Error( + message="The file you dropped is not a valid model file. Please try again." + ) + else: + file_name = format_title(os.path.basename(dropbox)) + if ".pth" in dropbox: + model_name = format_title(file_name.split(".pth")[0]) + else: + if "v2" not in dropbox: + model_name = format_title( + file_name.split("_nprobe_1_")[1].split("_v1")[0] + ) + else: + model_name = format_title( + file_name.split("_nprobe_1_")[1].split("_v2")[0] + ) + model_path = os.path.join(now_dir, "logs", model_name) + if not os.path.exists(model_path): + os.makedirs(model_path) + if os.path.exists(os.path.join(model_path, file_name)): + os.remove(os.path.join(model_path, file_name)) + os.rename(dropbox, os.path.join(model_path, file_name)) + print(f"{file_name} saved in {model_path}") + gr.Info(f"{file_name} saved in {model_path}") + return None + + +def download_tab(): + with gr.Column(): + gr.Markdown(value=i18n("## Download Model")) + model_link = gr.Textbox( + label=i18n("Model Link"), + placeholder=i18n("Introduce the model link"), + interactive=True, + ) + model_download_output_info = gr.Textbox( + label=i18n("Output Information"), + value="", + max_lines=8, + interactive=False, + ) + model_download_button = gr.Button(i18n("Download Model")) + model_download_button.click( + run_download_script, + [model_link], + model_download_output_info, + api_name="model_download", + ) + gr.Markdown(value=i18n("## Drop files")) + + dropbox = gr.File( + label=i18n( + "Drag your .pth file and .index file into this space. Drag one and then the other." + ), + type="filepath", + ) + + dropbox.upload( + fn=save_drop_model, + inputs=[dropbox], + outputs=[dropbox], + ) diff --git a/tabs/extra/analyzer/analyzer.py b/tabs/extra/analyzer/analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..a6ee1805ccfc64a7c9c1ee2d9287b1e289556bc7 --- /dev/null +++ b/tabs/extra/analyzer/analyzer.py @@ -0,0 +1,85 @@ +import gradio as gr +import matplotlib.pyplot as plt +import soundfile as sf +import numpy as np +import os + +from assets.i18n.i18n import I18nAuto + +i18n = I18nAuto() + + +def generate_spectrogram(audio_data, sample_rate, file_name): + plt.clf() + + plt.specgram( + audio_data, + Fs=sample_rate / 1, + NFFT=4096, + sides="onesided", + cmap="Reds_r", + scale_by_freq=True, + scale="dB", + mode="magnitude", + window=np.hanning(4096), + ) + + plt.title(file_name) + plt.savefig("spectrogram.png") + + +def get_audio_info(audio_file): + audio_data, sample_rate = sf.read(audio_file) + + if len(audio_data.shape) > 1: + audio_data = np.mean(audio_data, axis=1) + + generate_spectrogram(audio_data, sample_rate, os.path.basename(audio_file)) + + audio_info = sf.info(audio_file) + bit_depth = {"PCM_16": 16, "FLOAT": 32}.get(audio_info.subtype, 0) + + minutes, seconds = divmod(audio_info.duration, 60) + seconds, milliseconds = divmod(seconds, 1) + milliseconds *= 1000 + + speed_in_kbps = audio_info.samplerate * bit_depth / 1000 + + info_table = f""" + - **File Name:** {os.path.basename(audio_file)} + - **Duration:** {int(minutes)} minutes, {int(seconds)} seconds, {int(milliseconds)} milliseconds + - **Bitrate:** {speed_in_kbps} kbp/s + - **Audio Channels:** {audio_info.channels} + - **Sampling rate:** {audio_info.samplerate} Hz + - **Bit per second:** {audio_info.samplerate * audio_info.channels * bit_depth} bit/s + """ + + return info_table, "spectrogram.png" + + +def analyzer(): + with gr.Column(): + gr.Markdown( + "Tool inspired in the original [Ilaria-Audio-Analyzer](https://github.com/TheStingerX/Ilaria-Audio-Analyzer) code." + ) + audio_input = gr.Audio(type="filepath") + get_info_button = gr.Button( + value=i18n("Get information about the audio"), variant="primary" + ) + with gr.Column(): + with gr.Row(): + with gr.Column(): + gr.Markdown( + value=i18n("Information about the audio file"), + visible=True, + ) + output_markdown = gr.Markdown( + value=i18n("Waiting for information..."), visible=True + ) + image_output = gr.Image(type="filepath", interactive=False) + + get_info_button.click( + fn=get_audio_info, + inputs=[audio_input], + outputs=[output_markdown, image_output], + ) diff --git a/tabs/extra/extra.py b/tabs/extra/extra.py new file mode 100644 index 0000000000000000000000000000000000000000..a6e82b9783e9e584e0f10b003f03d7139857f2f3 --- /dev/null +++ b/tabs/extra/extra.py @@ -0,0 +1,22 @@ +import gradio as gr + +import tabs.extra.processing.processing as processing +import tabs.extra.analyzer.analyzer as analyzer + +from assets.i18n.i18n import I18nAuto + +i18n = I18nAuto() + + +def extra_tab(): + gr.Markdown( + value=i18n( + "This section contains some extra utilities that often may be in experimental phases." + ) + ) + + with gr.TabItem(i18n("Processing")): + processing.processing() + + with gr.TabItem(i18n("Audio Analyzer")): + analyzer.analyzer() diff --git a/tabs/extra/model_information.py b/tabs/extra/model_information.py new file mode 100644 index 0000000000000000000000000000000000000000..1b87f4c297544f89385b063b97282a2f5b0e52d6 --- /dev/null +++ b/tabs/extra/model_information.py @@ -0,0 +1,28 @@ +import gradio as gr +from core import run_model_information_script + +from assets.i18n.i18n import I18nAuto + +i18n = I18nAuto() + + +def model_information_tab(): + with gr.Column(): + model_name = gr.Textbox( + label=i18n("Model Path"), + placeholder=i18n("Introduce the model .pth path"), + interactive=True, + ) + model_information_output_info = gr.Textbox( + label=i18n("Output Information"), + value="", + max_lines=8, + interactive=False, + ) + model_information_button = gr.Button(i18n("See Model Information")) + model_information_button.click( + run_model_information_script, + [model_name], + model_information_output_info, + api_name="model_information", + ) diff --git a/tabs/extra/processing/processing.py b/tabs/extra/processing/processing.py new file mode 100644 index 0000000000000000000000000000000000000000..def76ab827c877053f8264976e258ea3b479c3b8 --- /dev/null +++ b/tabs/extra/processing/processing.py @@ -0,0 +1,142 @@ +import sys + +sys.path.append("..") +import os + +now_dir = os.getcwd() +from rvc.train.process_ckpt import ( + extract_small_model, +) + +from rvc.lib.process.model_fusion import model_fusion +from rvc.lib.process.model_information import ( + model_information, +) + +from assets.i18n.i18n import I18nAuto + +i18n = I18nAuto() + +import gradio as gr + + +def processing(): + with gr.Accordion(label=i18n("Model fusion (On progress)"), open=False): + with gr.Column(): + model_fusion_name = gr.Textbox( + label=i18n("Model Name"), + value="", + max_lines=1, + interactive=True, + placeholder=i18n("Enter model name"), + ) + model_fusion_a = gr.Textbox( + label=i18n("Path to Model A"), + value="", + interactive=True, + placeholder=i18n("Path to model"), + ) + model_fusion_b = gr.Textbox( + label=i18n("Path to Model B"), + value="", + interactive=True, + placeholder=i18n("Path to model"), + ) + model_fusion_output_info = gr.Textbox( + label=i18n("Output Information"), + value="", + ) + + model_fusion_button = gr.Button( + i18n("Fusion"), variant="primary", interactive=False + ) + + model_fusion_button.click( + model_fusion, + [ + model_fusion_name, + model_fusion_a, + model_fusion_b, + ], + model_fusion_output_info, + api_name="model_fusion", + ) + + with gr.Accordion(label=i18n("View model information")): + with gr.Row(): + with gr.Column(): + model_view_model_path = gr.Textbox( + label=i18n("Path to Model"), + value="", + interactive=True, + placeholder=i18n("Path to model"), + ) + + model_view_output_info = gr.Textbox( + label=i18n("Output Information"), value="", max_lines=8 + ) + model_view_button = gr.Button(i18n("View"), variant="primary") + model_view_button.click( + model_information, + [model_view_model_path], + model_view_output_info, + api_name="model_info", + ) + + with gr.Accordion(label=i18n("Model extraction")): + with gr.Row(): + with gr.Column(): + model_extract_name = gr.Textbox( + label=i18n("Model Name"), + value="", + interactive=True, + placeholder=i18n("Enter model name"), + ) + model_extract_path = gr.Textbox( + label=i18n("Path to Model"), + placeholder=i18n("Path to model"), + interactive=True, + ) + model_extract_info = gr.Textbox( + label=i18n("Model information to be placed"), + value="", + max_lines=8, + interactive=True, + placeholder=i18n("Model information to be placed"), + ) + with gr.Column(): + model_extract_pitch_guidance = gr.Checkbox( + label=i18n("Pitch Guidance"), + value=True, + interactive=True, + ) + model_extract_rvc_version = gr.Radio( + label=i18n("RVC Version"), + choices=["v1", "v2"], + value="v2", + interactive=True, + ) + model_extract_sampling_rate = gr.Radio( + label=i18n("Sampling Rate"), + choices=["32000", "40000", "48000"], + value="40000", + interactive=True, + ) + model_extract_output_info = gr.Textbox( + label=i18n("Output Information"), value="", max_lines=8 + ) + + model_extract_button = gr.Button(i18n("Extract"), variant="primary") + model_extract_button.click( + extract_small_model, + [ + model_extract_path, + model_extract_name, + model_extract_sampling_rate, + model_extract_pitch_guidance, + model_extract_info, + model_extract_rvc_version, + ], + model_extract_output_info, + api_name="model_extract", + ) diff --git a/tabs/inference/inference.py b/tabs/inference/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..c6426188a255c6678653ea9a81b85b021340760f --- /dev/null +++ b/tabs/inference/inference.py @@ -0,0 +1,466 @@ +import os, sys +import gradio as gr +import regex as re +import shutil +import datetime +import random + +from core import ( + run_infer_script, + run_batch_infer_script, +) + +from assets.i18n.i18n import I18nAuto + +from rvc.lib.utils import format_title + +i18n = I18nAuto() + +now_dir = os.getcwd() +sys.path.append(now_dir) + +model_root = os.path.join(now_dir, "logs") +audio_root = os.path.join(now_dir, "assets", "audios") + +model_root_relative = os.path.relpath(model_root, now_dir) +audio_root_relative = os.path.relpath(audio_root, now_dir) + +sup_audioext = { + "wav", + "mp3", + "flac", + "ogg", + "opus", + "m4a", + "mp4", + "aac", + "alac", + "wma", + "aiff", + "webm", + "ac3", +} + +names = [ + os.path.join(root, file) + for root, _, files in os.walk(model_root_relative, topdown=False) + for file in files + if ( + file.endswith((".pth", ".onnx")) + and not (file.startswith("G_") or file.startswith("D_")) + ) +] + +indexes_list = [ + os.path.join(root, name) + for root, _, files in os.walk(model_root_relative, topdown=False) + for name in files + if name.endswith(".index") and "trained" not in name +] + +audio_paths = [ + os.path.join(root, name) + for root, _, files in os.walk(audio_root_relative, topdown=False) + for name in files + if name.endswith(tuple(sup_audioext)) + and root == audio_root_relative + and "_output" not in name +] + + +def output_path_fn(input_audio_path): + original_name_without_extension = os.path.basename(input_audio_path).rsplit(".", 1)[ + 0 + ] + new_name = original_name_without_extension + "_output.wav" + output_path = os.path.join(os.path.dirname(input_audio_path), new_name) + return output_path + + +def change_choices(): + names = [ + os.path.join(root, file) + for root, _, files in os.walk(model_root_relative, topdown=False) + for file in files + if ( + file.endswith((".pth", ".onnx")) + and not (file.startswith("G_") or file.startswith("D_")) + ) + ] + + indexes_list = [ + os.path.join(root, name) + for root, _, files in os.walk(model_root_relative, topdown=False) + for name in files + if name.endswith(".index") and "trained" not in name + ] + + audio_paths = [ + os.path.join(root, name) + for root, _, files in os.walk(audio_root_relative, topdown=False) + for name in files + if name.endswith(tuple(sup_audioext)) + and root == audio_root_relative + and "_output" not in name + ] + + return ( + {"choices": sorted(names), "__type__": "update"}, + {"choices": sorted(indexes_list), "__type__": "update"}, + {"choices": sorted(audio_paths), "__type__": "update"}, + ) + + +def get_indexes(): + indexes_list = [ + os.path.join(dirpath, filename) + for dirpath, _, filenames in os.walk(model_root_relative) + for filename in filenames + if filename.endswith(".index") and "trained" not in filename + ] + + return indexes_list if indexes_list else "" + + +def match_index(model_file: str) -> tuple: + model_files_trip = re.sub(r"\.pth|\.onnx$", "", model_file) + model_file_name = os.path.split(model_files_trip)[ + -1 + ] # Extract only the name, not the directory + + # Check if the sid0strip has the specific ending format _eXXX_sXXX + if re.match(r".+_e\d+_s\d+$", model_file_name): + base_model_name = model_file_name.rsplit("_", 2)[0] + else: + base_model_name = model_file_name + + sid_directory = os.path.join(model_root_relative, base_model_name) + double_sid_directory = os.path.join(sid_directory, base_model_name) + directories_to_search = [sid_directory] if os.path.exists(sid_directory) else [] + directories_to_search += ( + [double_sid_directory] if os.path.exists(double_sid_directory) else [] + ) + directories_to_search.append(model_root_relative) + matching_index_files = [] + + for directory in directories_to_search: + for filename in os.listdir(directory): + if filename.endswith(".index") and "trained" not in filename: + # Condition to match the name + name_match = any( + name.lower() in filename.lower() + for name in [model_file_name, base_model_name] + ) + + # If in the specific directory, it's automatically a match + folder_match = directory == sid_directory + + if name_match or folder_match: + index_path = os.path.join(directory, filename) + if index_path in indexes_list: + matching_index_files.append( + ( + index_path, + os.path.getsize(index_path), + " " not in filename, + ) + ) + if matching_index_files: + # Sort by favoring files without spaces and by size (largest size first) + matching_index_files.sort(key=lambda x: (-x[2], -x[1])) + best_match_index_path = matching_index_files[0][0] + return best_match_index_path + + return "" + + +def save_to_wav(record_button): + if record_button is None: + pass + else: + path_to_file = record_button + new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".wav" + target_path = os.path.join(audio_root_relative, os.path.basename(new_name)) + + shutil.move(path_to_file, target_path) + return target_path, output_path_fn(target_path) + + +def save_to_wav2(upload_audio): + file_path = upload_audio + formated_name = format_title(os.path.basename(file_path)) + target_path = os.path.join(audio_root_relative, formated_name) + + if os.path.exists(target_path): + os.remove(target_path) + + shutil.copy(file_path, target_path) + return target_path, output_path_fn(target_path) + + +def delete_outputs(): + for root, _, files in os.walk(audio_root_relative, topdown=False): + for name in files: + if name.endswith(tuple(sup_audioext)) and name.__contains__("_output"): + os.remove(os.path.join(root, name)) + gr.Info(f"Outputs cleared!") + + +# Inference tab +def inference_tab(): + default_weight = random.choice(names) if names else None + with gr.Row(): + with gr.Row(): + model_file = gr.Dropdown( + label=i18n("Voice Model"), + choices=sorted(names, key=lambda path: os.path.getsize(path)), + interactive=True, + value=default_weight, + allow_custom_value=True, + ) + + index_file = gr.Dropdown( + label=i18n("Index File"), + choices=get_indexes(), + value=match_index(default_weight) if default_weight else "", + interactive=True, + allow_custom_value=True, + ) + with gr.Column(): + refresh_button = gr.Button(i18n("Refresh")) + unload_button = gr.Button(i18n("Unload Voice")) + + unload_button.click( + fn=lambda: ({"value": "", "__type__": "update"}), + inputs=[], + outputs=[model_file], + ) + + model_file.select( + fn=match_index, + inputs=[model_file], + outputs=[index_file], + ) + + # Single inference tab + with gr.Tab(i18n("Single")): + with gr.Row(): + with gr.Column(): + upload_audio = gr.Audio( + label=i18n("Upload Audio"), type="filepath", editable=False + ) + with gr.Row(): + audio = gr.Dropdown( + label=i18n("Select Audio"), + choices=sorted(audio_paths), + value=audio_paths[0] if audio_paths else "", + interactive=True, + allow_custom_value=True, + ) + + with gr.Accordion(i18n("Advanced Settings"), open=False): + with gr.Column(): + clear_outputs = gr.Button( + i18n("Clear Outputs (Deletes all audios in assets/audios)") + ) + output_path = gr.Textbox( + label=i18n("Output Path"), + placeholder=i18n("Enter output path"), + value=output_path_fn(audio_paths[0]) + if audio_paths + else os.path.join(now_dir, "assets", "audios", "output.wav"), + interactive=True, + ) + split_audio = gr.Checkbox( + label=i18n("Split Audio"), + visible=True, + value=False, + interactive=True, + ) + pitch = gr.Slider( + minimum=-24, + maximum=24, + step=1, + label=i18n("Pitch"), + value=0, + interactive=True, + ) + filter_radius = gr.Slider( + minimum=0, + maximum=7, + label=i18n( + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness" + ), + value=3, + step=1, + interactive=True, + ) + index_rate = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Search Feature Ratio"), + value=0.75, + interactive=True, + ) + hop_length = gr.Slider( + minimum=1, + maximum=512, + step=1, + label=i18n("Hop Length"), + value=128, + interactive=True, + ) + with gr.Column(): + f0method = gr.Radio( + label=i18n("Pitch extraction algorithm"), + choices=[ + "pm", + "harvest", + "dio", + "crepe", + "crepe-tiny", + "rmvpe", + ], + value="rmvpe", + interactive=True, + ) + + convert_button1 = gr.Button(i18n("Convert")) + + with gr.Row(): # Defines output info + output audio download after conversion + vc_output1 = gr.Textbox(label=i18n("Output Information")) + vc_output2 = gr.Audio(label=i18n("Export Audio")) + + # Batch inference tab + with gr.Tab(i18n("Batch")): + with gr.Row(): + with gr.Column(): + input_folder_batch = gr.Textbox( + label=i18n("Input Folder"), + placeholder=i18n("Enter input path"), + value=os.path.join(now_dir, "assets", "audios"), + interactive=True, + ) + output_folder_batch = gr.Textbox( + label=i18n("Output Folder"), + placeholder=i18n("Enter output path"), + value=os.path.join(now_dir, "assets", "audios"), + interactive=True, + ) + with gr.Accordion(i18n("Advanced Settings"), open=False): + with gr.Column(): + clear_outputs = gr.Button( + i18n("Clear Outputs (Deletes all audios in assets/audios)") + ) + pitch_batch = gr.Slider( + minimum=-24, + maximum=24, + step=1, + label=i18n("Pitch"), + value=0, + interactive=True, + ) + filter_radius_batch = gr.Slider( + minimum=0, + maximum=7, + label=i18n( + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness" + ), + value=3, + step=1, + interactive=True, + ) + index_rate_batch = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Search Feature Ratio"), + value=0.75, + interactive=True, + ) + hop_length_batch = gr.Slider( + minimum=1, + maximum=512, + step=1, + label=i18n("Hop Length"), + value=128, + interactive=True, + ) + with gr.Column(): + f0method_batch = gr.Radio( + label=i18n("Pitch extraction algorithm"), + choices=[ + "pm", + "harvest", + "dio", + "crepe", + "crepe-tiny", + "rmvpe", + ], + value="rmvpe", + interactive=True, + ) + + convert_button2 = gr.Button(i18n("Convert")) + + with gr.Row(): # Defines output info + output audio download after conversion + vc_output3 = gr.Textbox(label=i18n("Output Information")) + + def toggle_visible(checkbox): + return {"visible": checkbox, "__type__": "update"} + + refresh_button.click( + fn=change_choices, + inputs=[], + outputs=[model_file, index_file, audio], + ) + audio.change( + fn=output_path_fn, + inputs=[audio], + outputs=[output_path], + ) + upload_audio.upload( + fn=save_to_wav2, + inputs=[upload_audio], + outputs=[audio, output_path], + ) + upload_audio.stop_recording( + fn=save_to_wav, + inputs=[upload_audio], + outputs=[audio, output_path], + ) + clear_outputs.click( + fn=delete_outputs, + inputs=[], + outputs=[], + ) + convert_button1.click( + fn=run_infer_script, + inputs=[ + pitch, + filter_radius, + index_rate, + hop_length, + f0method, + audio, + output_path, + model_file, + index_file, + split_audio, + ], + outputs=[vc_output1, vc_output2], + ) + convert_button2.click( + fn=run_batch_infer_script, + inputs=[ + pitch_batch, + filter_radius_batch, + index_rate_batch, + hop_length_batch, + f0method_batch, + input_folder_batch, + output_folder_batch, + model_file, + index_file, + ], + outputs=[vc_output3], + ) diff --git a/tabs/report/main.js b/tabs/report/main.js new file mode 100644 index 0000000000000000000000000000000000000000..755cb9ab442c247cab0ab647e1599481bff491aa --- /dev/null +++ b/tabs/report/main.js @@ -0,0 +1,74 @@ +// main.js +if (!ScreenCastRecorder.isSupportedBrowser()) { + console.error("Screen Recording not supported in this browser"); +} +let recorder; +let outputBlob; +const stopRecording = () => __awaiter(void 0, void 0, void 0, function* () { + let currentState = "RECORDING"; + // We should do nothing if the user try to stop recording when it is not started + if (currentState === "OFF" || recorder == null) { + return; + } + // if (currentState === "COUNTDOWN") { + // this.setState({ + // currentState: "OFF", + // }) + // } + if (currentState === "RECORDING") { + if (recorder.getState() === "inactive") { + // this.setState({ + // currentState: "OFF", + // }) + console.log("Inactive"); + } + else { + outputBlob = yield recorder.stop(); + console.log("Done recording"); + // this.setState({ + // outputBlob, + // currentState: "PREVIEW_FILE", + // }) + window.currentState = "PREVIEW_FILE"; + const videoSource = URL.createObjectURL(outputBlob); + window.videoSource = videoSource; + const fileName = "recording"; + const link = document.createElement("a"); + link.setAttribute("href", videoSource); + link.setAttribute("download", `${fileName}.webm`); + link.click(); + } + } +}); +const startRecording = () => __awaiter(void 0, void 0, void 0, function* () { + const recordAudio = false; + recorder = new ScreenCastRecorder({ + recordAudio, + onErrorOrStop: () => stopRecording(), + }); + try { + yield recorder.initialize(); + } + catch (e) { + console.warn(`ScreenCastRecorder.initialize error: ${e}`); + // this.setState({ currentState: "UNSUPPORTED" }) + window.currentState = "UNSUPPORTED"; + return; + } + // this.setState({ currentState: "COUNTDOWN" }) + const hasStarted = recorder.start(); + if (hasStarted) { + // this.setState({ + // currentState: "RECORDING", + // }) + console.log("Started recording"); + window.currentState = "RECORDING"; + } + else { + stopRecording().catch(err => console.warn(`withScreencast.stopRecording threw an error: ${err}`)); + } +}); + +// Set global functions to window. +window.startRecording = startRecording; +window.stopRecording = stopRecording; \ No newline at end of file diff --git a/tabs/report/record_button.js b/tabs/report/record_button.js new file mode 100644 index 0000000000000000000000000000000000000000..aa4fbf33fdaee2635cefc931ef0a786d5b06824a --- /dev/null +++ b/tabs/report/record_button.js @@ -0,0 +1,40 @@ +// Setup if needed and start recording. +async () => { + // Set up recording functions if not already initialized + if (!window.startRecording) { + let recorder_js = null; + let main_js = null; + } + + // Function to fetch and convert video blob to base64 using async/await without explicit Promise + async function getVideoBlobAsBase64(objectURL) { + const response = await fetch(objectURL); + if (!response.ok) { + throw new Error('Failed to fetch video blob.'); + } + + const blob = await response.blob(); + + const reader = new FileReader(); + reader.readAsDataURL(blob); + + return new Promise((resolve, reject) => { + reader.onloadend = () => { + if (reader.result) { + resolve(reader.result.split(',')[1]); // Return the base64 string (without data URI prefix) + } else { + reject('Failed to convert blob to base64.'); + } + }; + }); + } + + if (window.currentState === "RECORDING") { + await window.stopRecording(); + const base64String = await getVideoBlobAsBase64(window.videoSource); + return base64String; + } else { + window.startRecording(); + return "Record"; + } +} diff --git a/tabs/report/recorder.js b/tabs/report/recorder.js new file mode 100644 index 0000000000000000000000000000000000000000..d054437c04bacb705425f9cd7c6783e3895fade1 --- /dev/null +++ b/tabs/report/recorder.js @@ -0,0 +1,112 @@ +// recorder.js +var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { + function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } + return new (P || (P = Promise))(function (resolve, reject) { + function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } + function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } + function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } + step((generator = generator.apply(thisArg, _arguments || [])).next()); + }); +}; +const BLOB_TYPE = "video/webm"; +class ScreenCastRecorder { + /** True if the current browser likely supports screencasts. */ + static isSupportedBrowser() { + return (navigator.mediaDevices != null && + navigator.mediaDevices.getUserMedia != null && + navigator.mediaDevices.getDisplayMedia != null && + MediaRecorder.isTypeSupported(BLOB_TYPE)); + } + constructor({ recordAudio, onErrorOrStop }) { + this.recordAudio = recordAudio; + this.onErrorOrStopCallback = onErrorOrStop; + this.inputStream = null; + this.recordedChunks = []; + this.mediaRecorder = null; + } + /** + * This asynchronous method will initialize the screen recording object asking + * for permissions to the user which are needed to start recording. + */ + initialize() { + return __awaiter(this, void 0, void 0, function* () { + const desktopStream = yield navigator.mediaDevices.getDisplayMedia({ + video: true, + }); + let tracks = desktopStream.getTracks(); + if (this.recordAudio) { + const voiceStream = yield navigator.mediaDevices.getUserMedia({ + video: false, + audio: true, + }); + tracks = tracks.concat(voiceStream.getAudioTracks()); + } + this.recordedChunks = []; + this.inputStream = new MediaStream(tracks); + this.mediaRecorder = new MediaRecorder(this.inputStream, { + mimeType: BLOB_TYPE, + }); + this.mediaRecorder.ondataavailable = e => this.recordedChunks.push(e.data); + }); + } + getState() { + if (this.mediaRecorder) { + return this.mediaRecorder.state; + } + return "inactive"; + } + /** + * This method will start the screen recording if the user has granted permissions + * and the mediaRecorder has been initialized + * + * @returns {boolean} + */ + start() { + if (!this.mediaRecorder) { + console.warn(`ScreenCastRecorder.start: mediaRecorder is null`); + return false; + } + const logRecorderError = (e) => { + console.warn(`mediaRecorder.start threw an error: ${e}`); + }; + this.mediaRecorder.onerror = (e) => { + logRecorderError(e); + this.onErrorOrStopCallback(); + }; + this.mediaRecorder.onstop = () => this.onErrorOrStopCallback(); + try { + this.mediaRecorder.start(); + } + catch (e) { + logRecorderError(e); + return false; + } + return true; + } + /** + * This method will stop recording and then return the generated Blob + * + * @returns {(Promise|undefined)} + * A Promise which will return the generated Blob + * Undefined if the MediaRecorder could not initialize + */ + stop() { + if (!this.mediaRecorder) { + return undefined; + } + let resolver; + const promise = new Promise(r => { + resolver = r; + }); + this.mediaRecorder.onstop = () => resolver(); + this.mediaRecorder.stop(); + if (this.inputStream) { + this.inputStream.getTracks().forEach(s => s.stop()); + this.inputStream = null; + } + return promise.then(() => this.buildOutputBlob()); + } + buildOutputBlob() { + return new Blob(this.recordedChunks, { type: BLOB_TYPE }); + } +} \ No newline at end of file diff --git a/tabs/report/report.py b/tabs/report/report.py new file mode 100644 index 0000000000000000000000000000000000000000..d3b92518d66ea34342d7f6b82b2bfe893e1e3445 --- /dev/null +++ b/tabs/report/report.py @@ -0,0 +1,79 @@ +import os +import sys +import base64 +import pathlib +import tempfile +import gradio as gr + +from assets.i18n.i18n import I18nAuto + +now_dir = os.getcwd() +sys.path.append("..") + +i18n = I18nAuto() + +recorder_js_path = os.path.join(now_dir, "tabs", "report", "recorder.js") +main_js_path = os.path.join(now_dir, "tabs", "report", "main.js") +record_button_js_path = os.path.join(now_dir, "tabs", "report", "record_button.js") + +recorder_js = pathlib.Path(recorder_js_path).read_text() +main_js = pathlib.Path(main_js_path).read_text() +record_button_js = ( + pathlib.Path(record_button_js_path) + .read_text() + .replace("let recorder_js = null;", recorder_js) + .replace("let main_js = null;", main_js) +) + + +def save_base64_video(base64_string): + base64_video = base64_string + video_data = base64.b64decode(base64_video) + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file: + temp_filename = temp_file.name + temp_file.write(video_data) + print(f"Temporary MP4 file saved as: {temp_filename}") + return temp_filename + + +def report_tab(): + instructions = [ + i18n("# How to Report an Issue on GitHub"), + i18n( + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing." + ), + i18n( + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not)." + ), + i18n( + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button." + ), + i18n( + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step." + ), + ] + components = [gr.Markdown(value=instruction) for instruction in instructions] + + start_button = gr.Button("Record Screen") + video_component = gr.Video(interactive=False) + + def toggle_button_label(returned_string): + if returned_string.startswith("Record"): + return gr.Button(value="Stop Recording"), None + else: + try: + temp_filename = save_base64_video(returned_string) + except Exception as e: + return gr.Button(value="Record Screen"), gr.Warning( + f"Failed to convert video to mp4:\n{e}" + ) + return gr.Button(value="Record Screen"), gr.Video( + value=temp_filename, interactive=False + ) + + start_button.click( + toggle_button_label, + start_button, + [start_button, video_component], + js=record_button_js, + ) diff --git a/tabs/train/train.py b/tabs/train/train.py new file mode 100644 index 0000000000000000000000000000000000000000..051a991343a9f0dc799e8e6c311ec186b0c53137 --- /dev/null +++ b/tabs/train/train.py @@ -0,0 +1,428 @@ +import os +import subprocess +import sys +import gradio as gr +from assets.i18n.i18n import I18nAuto +from core import ( + run_preprocess_script, + run_extract_script, + run_train_script, + run_index_script, +) +from rvc.configs.config import max_vram_gpu, get_gpu_info +from rvc.lib.utils import format_title + +i18n = I18nAuto() +now_dir = os.getcwd() +sys.path.append(now_dir) + +sup_audioext = { + "wav", + "mp3", + "flac", + "ogg", + "opus", + "m4a", + "mp4", + "aac", + "alac", + "wma", + "aiff", + "webm", + "ac3", +} + +# Custom Pretraineds +pretraineds_custom_path = os.path.join( + now_dir, "rvc", "pretraineds", "pretraineds_custom" +) + +pretraineds_custom_path_relative = os.path.relpath(pretraineds_custom_path, now_dir) + +if not os.path.exists(pretraineds_custom_path_relative): + os.makedirs(pretraineds_custom_path_relative) + + +def get_pretrained_list(suffix): + return [ + os.path.join(dirpath, filename) + for dirpath, _, filenames in os.walk(pretraineds_custom_path_relative) + for filename in filenames + if filename.endswith(".pth") and suffix in filename + ] + +pretraineds_list_d = get_pretrained_list("D") +pretraineds_list_g = get_pretrained_list("G") + +def refresh_custom_pretraineds(): + return ( + {"choices": sorted(get_pretrained_list("G")), "__type__": "update"}, + {"choices": sorted(get_pretrained_list("D")), "__type__": "update"}, + ) + +# Dataset Creator +datasets_path = os.path.join(now_dir, "assets", "datasets") + +if not os.path.exists(datasets_path): + os.makedirs(datasets_path) + +datasets_path_relative = os.path.relpath(datasets_path, now_dir) + +def get_datasets_list(): + return [ + dirpath + for dirpath, _, filenames in os.walk(datasets_path_relative) + if any(filename.endswith(tuple(sup_audioext)) for filename in filenames) + ] + +def refresh_datasets(): + return {"choices": sorted(get_datasets_list()), "__type__": "update"} + +# Train Temporal Fix +def run_train( + model_name, + rvc_version, + save_every_epoch, + save_only_latest, + save_every_weights, + total_epoch, + sampling_rate, + batch_size, + gpu, + pitch_guidance, + pretrained, + custom_pretrained, + g_pretrained_path, + d_pretrained_path, +): + core = os.path.join("core.py") + command = [ + "python", + core, + "train", + str(model_name), + str(rvc_version), + str(save_every_epoch), + str(save_only_latest), + str(save_every_weights), + str(total_epoch), + str(sampling_rate), + str(batch_size), + str(gpu), + str(pitch_guidance), + str(pretrained), + str(custom_pretrained), + str(g_pretrained_path), + str(d_pretrained_path), + ] + subprocess.run(command) + +# Drop Model +def save_drop_model(dropbox): + if ".pth" not in dropbox: + gr.Info( + i18n( + "The file you dropped is not a valid pretrained file. Please try again." + ) + ) + else: + file_name = os.path.basename(dropbox) + pretrained_path = os.path.join(pretraineds_custom_path_relative, file_name) + if os.path.exists(pretrained_path): + os.remove(pretrained_path) + os.rename(dropbox, pretrained_path) + gr.Info( + i18n( + "Click the refresh button to see the pretrained file in the dropdown menu." + ) + ) + return None + +# Drop Dataset +def save_drop_dataset_audio(dropbox, dataset_name): + if not dataset_name: + gr.Info( + "Please enter a valid dataset name. Please try again." + ) + return None, None + else: + file_extension = os.path.splitext(dropbox)[1][1:].lower() + if file_extension not in sup_audioext: + gr.Info( + "The file you dropped is not a valid audio file. Please try again." + ) + else: + dataset_name = format_title(dataset_name) + audio_file = format_title(os.path.basename(dropbox)) + dataset_path = os.path.join(now_dir, "assets", "datasets", dataset_name) + if not os.path.exists(dataset_path): + os.makedirs(dataset_path) + destination_path = os.path.join(dataset_path, audio_file) + if os.path.exists(destination_path): + os.remove(destination_path) + os.rename(dropbox, destination_path) + gr.Info( + i18n( + "The audio file has been successfully added to the dataset. Please click the preprocess button." + ) + ) + return None, destination_path + + +# Train Tab +def train_tab(): + with gr.Accordion(i18n("Preprocess")): + with gr.Row(): + with gr.Column(): + model_name = gr.Textbox( + label=i18n("Model Name"), + placeholder=i18n("Enter model name"), + value="my-project", + interactive=True, + ) + dataset_path = gr.Dropdown( + label=i18n("Dataset Path"), + # placeholder=i18n("Enter dataset path"), + choices=get_datasets_list(), + allow_custom_value=True, + interactive=True, + ) + refresh_datasets_button = gr.Button(i18n("Refresh Datasets")) + dataset_creator = gr.Checkbox( + label=i18n("Dataset Advanced Settings"), + value=False, + interactive=True, + visible=True, + ) + + with gr.Column(visible=False) as dataset_creator_settings: + with gr.Accordion("Dataset Creator"): + dataset_name = gr.Textbox( + label=i18n("Dataset Name"), + placeholder=i18n("Enter dataset name"), + interactive=True, + ) + upload_audio_dataset = gr.File( + label=i18n("Upload Audio Dataset"), + type="filepath", + interactive=True, + ) + + with gr.Column(): + sampling_rate = gr.Radio( + label=i18n("Sampling Rate"), + choices=["32000", "40000", "48000"], + value="40000", + interactive=True, + ) + + rvc_version = gr.Radio( + label=i18n("RVC Version"), + choices=["v1", "v2"], + value="v2", + interactive=True, + ) + + preprocess_output_info = gr.Textbox( + label=i18n("Output Information"), + value="", + max_lines=8, + interactive=False, + ) + + with gr.Row(): + preprocess_button = gr.Button(i18n("Preprocess Dataset")) + preprocess_button.click( + run_preprocess_script, + [model_name, dataset_path, sampling_rate], + preprocess_output_info, + api_name="preprocess_dataset", + ) + + with gr.Accordion(i18n("Extract")): + with gr.Row(): + hop_length = gr.Slider( + 1, 512, 128, step=1, label=i18n("Hop Length"), interactive=True + ) + with gr.Row(): + with gr.Column(): + f0method = gr.Radio( + label=i18n("Pitch extraction algorithm"), + choices=["pm", "dio", "crepe", "crepe-tiny", "harvest", "rmvpe"], + value="rmvpe", + interactive=True, + ) + + extract_output_info = gr.Textbox( + label=i18n("Output Information"), + value="", + max_lines=8, + interactive=False, + ) + extract_button = gr.Button(i18n("Extract Features")) + extract_button.click( + run_extract_script, + [model_name, rvc_version, f0method, hop_length, sampling_rate], + extract_output_info, + api_name="extract_features", + ) + + with gr.Accordion(i18n("Train")): + with gr.Row(): + batch_size = gr.Slider( + 1, + 50, + max_vram_gpu(0), + step=1, + label=i18n("Batch Size"), + interactive=True, + ) + save_every_epoch = gr.Slider( + 1, 100, 10, step=1, label=i18n("Save Every Epoch"), interactive=True + ) + total_epoch = gr.Slider( + 1, 1000, 500, step=1, label=i18n("Total Epoch"), interactive=True + ) + with gr.Row(): + pitch_guidance = gr.Checkbox( + label=i18n("Pitch Guidance"), value=True, interactive=True + ) + pretrained = gr.Checkbox( + label=i18n("Pretrained"), value=True, interactive=True + ) + save_only_latest = gr.Checkbox( + label=i18n("Save Only Latest"), value=False, interactive=True + ) + save_every_weights = gr.Checkbox( + label=i18n("Save Every Weights"), + value=True, + interactive=True, + ) + custom_pretrained = gr.Checkbox( + label=i18n("Custom Pretrained"), value=False, interactive=True + ) + multiple_gpu = gr.Checkbox( + label=i18n("GPU Settings"), value=False, interactive=True + ) + + with gr.Row(): + with gr.Column(visible=False) as pretrained_custom_settings: + with gr.Accordion("Pretrained Custom Settings"): + upload_pretrained = gr.File( + label=i18n("Upload Pretrained Model"), + type="filepath", + interactive=True, + ) + refresh_custom_pretaineds_button = gr.Button( + i18n("Refresh Custom Pretraineds") + ) + g_pretrained_path = gr.Dropdown( + label=i18n("Custom Pretrained G"), + choices=sorted(pretraineds_list_g), + interactive=True, + allow_custom_value=True, + ) + d_pretrained_path = gr.Dropdown( + label=i18n("Custom Pretrained D"), + choices=sorted(pretraineds_list_d), + interactive=True, + allow_custom_value=True, + ) + with gr.Column(visible=False) as gpu_custom_settings: + with gr.Accordion("GPU Settings"): + gpu = gr.Textbox( + label=i18n("GPU Number"), + placeholder=i18n("0 to ∞ separated by -"), + value="0", + interactive=True, + ) + gr.Textbox( + label=i18n("GPU Information"), + value=get_gpu_info(), + interactive=False, + ) + + with gr.Row(): + train_output_info = gr.Textbox( + label=i18n("Output Information"), + value="", + max_lines=8, + interactive=False, + ) + + with gr.Row(): + train_button = gr.Button(i18n("Start Training")) + train_button.click( + run_train, + [ + model_name, + rvc_version, + save_every_epoch, + save_only_latest, + save_every_weights, + total_epoch, + sampling_rate, + batch_size, + gpu, + pitch_guidance, + pretrained, + custom_pretrained, + g_pretrained_path, + d_pretrained_path, + ], + train_output_info, + api_name="start_training", + ) + + index_button = gr.Button(i18n("Generate Index")) + index_button.click( + run_index_script, + [model_name, rvc_version], + train_output_info, + api_name="generate_index", + ) + + def toggle_visible(checkbox): + return {"visible": checkbox, "__type__": "update"} + + refresh_datasets_button.click( + fn=refresh_datasets, + inputs=[], + outputs=[dataset_path], + ) + + dataset_creator.change( + fn=toggle_visible, + inputs=[dataset_creator], + outputs=[dataset_creator_settings], + ) + + upload_audio_dataset.upload( + fn=save_drop_dataset_audio, + inputs=[upload_audio_dataset, dataset_name], + outputs=[upload_audio_dataset, dataset_path], + ) + + custom_pretrained.change( + fn=toggle_visible, + inputs=[custom_pretrained], + outputs=[pretrained_custom_settings], + ) + + refresh_custom_pretaineds_button.click( + fn=refresh_custom_pretraineds, + inputs=[], + outputs=[g_pretrained_path, d_pretrained_path], + ) + + upload_pretrained.upload( + fn=save_drop_model, + inputs=[upload_pretrained], + outputs=[upload_pretrained], + ) + + multiple_gpu.change( + fn=toggle_visible, + inputs=[multiple_gpu], + outputs=[gpu_custom_settings], + ) diff --git a/tabs/tts/tts.py b/tabs/tts/tts.py new file mode 100644 index 0000000000000000000000000000000000000000..833c88b9057078acf375426c24c2377c4aea272c --- /dev/null +++ b/tabs/tts/tts.py @@ -0,0 +1,339 @@ +import os, sys +import gradio as gr +import regex as re +import json +import shutil +import datetime +import random + +from core import ( + run_tts_script, +) + +from assets.i18n.i18n import I18nAuto + +i18n = I18nAuto() + +now_dir = os.getcwd() +sys.path.append(now_dir) + +model_root = os.path.join(now_dir, "logs") +audio_root = os.path.join(now_dir, "assets", "audios") + +model_root_relative = os.path.relpath(model_root, now_dir) +audio_root_relative = os.path.relpath(audio_root, now_dir) + +sup_audioext = { + "wav", + "mp3", + "flac", + "ogg", + "opus", + "m4a", + "mp4", + "aac", + "alac", + "wma", + "aiff", + "webm", + "ac3", +} + +names = [ + os.path.join(root, file) + for root, _, files in os.walk(model_root_relative, topdown=False) + for file in files + if ( + file.endswith((".pth", ".onnx")) + and not (file.startswith("G_") or file.startswith("D_")) + ) +] + +indexes_list = [ + os.path.join(root, name) + for root, _, files in os.walk(model_root_relative, topdown=False) + for name in files + if name.endswith(".index") and "trained" not in name +] + +audio_paths = [ + os.path.join(root, name) + for root, _, files in os.walk(audio_root_relative, topdown=False) + for name in files + if name.endswith(tuple(sup_audioext)) + and root == audio_root_relative + and "_output" not in name +] + + +def change_choices(): + names = [ + os.path.join(root, file) + for root, _, files in os.walk(model_root_relative, topdown=False) + for file in files + if ( + file.endswith((".pth", ".onnx")) + and not (file.startswith("G_") or file.startswith("D_")) + ) + ] + + indexes_list = [ + os.path.join(root, name) + for root, _, files in os.walk(model_root_relative, topdown=False) + for name in files + if name.endswith(".index") and "trained" not in name + ] + + audio_paths = [ + os.path.join(root, name) + for root, _, files in os.walk(audio_root_relative, topdown=False) + for name in files + if name.endswith(tuple(sup_audioext)) + and root == audio_root_relative + and "_output" not in name + ] + return ( + {"choices": sorted(names), "__type__": "update"}, + {"choices": sorted(indexes_list), "__type__": "update"}, + {"choices": sorted(audio_paths), "__type__": "update"}, + ) + + +def get_indexes(): + indexes_list = [ + os.path.join(dirpath, filename) + for dirpath, _, filenames in os.walk(model_root_relative) + for filename in filenames + if filename.endswith(".index") and "trained" not in filename + ] + + return indexes_list if indexes_list else "" + + +def match_index(model_file: str) -> tuple: + model_files_trip = re.sub(r"\.pth|\.onnx$", "", model_file) + model_file_name = os.path.split(model_files_trip)[ + -1 + ] # Extract only the name, not the directory + + # Check if the sid0strip has the specific ending format _eXXX_sXXX + if re.match(r".+_e\d+_s\d+$", model_file_name): + base_model_name = model_file_name.rsplit("_", 2)[0] + else: + base_model_name = model_file_name + + sid_directory = os.path.join(model_root_relative, base_model_name) + directories_to_search = [sid_directory] if os.path.exists(sid_directory) else [] + directories_to_search.append(model_root_relative) + + matching_index_files = [] + + for directory in directories_to_search: + for filename in os.listdir(directory): + if filename.endswith(".index") and "trained" not in filename: + # Condition to match the name + name_match = any( + name.lower() in filename.lower() + for name in [model_file_name, base_model_name] + ) + + # If in the specific directory, it's automatically a match + folder_match = directory == sid_directory + + if name_match or folder_match: + index_path = os.path.join(directory, filename) + if index_path in indexes_list: + matching_index_files.append( + ( + index_path, + os.path.getsize(index_path), + " " not in filename, + ) + ) + + if matching_index_files: + # Sort by favoring files without spaces and by size (largest size first) + matching_index_files.sort(key=lambda x: (-x[2], -x[1])) + best_match_index_path = matching_index_files[0][0] + return best_match_index_path + + return "" + + +def save_to_wav(record_button): + if record_button is None: + pass + else: + path_to_file = record_button + new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".wav" + target_path = os.path.join(audio_root_relative, os.path.basename(new_name)) + + shutil.move(path_to_file, target_path) + return target_path + + +def save_to_wav2(upload_audio): + file_path = upload_audio + target_path = os.path.join(audio_root_relative, os.path.basename(file_path)) + + if os.path.exists(target_path): + os.remove(target_path) + + shutil.copy(file_path, target_path) + return target_path + + +def delete_outputs(): + for root, _, files in os.walk(audio_root_relative, topdown=False): + for name in files: + if name.endswith(tuple(sup_audioext)) and name.__contains__("_output"): + os.remove(os.path.join(root, name)) + gr.Info(f"Outputs cleared!") + + +def tts_tab(): + default_weight = random.choice(names) if names else "" + with gr.Row(): + with gr.Row(): + model_file = gr.Dropdown( + label=i18n("Voice Model"), + choices=sorted(names, key=lambda path: os.path.getsize(path)), + interactive=True, + value=default_weight, + allow_custom_value=True, + ) + best_default_index_path = match_index(model_file.value) + index_file = gr.Dropdown( + label=i18n("Index File"), + choices=get_indexes(), + value=best_default_index_path, + interactive=True, + allow_custom_value=True, + ) + with gr.Column(): + refresh_button = gr.Button(i18n("Refresh")) + unload_button = gr.Button(i18n("Unload Voice")) + + unload_button.click( + fn=lambda: ({"value": "", "__type__": "update"}), + inputs=[], + outputs=[model_file], + ) + + model_file.select( + fn=match_index, + inputs=[model_file], + outputs=[index_file], + ) + + json_path = os.path.join("rvc", "lib", "tools", "tts_voices.json") + with open(json_path, "r") as file: + tts_voices_data = json.load(file) + + short_names = [voice.get("ShortName", "") for voice in tts_voices_data] + + tts_voice = gr.Dropdown( + label=i18n("TTS Voices"), + choices=short_names, + interactive=True, + value=None, + ) + + tts_text = gr.Textbox( + label=i18n("Text to Synthesize"), + placeholder=i18n("Enter text to synthesize"), + lines=3, + ) + + with gr.Accordion(i18n("Advanced Settings"), open=False): + with gr.Column(): + output_tts_path = gr.Textbox( + label=i18n("Output Path for TTS Audio"), + placeholder=i18n("Enter output path"), + value=os.path.join(now_dir, "assets", "audios", "tts_output.wav"), + interactive=True, + ) + + output_rvc_path = gr.Textbox( + label=i18n("Output Path for RVC Audio"), + placeholder=i18n("Enter output path"), + value=os.path.join(now_dir, "assets", "audios", "tts_rvc_output.wav"), + interactive=True, + ) + + pitch = gr.Slider( + minimum=-24, + maximum=24, + step=1, + label=i18n("Pitch"), + value=0, + interactive=True, + ) + filter_radius = gr.Slider( + minimum=0, + maximum=7, + label=i18n( + "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness" + ), + value=3, + step=1, + interactive=True, + ) + index_rate = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Search Feature Ratio"), + value=0.75, + interactive=True, + ) + hop_length = gr.Slider( + minimum=1, + maximum=512, + step=1, + label=i18n("Hop Length"), + value=128, + interactive=True, + ) + with gr.Column(): + f0method = gr.Radio( + label=i18n("Pitch extraction algorithm"), + choices=[ + "pm", + "harvest", + "dio", + "crepe", + "crepe-tiny", + "rmvpe", + ], + value="rmvpe", + interactive=True, + ) + + convert_button1 = gr.Button(i18n("Convert")) + + with gr.Row(): # Defines output info + output audio download after conversion + vc_output1 = gr.Textbox(label=i18n("Output Information")) + vc_output2 = gr.Audio(label=i18n("Export Audio")) + + refresh_button.click( + fn=change_choices, + inputs=[], + outputs=[model_file, index_file], + ) + convert_button1.click( + fn=run_tts_script, + inputs=[ + tts_text, + tts_voice, + pitch, + filter_radius, + index_rate, + hop_length, + f0method, + output_tts_path, + output_rvc_path, + model_file, + index_file, + ], + outputs=[vc_output1, vc_output2], + )