{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "1d072ab1", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:15:32.946837Z", "iopub.status.busy": "2024-12-30T02:15:32.946198Z", "iopub.status.idle": "2024-12-30T02:16:48.720761Z", "shell.execute_reply": "2024-12-30T02:16:48.719905Z" }, "papermill": { "duration": 75.781451, "end_time": "2024-12-30T02:16:48.722867", "exception": false, "start_time": "2024-12-30T02:15:32.941416", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\r\n", "cudf 24.10.1 requires cubinlinker, which is not installed.\r\n", "cudf 24.10.1 requires cupy-cuda11x>=12.0.0, which is not installed.\r\n", "cudf 24.10.1 requires libcudf==24.10.*, which is not installed.\r\n", "cudf 24.10.1 requires ptxcompiler, which is not installed.\r\n", "cuml 24.10.0 requires cupy-cuda11x>=12.0.0, which is not installed.\r\n", "cuml 24.10.0 requires cuvs==24.10.*, which is not installed.\r\n", "cuml 24.10.0 requires nvidia-cublas, which is not installed.\r\n", "cuml 24.10.0 requires nvidia-cufft, which is not installed.\r\n", "cuml 24.10.0 requires nvidia-curand, which is not installed.\r\n", "cuml 24.10.0 requires nvidia-cusolver, which is not installed.\r\n", "cuml 24.10.0 requires nvidia-cusparse, which is not installed.\r\n", "dask-cudf 24.10.1 requires cupy-cuda11x>=12.0.0, which is not installed.\r\n", "pylibcudf 24.10.1 requires libcudf==24.10.*, which is not installed.\r\n", "cudf 24.10.1 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.6.2.post1 which is incompatible.\r\n", "cudf 24.10.1 requires pandas<2.2.3dev0,>=2.0, but you have pandas 2.2.3 which is incompatible.\r\n", "dask-cudf 24.10.1 requires pandas<2.2.3dev0,>=2.0, but you have pandas 2.2.3 which is incompatible.\r\n", "distributed 2024.9.0 requires dask==2024.9.0, but you have dask 2024.11.2 which is incompatible.\r\n", "google-cloud-bigquery 2.34.4 requires packaging<22.0dev,>=14.3, but you have packaging 24.2 which is incompatible.\r\n", "jupyterlab 4.3.1 requires jupyter-lsp>=2.0.0, but you have jupyter-lsp 1.5.1 which is incompatible.\r\n", "jupyterlab-lsp 5.1.0 requires jupyter-lsp>=2.0.0, but you have jupyter-lsp 1.5.1 which is incompatible.\r\n", "libpysal 4.9.2 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.\r\n", "mlxtend 0.23.3 requires scikit-learn>=1.3.1, but you have scikit-learn 1.2.2 which is incompatible.\r\n", "plotnine 0.14.3 requires matplotlib>=3.8.0, but you have matplotlib 3.7.5 which is incompatible.\r\n", "pylibcudf 24.10.1 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.6.2.post1 which is incompatible.\r\n", "rapids-dask-dependency 24.10.0a0 requires dask==2024.9.0, but you have dask 2024.11.2 which is incompatible.\r\n", "rapids-dask-dependency 24.10.0a0 requires dask-expr==1.1.14, but you have dask-expr 1.1.19 which is incompatible.\r\n", "tensorflow 2.16.1 requires tensorboard<2.17,>=2.16, but you have tensorboard 2.18.0 which is incompatible.\r\n", "ydata-profiling 4.12.0 requires scipy<1.14,>=1.4.1, but you have scipy 1.14.1 which is incompatible.\u001b[0m\u001b[31m\r\n", "\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\r\n", "cudf 24.10.1 requires cubinlinker, which is not installed.\r\n", "cudf 24.10.1 requires cupy-cuda11x>=12.0.0, which is not installed.\r\n", "cudf 24.10.1 requires libcudf==24.10.*, which is not installed.\r\n", "cudf 24.10.1 requires ptxcompiler, which is not installed.\r\n", "cuml 24.10.0 requires cupy-cuda11x>=12.0.0, which is not installed.\r\n", "cuml 24.10.0 requires cuvs==24.10.*, which is not installed.\r\n", "cuml 24.10.0 requires nvidia-cublas, which is not installed.\r\n", "cuml 24.10.0 requires nvidia-cufft, which is not installed.\r\n", "cuml 24.10.0 requires nvidia-curand, which is not installed.\r\n", "cuml 24.10.0 requires nvidia-cusolver, which is not installed.\r\n", "cuml 24.10.0 requires nvidia-cusparse, which is not installed.\r\n", "dask-cudf 24.10.1 requires cupy-cuda11x>=12.0.0, which is not installed.\r\n", "pylibcudf 24.10.1 requires libcudf==24.10.*, which is not installed.\r\n", "conda 24.5.0 requires packaging>=23.0, but you have packaging 21.0 which is incompatible.\r\n", "coqui-tts 0.25.1 requires packaging>=23.1, but you have packaging 21.0 which is incompatible.\r\n", "cudf 24.10.1 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.6.2.post1 which is incompatible.\r\n", "cudf 24.10.1 requires pandas<2.2.3dev0,>=2.0, but you have pandas 2.2.3 which is incompatible.\r\n", "dask-cudf 24.10.1 requires pandas<2.2.3dev0,>=2.0, but you have pandas 2.2.3 which is incompatible.\r\n", "distributed 2024.9.0 requires dask==2024.9.0, but you have dask 2024.11.2 which is incompatible.\r\n", "jupyterlab 4.3.1 requires jupyter-lsp>=2.0.0, but you have jupyter-lsp 1.5.1 which is incompatible.\r\n", "jupyterlab-lsp 5.1.0 requires jupyter-lsp>=2.0.0, but you have jupyter-lsp 1.5.1 which is incompatible.\r\n", "jupyterlab-server 2.27.2 requires packaging>=21.3, but you have packaging 21.0 which is incompatible.\r\n", "libpysal 4.9.2 requires packaging>=22, but you have packaging 21.0 which is incompatible.\r\n", "libpysal 4.9.2 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.\r\n", "mlxtend 0.23.3 requires scikit-learn>=1.3.1, but you have scikit-learn 1.2.2 which is incompatible.\r\n", "plotnine 0.14.3 requires matplotlib>=3.8.0, but you have matplotlib 3.7.5 which is incompatible.\r\n", "pylibcudf 24.10.1 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.6.2.post1 which is incompatible.\r\n", "pytesseract 0.3.13 requires packaging>=21.3, but you have packaging 21.0 which is incompatible.\r\n", "pytoolconfig 1.3.1 requires packaging>=23.2, but you have packaging 21.0 which is incompatible.\r\n", "rapids-dask-dependency 24.10.0a0 requires dask==2024.9.0, but you have dask 2024.11.2 which is incompatible.\r\n", "rapids-dask-dependency 24.10.0a0 requires dask-expr==1.1.14, but you have dask-expr 1.1.19 which is incompatible.\r\n", "scikit-optimize 0.10.2 requires packaging>=21.3, but you have packaging 21.0 which is incompatible.\r\n", "statsmodels 0.14.2 requires packaging>=21.3, but you have packaging 21.0 which is incompatible.\r\n", "tensorflow 2.16.1 requires tensorboard<2.17,>=2.16, but you have tensorboard 2.18.0 which is incompatible.\r\n", "xarray 2024.11.0 requires packaging>=23.2, but you have packaging 21.0 which is incompatible.\r\n", "ydata-profiling 4.12.0 requires scipy<1.14,>=1.4.1, but you have scipy 1.14.1 which is incompatible.\u001b[0m\u001b[31m\r\n", "\u001b[0m" ] } ], "source": [ "!pip install coqui-tts > /dev/null\n", "!pip install packaging==21.0 > /dev/null" ] }, { "cell_type": "code", "execution_count": 2, "id": "816c25b2", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:16:48.731021Z", "iopub.status.busy": "2024-12-30T02:16:48.730700Z", "iopub.status.idle": "2024-12-30T02:17:25.960420Z", "shell.execute_reply": "2024-12-30T02:17:25.959749Z" }, "papermill": { "duration": 37.235977, "end_time": "2024-12-30T02:17:25.962504", "exception": false, "start_time": "2024-12-30T02:16:48.726527", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import os, math\n", "\n", "from trainer import Trainer, TrainerArgs\n", "\n", "from TTS.config.shared_configs import BaseDatasetConfig\n", "from TTS.tts.datasets import load_tts_samples\n", "from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig\n", "from TTS.tts.models.xtts import XttsAudioConfig\n", "from TTS.utils.manage import ModelManager" ] }, { "cell_type": "code", "execution_count": 3, "id": "3edcc56a", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:17:25.971167Z", "iopub.status.busy": "2024-12-30T02:17:25.970335Z", "iopub.status.idle": "2024-12-30T02:17:25.975180Z", "shell.execute_reply": "2024-12-30T02:17:25.974583Z" }, "papermill": { "duration": 0.010486, "end_time": "2024-12-30T02:17:25.976717", "exception": false, "start_time": "2024-12-30T02:17:25.966231", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Logging parameters\n", "RUN_NAME = \"viXTTS-FT-Code\"\n", "PROJECT_NAME = \"XTTS_trainer\"\n", "DASHBOARD_LOGGER = \"tensorboard\"\n", "LOGGER_URI = None\n", "\n", "# Set here the path that the checkpoints will be saved. Default: ./run/training/\n", "OUT_PATH = \"/kaggle/working/finetuned\"\n", "\n", "# Define the path where viXTTS files will be downloaded\n", "CHECKPOINTS_OUT_PATH = os.path.join(\"/kaggle/temp/viXTTS_original_model_files/\")\n", "os.makedirs(CHECKPOINTS_OUT_PATH, exist_ok=True)" ] }, { "cell_type": "code", "execution_count": 4, "id": "f68b6395", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:17:25.985680Z", "iopub.status.busy": "2024-12-30T02:17:25.985063Z", "iopub.status.idle": "2024-12-30T02:17:31.354653Z", "shell.execute_reply": "2024-12-30T02:17:31.353989Z" }, "papermill": { "duration": 5.375451, "end_time": "2024-12-30T02:17:31.356638", "exception": false, "start_time": "2024-12-30T02:17:25.981187", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " > Downloading DVAE files!\n" ] } ], "source": [ "# DVAE files\n", "DVAE_CHECKPOINT_LINK = \"https://huggingface.co./coqui/XTTS-v2/resolve/main/dvae.pth\"\n", "MEL_NORM_LINK = \"https://huggingface.co./coqui/XTTS-v2/resolve/main/mel_stats.pth\"\n", "\n", "# Set the path to the downloaded files\n", "DVAE_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(DVAE_CHECKPOINT_LINK))\n", "MEL_NORM_FILE = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(MEL_NORM_LINK))\n", "\n", "# download DVAE files if needed\n", "if not os.path.isfile(DVAE_CHECKPOINT) or not os.path.isfile(MEL_NORM_FILE):\n", " print(\" > Downloading DVAE files!\")\n", " ModelManager._download_model_files([MEL_NORM_LINK, DVAE_CHECKPOINT_LINK], CHECKPOINTS_OUT_PATH, progress_bar=False)" ] }, { "cell_type": "code", "execution_count": 5, "id": "5677fea0", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:17:31.365250Z", "iopub.status.busy": "2024-12-30T02:17:31.364953Z", "iopub.status.idle": "2024-12-30T02:18:17.392273Z", "shell.execute_reply": "2024-12-30T02:18:17.391535Z" }, "papermill": { "duration": 46.0338, "end_time": "2024-12-30T02:18:17.394173", "exception": false, "start_time": "2024-12-30T02:17:31.360373", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " > Downloading viXTTS files!\n" ] } ], "source": [ "# Download viXTTS checkpoint if needed\n", "TOKENIZER_FILE_LINK = \"https://huggingface.co./capleaf/viXTTS/resolve/main/vocab.json\"\n", "XTTS_CHECKPOINT_LINK = \"https://huggingface.co./capleaf/viXTTS/resolve/main/model.pth\"\n", "# TOKENIZER_FILE_LINK = \"https://huggingface.co./coqui/XTTS-v2/resolve/main/vocab.json\"\n", "# XTTS_CHECKPOINT_LINK = \"https://huggingface.co./coqui/XTTS-v2/resolve/main/model.pth\"\n", "\n", "# XTTS transfer learning parameters: You we need to provide the paths of XTTS model checkpoint that you want to do the fine tuning.\n", "TOKENIZER_FILE = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(TOKENIZER_FILE_LINK)) # vocab.json file\n", "XTTS_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(XTTS_CHECKPOINT_LINK)) # model.pth file\n", "\n", "# download viXTTS files if needed\n", "if not os.path.isfile(TOKENIZER_FILE) or not os.path.isfile(XTTS_CHECKPOINT):\n", " print(\" > Downloading viXTTS files!\")\n", " ModelManager._download_model_files(\n", " [TOKENIZER_FILE_LINK, XTTS_CHECKPOINT_LINK], CHECKPOINTS_OUT_PATH, progress_bar=False\n", " )" ] }, { "cell_type": "code", "execution_count": 6, "id": "3f21c409", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:18:17.402347Z", "iopub.status.busy": "2024-12-30T02:18:17.402089Z", "iopub.status.idle": "2024-12-30T02:18:17.406558Z", "shell.execute_reply": "2024-12-30T02:18:17.405788Z" }, "papermill": { "duration": 0.010295, "end_time": "2024-12-30T02:18:17.408196", "exception": false, "start_time": "2024-12-30T02:18:17.397901", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# init args and config\n", "model_args = GPTArgs(\n", " max_conditioning_length=132300, # 6 secs\n", " min_conditioning_length=66150, # 3 secs\n", " debug_loading_failures=False,\n", " max_wav_length=255995, # ~11.6 seconds\n", " max_text_length=200,\n", " mel_norm_file=MEL_NORM_FILE,\n", " dvae_checkpoint=DVAE_CHECKPOINT,\n", " xtts_checkpoint=XTTS_CHECKPOINT, # checkpoint path of the model that you want to fine-tune\n", " tokenizer_file=TOKENIZER_FILE,\n", " gpt_num_audio_tokens=1026,\n", " gpt_start_audio_token=1024,\n", " gpt_stop_audio_token=1025,\n", " gpt_use_masking_gt_prompt_approach=True,\n", " gpt_use_perceiver_resampler=True,\n", ")" ] }, { "cell_type": "code", "execution_count": 7, "id": "55363300", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:18:17.415760Z", "iopub.status.busy": "2024-12-30T02:18:17.415478Z", "iopub.status.idle": "2024-12-30T02:18:17.419424Z", "shell.execute_reply": "2024-12-30T02:18:17.418628Z" }, "papermill": { "duration": 0.009552, "end_time": "2024-12-30T02:18:17.421024", "exception": false, "start_time": "2024-12-30T02:18:17.411472", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Training Parameters\n", "OPTIMIZER_WD_ONLY_ON_WEIGHTS = True # for multi-gpu training please make it False\n", "START_WITH_EVAL = False # if True it will start with evaluation\n", "BATCH_SIZE = 4 # set here the batch size\n", "GRAD_ACUMM_STEPS = math.ceil(252 / BATCH_SIZE) # set here the grad accumulation steps\n", "# Note: we recommend that BATCH_SIZE * GRAD_ACUMM_STEPS need to be at least 252 for more efficient training. You can increase/decrease BATCH_SIZE but then set GRAD_ACUMM_STEPS accordingly." ] }, { "cell_type": "code", "execution_count": 8, "id": "d952daae", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:18:17.428759Z", "iopub.status.busy": "2024-12-30T02:18:17.428482Z", "iopub.status.idle": "2024-12-30T02:18:17.551553Z", "shell.execute_reply": "2024-12-30T02:18:17.550603Z" }, "papermill": { "duration": 0.128736, "end_time": "2024-12-30T02:18:17.553262", "exception": false, "start_time": "2024-12-30T02:18:17.424526", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " > Downloading XTTS v2.0 files!\n" ] } ], "source": [ "# Training sentences generations\n", "SPEAKER_REFERENCE_LINK = \"https://huggingface.co./capleaf/viXTTS/resolve/main/vi_sample.wav\"\n", "SPEAKER_REFERENCE = [\n", " os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(SPEAKER_REFERENCE_LINK)) # speaker reference to be used in training test sentences\n", "]\n", "\n", "if not os.path.isfile(SPEAKER_REFERENCE[0]) or not os.path.isfile(SPEAKER_REFERENCE[0]):\n", " print(\" > Downloading XTTS v2.0 files!\")\n", " ModelManager._download_model_files(\n", " [SPEAKER_REFERENCE_LINK], CHECKPOINTS_OUT_PATH, progress_bar=False\n", " )\n", "LANGUAGE = 'en'" ] }, { "cell_type": "code", "execution_count": 9, "id": "96c4f72f", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:18:17.561314Z", "iopub.status.busy": "2024-12-30T02:18:17.561033Z", "iopub.status.idle": "2024-12-30T02:18:18.597609Z", "shell.execute_reply": "2024-12-30T02:18:18.596727Z" }, "papermill": { "duration": 1.042764, "end_time": "2024-12-30T02:18:18.599536", "exception": false, "start_time": "2024-12-30T02:18:17.556772", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/pty.py:89: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", " pid, fd = os.forkpty()\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "dir: cannot access 'viXTTS_original_model_files': No such file or directory\r\n" ] } ], "source": [ "!dir viXTTS_original_model_files" ] }, { "cell_type": "code", "execution_count": 10, "id": "9d3dbf6c", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:18:18.608550Z", "iopub.status.busy": "2024-12-30T02:18:18.607968Z", "iopub.status.idle": "2024-12-30T02:18:18.615187Z", "shell.execute_reply": "2024-12-30T02:18:18.614364Z" }, "papermill": { "duration": 0.013514, "end_time": "2024-12-30T02:18:18.616879", "exception": false, "start_time": "2024-12-30T02:18:18.603365", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# define audio config\n", "audio_config = XttsAudioConfig(sample_rate=22050, dvae_sample_rate=22050, output_sample_rate=24000)\n", "# training parameters config\n", "config = GPTTrainerConfig(\n", " precision=\"fp16\",\n", " output_path=OUT_PATH,\n", " model_args=model_args,\n", " run_name=RUN_NAME,\n", " project_name=PROJECT_NAME,\n", " run_description=\"\"\"\n", " GPT XTTS training\n", " \"\"\",\n", " dashboard_logger=DASHBOARD_LOGGER,\n", " logger_uri=LOGGER_URI,\n", " audio=audio_config,\n", " epochs=1,\n", " batch_size=BATCH_SIZE,\n", " batch_group_size=48,\n", " eval_batch_size=BATCH_SIZE,\n", " num_loader_workers=8,\n", " eval_split_max_size=256,\n", " print_step=50,\n", " plot_step=100,\n", " log_model_step=1,\n", " save_step=1000,\n", " save_n_checkpoints=1,\n", " save_checkpoints=True,\n", " # target_loss=\"loss\",\n", " print_eval=False,\n", " # Optimizer values like tortoise, pytorch implementation with modifications to not apply WD to non-weight parameters.\n", " optimizer=\"AdamW\",\n", " optimizer_wd_only_on_weights=OPTIMIZER_WD_ONLY_ON_WEIGHTS,\n", " optimizer_params={\"betas\": [0.9, 0.96], \"eps\": 1e-8, \"weight_decay\": 1e-2},\n", " lr=1e-5, # learning rate\n", " lr_scheduler=\"MultiStepLR\",\n", " # it was adjusted accordly for the new step scheme\n", " lr_scheduler_params={\"milestones\": [50000 * 18, 150000 * 18, 300000 * 18], \"gamma\": 0.5, \"last_epoch\": -1},\n", " test_sentences=[\n", " # {\n", " # \"text\": \"My favorite programming languages is C++ and Java.\",\n", " # \"speaker_wav\": SPEAKER_REFERENCE,\n", " # \"language\": LANGUAGE,\n", " # },\n", " # {\n", " # \"text\": \"I am learning HTML, CSS and JavaScript.\",\n", " # \"speaker_wav\": SPEAKER_REFERENCE,\n", " # \"language\": LANGUAGE,\n", " # },\n", " ],\n", ")" ] }, { "cell_type": "code", "execution_count": 11, "id": "792888d0", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:18:18.625187Z", "iopub.status.busy": "2024-12-30T02:18:18.624522Z", "iopub.status.idle": "2024-12-30T02:18:18.628380Z", "shell.execute_reply": "2024-12-30T02:18:18.627756Z" }, "papermill": { "duration": 0.009566, "end_time": "2024-12-30T02:18:18.629894", "exception": false, "start_time": "2024-12-30T02:18:18.620328", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Define here the dataset that you want to use for the fine-tuning on.\n", "cs50_config_dataset = BaseDatasetConfig(\n", " formatter=\"ljspeech\",\n", " dataset_name=\"cs-50\",\n", " path=\"/kaggle/input/cs50-dataset/\",\n", " meta_file_train=\"/kaggle/input/cs50-dataset/metadata.csv\",\n", " language=\"en\",\n", ")" ] }, { "cell_type": "code", "execution_count": 12, "id": "2d744fee", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:18:18.637857Z", "iopub.status.busy": "2024-12-30T02:18:18.637569Z", "iopub.status.idle": "2024-12-30T02:18:18.988602Z", "shell.execute_reply": "2024-12-30T02:18:18.987454Z" }, "papermill": { "duration": 0.357093, "end_time": "2024-12-30T02:18:18.990479", "exception": false, "start_time": "2024-12-30T02:18:18.633386", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. Train 24007\n", "No. Eval 242\n" ] } ], "source": [ "# load training samples\n", "DATASETS_CONFIG_LIST = [cs50_config_dataset]\n", "train_samples, eval_samples = load_tts_samples(\n", " DATASETS_CONFIG_LIST,\n", " eval_split=True,\n", " eval_split_max_size=config.eval_split_max_size,\n", " eval_split_size=config.eval_split_size,\n", ")\n", "import random\n", "\n", "# Set a consistent seed\n", "random.seed(42)\n", "\n", "# Shuffle the list\n", "random.shuffle(train_samples)\n", "print(\"No. Train\", len(train_samples))\n", "print(\"No. Eval\", len(eval_samples))" ] }, { "cell_type": "code", "execution_count": 13, "id": "bedf8c92", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:18:18.999608Z", "iopub.status.busy": "2024-12-30T02:18:18.999317Z", "iopub.status.idle": "2024-12-30T02:18:30.571580Z", "shell.execute_reply": "2024-12-30T02:18:30.570882Z" }, "papermill": { "duration": 11.578803, "end_time": "2024-12-30T02:18:30.573605", "exception": false, "start_time": "2024-12-30T02:18:18.994802", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# init the model from config\n", "model = GPTTrainer.init_from_config(config)" ] }, { "cell_type": "code", "execution_count": 14, "id": "ca61f3c9", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T02:18:30.582985Z", "iopub.status.busy": "2024-12-30T02:18:30.582417Z", "iopub.status.idle": "2024-12-30T03:21:16.667628Z", "shell.execute_reply": "2024-12-30T03:21:16.666783Z" }, "papermill": { "duration": 3766.093873, "end_time": "2024-12-30T03:21:16.671796", "exception": false, "start_time": "2024-12-30T02:18:30.577923", "status": "completed" }, "scrolled": true, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "fatal: not a git repository (or any parent up to mount point /kaggle)\n", "Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\n", "fatal: not a git repository (or any parent up to mount point /kaggle)\n", "Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\n", " > Training Environment:\n", " | > Backend: Torch\n", " | > Mixed precision: False\n", " | > Precision: float32\n", " | > Current device: 0\n", " | > Num. of GPUs: 1\n", " | > Num. of CPUs: 4\n", " | > Num. of Torch Threads: 1\n", " | > Torch seed: 1\n", " | > Torch CUDNN: True\n", " | > Torch CUDNN deterministic: False\n", " | > Torch CUDNN benchmark: False\n", " | > Torch TF32 MatMul: False\n", " > Start Tensorboard: tensorboard --logdir=/kaggle/working/finetuned/viXTTS-FT-Code-December-30-2024_02+18AM-0000000\n", "\n", " > Model has 520210334 parameters\n", "\n", "\u001b[4m\u001b[1m > EPOCH: 0/1\u001b[0m\n", " --> /kaggle/working/finetuned/viXTTS-FT-Code-December-30-2024_02+18AM-0000000\n", "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py:557: UserWarning: This DataLoader will create 8 worker processes in total. Our suggested max number of worker in current system is 4, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.\n", " warnings.warn(_create_warning_msg(\n", "\n", "\u001b[1m > TRAINING (2024-12-30 02:18:31) \u001b[0m\n", "/opt/conda/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", " self.pid = os.fork()\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:18:37 -- STEP: 0/6002 -- GLOBAL_STEP: 0\u001b[0m\n", " | > loss_text_ce: 0.06515223532915115 (0.06515223532915115)\n", " | > loss_mel_ce: 4.35933780670166 (4.35933780670166)\n", " | > loss: 0.07023000717163086 (0.07023000717163086)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.8685 (0.8684570789337158)\n", " | > loader_time: 4.6888 (4.68878173828125)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:19:06 -- STEP: 50/6002 -- GLOBAL_STEP: 50\u001b[0m\n", " | > loss_text_ce: 0.0714154914021492 (0.056773004457354545)\n", " | > loss_mel_ce: 4.538961887359619 (4.309474244117736)\n", " | > loss: 0.07318059355020523 (0.06930551603436469)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2895 (0.3494372653961182)\n", " | > loader_time: 0.0084 (0.012985472679138183)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:19:35 -- STEP: 100/6002 -- GLOBAL_STEP: 100\u001b[0m\n", " | > loss_text_ce: 0.05715508013963699 (0.056441128402948376)\n", " | > loss_mel_ce: 3.86618709564209 (4.239097881317137)\n", " | > loss: 0.062275275588035583 (0.0681831630691886)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.332 (0.35537487030029297)\n", " | > loader_time: 0.0121 (0.012576158046722413)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:20:06 -- STEP: 150/6002 -- GLOBAL_STEP: 150\u001b[0m\n", " | > loss_text_ce: 0.04979538172483444 (0.056126846099893254)\n", " | > loss_mel_ce: 4.362597465515137 (4.193226407368979)\n", " | > loss: 0.07003798335790634 (0.0674500556786855)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.5006 (0.3627257076899211)\n", " | > loader_time: 0.0159 (0.012435827255249022)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:20:37 -- STEP: 200/6002 -- GLOBAL_STEP: 200\u001b[0m\n", " | > loss_text_ce: 0.0546397902071476 (0.05623059207573533)\n", " | > loss_mel_ce: 4.149126052856445 (4.17115321278572)\n", " | > loss: 0.0667264461517334 (0.06710133409127594)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.44 (0.364131268262863)\n", " | > loader_time: 0.0089 (0.012122513055801391)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:21:05 -- STEP: 250/6002 -- GLOBAL_STEP: 250\u001b[0m\n", " | > loss_text_ce: 0.0533536821603775 (0.056623691856861114)\n", " | > loss_mel_ce: 3.9586517810821533 (4.151856688499453)\n", " | > loss: 0.06368263065814972 (0.06680127978324893)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4338 (0.35923325157165514)\n", " | > loader_time: 0.0152 (0.011978547096252441)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:21:35 -- STEP: 300/6002 -- GLOBAL_STEP: 300\u001b[0m\n", " | > loss_text_ce: 0.05818209424614906 (0.0565457459166646)\n", " | > loss_mel_ce: 4.232809543609619 (4.133488818009696)\n", " | > loss: 0.06811098754405975 (0.06650848906487232)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3457 (0.3592371201515197)\n", " | > loader_time: 0.0109 (0.011683204968770346)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:22:05 -- STEP: 350/6002 -- GLOBAL_STEP: 350\u001b[0m\n", " | > loss_text_ce: 0.045653726905584335 (0.056255612458501546)\n", " | > loss_mel_ce: 3.756685972213745 (4.10903904438019)\n", " | > loss: 0.060354601591825485 (0.06611579206373012)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3865 (0.3607252025604247)\n", " | > loader_time: 0.009 (0.01155953407287597)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:22:36 -- STEP: 400/6002 -- GLOBAL_STEP: 400\u001b[0m\n", " | > loss_text_ce: 0.05484054610133171 (0.05597156930714846)\n", " | > loss_mel_ce: 3.901956558227539 (4.095725513100628)\n", " | > loss: 0.06280630826950073 (0.06589995758607986)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4763 (0.36137989819049826)\n", " | > loader_time: 0.011 (0.011484247446060176)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:23:06 -- STEP: 450/6002 -- GLOBAL_STEP: 450\u001b[0m\n", " | > loss_text_ce: 0.04944393038749695 (0.055591924157407546)\n", " | > loss_mel_ce: 3.6606862545013428 (4.079583916134309)\n", " | > loss: 0.05889095738530159 (0.06563771559960316)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2936 (0.3618443160586886)\n", " | > loader_time: 0.0087 (0.01133997970157199)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:23:36 -- STEP: 500/6002 -- GLOBAL_STEP: 500\u001b[0m\n", " | > loss_text_ce: 0.054693520069122314 (0.0554021243751049)\n", " | > loss_mel_ce: 3.959427833557129 (4.064631403446203)\n", " | > loss: 0.06371621787548065 (0.06539736142754554)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3073 (0.3619099946022033)\n", " | > loader_time: 0.0096 (0.011189272403717037)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:24:07 -- STEP: 550/6002 -- GLOBAL_STEP: 550\u001b[0m\n", " | > loss_text_ce: 0.05067973583936691 (0.055131359506737095)\n", " | > loss_mel_ce: 3.4955108165740967 (4.048640600984751)\n", " | > loss: 0.056288741528987885 (0.06513924134048547)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4068 (0.3631135008551857)\n", " | > loader_time: 0.0089 (0.011077651544050734)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:24:37 -- STEP: 600/6002 -- GLOBAL_STEP: 600\u001b[0m\n", " | > loss_text_ce: 0.05747831612825394 (0.05492468555768331)\n", " | > loss_mel_ce: 4.033565044403076 (4.038732697566357)\n", " | > loss: 0.06493720412254333 (0.06497869241982701)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.261 (0.3629092776775359)\n", " | > loader_time: 0.0083 (0.010997156302134197)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:25:08 -- STEP: 650/6002 -- GLOBAL_STEP: 650\u001b[0m\n", " | > loss_text_ce: 0.04280412569642067 (0.05469198384537147)\n", " | > loss_mel_ce: 3.6755707263946533 (4.0283779650468166)\n", " | > loss: 0.05902182683348656 (0.06481063789473136)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3539 (0.36425338818476727)\n", " | > loader_time: 0.0093 (0.01088279907520001)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:25:37 -- STEP: 700/6002 -- GLOBAL_STEP: 700\u001b[0m\n", " | > loss_text_ce: 0.05117706209421158 (0.05459854888596705)\n", " | > loss_mel_ce: 3.2699708938598633 (4.01818464279175)\n", " | > loss: 0.052716635167598724 (0.06464735606419196)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3813 (0.3632683897018431)\n", " | > loader_time: 0.0097 (0.010784306185586113)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:26:07 -- STEP: 750/6002 -- GLOBAL_STEP: 750\u001b[0m\n", " | > loss_text_ce: 0.05292212963104248 (0.05440769362449646)\n", " | > loss_mel_ce: 3.8137242794036865 (4.009889735539755)\n", " | > loss: 0.061375342309474945 (0.0645126614421607)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3532 (0.36326850477854394)\n", " | > loader_time: 0.0088 (0.010701715787251796)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:26:38 -- STEP: 800/6002 -- GLOBAL_STEP: 800\u001b[0m\n", " | > loss_text_ce: 0.05412683263421059 (0.05420490347780287)\n", " | > loss_mel_ce: 3.8943874835968018 (3.99855845719576)\n", " | > loss: 0.06267483532428741 (0.06432958097662782)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2913 (0.3639240592718123)\n", " | > loader_time: 0.0081 (0.010638329088687901)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:27:06 -- STEP: 850/6002 -- GLOBAL_STEP: 850\u001b[0m\n", " | > loss_text_ce: 0.04695494472980499 (0.054128460520330594)\n", " | > loss_mel_ce: 3.8355696201324463 (3.99081969429465)\n", " | > loss: 0.061627376824617386 (0.06420553006231791)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4307 (0.36264264246996697)\n", " | > loader_time: 0.0095 (0.010562590150272149)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:27:36 -- STEP: 900/6002 -- GLOBAL_STEP: 900\u001b[0m\n", " | > loss_text_ce: 0.05314582586288452 (0.05395668339812093)\n", " | > loss_mel_ce: 3.8255321979522705 (3.9823400161001428)\n", " | > loss: 0.061566323041915894 (0.06406820535659803)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2621 (0.3626030037138196)\n", " | > loader_time: 0.0079 (0.010517186588711213)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:28:09 -- STEP: 950/6002 -- GLOBAL_STEP: 950\u001b[0m\n", " | > loss_text_ce: 0.04585574194788933 (0.05369826306637965)\n", " | > loss_mel_ce: 3.754807233810425 (3.9733545900646017)\n", " | > loss: 0.06032798811793327 (0.06392147763779303)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4364 (0.3641341555745977)\n", " | > loader_time: 0.0085 (0.01046099888650995)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:28:39 -- STEP: 1000/6002 -- GLOBAL_STEP: 1000\u001b[0m\n", " | > loss_text_ce: 0.037731580436229706 (0.053464883763343095)\n", " | > loss_mel_ce: 3.707043170928955 (3.9641590559482585)\n", " | > loss: 0.05944087356328964 (0.06377181232720625)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4365 (0.3643548517227172)\n", " | > loader_time: 0.0097 (0.010418024539947513)\n", "\n", "\n", " > CHECKPOINT : /kaggle/working/finetuned/viXTTS-FT-Code-December-30-2024_02+18AM-0000000/checkpoint_1000.pth\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:29:17 -- STEP: 1050/6002 -- GLOBAL_STEP: 1050\u001b[0m\n", " | > loss_text_ce: 0.0405663326382637 (0.05336961474447023)\n", " | > loss_mel_ce: 3.710972785949707 (3.957252526964461)\n", " | > loss: 0.05954824388027191 (0.06366067266180415)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.5816 (0.36400171098254963)\n", " | > loader_time: 0.0095 (0.010373912311735612)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:29:46 -- STEP: 1100/6002 -- GLOBAL_STEP: 1100\u001b[0m\n", " | > loss_text_ce: 0.0485985204577446 (0.053199640295722286)\n", " | > loss_mel_ce: 3.9071521759033203 (3.950506059473212)\n", " | > loss: 0.06278970092535019 (0.06355088788677357)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4023 (0.3636908225579695)\n", " | > loader_time: 0.0086 (0.010335644158450043)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:30:17 -- STEP: 1150/6002 -- GLOBAL_STEP: 1150\u001b[0m\n", " | > loss_text_ce: 0.050223011523485184 (0.053011336893491155)\n", " | > loss_mel_ce: 3.9060816764831543 (3.946488602057748)\n", " | > loss: 0.06279849261045456 (0.0634841297696467)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3013 (0.36415399074554455)\n", " | > loader_time: 0.009 (0.01030124332593835)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:30:49 -- STEP: 1200/6002 -- GLOBAL_STEP: 1200\u001b[0m\n", " | > loss_text_ce: 0.03817598521709442 (0.0527884591650218)\n", " | > loss_mel_ce: 3.7098653316497803 (3.941276328166327)\n", " | > loss: 0.059492722153663635 (0.06339785751886676)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4389 (0.3649731520811718)\n", " | > loader_time: 0.0107 (0.010270679990450535)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:31:18 -- STEP: 1250/6002 -- GLOBAL_STEP: 1250\u001b[0m\n", " | > loss_text_ce: 0.04458443075418472 (0.052597165021300324)\n", " | > loss_mel_ce: 3.6091222763061523 (3.9350670309066778)\n", " | > loss: 0.057995349168777466 (0.06329626083076006)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4401 (0.3647538839340211)\n", " | > loader_time: 0.0124 (0.010239666175842283)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:31:50 -- STEP: 1300/6002 -- GLOBAL_STEP: 1300\u001b[0m\n", " | > loss_text_ce: 0.04871862009167671 (0.05238850174328457)\n", " | > loss_mel_ce: 3.7566115856170654 (3.9302088198295015)\n", " | > loss: 0.060402072966098785 (0.06321583423763519)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3462 (0.36529964061883774)\n", " | > loader_time: 0.0089 (0.010207668084364668)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:32:19 -- STEP: 1350/6002 -- GLOBAL_STEP: 1350\u001b[0m\n", " | > loss_text_ce: 0.0542672760784626 (0.05221975600829832)\n", " | > loss_mel_ce: 4.055333137512207 (3.9249400560944174)\n", " | > loss: 0.06523175537586212 (0.06312952456650918)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2632 (0.3649682166841295)\n", " | > loader_time: 0.0082 (0.010164705382453072)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:32:51 -- STEP: 1400/6002 -- GLOBAL_STEP: 1400\u001b[0m\n", " | > loss_text_ce: 0.05162594094872475 (0.05203412518969605)\n", " | > loss_mel_ce: 3.5535080432891846 (3.920573236601694)\n", " | > loss: 0.05722435191273689 (0.06305726345628503)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2548 (0.36558932559830837)\n", " | > loader_time: 0.0095 (0.010136415788105554)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:33:20 -- STEP: 1450/6002 -- GLOBAL_STEP: 1450\u001b[0m\n", " | > loss_text_ce: 0.051494110375642776 (0.05186354096336612)\n", " | > loss_mel_ce: 3.7324509620666504 (3.914501560474265)\n", " | > loss: 0.060062624514102936 (0.06295817994865882)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3261 (0.36527410671628785)\n", " | > loader_time: 0.0083 (0.010109037366406661)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:33:51 -- STEP: 1500/6002 -- GLOBAL_STEP: 1500\u001b[0m\n", " | > loss_text_ce: 0.050328031182289124 (0.051655744239687926)\n", " | > loss_mel_ce: 3.9715049266815186 (3.908826083183289)\n", " | > loss: 0.06383862346410751 (0.06286479464173325)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2666 (0.3654310743014021)\n", " | > loader_time: 0.0101 (0.010084115505218504)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:34:22 -- STEP: 1550/6002 -- GLOBAL_STEP: 1550\u001b[0m\n", " | > loss_text_ce: 0.04234788939356804 (0.0514678742352032)\n", " | > loss_mel_ce: 3.449240207672119 (3.9025823163986213)\n", " | > loss: 0.055422037839889526 (0.06276270515976423)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3027 (0.3657886335926674)\n", " | > loader_time: 0.0086 (0.010062441364411388)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:34:52 -- STEP: 1600/6002 -- GLOBAL_STEP: 1600\u001b[0m\n", " | > loss_text_ce: 0.04169786348938942 (0.05128284019418062)\n", " | > loss_mel_ce: 3.800319194793701 (3.8982348279654984)\n", " | > loss: 0.060984402894973755 (0.06269076035125189)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.384 (0.3659165552258494)\n", " | > loader_time: 0.0124 (0.010050762146711367)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:35:22 -- STEP: 1650/6002 -- GLOBAL_STEP: 1650\u001b[0m\n", " | > loss_text_ce: 0.046749047935009 (0.051116718995300216)\n", " | > loss_mel_ce: 3.671217918395996 (3.894774938207685)\n", " | > loss: 0.05901535227894783 (0.06263320462496003)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3278 (0.3656764378692167)\n", " | > loader_time: 0.01 (0.010028723369945196)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:35:51 -- STEP: 1700/6002 -- GLOBAL_STEP: 1700\u001b[0m\n", " | > loss_text_ce: 0.04395017400383949 (0.05096588992678068)\n", " | > loss_mel_ce: 3.8848042488098145 (3.8896661135729627)\n", " | > loss: 0.062361184507608414 (0.0625497180552168)\n", " | > grad_norm: 0 (0.0)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4367 (0.36524616479873684)\n", " | > loader_time: 0.009 (0.010008859073414538)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:36:20 -- STEP: 1750/6002 -- GLOBAL_STEP: 1750\u001b[0m\n", " | > loss_text_ce: 0.04031569883227348 (0.05076855845536505)\n", " | > loss_mel_ce: 3.4842817783355713 (3.8842643218721666)\n", " | > loss: 0.0559459924697876 (0.06246084307559902)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4414 (0.364896200180054)\n", " | > loader_time: 0.0094 (0.009986371994018571)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:36:50 -- STEP: 1800/6002 -- GLOBAL_STEP: 1800\u001b[0m\n", " | > loss_text_ce: 0.04884432628750801 (0.05060019393761953)\n", " | > loss_mel_ce: 3.7332818508148193 (3.879839418066873)\n", " | > loss: 0.060033753514289856 (0.062387934039450386)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.261 (0.36471144066916594)\n", " | > loader_time: 0.0088 (0.0099648247824775)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:37:20 -- STEP: 1850/6002 -- GLOBAL_STEP: 1850\u001b[0m\n", " | > loss_text_ce: 0.03939485922455788 (0.050419671128730525)\n", " | > loss_mel_ce: 3.6386282444000244 (3.8758774540875414)\n", " | > loss: 0.058381322771310806 (0.06232218027718972)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3264 (0.36475672902287687)\n", " | > loader_time: 0.0081 (0.009949095700238218)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:37:51 -- STEP: 1900/6002 -- GLOBAL_STEP: 1900\u001b[0m\n", " | > loss_text_ce: 0.03898278996348381 (0.05022828536206171)\n", " | > loss_mel_ce: 3.6770479679107666 (3.8711325939078085)\n", " | > loss: 0.05898461863398552 (0.06224382716378102)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3324 (0.36513000011444113)\n", " | > loader_time: 0.0084 (0.009936457433198643)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:38:21 -- STEP: 1950/6002 -- GLOBAL_STEP: 1950\u001b[0m\n", " | > loss_text_ce: 0.0413382388651371 (0.0500581090629865)\n", " | > loss_mel_ce: 3.601344108581543 (3.8671812502543137)\n", " | > loss: 0.057820357382297516 (0.06217840620722529)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.506 (0.3650426622537469)\n", " | > loader_time: 0.0097 (0.009917224981845968)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:38:52 -- STEP: 2000/6002 -- GLOBAL_STEP: 2000\u001b[0m\n", " | > loss_text_ce: 0.03463033214211464 (0.04992346741259099)\n", " | > loss_mel_ce: 3.6247920989990234 (3.863026950478554)\n", " | > loss: 0.05808607488870621 (0.06211032776907089)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.574 (0.36516597342491175)\n", " | > loader_time: 0.0123 (0.009906328916549696)\n", "\n", "\n", " > CHECKPOINT : /kaggle/working/finetuned/viXTTS-FT-Code-December-30-2024_02+18AM-0000000/checkpoint_2000.pth\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:39:31 -- STEP: 2050/6002 -- GLOBAL_STEP: 2050\u001b[0m\n", " | > loss_text_ce: 0.04263600334525108 (0.049765649721389874)\n", " | > loss_mel_ce: 4.009232521057129 (3.8596888982958912)\n", " | > loss: 0.06431537866592407 (0.062054837760401965)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3732 (0.3654085600085377)\n", " | > loader_time: 0.01 (0.009900610970287802)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:40:02 -- STEP: 2100/6002 -- GLOBAL_STEP: 2100\u001b[0m\n", " | > loss_text_ce: 0.04792046919465065 (0.04958703994750977)\n", " | > loss_mel_ce: 3.889775276184082 (3.856694917224704)\n", " | > loss: 0.06250311434268951 (0.06200447916807166)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2656 (0.3656259855769931)\n", " | > loader_time: 0.0083 (0.009884082703363339)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:40:36 -- STEP: 2150/6002 -- GLOBAL_STEP: 2150\u001b[0m\n", " | > loss_text_ce: 0.04725513979792595 (0.049380414485931425)\n", " | > loss_mel_ce: 3.76875376701355 (3.8526085227034823)\n", " | > loss: 0.0605715736746788 (0.06193633598816952)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.258 (0.3668120364255686)\n", " | > loader_time: 0.0081 (0.009878406302873493)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:41:06 -- STEP: 2200/6002 -- GLOBAL_STEP: 2200\u001b[0m\n", " | > loss_text_ce: 0.038186680525541306 (0.049228163349696215)\n", " | > loss_mel_ce: 3.604285478591919 (3.849216685836966)\n", " | > loss: 0.05781702324748039 (0.0618800806169483)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.5215 (0.36665335720235664)\n", " | > loader_time: 0.0092 (0.009862192110581848)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:41:34 -- STEP: 2250/6002 -- GLOBAL_STEP: 2250\u001b[0m\n", " | > loss_text_ce: 0.046331796795129776 (0.04907235008974875)\n", " | > loss_mel_ce: 3.6905477046966553 (3.845727434158326)\n", " | > loss: 0.059315551072359085 (0.061822222441434885)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2335 (0.36605500570933036)\n", " | > loader_time: 0.0084 (0.009848753717210576)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:42:05 -- STEP: 2300/6002 -- GLOBAL_STEP: 2300\u001b[0m\n", " | > loss_text_ce: 0.04034408926963806 (0.04891125987405368)\n", " | > loss_mel_ce: 3.9499354362487793 (3.8429036462825286)\n", " | > loss: 0.0633377730846405 (0.0617748434290938)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.5097 (0.3661970526239148)\n", " | > loader_time: 0.0092 (0.009844462560570776)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:42:36 -- STEP: 2350/6002 -- GLOBAL_STEP: 2350\u001b[0m\n", " | > loss_text_ce: 0.03489398583769798 (0.04872991116123002)\n", " | > loss_mel_ce: 3.5750088691711426 (3.8391739529751723)\n", " | > loss: 0.05730004981160164 (0.06171276339191074)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4346 (0.3664931631088259)\n", " | > loader_time: 0.0093 (0.009836713303910942)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:43:07 -- STEP: 2400/6002 -- GLOBAL_STEP: 2400\u001b[0m\n", " | > loss_text_ce: 0.04089752212166786 (0.04854642809213455)\n", " | > loss_mel_ce: 3.5654296875 (3.836248270769915)\n", " | > loss: 0.05724329501390457 (0.06166341154991341)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3534 (0.3666420244177186)\n", " | > loader_time: 0.0102 (0.009824784100055715)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:43:37 -- STEP: 2450/6002 -- GLOBAL_STEP: 2450\u001b[0m\n", " | > loss_text_ce: 0.03833115100860596 (0.048388583381869396)\n", " | > loss_mel_ce: 3.391525983810425 (3.8325945786067424)\n", " | > loss: 0.054442182183265686 (0.06160291095625384)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4235 (0.3667355056684846)\n", " | > loader_time: 0.0096 (0.009811785561697841)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:44:09 -- STEP: 2500/6002 -- GLOBAL_STEP: 2500\u001b[0m\n", " | > loss_text_ce: 0.03442290797829628 (0.048233727233111895)\n", " | > loss_mel_ce: 3.639181613922119 (3.8299599958419805)\n", " | > loss: 0.058311186730861664 (0.061558634148538134)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4955 (0.3670914826393129)\n", " | > loader_time: 0.0093 (0.00980192937850953)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:44:39 -- STEP: 2550/6002 -- GLOBAL_STEP: 2550\u001b[0m\n", " | > loss_text_ce: 0.03878515958786011 (0.04809201906563022)\n", " | > loss_mel_ce: 3.7524030208587646 (3.827229133680756)\n", " | > loss: 0.06017759442329407 (0.06151303780137326)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3828 (0.36689552185582186)\n", " | > loader_time: 0.0084 (0.009791368409699092)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:45:09 -- STEP: 2600/6002 -- GLOBAL_STEP: 2600\u001b[0m\n", " | > loss_text_ce: 0.03481372818350792 (0.04795890944938248)\n", " | > loss_mel_ce: 3.816746473312378 (3.8245432611612182)\n", " | > loss: 0.061135876923799515 (0.06146829205780077)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4317 (0.36693254186556895)\n", " | > loader_time: 0.0126 (0.00978282644198492)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:45:39 -- STEP: 2650/6002 -- GLOBAL_STEP: 2650\u001b[0m\n", " | > loss_text_ce: 0.03825747221708298 (0.04780789489172539)\n", " | > loss_mel_ce: 3.951406478881836 (3.8218966377006396)\n", " | > loss: 0.06332799792289734 (0.061423885095513116)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3519 (0.36685921713991027)\n", " | > loader_time: 0.0084 (0.009775883836566279)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:46:09 -- STEP: 2700/6002 -- GLOBAL_STEP: 2700\u001b[0m\n", " | > loss_text_ce: 0.04506264626979828 (0.047668707388694616)\n", " | > loss_mel_ce: 3.7299747467041016 (3.818908161675489)\n", " | > loss: 0.059921231120824814 (0.06137423963182505)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.266 (0.36675769611641257)\n", " | > loader_time: 0.0078 (0.009761120036796288)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:46:40 -- STEP: 2750/6002 -- GLOBAL_STEP: 2750\u001b[0m\n", " | > loss_text_ce: 0.0430850125849247 (0.04750408110835334)\n", " | > loss_mel_ce: 3.8795690536499023 (3.816381351124157)\n", " | > loss: 0.06226435676217079 (0.06133151840486312)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2644 (0.3669947886033495)\n", " | > loader_time: 0.0084 (0.009750937115062374)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:47:12 -- STEP: 2800/6002 -- GLOBAL_STEP: 2800\u001b[0m\n", " | > loss_text_ce: 0.03652523085474968 (0.04736481287117513)\n", " | > loss_mel_ce: 3.5829005241394043 (3.813901749593872)\n", " | > loss: 0.05745120719075203 (0.06128994903926341)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.522 (0.36725169318062945)\n", " | > loader_time: 0.0094 (0.00974452027252743)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:47:41 -- STEP: 2850/6002 -- GLOBAL_STEP: 2850\u001b[0m\n", " | > loss_text_ce: 0.04009255766868591 (0.04721228562295435)\n", " | > loss_mel_ce: 3.655750274658203 (3.8119479245470287)\n", " | > loss: 0.058664172887802124 (0.06125651487132961)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2962 (0.3671331299396986)\n", " | > loader_time: 0.009 (0.009736711602461976)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:48:09 -- STEP: 2900/6002 -- GLOBAL_STEP: 2900\u001b[0m\n", " | > loss_text_ce: 0.0491073839366436 (0.04708538558719485)\n", " | > loss_mel_ce: 3.8320281505584717 (3.809413806405561)\n", " | > loss: 0.06160532683134079 (0.06121427648540203)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3317 (0.3665771188407111)\n", " | > loader_time: 0.0103 (0.009723841897372553)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:48:40 -- STEP: 2950/6002 -- GLOBAL_STEP: 2950\u001b[0m\n", " | > loss_text_ce: 0.034916952252388 (0.04693668976615544)\n", " | > loss_mel_ce: 3.6368796825408936 (3.8070163840762645)\n", " | > loss: 0.05828248709440231 (0.061173861909468316)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.581 (0.36671117952314497)\n", " | > loader_time: 0.011 (0.009716794288764577)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:49:10 -- STEP: 3000/6002 -- GLOBAL_STEP: 3000\u001b[0m\n", " | > loss_text_ce: 0.03703349083662033 (0.04681156018127996)\n", " | > loss_mel_ce: 3.720595359802246 (3.804192325433096)\n", " | > loss: 0.0596449077129364 (0.06112704939891896)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3866 (0.3665878016948702)\n", " | > loader_time: 0.0086 (0.009708314021428433)\n", "\n", "\n", " > CHECKPOINT : /kaggle/working/finetuned/viXTTS-FT-Code-December-30-2024_02+18AM-0000000/checkpoint_3000.pth\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:49:49 -- STEP: 3050/6002 -- GLOBAL_STEP: 3050\u001b[0m\n", " | > loss_text_ce: 0.04241381585597992 (0.04667059979600006)\n", " | > loss_mel_ce: 3.842857837677002 (3.8014708446283816)\n", " | > loss: 0.06167098134756088 (0.061081613823771495)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4039 (0.3667699028234015)\n", " | > loader_time: 0.0086 (0.00970199405169878)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:50:17 -- STEP: 3100/6002 -- GLOBAL_STEP: 3100\u001b[0m\n", " | > loss_text_ce: 0.03888985142111778 (0.046538259420664065)\n", " | > loss_mel_ce: 3.8008034229278564 (3.799110640402764)\n", " | > loss: 0.060947515070438385 (0.06104204962330482)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2657 (0.366319617917461)\n", " | > loader_time: 0.0086 (0.009700484583454743)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:50:50 -- STEP: 3150/6002 -- GLOBAL_STEP: 3150\u001b[0m\n", " | > loss_text_ce: 0.03398139774799347 (0.046380371865298996)\n", " | > loss_mel_ce: 3.5990543365478516 (3.79709165921287)\n", " | > loss: 0.057667236775159836 (0.061007496143380816)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4176 (0.3668183668076048)\n", " | > loader_time: 0.0104 (0.009696355017404706)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:51:22 -- STEP: 3200/6002 -- GLOBAL_STEP: 3200\u001b[0m\n", " | > loss_text_ce: 0.04039013385772705 (0.046232308440376055)\n", " | > loss_mel_ce: 3.8681564331054688 (3.7945044656842954)\n", " | > loss: 0.06204042211174965 (0.060964079361874625)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2898 (0.36717331722378754)\n", " | > loader_time: 0.0089 (0.009688454717397687)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:51:52 -- STEP: 3250/6002 -- GLOBAL_STEP: 3250\u001b[0m\n", " | > loss_text_ce: 0.04073147103190422 (0.04609538769664671)\n", " | > loss_mel_ce: 3.8346657752990723 (3.792284564531767)\n", " | > loss: 0.061514243483543396 (0.060926669487586405)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3045 (0.3671379034335799)\n", " | > loader_time: 0.0086 (0.009680179522587694)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:52:21 -- STEP: 3300/6002 -- GLOBAL_STEP: 3300\u001b[0m\n", " | > loss_text_ce: 0.03657256439328194 (0.045967790225351385)\n", " | > loss_mel_ce: 3.714445114135742 (3.7901291346549995)\n", " | > loss: 0.05953996628522873 (0.06089043095256345)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3864 (0.3669941351630473)\n", " | > loader_time: 0.0088 (0.009674786148649259)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:52:53 -- STEP: 3350/6002 -- GLOBAL_STEP: 3350\u001b[0m\n", " | > loss_text_ce: 0.03447043523192406 (0.04584117062556653)\n", " | > loss_mel_ce: 3.758842945098877 (3.787633190866727)\n", " | > loss: 0.06021132692694664 (0.06084880295974107)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.5261 (0.36727490460694745)\n", " | > loader_time: 0.0093 (0.009672800035619015)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:53:24 -- STEP: 3400/6002 -- GLOBAL_STEP: 3400\u001b[0m\n", " | > loss_text_ce: 0.031052475795149803 (0.04571005828897741)\n", " | > loss_mel_ce: 3.616142988204956 (3.785918262145099)\n", " | > loss: 0.05789199844002724 (0.06081950072199108)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.5829 (0.3673302212182215)\n", " | > loader_time: 0.0099 (0.009665432116564574)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:53:55 -- STEP: 3450/6002 -- GLOBAL_STEP: 3450\u001b[0m\n", " | > loss_text_ce: 0.03431342914700508 (0.04558153031528858)\n", " | > loss_mel_ce: 3.5432984828948975 (3.7836409228089933)\n", " | > loss: 0.05678749457001686 (0.06078131234948188)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.5697 (0.3674875206878222)\n", " | > loader_time: 0.0098 (0.009662290725155145)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:54:26 -- STEP: 3500/6002 -- GLOBAL_STEP: 3500\u001b[0m\n", " | > loss_text_ce: 0.029954245314002037 (0.04544649321798766)\n", " | > loss_mel_ce: 3.6239559650421143 (3.7814774033001495)\n", " | > loss: 0.05799857899546623 (0.06074482732159753)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.5863 (0.3676566194125586)\n", " | > loader_time: 0.0097 (0.009656347615378235)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:54:54 -- STEP: 3550/6002 -- GLOBAL_STEP: 3550\u001b[0m\n", " | > loss_text_ce: 0.04094313457608223 (0.045348405600853355)\n", " | > loss_mel_ce: 4.057122707366943 (3.779134326518422)\n", " | > loss: 0.06504866480827332 (0.06070607867773991)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2896 (0.36722744793959083)\n", " | > loader_time: 0.0091 (0.009648049918698585)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:55:24 -- STEP: 3600/6002 -- GLOBAL_STEP: 3600\u001b[0m\n", " | > loss_text_ce: 0.03854451701045036 (0.045231095257525625)\n", " | > loss_mel_ce: 3.5051193237304688 (3.777313461105029)\n", " | > loss: 0.056248635053634644 (0.06067531397980122)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4178 (0.36712758084138253)\n", " | > loader_time: 0.0106 (0.00964237968126932)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:55:54 -- STEP: 3650/6002 -- GLOBAL_STEP: 3650\u001b[0m\n", " | > loss_text_ce: 0.035935577005147934 (0.045108372943144126)\n", " | > loss_mel_ce: 3.6601147651672363 (3.7751044286440503)\n", " | > loss: 0.05866746976971626 (0.060638301999193366)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2651 (0.3670803853910265)\n", " | > loader_time: 0.0095 (0.009635487974506527)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:56:24 -- STEP: 3700/6002 -- GLOBAL_STEP: 3700\u001b[0m\n", " | > loss_text_ce: 0.0341482013463974 (0.04498326027564502)\n", " | > loss_mel_ce: 3.4101722240448 (3.773071005279954)\n", " | > loss: 0.0546717569231987 (0.060604039522240305)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4942 (0.3669952942229608)\n", " | > loader_time: 0.0091 (0.009627564017837105)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:56:54 -- STEP: 3750/6002 -- GLOBAL_STEP: 3750\u001b[0m\n", " | > loss_text_ce: 0.04054053872823715 (0.044863220184048)\n", " | > loss_mel_ce: 3.473175048828125 (3.770879880714417)\n", " | > loss: 0.05577326565980911 (0.060567354366183296)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2345 (0.36696857668558774)\n", " | > loader_time: 0.0084 (0.009625581550598138)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:57:24 -- STEP: 3800/6002 -- GLOBAL_STEP: 3800\u001b[0m\n", " | > loss_text_ce: 0.038733165711164474 (0.044742963467479524)\n", " | > loss_mel_ce: 3.4512112140655518 (3.7686209077584123)\n", " | > loss: 0.0553959459066391 (0.060529588814824835)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4042 (0.3670554188678141)\n", " | > loader_time: 0.0093 (0.009618771389911043)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:57:54 -- STEP: 3850/6002 -- GLOBAL_STEP: 3850\u001b[0m\n", " | > loss_text_ce: 0.03406408056616783 (0.04462288109561451)\n", " | > loss_mel_ce: 3.4600462913513184 (3.7673394096052495)\n", " | > loss: 0.0554620735347271 (0.06050734150816095)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2992 (0.36696773293730517)\n", " | > loader_time: 0.01 (0.00961218629564557)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:58:25 -- STEP: 3900/6002 -- GLOBAL_STEP: 3900\u001b[0m\n", " | > loss_text_ce: 0.03539643809199333 (0.04449879817234781)\n", " | > loss_mel_ce: 3.5272157192230225 (3.765289707550636)\n", " | > loss: 0.056549400091171265 (0.06047283698350957)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3865 (0.3670891075256545)\n", " | > loader_time: 0.0089 (0.00960772813894809)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:58:55 -- STEP: 3950/6002 -- GLOBAL_STEP: 3950\u001b[0m\n", " | > loss_text_ce: 0.033615842461586 (0.044388653156406506)\n", " | > loss_mel_ce: 3.618290424346924 (3.7629610045348545)\n", " | > loss: 0.057966768741607666 (0.06043412510824356)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3854 (0.36704017216646234)\n", " | > loader_time: 0.0096 (0.009599945756453496)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 02:59:27 -- STEP: 4000/6002 -- GLOBAL_STEP: 4000\u001b[0m\n", " | > loss_text_ce: 0.042274005711078644 (0.04427077241521327)\n", " | > loss_mel_ce: 3.634159564971924 (3.7605544337630277)\n", " | > loss: 0.058356091380119324 (0.06039405444916339)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3078 (0.36724365979433077)\n", " | > loader_time: 0.009 (0.009597137868404382)\n", "\n", "\n", " > CHECKPOINT : /kaggle/working/finetuned/viXTTS-FT-Code-December-30-2024_02+18AM-0000000/checkpoint_4000.pth\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:00:05 -- STEP: 4050/6002 -- GLOBAL_STEP: 4050\u001b[0m\n", " | > loss_text_ce: 0.03768029436469078 (0.04416600082575538)\n", " | > loss_mel_ce: 3.5807502269744873 (3.7592164147341696)\n", " | > loss: 0.05743540823459625 (0.06037115300695103)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2356 (0.3671922085020279)\n", " | > loader_time: 0.008 (0.009593721142521603)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:00:37 -- STEP: 4100/6002 -- GLOBAL_STEP: 4100\u001b[0m\n", " | > loss_text_ce: 0.033415537327528 (0.04404218139081465)\n", " | > loss_mel_ce: 3.831455945968628 (3.757545557022095)\n", " | > loss: 0.061347171664237976 (0.060342666066092704)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2585 (0.36744110683115533)\n", " | > loader_time: 0.0103 (0.009590543421303346)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:01:06 -- STEP: 4150/6002 -- GLOBAL_STEP: 4150\u001b[0m\n", " | > loss_text_ce: 0.038190800696611404 (0.043932985918349515)\n", " | > loss_mel_ce: 3.661740303039551 (3.7563100005345182)\n", " | > loss: 0.05872906744480133 (0.06032132079443302)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2886 (0.36725289304572434)\n", " | > loader_time: 0.0088 (0.009587388900389147)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:01:36 -- STEP: 4200/6002 -- GLOBAL_STEP: 4200\u001b[0m\n", " | > loss_text_ce: 0.03321900591254234 (0.04382747496966094)\n", " | > loss_mel_ce: 3.582749128341675 (3.7544083845047727)\n", " | > loss: 0.05739632621407509 (0.06028946163664973)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2658 (0.36712515524455497)\n", " | > loader_time: 0.0081 (0.009579819781439635)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:02:07 -- STEP: 4250/6002 -- GLOBAL_STEP: 4250\u001b[0m\n", " | > loss_text_ce: 0.03277873620390892 (0.04370882437421994)\n", " | > loss_mel_ce: 3.5806267261505127 (3.752393494998708)\n", " | > loss: 0.057355646044015884 (0.06025559592159357)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4248 (0.3673566850774429)\n", " | > loader_time: 0.0101 (0.009576921238618728)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:02:38 -- STEP: 4300/6002 -- GLOBAL_STEP: 4300\u001b[0m\n", " | > loss_text_ce: 0.037086814641952515 (0.04359607292729061)\n", " | > loss_mel_ce: 3.5529839992523193 (3.7501997979851653)\n", " | > loss: 0.05698525160551071 (0.060218985625651006)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2655 (0.3674028971028882)\n", " | > loader_time: 0.0082 (0.00957841363064077)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:03:08 -- STEP: 4350/6002 -- GLOBAL_STEP: 4350\u001b[0m\n", " | > loss_text_ce: 0.02830120176076889 (0.04348899993788579)\n", " | > loss_mel_ce: 3.6531119346618652 (3.748075530282382)\n", " | > loss: 0.05843513458967209 (0.060183567519503094)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3529 (0.3673193570389144)\n", " | > loader_time: 0.0088 (0.00957431409550808)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:03:38 -- STEP: 4400/6002 -- GLOBAL_STEP: 4400\u001b[0m\n", " | > loss_text_ce: 0.03639117255806923 (0.043384043332612636)\n", " | > loss_mel_ce: 3.778865098953247 (3.746729432994669)\n", " | > loss: 0.060559626668691635 (0.06016053491610696)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.34 (0.36735853888771763)\n", " | > loader_time: 0.0102 (0.009569678740067904)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:04:07 -- STEP: 4450/6002 -- GLOBAL_STEP: 4450\u001b[0m\n", " | > loss_text_ce: 0.04266228899359703 (0.04328831750355409)\n", " | > loss_mel_ce: 3.6449623107910156 (3.7449520748116996)\n", " | > loss: 0.05853372812271118 (0.060130803419298016)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2628 (0.3671370781137705)\n", " | > loader_time: 0.0101 (0.009567777172902985)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:04:37 -- STEP: 4500/6002 -- GLOBAL_STEP: 4500\u001b[0m\n", " | > loss_text_ce: 0.03037821315228939 (0.043183142736968064)\n", " | > loss_mel_ce: 3.5409481525421143 (3.7430683227645027)\n", " | > loss: 0.056687720119953156 (0.060099233151310036)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4751 (0.3670953090455799)\n", " | > loader_time: 0.0093 (0.009562611262003569)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:05:07 -- STEP: 4550/6002 -- GLOBAL_STEP: 4550\u001b[0m\n", " | > loss_text_ce: 0.03391455113887787 (0.04307953738085517)\n", " | > loss_mel_ce: 4.040862083435059 (3.741742706822825)\n", " | > loss: 0.06467899680137634 (0.060076547094753824)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4958 (0.367052145109072)\n", " | > loader_time: 0.0095 (0.00956034021063164)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:05:36 -- STEP: 4600/6002 -- GLOBAL_STEP: 4600\u001b[0m\n", " | > loss_text_ce: 0.03460186347365379 (0.04297828236108885)\n", " | > loss_mel_ce: 3.584301233291626 (3.7400744056183357)\n", " | > loss: 0.057442910969257355 (0.06004845889528162)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4426 (0.36684356622074005)\n", " | > loader_time: 0.0094 (0.009557484751162316)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:06:07 -- STEP: 4650/6002 -- GLOBAL_STEP: 4650\u001b[0m\n", " | > loss_text_ce: 0.03437335044145584 (0.04286759679836608)\n", " | > loss_mel_ce: 3.3940107822418213 (3.7384464833044238)\n", " | > loss: 0.05441879853606224 (0.060020861944524204)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3041 (0.3668617288015227)\n", " | > loader_time: 0.0083 (0.009552984647853391)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:06:38 -- STEP: 4700/6002 -- GLOBAL_STEP: 4700\u001b[0m\n", " | > loss_text_ce: 0.03274039551615715 (0.042755716802512674)\n", " | > loss_mel_ce: 3.656752586364746 (3.7369474238030453)\n", " | > loss: 0.05856338515877724 (0.05999529147718816)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2646 (0.36699822674406396)\n", " | > loader_time: 0.008 (0.009552650299478072)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:07:08 -- STEP: 4750/6002 -- GLOBAL_STEP: 4750\u001b[0m\n", " | > loss_text_ce: 0.03280211612582207 (0.042660427815035734)\n", " | > loss_mel_ce: 3.257740020751953 (3.7356189927050942)\n", " | > loss: 0.0522308312356472 (0.0599726927421595)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2959 (0.36704730179435385)\n", " | > loader_time: 0.0094 (0.009549721818221224)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:07:39 -- STEP: 4800/6002 -- GLOBAL_STEP: 4800\u001b[0m\n", " | > loss_text_ce: 0.03497743606567383 (0.04256372307543639)\n", " | > loss_mel_ce: 3.8069522380828857 (3.734246160984039)\n", " | > loss: 0.06098301336169243 (0.05994936676152673)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2574 (0.3670590261618302)\n", " | > loader_time: 0.0083 (0.009545817772547387)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:08:08 -- STEP: 4850/6002 -- GLOBAL_STEP: 4850\u001b[0m\n", " | > loss_text_ce: 0.030550243332982063 (0.042462338532906825)\n", " | > loss_mel_ce: 3.634552001953125 (3.733124888557749)\n", " | > loss: 0.058176230639219284 (0.059929959507486266)\n", " | > grad_norm: 0 (0.0)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4009 (0.3669585727416369)\n", " | > loader_time: 0.0095 (0.009540836737327958)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:08:38 -- STEP: 4900/6002 -- GLOBAL_STEP: 4900\u001b[0m\n", " | > loss_text_ce: 0.03258618339896202 (0.042365890206122865)\n", " | > loss_mel_ce: 3.7526540756225586 (3.7318143571152977)\n", " | > loss: 0.06008318066596985 (0.05990762649294066)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.522 (0.3668591218092009)\n", " | > loader_time: 0.0101 (0.009537843052221794)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:09:09 -- STEP: 4950/6002 -- GLOBAL_STEP: 4950\u001b[0m\n", " | > loss_text_ce: 0.027767976745963097 (0.04226126361186752)\n", " | > loss_mel_ce: 3.824904203414917 (3.730494757806412)\n", " | > loss: 0.06115352734923363 (0.059885019732244096)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.5009 (0.3669634467423571)\n", " | > loader_time: 0.0111 (0.009536925710812957)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:09:40 -- STEP: 5000/6002 -- GLOBAL_STEP: 5000\u001b[0m\n", " | > loss_text_ce: 0.029422897845506668 (0.042151218434795704)\n", " | > loss_mel_ce: 3.4236443042755127 (3.728866738271713)\n", " | > loss: 0.05481059476733208 (0.05985743140354754)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.5752 (0.3671079103946695)\n", " | > loader_time: 0.0098 (0.009532590532302853)\n", "\n", "\n", " > CHECKPOINT : /kaggle/working/finetuned/viXTTS-FT-Code-December-30-2024_02+18AM-0000000/checkpoint_5000.pth\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:10:20 -- STEP: 5050/6002 -- GLOBAL_STEP: 5050\u001b[0m\n", " | > loss_text_ce: 0.03708619251847267 (0.04204935689115579)\n", " | > loss_mel_ce: 3.676577568054199 (3.727431866438082)\n", " | > loss: 0.05894704908132553 (0.059833038813701965)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3188 (0.3672838431065634)\n", " | > loader_time: 0.0087 (0.00953208597579805)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:10:51 -- STEP: 5100/6002 -- GLOBAL_STEP: 5100\u001b[0m\n", " | > loss_text_ce: 0.029041942209005356 (0.04195359596361712)\n", " | > loss_mel_ce: 3.567103385925293 (3.725712189066644)\n", " | > loss: 0.0570816770195961 (0.059804222331035384)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.5245 (0.36738481951694957)\n", " | > loader_time: 0.0098 (0.009529042103711301)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:11:21 -- STEP: 5150/6002 -- GLOBAL_STEP: 5150\u001b[0m\n", " | > loss_text_ce: 0.026832789182662964 (0.041861445266861905)\n", " | > loss_mel_ce: 3.2161343097686768 (3.7248462036280956)\n", " | > loss: 0.05147567018866539 (0.05978901381993179)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4032 (0.3673991538482973)\n", " | > loader_time: 0.0094 (0.009526236450787897)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:11:53 -- STEP: 5200/6002 -- GLOBAL_STEP: 5200\u001b[0m\n", " | > loss_text_ce: 0.03141617402434349 (0.04176154844570329)\n", " | > loss_mel_ce: 3.535151720046997 (3.7237811005574004)\n", " | > loss: 0.05661219358444214 (0.05977052175296615)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2909 (0.36756745104606425)\n", " | > loader_time: 0.0095 (0.009525807133087759)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:12:24 -- STEP: 5250/6002 -- GLOBAL_STEP: 5250\u001b[0m\n", " | > loss_text_ce: 0.038742657750844955 (0.04165987514101321)\n", " | > loss_mel_ce: 3.4281036853790283 (3.722420889672779)\n", " | > loss: 0.055029306560754776 (0.059747317236803835)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2343 (0.36766499369485117)\n", " | > loader_time: 0.0093 (0.009522705895560151)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:12:55 -- STEP: 5300/6002 -- GLOBAL_STEP: 5300\u001b[0m\n", " | > loss_text_ce: 0.029864758253097534 (0.04156238605813032)\n", " | > loss_mel_ce: 3.668837547302246 (3.721413628965054)\n", " | > loss: 0.05870956555008888 (0.0597297815240498)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2537 (0.3677742493827399)\n", " | > loader_time: 0.0088 (0.00952106898685673)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:13:25 -- STEP: 5350/6002 -- GLOBAL_STEP: 5350\u001b[0m\n", " | > loss_text_ce: 0.030643125995993614 (0.041462636637353396)\n", " | > loss_mel_ce: 3.716447114944458 (3.719917015048945)\n", " | > loss: 0.05947762727737427 (0.05970444242113106)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3069 (0.36768384327398407)\n", " | > loader_time: 0.01 (0.009521202639998701)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:13:55 -- STEP: 5400/6002 -- GLOBAL_STEP: 5400\u001b[0m\n", " | > loss_text_ce: 0.025833215564489365 (0.04136838864121169)\n", " | > loss_mel_ce: 3.5363640785217285 (3.718659540768023)\n", " | > loss: 0.05654281750321388 (0.059682986513470065)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.5803 (0.36770368836544304)\n", " | > loader_time: 0.0089 (0.009517786900202463)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:14:27 -- STEP: 5450/6002 -- GLOBAL_STEP: 5450\u001b[0m\n", " | > loss_text_ce: 0.026077324524521828 (0.04127162150457636)\n", " | > loss_mel_ce: 3.778614044189453 (3.7173237835595367)\n", " | > loss: 0.060391929000616074 (0.05966024802912267)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.5267 (0.36787155671950994)\n", " | > loader_time: 0.0111 (0.009516589619697811)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:14:58 -- STEP: 5500/6002 -- GLOBAL_STEP: 5500\u001b[0m\n", " | > loss_text_ce: 0.03143662214279175 (0.04117507810552008)\n", " | > loss_mel_ce: 3.4044787883758545 (3.7160354377573186)\n", " | > loss: 0.05453834310173988 (0.05963826565512203)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3013 (0.3679243249893202)\n", " | > loader_time: 0.0096 (0.009517332683910048)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:15:28 -- STEP: 5550/6002 -- GLOBAL_STEP: 5550\u001b[0m\n", " | > loss_text_ce: 0.02779071219265461 (0.04108735355607287)\n", " | > loss_mel_ce: 3.483369827270508 (3.7150219983023565)\n", " | > loss: 0.055732712149620056 (0.059620786862330404)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4243 (0.36783921632680977)\n", " | > loader_time: 0.0104 (0.009515736124537027)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:15:59 -- STEP: 5600/6002 -- GLOBAL_STEP: 5600\u001b[0m\n", " | > loss_text_ce: 0.02839227207005024 (0.04099662612990604)\n", " | > loss_mel_ce: 3.386875629425049 (3.713761034309864)\n", " | > loss: 0.0542106069624424 (0.059599331443064986)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3857 (0.3679373273253459)\n", " | > loader_time: 0.0102 (0.00951358543975014)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:16:28 -- STEP: 5650/6002 -- GLOBAL_STEP: 5650\u001b[0m\n", " | > loss_text_ce: 0.03031308576464653 (0.04090433009298497)\n", " | > loss_mel_ce: 3.399599075317383 (3.712649542715697)\n", " | > loss: 0.054443053901195526 (0.059580223699454725)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2655 (0.3678098459159396)\n", " | > loader_time: 0.0081 (0.009510872385143183)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:17:00 -- STEP: 5700/6002 -- GLOBAL_STEP: 5700\u001b[0m\n", " | > loss_text_ce: 0.03042440116405487 (0.04081507612869399)\n", " | > loss_mel_ce: 3.501142978668213 (3.7111153245808786)\n", " | > loss: 0.05605662614107132 (0.059554454299310854)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.2609 (0.3679519853675592)\n", " | > loader_time: 0.0089 (0.009508784110086009)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:17:30 -- STEP: 5750/6002 -- GLOBAL_STEP: 5750\u001b[0m\n", " | > loss_text_ce: 0.030299242585897446 (0.04072545524880931)\n", " | > loss_mel_ce: 3.496628999710083 (3.709938796292181)\n", " | > loss: 0.05598299205303192 (0.05953435669321082)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4831 (0.3679715455096718)\n", " | > loader_time: 0.0106 (0.00950829750558605)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:18:01 -- STEP: 5800/6002 -- GLOBAL_STEP: 5800\u001b[0m\n", " | > loss_text_ce: 0.02732502669095993 (0.040636571648626954)\n", " | > loss_mel_ce: 3.55778431892395 (3.709117834074744)\n", " | > loss: 0.05690649896860123 (0.059519914694387346)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4305 (0.36801260939959995)\n", " | > loader_time: 0.0101 (0.009507879758703302)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:18:30 -- STEP: 5850/6002 -- GLOBAL_STEP: 5850\u001b[0m\n", " | > loss_text_ce: 0.030632538720965385 (0.04054841572753128)\n", " | > loss_mel_ce: 3.4206042289733887 (3.707707550056979)\n", " | > loss: 0.054781537503004074 (0.05949612993142035)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.294 (0.367902494332731)\n", " | > loader_time: 0.0105 (0.009507853964455108)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:19:01 -- STEP: 5900/6002 -- GLOBAL_STEP: 5900\u001b[0m\n", " | > loss_text_ce: 0.029053425416350365 (0.04046156533820147)\n", " | > loss_mel_ce: 3.265700340270996 (3.7064380621505997)\n", " | > loss: 0.0522976815700531 (0.0594746007517738)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.259 (0.36799900483277054)\n", " | > loader_time: 0.0097 (0.009507248684511346)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:19:31 -- STEP: 5950/6002 -- GLOBAL_STEP: 5950\u001b[0m\n", " | > loss_text_ce: 0.025232641026377678 (0.04036859335111719)\n", " | > loss_mel_ce: 3.5740296840667725 (3.7055943239035725)\n", " | > loss: 0.057131148874759674 (0.059459732332024264)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.4226 (0.36790635645890607)\n", " | > loader_time: 0.0084 (0.009504874614106505)\n", "\n", "\n", "\u001b[1m --> TIME: 2024-12-30 03:20:02 -- STEP: 6000/6002 -- GLOBAL_STEP: 6000\u001b[0m\n", " | > loss_text_ce: 0.0265456885099411 (0.0402774459362651)\n", " | > loss_mel_ce: 3.6577162742614746 (3.704034495631854)\n", " | > loss: 0.05848035216331482 (0.05943352636694909)\n", " | > current_lr: 1e-05 \n", " | > step_time: 0.3533 (0.36799129267533787)\n", " | > loader_time: 0.0086 (0.009502121210098262)\n", "\n", "\n", " > CHECKPOINT : /kaggle/working/finetuned/viXTTS-FT-Code-December-30-2024_02+18AM-0000000/checkpoint_6000.pth\n", "/opt/conda/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", " self.pid = os.fork()\n", "\n", "\u001b[1m > EVALUATION \u001b[0m\n", "\n", "\n", " \u001b[1m--> EVAL PERFORMANCE\u001b[0m\n", " | > avg_loader_time: 0.15799555608204432 \u001b[0m(+0)\n", " | > avg_loss_text_ce: 0.027340748846264824 \u001b[0m(+0)\n", " | > avg_loss_mel_ce: 3.517146889652525 \u001b[0m(+0)\n", " | > avg_loss: 3.5444876296179637 \u001b[0m(+0)\n", "\n", " > BEST MODEL : /kaggle/working/finetuned/viXTTS-FT-Code-December-30-2024_02+18AM-0000000/best_model_6002.pth\n" ] } ], "source": [ "# init the trainer and πŸš€\n", "trainer = Trainer(\n", " TrainerArgs(\n", " restore_path=None, # xtts checkpoint is restored via xtts_checkpoint key so no need of restore it using Trainer restore_path parameter\n", " skip_train_epoch=False,\n", " start_with_eval=START_WITH_EVAL,\n", " grad_accum_steps=GRAD_ACUMM_STEPS,\n", " # use_accelerate=True, cause NaN loss\n", " # small_run=20\n", " ),\n", " config,\n", " output_path=OUT_PATH,\n", " model=model,\n", " train_samples=train_samples,\n", " eval_samples=eval_samples,\n", ")\n", "try:\n", " trainer.fit()\n", "except Exception as e:\n", " print(e)" ] }, { "cell_type": "code", "execution_count": 15, "id": "e2771685", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T03:21:16.710310Z", "iopub.status.busy": "2024-12-30T03:21:16.709909Z", "iopub.status.idle": "2024-12-30T03:21:16.876046Z", "shell.execute_reply": "2024-12-30T03:21:16.874875Z" }, "papermill": { "duration": 0.187582, "end_time": "2024-12-30T03:21:16.877931", "exception": false, "start_time": "2024-12-30T03:21:16.690349", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "from kaggle_secrets import UserSecretsClient\n", "user_secrets = UserSecretsClient()\n", "HF_TOKEN = user_secrets.get_secret(\"HF_TOKEN\")" ] }, { "cell_type": "code", "execution_count": 16, "id": "fbc4013e", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T03:21:16.911630Z", "iopub.status.busy": "2024-12-30T03:21:16.911379Z", "iopub.status.idle": "2024-12-30T03:21:17.005317Z", "shell.execute_reply": "2024-12-30T03:21:17.004362Z" }, "papermill": { "duration": 0.11248, "end_time": "2024-12-30T03:21:17.007004", "exception": false, "start_time": "2024-12-30T03:21:16.894524", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "409 Client Error: Conflict for url: https://huggingface.co./api/repos/create (Request ID: Root=1-677211ac-238f389e186a26876f72fac4;df039791-dd04-4fd6-891f-68be2f37d1c6)\n", "\n", "You already created this model repo\n" ] } ], "source": [ "from huggingface_hub import HfApi\n", "\n", "api = HfApi(token=HF_TOKEN)\n", "\n", "# Upload all the content from the local folder to your remote Space.\n", "# By default, files are uploaded at the root of the repo\n", "repo_id = \"thng292/viXTTS-ft-code-test\"\n", "try:\n", " api.create_repo(repo_id, private=True)\n", "except Exception as e:\n", " print(e)\n", " pass" ] }, { "cell_type": "code", "execution_count": 17, "id": "1f3bc7ff", "metadata": { "execution": { "iopub.execute_input": "2024-12-30T03:21:17.042525Z", "iopub.status.busy": "2024-12-30T03:21:17.042293Z", "iopub.status.idle": "2024-12-30T03:24:49.321472Z", "shell.execute_reply": "2024-12-30T03:24:49.320536Z" }, "papermill": { "duration": 212.302223, "end_time": "2024-12-30T03:24:49.325329", "exception": false, "start_time": "2024-12-30T03:21:17.023106", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "edd0f276e6c140daa81e037fbd6605e4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "checkpoint_6000.pth: 0%| | 0.00/5.63G [00:00