Spaces:
Sleeping
Sleeping
init
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +1 -0
- Dockerfile +100 -0
- __pycache__/app.cpython-311.pyc +0 -0
- __pycache__/inference_functions.cpython-311.pyc +0 -0
- __pycache__/load_models.cpython-311.pyc +0 -0
- __pycache__/server.cpython-311.pyc +0 -0
- __pycache__/silence_removal.cpython-311.pyc +0 -0
- __pycache__/stream_VAD.cpython-311.pyc +0 -0
- __pycache__/stream_VAD2.cpython-311.pyc +0 -0
- __pycache__/stream_prod_main2.cpython-311.pyc +0 -0
- app.py +79 -0
- app/package-lock.json +985 -0
- app/package.json +19 -0
- app/public/app.js +99 -0
- app/public/index.html +35 -0
- app/public/styles.css +96 -0
- app/server.js +102 -0
- app/temp_wav_files/audio-1718725396714.wav +0 -0
- app/uploads/1/audio_2.wav +0 -0
- app/uploads/1/transcription_2.txt +1 -0
- audio_segments/readme +0 -0
- inference_functions.py +80 -0
- load_models.py +18 -0
- main.ipynb +395 -0
- main.py +79 -0
- main_stream.ipynb +87 -0
- models/TTS_utils.py +365 -0
- models/__init__.py +0 -0
- models/__pycache__/TTS_utils.cpython-311.pyc +0 -0
- models/__pycache__/__init__.cpython-311.pyc +0 -0
- models/__pycache__/__init__.cpython-38.pyc +0 -0
- models/__pycache__/es_fastconformer.cpython-311.pyc +0 -0
- models/__pycache__/nllb.cpython-311.pyc +0 -0
- models/__pycache__/nllb.cpython-38.pyc +0 -0
- models/__pycache__/noise_red.cpython-311.pyc +0 -0
- models/__pycache__/parakeet.cpython-311.pyc +0 -0
- models/__pycache__/parakeet.cpython-38.pyc +0 -0
- models/es_fastconformer.py +37 -0
- models/nllb.py +72 -0
- models/noise_red.py +28 -0
- models/parakeet.py +43 -0
- models/status.txt +1 -0
- record_per.json +1 -0
- record_temp.json +1 -0
- requirements.txt +25 -0
- results/readme +0 -0
- run.py +73 -0
- setup.sh +18 -0
- status.txt +1 -0
- stream_VAD.py +249 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
app/node_modules
|
Dockerfile
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official CUDA-enabled image from NVIDIA with CUDA 12.1
|
2 |
+
FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04
|
3 |
+
|
4 |
+
# Set the working directory in the container
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# Set the environment variable to suppress interactive prompts
|
8 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
9 |
+
|
10 |
+
# Install necessary OS packages and Python 3.9
|
11 |
+
RUN apt-get update && apt-get install -y \
|
12 |
+
software-properties-common \
|
13 |
+
&& add-apt-repository ppa:deadsnakes/ppa \
|
14 |
+
&& apt-get update && apt-get install -y \
|
15 |
+
python3.9 \
|
16 |
+
python3.9-distutils \
|
17 |
+
python3.9-venv \
|
18 |
+
python3.9-dev \
|
19 |
+
build-essential \
|
20 |
+
cmake \
|
21 |
+
libsndfile1 \
|
22 |
+
ffmpeg \
|
23 |
+
portaudio19-dev \
|
24 |
+
alsa-utils \
|
25 |
+
curl \
|
26 |
+
git \
|
27 |
+
nodejs \
|
28 |
+
npm \
|
29 |
+
&& rm -rf /var/lib/apt/lists/*
|
30 |
+
|
31 |
+
# Install pip for Python 3.9
|
32 |
+
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
|
33 |
+
|
34 |
+
# Create a symlink for python3.9 and pip3.9
|
35 |
+
RUN ln -s /usr/bin/python3.9 /usr/bin/python
|
36 |
+
RUN ln -s /usr/local/bin/pip /usr/bin/pip
|
37 |
+
|
38 |
+
# Set CUDA_HOME environment variable
|
39 |
+
ENV CUDA_HOME=/usr/local/cuda
|
40 |
+
|
41 |
+
# Add CUDA to PATH
|
42 |
+
ENV PATH=${CUDA_HOME}/bin:${PATH}
|
43 |
+
|
44 |
+
# Optionally set LD_LIBRARY_PATH for CUDA libraries
|
45 |
+
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
|
46 |
+
|
47 |
+
# Set environment variable for NeMo cache directory
|
48 |
+
ENV NEMO_NLP_TMP=/app/.cache
|
49 |
+
|
50 |
+
# Create cache directory
|
51 |
+
RUN mkdir -p /app/.cache
|
52 |
+
|
53 |
+
# Copy the setup script and requirements file into the container
|
54 |
+
COPY setup.sh requirements.txt /app/
|
55 |
+
|
56 |
+
# Make the setup script executable
|
57 |
+
RUN chmod +x setup.sh
|
58 |
+
|
59 |
+
# Copy the application code into the container
|
60 |
+
COPY . /app
|
61 |
+
|
62 |
+
# Copy wait-for-it script
|
63 |
+
COPY wait-for-it.sh /app/wait-for-it.sh
|
64 |
+
|
65 |
+
# Make wait-for-it script executable
|
66 |
+
RUN chmod +x /app/wait-for-it.sh
|
67 |
+
|
68 |
+
# Install dependencies
|
69 |
+
RUN pip install --upgrade pip setuptools wheel
|
70 |
+
RUN pip install pybind11
|
71 |
+
RUN pip install fasttext
|
72 |
+
RUN pip install Cython
|
73 |
+
RUN pip install pyaudio
|
74 |
+
RUN pip install fastapi uvicorn
|
75 |
+
RUN pip install uvloop
|
76 |
+
|
77 |
+
# Install PyTorch and torchaudio
|
78 |
+
RUN pip install torch==2.2.2+cu121 -f https://download.pytorch.org/whl/cu121/torch_stable.html
|
79 |
+
RUN pip install torchaudio==2.2.2+cu121 -f https://download.pytorch.org/whl/cu121/torch_stable.html
|
80 |
+
|
81 |
+
# Install the requirements
|
82 |
+
RUN pip install -r requirements.txt
|
83 |
+
|
84 |
+
# Clone and install TTS
|
85 |
+
RUN git clone https://github.com/coqui-ai/TTS/ && \
|
86 |
+
cd TTS && \
|
87 |
+
make install
|
88 |
+
|
89 |
+
# Install Node.js dependencies
|
90 |
+
RUN cd /app/app && npm install
|
91 |
+
|
92 |
+
# Expose the ports
|
93 |
+
EXPOSE 8000
|
94 |
+
EXPOSE 3000
|
95 |
+
|
96 |
+
# Set the environment variable to indicate running in Docker
|
97 |
+
ENV IN_DOCKER=True
|
98 |
+
|
99 |
+
# Run the FastAPI app and Node.js server
|
100 |
+
CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port 8000 & /app/wait-for-it.sh --url http://0.0.0.0:8000/health --strict -- node /app/app/server.js"]
|
__pycache__/app.cpython-311.pyc
ADDED
Binary file (5.74 kB). View file
|
|
__pycache__/inference_functions.cpython-311.pyc
ADDED
Binary file (7.07 kB). View file
|
|
__pycache__/load_models.cpython-311.pyc
ADDED
Binary file (817 Bytes). View file
|
|
__pycache__/server.cpython-311.pyc
ADDED
Binary file (2.21 kB). View file
|
|
__pycache__/silence_removal.cpython-311.pyc
ADDED
Binary file (1.6 kB). View file
|
|
__pycache__/stream_VAD.cpython-311.pyc
ADDED
Binary file (13 kB). View file
|
|
__pycache__/stream_VAD2.cpython-311.pyc
ADDED
Binary file (13.5 kB). View file
|
|
__pycache__/stream_prod_main2.cpython-311.pyc
ADDED
Binary file (4.17 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import fastapi
|
2 |
+
import uvicorn
|
3 |
+
from fastapi import File, UploadFile, Form, HTTPException
|
4 |
+
from fastapi.responses import JSONResponse, FileResponse
|
5 |
+
from load_models import get_nllb_model_and_tokenizer, get_xtts_model
|
6 |
+
from inference_functions import translate, just_inference
|
7 |
+
import os
|
8 |
+
import torch
|
9 |
+
|
10 |
+
# Set GPU memory fraction
|
11 |
+
torch.cuda.set_per_process_memory_fraction(0.75, 0)
|
12 |
+
|
13 |
+
# Load models
|
14 |
+
model_nllb, tokenizer_nllb = get_nllb_model_and_tokenizer()
|
15 |
+
model_xtts = get_xtts_model()
|
16 |
+
|
17 |
+
app = fastapi.FastAPI()
|
18 |
+
|
19 |
+
@app.get("/health")
|
20 |
+
def health_check():
|
21 |
+
return {"status": "ok"}
|
22 |
+
|
23 |
+
@app.post("/translate/")
|
24 |
+
def translate_text(text: str = Form(...), target_lang: str = Form(...)):
|
25 |
+
translation = translate(model_nllb, tokenizer_nllb, text, target_lang)
|
26 |
+
return {"translation": translation}
|
27 |
+
|
28 |
+
@app.post("/inference/")
|
29 |
+
def inference_audio(original_path: UploadFile = File(...), text: str = Form(...), lang: str = Form(...)):
|
30 |
+
# Save the uploaded file
|
31 |
+
file_location = f"/tmp/{original_path.filename}"
|
32 |
+
with open(file_location, "wb") as file:
|
33 |
+
file.write(original_path.file.read())
|
34 |
+
|
35 |
+
output_dir = f"/tmp/generated_audio_{os.path.basename(file_location)}.wav"
|
36 |
+
torch.cuda.empty_cache()
|
37 |
+
generated_audio = just_inference(model_xtts, file_location, output_dir, text, lang)
|
38 |
+
return {"path_to_save": output_dir}
|
39 |
+
|
40 |
+
@app.post("/process-audio/")
|
41 |
+
async def process_audio(original_path: UploadFile = File(...), text: str = Form(...), lang: str = Form(...), target_lang: str = Form(...)):
|
42 |
+
print(f"original_path: {original_path.filename}")
|
43 |
+
print(f"text: {text}")
|
44 |
+
print(f"lang: {lang}")
|
45 |
+
print(f"target_lang: {target_lang}")
|
46 |
+
|
47 |
+
# Validate target language
|
48 |
+
if target_lang not in ["es", "en"]: # Use 'es' and 'en' to match the example values
|
49 |
+
print("Unsupported language")
|
50 |
+
raise HTTPException(status_code=400, detail="Unsupported language. Use 'spanish' or 'english'.")
|
51 |
+
|
52 |
+
try:
|
53 |
+
# Translate the text first
|
54 |
+
translated_text = translate(model_nllb, tokenizer_nllb, text, target_lang)
|
55 |
+
print(f"translated_text: {translated_text}")
|
56 |
+
|
57 |
+
# Save the uploaded file
|
58 |
+
file_location = f"/tmp/{original_path.filename}"
|
59 |
+
with open(file_location, "wb") as file:
|
60 |
+
file.write(original_path.file.read())
|
61 |
+
|
62 |
+
output_dir = f"/tmp/generated_audio_{os.path.basename(file_location)}.wav"
|
63 |
+
torch.cuda.empty_cache()
|
64 |
+
generated_audio = just_inference(model_xtts, file_location, output_dir, translated_text, target_lang)
|
65 |
+
|
66 |
+
return JSONResponse(content={"audio_path": output_dir, "translation": translated_text})
|
67 |
+
|
68 |
+
except Exception as e:
|
69 |
+
print(f"Error during processing: {e}")
|
70 |
+
raise HTTPException(status_code=500, detail="Error during processing")
|
71 |
+
|
72 |
+
@app.get("/download-audio/")
|
73 |
+
def download_audio(file_path: str):
|
74 |
+
if not os.path.exists(file_path):
|
75 |
+
raise HTTPException(status_code=404, detail="File not found")
|
76 |
+
return FileResponse(file_path, media_type='audio/wav', filename=os.path.basename(file_path))
|
77 |
+
|
78 |
+
if __name__ == "__main__":
|
79 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
app/package-lock.json
ADDED
@@ -0,0 +1,985 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "audio-transcription",
|
3 |
+
"version": "1.0.0",
|
4 |
+
"lockfileVersion": 3,
|
5 |
+
"requires": true,
|
6 |
+
"packages": {
|
7 |
+
"": {
|
8 |
+
"name": "audio-transcription",
|
9 |
+
"version": "1.0.0",
|
10 |
+
"license": "ISC",
|
11 |
+
"dependencies": {
|
12 |
+
"express": "^4.19.2",
|
13 |
+
"form-data": "^4.0.0",
|
14 |
+
"multer": "^1.4.5-lts.1",
|
15 |
+
"node-fetch": "^2.7.0",
|
16 |
+
"wav": "^1.0.2"
|
17 |
+
}
|
18 |
+
},
|
19 |
+
"node_modules/accepts": {
|
20 |
+
"version": "1.3.8",
|
21 |
+
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
|
22 |
+
"integrity": "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==",
|
23 |
+
"dependencies": {
|
24 |
+
"mime-types": "~2.1.34",
|
25 |
+
"negotiator": "0.6.3"
|
26 |
+
},
|
27 |
+
"engines": {
|
28 |
+
"node": ">= 0.6"
|
29 |
+
}
|
30 |
+
},
|
31 |
+
"node_modules/append-field": {
|
32 |
+
"version": "1.0.0",
|
33 |
+
"resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz",
|
34 |
+
"integrity": "sha512-klpgFSWLW1ZEs8svjfb7g4qWY0YS5imI82dTg+QahUvJ8YqAY0P10Uk8tTyh9ZGuYEZEMaeJYCF5BFuX552hsw=="
|
35 |
+
},
|
36 |
+
"node_modules/array-flatten": {
|
37 |
+
"version": "1.1.1",
|
38 |
+
"resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
|
39 |
+
"integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg=="
|
40 |
+
},
|
41 |
+
"node_modules/asynckit": {
|
42 |
+
"version": "0.4.0",
|
43 |
+
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
44 |
+
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
45 |
+
},
|
46 |
+
"node_modules/body-parser": {
|
47 |
+
"version": "1.20.2",
|
48 |
+
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.2.tgz",
|
49 |
+
"integrity": "sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==",
|
50 |
+
"dependencies": {
|
51 |
+
"bytes": "3.1.2",
|
52 |
+
"content-type": "~1.0.5",
|
53 |
+
"debug": "2.6.9",
|
54 |
+
"depd": "2.0.0",
|
55 |
+
"destroy": "1.2.0",
|
56 |
+
"http-errors": "2.0.0",
|
57 |
+
"iconv-lite": "0.4.24",
|
58 |
+
"on-finished": "2.4.1",
|
59 |
+
"qs": "6.11.0",
|
60 |
+
"raw-body": "2.5.2",
|
61 |
+
"type-is": "~1.6.18",
|
62 |
+
"unpipe": "1.0.0"
|
63 |
+
},
|
64 |
+
"engines": {
|
65 |
+
"node": ">= 0.8",
|
66 |
+
"npm": "1.2.8000 || >= 1.4.16"
|
67 |
+
}
|
68 |
+
},
|
69 |
+
"node_modules/buffer-alloc": {
|
70 |
+
"version": "1.2.0",
|
71 |
+
"resolved": "https://registry.npmjs.org/buffer-alloc/-/buffer-alloc-1.2.0.tgz",
|
72 |
+
"integrity": "sha512-CFsHQgjtW1UChdXgbyJGtnm+O/uLQeZdtbDo8mfUgYXCHSM1wgrVxXm6bSyrUuErEb+4sYVGCzASBRot7zyrow==",
|
73 |
+
"dependencies": {
|
74 |
+
"buffer-alloc-unsafe": "^1.1.0",
|
75 |
+
"buffer-fill": "^1.0.0"
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"node_modules/buffer-alloc-unsafe": {
|
79 |
+
"version": "1.1.0",
|
80 |
+
"resolved": "https://registry.npmjs.org/buffer-alloc-unsafe/-/buffer-alloc-unsafe-1.1.0.tgz",
|
81 |
+
"integrity": "sha512-TEM2iMIEQdJ2yjPJoSIsldnleVaAk1oW3DBVUykyOLsEsFmEc9kn+SFFPz+gl54KQNxlDnAwCXosOS9Okx2xAg=="
|
82 |
+
},
|
83 |
+
"node_modules/buffer-fill": {
|
84 |
+
"version": "1.0.0",
|
85 |
+
"resolved": "https://registry.npmjs.org/buffer-fill/-/buffer-fill-1.0.0.tgz",
|
86 |
+
"integrity": "sha512-T7zexNBwiiaCOGDg9xNX9PBmjrubblRkENuptryuI64URkXDFum9il/JGL8Lm8wYfAXpredVXXZz7eMHilimiQ=="
|
87 |
+
},
|
88 |
+
"node_modules/buffer-from": {
|
89 |
+
"version": "1.1.2",
|
90 |
+
"resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz",
|
91 |
+
"integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ=="
|
92 |
+
},
|
93 |
+
"node_modules/busboy": {
|
94 |
+
"version": "1.6.0",
|
95 |
+
"resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz",
|
96 |
+
"integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==",
|
97 |
+
"dependencies": {
|
98 |
+
"streamsearch": "^1.1.0"
|
99 |
+
},
|
100 |
+
"engines": {
|
101 |
+
"node": ">=10.16.0"
|
102 |
+
}
|
103 |
+
},
|
104 |
+
"node_modules/bytes": {
|
105 |
+
"version": "3.1.2",
|
106 |
+
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
|
107 |
+
"integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==",
|
108 |
+
"engines": {
|
109 |
+
"node": ">= 0.8"
|
110 |
+
}
|
111 |
+
},
|
112 |
+
"node_modules/call-bind": {
|
113 |
+
"version": "1.0.7",
|
114 |
+
"resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz",
|
115 |
+
"integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==",
|
116 |
+
"dependencies": {
|
117 |
+
"es-define-property": "^1.0.0",
|
118 |
+
"es-errors": "^1.3.0",
|
119 |
+
"function-bind": "^1.1.2",
|
120 |
+
"get-intrinsic": "^1.2.4",
|
121 |
+
"set-function-length": "^1.2.1"
|
122 |
+
},
|
123 |
+
"engines": {
|
124 |
+
"node": ">= 0.4"
|
125 |
+
},
|
126 |
+
"funding": {
|
127 |
+
"url": "https://github.com/sponsors/ljharb"
|
128 |
+
}
|
129 |
+
},
|
130 |
+
"node_modules/combined-stream": {
|
131 |
+
"version": "1.0.8",
|
132 |
+
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
133 |
+
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
134 |
+
"dependencies": {
|
135 |
+
"delayed-stream": "~1.0.0"
|
136 |
+
},
|
137 |
+
"engines": {
|
138 |
+
"node": ">= 0.8"
|
139 |
+
}
|
140 |
+
},
|
141 |
+
"node_modules/concat-stream": {
|
142 |
+
"version": "1.6.2",
|
143 |
+
"resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz",
|
144 |
+
"integrity": "sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==",
|
145 |
+
"engines": [
|
146 |
+
"node >= 0.8"
|
147 |
+
],
|
148 |
+
"dependencies": {
|
149 |
+
"buffer-from": "^1.0.0",
|
150 |
+
"inherits": "^2.0.3",
|
151 |
+
"readable-stream": "^2.2.2",
|
152 |
+
"typedarray": "^0.0.6"
|
153 |
+
}
|
154 |
+
},
|
155 |
+
"node_modules/content-disposition": {
|
156 |
+
"version": "0.5.4",
|
157 |
+
"resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz",
|
158 |
+
"integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==",
|
159 |
+
"dependencies": {
|
160 |
+
"safe-buffer": "5.2.1"
|
161 |
+
},
|
162 |
+
"engines": {
|
163 |
+
"node": ">= 0.6"
|
164 |
+
}
|
165 |
+
},
|
166 |
+
"node_modules/content-type": {
|
167 |
+
"version": "1.0.5",
|
168 |
+
"resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz",
|
169 |
+
"integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==",
|
170 |
+
"engines": {
|
171 |
+
"node": ">= 0.6"
|
172 |
+
}
|
173 |
+
},
|
174 |
+
"node_modules/cookie": {
|
175 |
+
"version": "0.6.0",
|
176 |
+
"resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz",
|
177 |
+
"integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==",
|
178 |
+
"engines": {
|
179 |
+
"node": ">= 0.6"
|
180 |
+
}
|
181 |
+
},
|
182 |
+
"node_modules/cookie-signature": {
|
183 |
+
"version": "1.0.6",
|
184 |
+
"resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz",
|
185 |
+
"integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ=="
|
186 |
+
},
|
187 |
+
"node_modules/core-util-is": {
|
188 |
+
"version": "1.0.3",
|
189 |
+
"resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz",
|
190 |
+
"integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ=="
|
191 |
+
},
|
192 |
+
"node_modules/debug": {
|
193 |
+
"version": "2.6.9",
|
194 |
+
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
|
195 |
+
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
|
196 |
+
"dependencies": {
|
197 |
+
"ms": "2.0.0"
|
198 |
+
}
|
199 |
+
},
|
200 |
+
"node_modules/define-data-property": {
|
201 |
+
"version": "1.1.4",
|
202 |
+
"resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
|
203 |
+
"integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
|
204 |
+
"dependencies": {
|
205 |
+
"es-define-property": "^1.0.0",
|
206 |
+
"es-errors": "^1.3.0",
|
207 |
+
"gopd": "^1.0.1"
|
208 |
+
},
|
209 |
+
"engines": {
|
210 |
+
"node": ">= 0.4"
|
211 |
+
},
|
212 |
+
"funding": {
|
213 |
+
"url": "https://github.com/sponsors/ljharb"
|
214 |
+
}
|
215 |
+
},
|
216 |
+
"node_modules/delayed-stream": {
|
217 |
+
"version": "1.0.0",
|
218 |
+
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
219 |
+
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
220 |
+
"engines": {
|
221 |
+
"node": ">=0.4.0"
|
222 |
+
}
|
223 |
+
},
|
224 |
+
"node_modules/depd": {
|
225 |
+
"version": "2.0.0",
|
226 |
+
"resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
|
227 |
+
"integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==",
|
228 |
+
"engines": {
|
229 |
+
"node": ">= 0.8"
|
230 |
+
}
|
231 |
+
},
|
232 |
+
"node_modules/destroy": {
|
233 |
+
"version": "1.2.0",
|
234 |
+
"resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz",
|
235 |
+
"integrity": "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==",
|
236 |
+
"engines": {
|
237 |
+
"node": ">= 0.8",
|
238 |
+
"npm": "1.2.8000 || >= 1.4.16"
|
239 |
+
}
|
240 |
+
},
|
241 |
+
"node_modules/ee-first": {
|
242 |
+
"version": "1.1.1",
|
243 |
+
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
|
244 |
+
"integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow=="
|
245 |
+
},
|
246 |
+
"node_modules/encodeurl": {
|
247 |
+
"version": "1.0.2",
|
248 |
+
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
|
249 |
+
"integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==",
|
250 |
+
"engines": {
|
251 |
+
"node": ">= 0.8"
|
252 |
+
}
|
253 |
+
},
|
254 |
+
"node_modules/es-define-property": {
|
255 |
+
"version": "1.0.0",
|
256 |
+
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz",
|
257 |
+
"integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==",
|
258 |
+
"dependencies": {
|
259 |
+
"get-intrinsic": "^1.2.4"
|
260 |
+
},
|
261 |
+
"engines": {
|
262 |
+
"node": ">= 0.4"
|
263 |
+
}
|
264 |
+
},
|
265 |
+
"node_modules/es-errors": {
|
266 |
+
"version": "1.3.0",
|
267 |
+
"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
|
268 |
+
"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
|
269 |
+
"engines": {
|
270 |
+
"node": ">= 0.4"
|
271 |
+
}
|
272 |
+
},
|
273 |
+
"node_modules/escape-html": {
|
274 |
+
"version": "1.0.3",
|
275 |
+
"resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
|
276 |
+
"integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow=="
|
277 |
+
},
|
278 |
+
"node_modules/etag": {
|
279 |
+
"version": "1.8.1",
|
280 |
+
"resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
|
281 |
+
"integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==",
|
282 |
+
"engines": {
|
283 |
+
"node": ">= 0.6"
|
284 |
+
}
|
285 |
+
},
|
286 |
+
"node_modules/express": {
|
287 |
+
"version": "4.19.2",
|
288 |
+
"resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz",
|
289 |
+
"integrity": "sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==",
|
290 |
+
"dependencies": {
|
291 |
+
"accepts": "~1.3.8",
|
292 |
+
"array-flatten": "1.1.1",
|
293 |
+
"body-parser": "1.20.2",
|
294 |
+
"content-disposition": "0.5.4",
|
295 |
+
"content-type": "~1.0.4",
|
296 |
+
"cookie": "0.6.0",
|
297 |
+
"cookie-signature": "1.0.6",
|
298 |
+
"debug": "2.6.9",
|
299 |
+
"depd": "2.0.0",
|
300 |
+
"encodeurl": "~1.0.2",
|
301 |
+
"escape-html": "~1.0.3",
|
302 |
+
"etag": "~1.8.1",
|
303 |
+
"finalhandler": "1.2.0",
|
304 |
+
"fresh": "0.5.2",
|
305 |
+
"http-errors": "2.0.0",
|
306 |
+
"merge-descriptors": "1.0.1",
|
307 |
+
"methods": "~1.1.2",
|
308 |
+
"on-finished": "2.4.1",
|
309 |
+
"parseurl": "~1.3.3",
|
310 |
+
"path-to-regexp": "0.1.7",
|
311 |
+
"proxy-addr": "~2.0.7",
|
312 |
+
"qs": "6.11.0",
|
313 |
+
"range-parser": "~1.2.1",
|
314 |
+
"safe-buffer": "5.2.1",
|
315 |
+
"send": "0.18.0",
|
316 |
+
"serve-static": "1.15.0",
|
317 |
+
"setprototypeof": "1.2.0",
|
318 |
+
"statuses": "2.0.1",
|
319 |
+
"type-is": "~1.6.18",
|
320 |
+
"utils-merge": "1.0.1",
|
321 |
+
"vary": "~1.1.2"
|
322 |
+
},
|
323 |
+
"engines": {
|
324 |
+
"node": ">= 0.10.0"
|
325 |
+
}
|
326 |
+
},
|
327 |
+
"node_modules/finalhandler": {
|
328 |
+
"version": "1.2.0",
|
329 |
+
"resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz",
|
330 |
+
"integrity": "sha512-5uXcUVftlQMFnWC9qu/svkWv3GTd2PfUhK/3PLkYNAe7FbqJMt3515HaxE6eRL74GdsriiwujiawdaB1BpEISg==",
|
331 |
+
"dependencies": {
|
332 |
+
"debug": "2.6.9",
|
333 |
+
"encodeurl": "~1.0.2",
|
334 |
+
"escape-html": "~1.0.3",
|
335 |
+
"on-finished": "2.4.1",
|
336 |
+
"parseurl": "~1.3.3",
|
337 |
+
"statuses": "2.0.1",
|
338 |
+
"unpipe": "~1.0.0"
|
339 |
+
},
|
340 |
+
"engines": {
|
341 |
+
"node": ">= 0.8"
|
342 |
+
}
|
343 |
+
},
|
344 |
+
"node_modules/form-data": {
|
345 |
+
"version": "4.0.0",
|
346 |
+
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
347 |
+
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
348 |
+
"dependencies": {
|
349 |
+
"asynckit": "^0.4.0",
|
350 |
+
"combined-stream": "^1.0.8",
|
351 |
+
"mime-types": "^2.1.12"
|
352 |
+
},
|
353 |
+
"engines": {
|
354 |
+
"node": ">= 6"
|
355 |
+
}
|
356 |
+
},
|
357 |
+
"node_modules/forwarded": {
|
358 |
+
"version": "0.2.0",
|
359 |
+
"resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
|
360 |
+
"integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==",
|
361 |
+
"engines": {
|
362 |
+
"node": ">= 0.6"
|
363 |
+
}
|
364 |
+
},
|
365 |
+
"node_modules/fresh": {
|
366 |
+
"version": "0.5.2",
|
367 |
+
"resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz",
|
368 |
+
"integrity": "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==",
|
369 |
+
"engines": {
|
370 |
+
"node": ">= 0.6"
|
371 |
+
}
|
372 |
+
},
|
373 |
+
"node_modules/function-bind": {
|
374 |
+
"version": "1.1.2",
|
375 |
+
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
|
376 |
+
"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
|
377 |
+
"funding": {
|
378 |
+
"url": "https://github.com/sponsors/ljharb"
|
379 |
+
}
|
380 |
+
},
|
381 |
+
"node_modules/get-intrinsic": {
|
382 |
+
"version": "1.2.4",
|
383 |
+
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz",
|
384 |
+
"integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==",
|
385 |
+
"dependencies": {
|
386 |
+
"es-errors": "^1.3.0",
|
387 |
+
"function-bind": "^1.1.2",
|
388 |
+
"has-proto": "^1.0.1",
|
389 |
+
"has-symbols": "^1.0.3",
|
390 |
+
"hasown": "^2.0.0"
|
391 |
+
},
|
392 |
+
"engines": {
|
393 |
+
"node": ">= 0.4"
|
394 |
+
},
|
395 |
+
"funding": {
|
396 |
+
"url": "https://github.com/sponsors/ljharb"
|
397 |
+
}
|
398 |
+
},
|
399 |
+
"node_modules/gopd": {
|
400 |
+
"version": "1.0.1",
|
401 |
+
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz",
|
402 |
+
"integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==",
|
403 |
+
"dependencies": {
|
404 |
+
"get-intrinsic": "^1.1.3"
|
405 |
+
},
|
406 |
+
"funding": {
|
407 |
+
"url": "https://github.com/sponsors/ljharb"
|
408 |
+
}
|
409 |
+
},
|
410 |
+
"node_modules/has-property-descriptors": {
|
411 |
+
"version": "1.0.2",
|
412 |
+
"resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
|
413 |
+
"integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
|
414 |
+
"dependencies": {
|
415 |
+
"es-define-property": "^1.0.0"
|
416 |
+
},
|
417 |
+
"funding": {
|
418 |
+
"url": "https://github.com/sponsors/ljharb"
|
419 |
+
}
|
420 |
+
},
|
421 |
+
"node_modules/has-proto": {
|
422 |
+
"version": "1.0.3",
|
423 |
+
"resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz",
|
424 |
+
"integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==",
|
425 |
+
"engines": {
|
426 |
+
"node": ">= 0.4"
|
427 |
+
},
|
428 |
+
"funding": {
|
429 |
+
"url": "https://github.com/sponsors/ljharb"
|
430 |
+
}
|
431 |
+
},
|
432 |
+
"node_modules/has-symbols": {
|
433 |
+
"version": "1.0.3",
|
434 |
+
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz",
|
435 |
+
"integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==",
|
436 |
+
"engines": {
|
437 |
+
"node": ">= 0.4"
|
438 |
+
},
|
439 |
+
"funding": {
|
440 |
+
"url": "https://github.com/sponsors/ljharb"
|
441 |
+
}
|
442 |
+
},
|
443 |
+
"node_modules/hasown": {
|
444 |
+
"version": "2.0.2",
|
445 |
+
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
|
446 |
+
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
|
447 |
+
"dependencies": {
|
448 |
+
"function-bind": "^1.1.2"
|
449 |
+
},
|
450 |
+
"engines": {
|
451 |
+
"node": ">= 0.4"
|
452 |
+
}
|
453 |
+
},
|
454 |
+
"node_modules/http-errors": {
|
455 |
+
"version": "2.0.0",
|
456 |
+
"resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz",
|
457 |
+
"integrity": "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==",
|
458 |
+
"dependencies": {
|
459 |
+
"depd": "2.0.0",
|
460 |
+
"inherits": "2.0.4",
|
461 |
+
"setprototypeof": "1.2.0",
|
462 |
+
"statuses": "2.0.1",
|
463 |
+
"toidentifier": "1.0.1"
|
464 |
+
},
|
465 |
+
"engines": {
|
466 |
+
"node": ">= 0.8"
|
467 |
+
}
|
468 |
+
},
|
469 |
+
"node_modules/iconv-lite": {
|
470 |
+
"version": "0.4.24",
|
471 |
+
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
|
472 |
+
"integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==",
|
473 |
+
"dependencies": {
|
474 |
+
"safer-buffer": ">= 2.1.2 < 3"
|
475 |
+
},
|
476 |
+
"engines": {
|
477 |
+
"node": ">=0.10.0"
|
478 |
+
}
|
479 |
+
},
|
480 |
+
"node_modules/inherits": {
|
481 |
+
"version": "2.0.4",
|
482 |
+
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
|
483 |
+
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
|
484 |
+
},
|
485 |
+
"node_modules/ipaddr.js": {
|
486 |
+
"version": "1.9.1",
|
487 |
+
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
|
488 |
+
"integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==",
|
489 |
+
"engines": {
|
490 |
+
"node": ">= 0.10"
|
491 |
+
}
|
492 |
+
},
|
493 |
+
"node_modules/isarray": {
|
494 |
+
"version": "1.0.0",
|
495 |
+
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
|
496 |
+
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ=="
|
497 |
+
},
|
498 |
+
"node_modules/media-typer": {
|
499 |
+
"version": "0.3.0",
|
500 |
+
"resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
|
501 |
+
"integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==",
|
502 |
+
"engines": {
|
503 |
+
"node": ">= 0.6"
|
504 |
+
}
|
505 |
+
},
|
506 |
+
"node_modules/merge-descriptors": {
|
507 |
+
"version": "1.0.1",
|
508 |
+
"resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
|
509 |
+
"integrity": "sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w=="
|
510 |
+
},
|
511 |
+
"node_modules/methods": {
|
512 |
+
"version": "1.1.2",
|
513 |
+
"resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz",
|
514 |
+
"integrity": "sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==",
|
515 |
+
"engines": {
|
516 |
+
"node": ">= 0.6"
|
517 |
+
}
|
518 |
+
},
|
519 |
+
"node_modules/mime": {
|
520 |
+
"version": "1.6.0",
|
521 |
+
"resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
|
522 |
+
"integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==",
|
523 |
+
"bin": {
|
524 |
+
"mime": "cli.js"
|
525 |
+
},
|
526 |
+
"engines": {
|
527 |
+
"node": ">=4"
|
528 |
+
}
|
529 |
+
},
|
530 |
+
"node_modules/mime-db": {
|
531 |
+
"version": "1.52.0",
|
532 |
+
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
533 |
+
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
534 |
+
"engines": {
|
535 |
+
"node": ">= 0.6"
|
536 |
+
}
|
537 |
+
},
|
538 |
+
"node_modules/mime-types": {
|
539 |
+
"version": "2.1.35",
|
540 |
+
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
541 |
+
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
542 |
+
"dependencies": {
|
543 |
+
"mime-db": "1.52.0"
|
544 |
+
},
|
545 |
+
"engines": {
|
546 |
+
"node": ">= 0.6"
|
547 |
+
}
|
548 |
+
},
|
549 |
+
"node_modules/minimist": {
|
550 |
+
"version": "1.2.8",
|
551 |
+
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
|
552 |
+
"integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
|
553 |
+
"funding": {
|
554 |
+
"url": "https://github.com/sponsors/ljharb"
|
555 |
+
}
|
556 |
+
},
|
557 |
+
"node_modules/mkdirp": {
|
558 |
+
"version": "0.5.6",
|
559 |
+
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz",
|
560 |
+
"integrity": "sha512-FP+p8RB8OWpF3YZBCrP5gtADmtXApB5AMLn+vdyA+PyxCjrCs00mjyUozssO33cwDeT3wNGdLxJ5M//YqtHAJw==",
|
561 |
+
"dependencies": {
|
562 |
+
"minimist": "^1.2.6"
|
563 |
+
},
|
564 |
+
"bin": {
|
565 |
+
"mkdirp": "bin/cmd.js"
|
566 |
+
}
|
567 |
+
},
|
568 |
+
"node_modules/ms": {
|
569 |
+
"version": "2.0.0",
|
570 |
+
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
|
571 |
+
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
|
572 |
+
},
|
573 |
+
"node_modules/multer": {
|
574 |
+
"version": "1.4.5-lts.1",
|
575 |
+
"resolved": "https://registry.npmjs.org/multer/-/multer-1.4.5-lts.1.tgz",
|
576 |
+
"integrity": "sha512-ywPWvcDMeH+z9gQq5qYHCCy+ethsk4goepZ45GLD63fOu0YcNecQxi64nDs3qluZB+murG3/D4dJ7+dGctcCQQ==",
|
577 |
+
"dependencies": {
|
578 |
+
"append-field": "^1.0.0",
|
579 |
+
"busboy": "^1.0.0",
|
580 |
+
"concat-stream": "^1.5.2",
|
581 |
+
"mkdirp": "^0.5.4",
|
582 |
+
"object-assign": "^4.1.1",
|
583 |
+
"type-is": "^1.6.4",
|
584 |
+
"xtend": "^4.0.0"
|
585 |
+
},
|
586 |
+
"engines": {
|
587 |
+
"node": ">= 6.0.0"
|
588 |
+
}
|
589 |
+
},
|
590 |
+
"node_modules/negotiator": {
|
591 |
+
"version": "0.6.3",
|
592 |
+
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz",
|
593 |
+
"integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==",
|
594 |
+
"engines": {
|
595 |
+
"node": ">= 0.6"
|
596 |
+
}
|
597 |
+
},
|
598 |
+
"node_modules/node-fetch": {
|
599 |
+
"version": "2.7.0",
|
600 |
+
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
|
601 |
+
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
|
602 |
+
"dependencies": {
|
603 |
+
"whatwg-url": "^5.0.0"
|
604 |
+
},
|
605 |
+
"engines": {
|
606 |
+
"node": "4.x || >=6.0.0"
|
607 |
+
},
|
608 |
+
"peerDependencies": {
|
609 |
+
"encoding": "^0.1.0"
|
610 |
+
},
|
611 |
+
"peerDependenciesMeta": {
|
612 |
+
"encoding": {
|
613 |
+
"optional": true
|
614 |
+
}
|
615 |
+
}
|
616 |
+
},
|
617 |
+
"node_modules/object-assign": {
|
618 |
+
"version": "4.1.1",
|
619 |
+
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
620 |
+
"integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
|
621 |
+
"engines": {
|
622 |
+
"node": ">=0.10.0"
|
623 |
+
}
|
624 |
+
},
|
625 |
+
"node_modules/object-inspect": {
|
626 |
+
"version": "1.13.1",
|
627 |
+
"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.1.tgz",
|
628 |
+
"integrity": "sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ==",
|
629 |
+
"funding": {
|
630 |
+
"url": "https://github.com/sponsors/ljharb"
|
631 |
+
}
|
632 |
+
},
|
633 |
+
"node_modules/on-finished": {
|
634 |
+
"version": "2.4.1",
|
635 |
+
"resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz",
|
636 |
+
"integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==",
|
637 |
+
"dependencies": {
|
638 |
+
"ee-first": "1.1.1"
|
639 |
+
},
|
640 |
+
"engines": {
|
641 |
+
"node": ">= 0.8"
|
642 |
+
}
|
643 |
+
},
|
644 |
+
"node_modules/parseurl": {
|
645 |
+
"version": "1.3.3",
|
646 |
+
"resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
|
647 |
+
"integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==",
|
648 |
+
"engines": {
|
649 |
+
"node": ">= 0.8"
|
650 |
+
}
|
651 |
+
},
|
652 |
+
"node_modules/path-to-regexp": {
|
653 |
+
"version": "0.1.7",
|
654 |
+
"resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz",
|
655 |
+
"integrity": "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ=="
|
656 |
+
},
|
657 |
+
"node_modules/process-nextick-args": {
|
658 |
+
"version": "2.0.1",
|
659 |
+
"resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
|
660 |
+
"integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag=="
|
661 |
+
},
|
662 |
+
"node_modules/proxy-addr": {
|
663 |
+
"version": "2.0.7",
|
664 |
+
"resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
|
665 |
+
"integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==",
|
666 |
+
"dependencies": {
|
667 |
+
"forwarded": "0.2.0",
|
668 |
+
"ipaddr.js": "1.9.1"
|
669 |
+
},
|
670 |
+
"engines": {
|
671 |
+
"node": ">= 0.10"
|
672 |
+
}
|
673 |
+
},
|
674 |
+
"node_modules/qs": {
|
675 |
+
"version": "6.11.0",
|
676 |
+
"resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz",
|
677 |
+
"integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==",
|
678 |
+
"dependencies": {
|
679 |
+
"side-channel": "^1.0.4"
|
680 |
+
},
|
681 |
+
"engines": {
|
682 |
+
"node": ">=0.6"
|
683 |
+
},
|
684 |
+
"funding": {
|
685 |
+
"url": "https://github.com/sponsors/ljharb"
|
686 |
+
}
|
687 |
+
},
|
688 |
+
"node_modules/range-parser": {
|
689 |
+
"version": "1.2.1",
|
690 |
+
"resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
|
691 |
+
"integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==",
|
692 |
+
"engines": {
|
693 |
+
"node": ">= 0.6"
|
694 |
+
}
|
695 |
+
},
|
696 |
+
"node_modules/raw-body": {
|
697 |
+
"version": "2.5.2",
|
698 |
+
"resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.2.tgz",
|
699 |
+
"integrity": "sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==",
|
700 |
+
"dependencies": {
|
701 |
+
"bytes": "3.1.2",
|
702 |
+
"http-errors": "2.0.0",
|
703 |
+
"iconv-lite": "0.4.24",
|
704 |
+
"unpipe": "1.0.0"
|
705 |
+
},
|
706 |
+
"engines": {
|
707 |
+
"node": ">= 0.8"
|
708 |
+
}
|
709 |
+
},
|
710 |
+
"node_modules/readable-stream": {
|
711 |
+
"version": "2.3.8",
|
712 |
+
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz",
|
713 |
+
"integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
|
714 |
+
"dependencies": {
|
715 |
+
"core-util-is": "~1.0.0",
|
716 |
+
"inherits": "~2.0.3",
|
717 |
+
"isarray": "~1.0.0",
|
718 |
+
"process-nextick-args": "~2.0.0",
|
719 |
+
"safe-buffer": "~5.1.1",
|
720 |
+
"string_decoder": "~1.1.1",
|
721 |
+
"util-deprecate": "~1.0.1"
|
722 |
+
}
|
723 |
+
},
|
724 |
+
"node_modules/readable-stream/node_modules/safe-buffer": {
|
725 |
+
"version": "5.1.2",
|
726 |
+
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
|
727 |
+
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
|
728 |
+
},
|
729 |
+
"node_modules/safe-buffer": {
|
730 |
+
"version": "5.2.1",
|
731 |
+
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
|
732 |
+
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
|
733 |
+
"funding": [
|
734 |
+
{
|
735 |
+
"type": "github",
|
736 |
+
"url": "https://github.com/sponsors/feross"
|
737 |
+
},
|
738 |
+
{
|
739 |
+
"type": "patreon",
|
740 |
+
"url": "https://www.patreon.com/feross"
|
741 |
+
},
|
742 |
+
{
|
743 |
+
"type": "consulting",
|
744 |
+
"url": "https://feross.org/support"
|
745 |
+
}
|
746 |
+
]
|
747 |
+
},
|
748 |
+
"node_modules/safer-buffer": {
|
749 |
+
"version": "2.1.2",
|
750 |
+
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
|
751 |
+
"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
|
752 |
+
},
|
753 |
+
"node_modules/send": {
|
754 |
+
"version": "0.18.0",
|
755 |
+
"resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz",
|
756 |
+
"integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==",
|
757 |
+
"dependencies": {
|
758 |
+
"debug": "2.6.9",
|
759 |
+
"depd": "2.0.0",
|
760 |
+
"destroy": "1.2.0",
|
761 |
+
"encodeurl": "~1.0.2",
|
762 |
+
"escape-html": "~1.0.3",
|
763 |
+
"etag": "~1.8.1",
|
764 |
+
"fresh": "0.5.2",
|
765 |
+
"http-errors": "2.0.0",
|
766 |
+
"mime": "1.6.0",
|
767 |
+
"ms": "2.1.3",
|
768 |
+
"on-finished": "2.4.1",
|
769 |
+
"range-parser": "~1.2.1",
|
770 |
+
"statuses": "2.0.1"
|
771 |
+
},
|
772 |
+
"engines": {
|
773 |
+
"node": ">= 0.8.0"
|
774 |
+
}
|
775 |
+
},
|
776 |
+
"node_modules/send/node_modules/ms": {
|
777 |
+
"version": "2.1.3",
|
778 |
+
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
779 |
+
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
|
780 |
+
},
|
781 |
+
"node_modules/serve-static": {
|
782 |
+
"version": "1.15.0",
|
783 |
+
"resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.15.0.tgz",
|
784 |
+
"integrity": "sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==",
|
785 |
+
"dependencies": {
|
786 |
+
"encodeurl": "~1.0.2",
|
787 |
+
"escape-html": "~1.0.3",
|
788 |
+
"parseurl": "~1.3.3",
|
789 |
+
"send": "0.18.0"
|
790 |
+
},
|
791 |
+
"engines": {
|
792 |
+
"node": ">= 0.8.0"
|
793 |
+
}
|
794 |
+
},
|
795 |
+
"node_modules/set-function-length": {
|
796 |
+
"version": "1.2.2",
|
797 |
+
"resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
|
798 |
+
"integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==",
|
799 |
+
"dependencies": {
|
800 |
+
"define-data-property": "^1.1.4",
|
801 |
+
"es-errors": "^1.3.0",
|
802 |
+
"function-bind": "^1.1.2",
|
803 |
+
"get-intrinsic": "^1.2.4",
|
804 |
+
"gopd": "^1.0.1",
|
805 |
+
"has-property-descriptors": "^1.0.2"
|
806 |
+
},
|
807 |
+
"engines": {
|
808 |
+
"node": ">= 0.4"
|
809 |
+
}
|
810 |
+
},
|
811 |
+
"node_modules/setprototypeof": {
|
812 |
+
"version": "1.2.0",
|
813 |
+
"resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
|
814 |
+
"integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="
|
815 |
+
},
|
816 |
+
"node_modules/side-channel": {
|
817 |
+
"version": "1.0.6",
|
818 |
+
"resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz",
|
819 |
+
"integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==",
|
820 |
+
"dependencies": {
|
821 |
+
"call-bind": "^1.0.7",
|
822 |
+
"es-errors": "^1.3.0",
|
823 |
+
"get-intrinsic": "^1.2.4",
|
824 |
+
"object-inspect": "^1.13.1"
|
825 |
+
},
|
826 |
+
"engines": {
|
827 |
+
"node": ">= 0.4"
|
828 |
+
},
|
829 |
+
"funding": {
|
830 |
+
"url": "https://github.com/sponsors/ljharb"
|
831 |
+
}
|
832 |
+
},
|
833 |
+
"node_modules/statuses": {
|
834 |
+
"version": "2.0.1",
|
835 |
+
"resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz",
|
836 |
+
"integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==",
|
837 |
+
"engines": {
|
838 |
+
"node": ">= 0.8"
|
839 |
+
}
|
840 |
+
},
|
841 |
+
"node_modules/stream-parser": {
|
842 |
+
"version": "0.3.1",
|
843 |
+
"resolved": "https://registry.npmjs.org/stream-parser/-/stream-parser-0.3.1.tgz",
|
844 |
+
"integrity": "sha512-bJ/HgKq41nlKvlhccD5kaCr/P+Hu0wPNKPJOH7en+YrJu/9EgqUF+88w5Jb6KNcjOFMhfX4B2asfeAtIGuHObQ==",
|
845 |
+
"dependencies": {
|
846 |
+
"debug": "2"
|
847 |
+
}
|
848 |
+
},
|
849 |
+
"node_modules/streamsearch": {
|
850 |
+
"version": "1.1.0",
|
851 |
+
"resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz",
|
852 |
+
"integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==",
|
853 |
+
"engines": {
|
854 |
+
"node": ">=10.0.0"
|
855 |
+
}
|
856 |
+
},
|
857 |
+
"node_modules/string_decoder": {
|
858 |
+
"version": "1.1.1",
|
859 |
+
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
|
860 |
+
"integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
|
861 |
+
"dependencies": {
|
862 |
+
"safe-buffer": "~5.1.0"
|
863 |
+
}
|
864 |
+
},
|
865 |
+
"node_modules/string_decoder/node_modules/safe-buffer": {
|
866 |
+
"version": "5.1.2",
|
867 |
+
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
|
868 |
+
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
|
869 |
+
},
|
870 |
+
"node_modules/toidentifier": {
|
871 |
+
"version": "1.0.1",
|
872 |
+
"resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz",
|
873 |
+
"integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==",
|
874 |
+
"engines": {
|
875 |
+
"node": ">=0.6"
|
876 |
+
}
|
877 |
+
},
|
878 |
+
"node_modules/tr46": {
|
879 |
+
"version": "0.0.3",
|
880 |
+
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
|
881 |
+
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
|
882 |
+
},
|
883 |
+
"node_modules/type-is": {
|
884 |
+
"version": "1.6.18",
|
885 |
+
"resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",
|
886 |
+
"integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==",
|
887 |
+
"dependencies": {
|
888 |
+
"media-typer": "0.3.0",
|
889 |
+
"mime-types": "~2.1.24"
|
890 |
+
},
|
891 |
+
"engines": {
|
892 |
+
"node": ">= 0.6"
|
893 |
+
}
|
894 |
+
},
|
895 |
+
"node_modules/typedarray": {
|
896 |
+
"version": "0.0.6",
|
897 |
+
"resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz",
|
898 |
+
"integrity": "sha512-/aCDEGatGvZ2BIk+HmLf4ifCJFwvKFNb9/JeZPMulfgFracn9QFcAf5GO8B/mweUjSoblS5In0cWhqpfs/5PQA=="
|
899 |
+
},
|
900 |
+
"node_modules/unpipe": {
|
901 |
+
"version": "1.0.0",
|
902 |
+
"resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
|
903 |
+
"integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==",
|
904 |
+
"engines": {
|
905 |
+
"node": ">= 0.8"
|
906 |
+
}
|
907 |
+
},
|
908 |
+
"node_modules/util-deprecate": {
|
909 |
+
"version": "1.0.2",
|
910 |
+
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
|
911 |
+
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
|
912 |
+
},
|
913 |
+
"node_modules/utils-merge": {
|
914 |
+
"version": "1.0.1",
|
915 |
+
"resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
|
916 |
+
"integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==",
|
917 |
+
"engines": {
|
918 |
+
"node": ">= 0.4.0"
|
919 |
+
}
|
920 |
+
},
|
921 |
+
"node_modules/vary": {
|
922 |
+
"version": "1.1.2",
|
923 |
+
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
|
924 |
+
"integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==",
|
925 |
+
"engines": {
|
926 |
+
"node": ">= 0.8"
|
927 |
+
}
|
928 |
+
},
|
929 |
+
"node_modules/wav": {
|
930 |
+
"version": "1.0.2",
|
931 |
+
"resolved": "https://registry.npmjs.org/wav/-/wav-1.0.2.tgz",
|
932 |
+
"integrity": "sha512-viHtz3cDd/Tcr/HbNqzQCofKdF6kWUymH9LGDdskfWFoIy/HJ+RTihgjEcHfnsy1PO4e9B+y4HwgTwMrByquhg==",
|
933 |
+
"dependencies": {
|
934 |
+
"buffer-alloc": "^1.1.0",
|
935 |
+
"buffer-from": "^1.0.0",
|
936 |
+
"debug": "^2.2.0",
|
937 |
+
"readable-stream": "^1.1.14",
|
938 |
+
"stream-parser": "^0.3.1"
|
939 |
+
}
|
940 |
+
},
|
941 |
+
"node_modules/wav/node_modules/isarray": {
|
942 |
+
"version": "0.0.1",
|
943 |
+
"resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz",
|
944 |
+
"integrity": "sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ=="
|
945 |
+
},
|
946 |
+
"node_modules/wav/node_modules/readable-stream": {
|
947 |
+
"version": "1.1.14",
|
948 |
+
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz",
|
949 |
+
"integrity": "sha512-+MeVjFf4L44XUkhM1eYbD8fyEsxcV81pqMSR5gblfcLCHfZvbrqy4/qYHE+/R5HoBUT11WV5O08Cr1n3YXkWVQ==",
|
950 |
+
"dependencies": {
|
951 |
+
"core-util-is": "~1.0.0",
|
952 |
+
"inherits": "~2.0.1",
|
953 |
+
"isarray": "0.0.1",
|
954 |
+
"string_decoder": "~0.10.x"
|
955 |
+
}
|
956 |
+
},
|
957 |
+
"node_modules/wav/node_modules/string_decoder": {
|
958 |
+
"version": "0.10.31",
|
959 |
+
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz",
|
960 |
+
"integrity": "sha512-ev2QzSzWPYmy9GuqfIVildA4OdcGLeFZQrq5ys6RtiuF+RQQiZWr8TZNyAcuVXyQRYfEO+MsoB/1BuQVhOJuoQ=="
|
961 |
+
},
|
962 |
+
"node_modules/webidl-conversions": {
|
963 |
+
"version": "3.0.1",
|
964 |
+
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
|
965 |
+
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
|
966 |
+
},
|
967 |
+
"node_modules/whatwg-url": {
|
968 |
+
"version": "5.0.0",
|
969 |
+
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
|
970 |
+
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
|
971 |
+
"dependencies": {
|
972 |
+
"tr46": "~0.0.3",
|
973 |
+
"webidl-conversions": "^3.0.0"
|
974 |
+
}
|
975 |
+
},
|
976 |
+
"node_modules/xtend": {
|
977 |
+
"version": "4.0.2",
|
978 |
+
"resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz",
|
979 |
+
"integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==",
|
980 |
+
"engines": {
|
981 |
+
"node": ">=0.4"
|
982 |
+
}
|
983 |
+
}
|
984 |
+
}
|
985 |
+
}
|
app/package.json
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "audio-transcription",
|
3 |
+
"version": "1.0.0",
|
4 |
+
"description": "",
|
5 |
+
"main": "app.js",
|
6 |
+
"scripts": {
|
7 |
+
"test": "echo \"Error: no test specified\" && exit 1"
|
8 |
+
},
|
9 |
+
"keywords": [],
|
10 |
+
"author": "",
|
11 |
+
"license": "ISC",
|
12 |
+
"dependencies": {
|
13 |
+
"express": "^4.19.2",
|
14 |
+
"form-data": "^4.0.0",
|
15 |
+
"multer": "^1.4.5-lts.1",
|
16 |
+
"node-fetch": "^2.7.0",
|
17 |
+
"wav": "^1.0.2"
|
18 |
+
}
|
19 |
+
}
|
app/public/app.js
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
const recordButton = document.getElementById('record');
|
2 |
+
const status = document.getElementById('status');
|
3 |
+
const transcriptionElement = document.getElementById('transcription');
|
4 |
+
const audioElement = document.getElementById('audio');
|
5 |
+
const translationElement = document.getElementById('translation');
|
6 |
+
|
7 |
+
let mediaRecorder;
|
8 |
+
let audioChunks = [];
|
9 |
+
let transcript = '';
|
10 |
+
let sentenceIndex = 0;
|
11 |
+
|
12 |
+
const recognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
|
13 |
+
recognition.continuous = true;
|
14 |
+
recognition.interimResults = true;
|
15 |
+
|
16 |
+
recognition.onresult = (event) => {
|
17 |
+
let interimTranscript = '';
|
18 |
+
for (let i = event.resultIndex; i < event.results.length; ++i) {
|
19 |
+
if (event.results[i].isFinal) {
|
20 |
+
transcript += event.results[i][0].transcript + ' ';
|
21 |
+
saveAudioAndTranscription(event.results[i][0].transcript, sentenceIndex++);
|
22 |
+
} else {
|
23 |
+
interimTranscript += event.results[i][0].transcript;
|
24 |
+
}
|
25 |
+
}
|
26 |
+
transcriptionElement.innerHTML = transcript + '<i style="color:red;">' + interimTranscript + '</i>';
|
27 |
+
};
|
28 |
+
|
29 |
+
recognition.onerror = (event) => {
|
30 |
+
console.error(event.error);
|
31 |
+
};
|
32 |
+
|
33 |
+
recordButton.onmousedown = async () => {
|
34 |
+
status.textContent = "Recording...";
|
35 |
+
transcript = '';
|
36 |
+
sentenceIndex = 0;
|
37 |
+
|
38 |
+
// Start speech recognition
|
39 |
+
recognition.start();
|
40 |
+
|
41 |
+
// Start audio recording
|
42 |
+
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
43 |
+
mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
|
44 |
+
mediaRecorder.start();
|
45 |
+
|
46 |
+
mediaRecorder.ondataavailable = (event) => {
|
47 |
+
audioChunks.push(event.data);
|
48 |
+
};
|
49 |
+
};
|
50 |
+
|
51 |
+
recordButton.onmouseup = () => {
|
52 |
+
status.textContent = "Recording stopped";
|
53 |
+
|
54 |
+
// Stop speech recognition and audio recording
|
55 |
+
recognition.stop();
|
56 |
+
mediaRecorder.stop();
|
57 |
+
|
58 |
+
// Process the recorded audio
|
59 |
+
saveAudioAndTranscription(transcript, sentenceIndex);
|
60 |
+
};
|
61 |
+
|
62 |
+
async function saveAudioAndTranscription(sentence, index) {
|
63 |
+
mediaRecorder.stop();
|
64 |
+
mediaRecorder.onstop = async () => {
|
65 |
+
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
|
66 |
+
const arrayBuffer = await audioBlob.arrayBuffer();
|
67 |
+
const audioBuffer = new Uint8Array(arrayBuffer);
|
68 |
+
|
69 |
+
const formData = new FormData();
|
70 |
+
formData.append('audio', new Blob([audioBuffer], { type: 'application/octet-stream' }));
|
71 |
+
formData.append('transcript', sentence);
|
72 |
+
formData.append('sampleRate', mediaRecorder.stream.getAudioTracks()[0].getSettings().sampleRate);
|
73 |
+
formData.append('numberOfChannels', 1); // Assuming mono audio
|
74 |
+
|
75 |
+
try {
|
76 |
+
const response = await fetch('/save-audio', {
|
77 |
+
method: 'POST',
|
78 |
+
body: formData
|
79 |
+
});
|
80 |
+
|
81 |
+
if (response.ok) {
|
82 |
+
const result = await response.json();
|
83 |
+
console.log(`Saved sentence ${index}`);
|
84 |
+
|
85 |
+
// Show translation and play audio
|
86 |
+
translationElement.textContent = result.translation;
|
87 |
+
audioElement.src = `http://localhost:8000/download-audio?file_path=${result.audio_path}`;
|
88 |
+
audioElement.play();
|
89 |
+
} else {
|
90 |
+
console.error('Failed to save the file.');
|
91 |
+
}
|
92 |
+
} catch (error) {
|
93 |
+
console.error('Error saving audio and transcription:', error);
|
94 |
+
}
|
95 |
+
|
96 |
+
audioChunks = [];
|
97 |
+
mediaRecorder.start();
|
98 |
+
};
|
99 |
+
}
|
app/public/index.html
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Audio Recording and Translation</title>
|
7 |
+
<link rel="stylesheet" href="styles.css">
|
8 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">
|
9 |
+
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;500;700&display=swap">
|
10 |
+
</head>
|
11 |
+
<body>
|
12 |
+
<div class="container">
|
13 |
+
<header>
|
14 |
+
<h1>Seamless Speech-to-Speech Translation with Voice Replication (S3TVR)</h1>
|
15 |
+
<p class="description">S3TVR is an advanced AI cascaded framework designed for real-time speech-to-speech translation while maintaining the speaker's voice characteristics in a zero-shot fashion. This project balances latency and output quality, focusing on English and Spanish languages, and involves multiple open-source models and algorithms. The system is optimized for local execution, allowing for dynamic and efficient voice translation with an average latency of ~3 seconds per sentence. For the optimized model, check the Github Repo bellow.</p>
|
16 |
+
<p class="description">NOTE: The local excution is streamed and fully optimized(unlike this Demo)</p>
|
17 |
+
<div class="links">
|
18 |
+
<a href="https://github.com/yalsaffar/S3TVR" target="_blank"><i class="fab fa-github"></i></a>
|
19 |
+
<a href="https://yousifalsaffar.com/" target="_blank"><i class="fas fa-globe"></i></a>
|
20 |
+
<a href="https://www.linkedin.com/in/yousif-alsaffar-7621b5142/" target="_blank"><i class="fab fa-linkedin"></i></a>
|
21 |
+
<a href="https://huggingface.co/yalsaffar" target="_blank"><i class="fas fa-robot"></i></a>
|
22 |
+
</div>
|
23 |
+
</header>
|
24 |
+
<div class="circle-button" id="record">
|
25 |
+
<i class="fas fa-microphone"></i>
|
26 |
+
</div>
|
27 |
+
<p id="label">Press and Hold till the sentence is not RED</p>
|
28 |
+
<p id="status"> </p>
|
29 |
+
<div id="transcription" class="text-output"></div>
|
30 |
+
<div id="translation" class="text-output"></div>
|
31 |
+
<audio id="audio" controls></audio>
|
32 |
+
</div>
|
33 |
+
<script src="app.js"></script>
|
34 |
+
</body>
|
35 |
+
</html>
|
app/public/styles.css
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
body {
|
2 |
+
font-family: 'Roboto', sans-serif;
|
3 |
+
display: flex;
|
4 |
+
justify-content: center;
|
5 |
+
align-items: center;
|
6 |
+
height: 100vh;
|
7 |
+
background-color: #f5f5f5;
|
8 |
+
margin: 0;
|
9 |
+
padding: 20px;
|
10 |
+
box-sizing: border-box;
|
11 |
+
}
|
12 |
+
|
13 |
+
.container {
|
14 |
+
text-align: center;
|
15 |
+
max-width: 800px;
|
16 |
+
width: 100%;
|
17 |
+
}
|
18 |
+
|
19 |
+
header {
|
20 |
+
margin-bottom: 20px;
|
21 |
+
}
|
22 |
+
|
23 |
+
header h1 {
|
24 |
+
font-size: 2em;
|
25 |
+
font-weight: 700;
|
26 |
+
margin-bottom: 10px;
|
27 |
+
}
|
28 |
+
|
29 |
+
header .description {
|
30 |
+
font-size: 1.1em;
|
31 |
+
font-weight: 400;
|
32 |
+
color: #555;
|
33 |
+
margin-bottom: 20px;
|
34 |
+
line-height: 1.6;
|
35 |
+
}
|
36 |
+
|
37 |
+
.links {
|
38 |
+
display: flex;
|
39 |
+
justify-content: center;
|
40 |
+
gap: 20px;
|
41 |
+
margin-bottom: 20px;
|
42 |
+
}
|
43 |
+
|
44 |
+
.links a {
|
45 |
+
color: #333;
|
46 |
+
font-size: 1.5em;
|
47 |
+
transition: color 0.3s;
|
48 |
+
}
|
49 |
+
|
50 |
+
.links a:hover {
|
51 |
+
color: #ff4757;
|
52 |
+
}
|
53 |
+
|
54 |
+
.circle-button {
|
55 |
+
width: 100px;
|
56 |
+
height: 100px;
|
57 |
+
background-color: #ff4757;
|
58 |
+
border-radius: 50%;
|
59 |
+
display: flex;
|
60 |
+
justify-content: center;
|
61 |
+
align-items: center;
|
62 |
+
cursor: pointer;
|
63 |
+
margin: 20px auto;
|
64 |
+
transition: background-color 0.3s ease;
|
65 |
+
}
|
66 |
+
|
67 |
+
.circle-button:hover {
|
68 |
+
background-color: #ff6b81;
|
69 |
+
}
|
70 |
+
|
71 |
+
.circle-button:active {
|
72 |
+
background-color: #34c759;
|
73 |
+
}
|
74 |
+
|
75 |
+
.circle-button i {
|
76 |
+
color: white;
|
77 |
+
font-size: 2em;
|
78 |
+
}
|
79 |
+
|
80 |
+
.text-output {
|
81 |
+
background-color: white;
|
82 |
+
border-radius: 5px;
|
83 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
84 |
+
padding: 15px;
|
85 |
+
margin: 10px auto;
|
86 |
+
width: 80%;
|
87 |
+
max-width: 500px;
|
88 |
+
text-align: left;
|
89 |
+
font-size: 1em;
|
90 |
+
line-height: 1.5;
|
91 |
+
}
|
92 |
+
|
93 |
+
#status {
|
94 |
+
font-weight: bold;
|
95 |
+
margin-top: 10px;
|
96 |
+
}
|
app/server.js
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
const express = require('express');
|
2 |
+
const multer = require('multer');
|
3 |
+
const path = require('path');
|
4 |
+
const fs = require('fs');
|
5 |
+
const { exec } = require('child_process');
|
6 |
+
const fetch = require('node-fetch');
|
7 |
+
const FormData = require('form-data');
|
8 |
+
|
9 |
+
const app = express();
|
10 |
+
const port = 3000;
|
11 |
+
|
12 |
+
const uploadsDir = path.join(__dirname, 'uploads');
|
13 |
+
if (!fs.existsSync(uploadsDir)) {
|
14 |
+
fs.mkdirSync(uploadsDir);
|
15 |
+
}
|
16 |
+
|
17 |
+
const storage = multer.memoryStorage();
|
18 |
+
const upload = multer({ storage: storage });
|
19 |
+
|
20 |
+
app.use(express.static(path.join(__dirname, 'public')));
|
21 |
+
app.use(express.json());
|
22 |
+
|
23 |
+
const getNextFolderNumber = () => {
|
24 |
+
const folders = fs.readdirSync(uploadsDir).filter(file => fs.statSync(path.join(uploadsDir, file)).isDirectory());
|
25 |
+
const folderNumbers = folders.map(folder => parseInt(folder)).filter(num => !isNaN(num));
|
26 |
+
return folderNumbers.length > 0 ? Math.max(...folderNumbers) + 1 : 1;
|
27 |
+
};
|
28 |
+
|
29 |
+
let sentenceIndex = 0;
|
30 |
+
let audioPaths = [];
|
31 |
+
|
32 |
+
app.post('/save-audio', upload.single('audio'), async (req, res) => {
|
33 |
+
const nextFolderNumber = getNextFolderNumber();
|
34 |
+
const folderPath = path.join(uploadsDir, nextFolderNumber.toString());
|
35 |
+
if (!fs.existsSync(folderPath)) {
|
36 |
+
fs.mkdirSync(folderPath, { recursive: true });
|
37 |
+
}
|
38 |
+
|
39 |
+
const rawAudioPath = path.join(folderPath, `audio_${sentenceIndex}.webm`);
|
40 |
+
const wavAudioPath = path.join(folderPath, `audio_${sentenceIndex}.wav`);
|
41 |
+
const transcriptionPath = path.join(folderPath, `transcription_${sentenceIndex}.txt`);
|
42 |
+
|
43 |
+
fs.writeFileSync(rawAudioPath, req.file.buffer);
|
44 |
+
|
45 |
+
fs.writeFileSync(transcriptionPath, req.body.transcript);
|
46 |
+
|
47 |
+
const ffmpegCommand = `ffmpeg -i ${rawAudioPath} -ar 44100 -ac 1 ${wavAudioPath}`;
|
48 |
+
exec(ffmpegCommand, async (error, stdout, stderr) => {
|
49 |
+
if (error) {
|
50 |
+
console.error(`Error converting audio to WAV: ${stderr}`);
|
51 |
+
return res.status(500).send('Error converting audio to WAV');
|
52 |
+
}
|
53 |
+
|
54 |
+
fs.unlinkSync(rawAudioPath);
|
55 |
+
|
56 |
+
const formData = new FormData();
|
57 |
+
formData.append('original_path', fs.createReadStream(wavAudioPath));
|
58 |
+
formData.append('text', req.body.transcript);
|
59 |
+
formData.append('lang', 'en');
|
60 |
+
formData.append('target_lang', 'es');
|
61 |
+
|
62 |
+
try {
|
63 |
+
const response = await fetch('http://localhost:8000/process-audio/', {
|
64 |
+
method: 'POST',
|
65 |
+
body: formData,
|
66 |
+
headers: formData.getHeaders()
|
67 |
+
});
|
68 |
+
|
69 |
+
if (response.ok) {
|
70 |
+
const result = await response.json();
|
71 |
+
console.log(result);
|
72 |
+
audioPaths.push(result.audio_path);
|
73 |
+
sentenceIndex++;
|
74 |
+
res.status(200).json({ audio_path: result.audio_path, translation: result.translation });
|
75 |
+
} else {
|
76 |
+
console.error('Failed to process the file via FastAPI');
|
77 |
+
res.status(500).send('Failed to process the file via FastAPI');
|
78 |
+
}
|
79 |
+
} catch (error) {
|
80 |
+
console.error('Error calling FastAPI:', error);
|
81 |
+
res.status(500).send('Error calling FastAPI');
|
82 |
+
}
|
83 |
+
});
|
84 |
+
});
|
85 |
+
|
86 |
+
app.get('/concatenate-audio', (req, res) => {
|
87 |
+
const folderPath = path.join(uploadsDir, getNextFolderNumber().toString());
|
88 |
+
const finalAudioPath = path.join(folderPath, 'final_audio.wav');
|
89 |
+
const concatCommand = `ffmpeg -y -i "concat:${audioPaths.join('|')}" -acodec copy ${finalAudioPath}`;
|
90 |
+
exec(concatCommand, (concatError, concatStdout, concatStderr) => {
|
91 |
+
if (concatError) {
|
92 |
+
console.error(`Error concatenating audio files: ${concatStderr}`);
|
93 |
+
return res.status(500).send('Error concatenating audio files');
|
94 |
+
}
|
95 |
+
|
96 |
+
res.status(200).json({ audio_path: finalAudioPath });
|
97 |
+
});
|
98 |
+
});
|
99 |
+
|
100 |
+
app.listen(port, () => {
|
101 |
+
console.log(`Server running at http://localhost:${port}`);
|
102 |
+
});
|
app/temp_wav_files/audio-1718725396714.wav
ADDED
Binary file (278 kB). View file
|
|
app/uploads/1/audio_2.wav
ADDED
Binary file (307 kB). View file
|
|
app/uploads/1/transcription_2.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
okay now we're still actually works
|
audio_segments/readme
ADDED
File without changes
|
inference_functions.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import torch
|
3 |
+
import torchaudio
|
4 |
+
import noisereduce as nr
|
5 |
+
import numpy as np
|
6 |
+
from models.nllb import nllb_translate
|
7 |
+
|
8 |
+
def translate(model_nllb, tokenizer_nllb, text, target_lang):
|
9 |
+
print("Processing translation...")
|
10 |
+
start_time = time.time()
|
11 |
+
translation = nllb_translate(model_nllb, tokenizer_nllb, text, target_lang)
|
12 |
+
print("Translation:", translation)
|
13 |
+
print("Translation time:", time.time() - start_time)
|
14 |
+
return translation
|
15 |
+
|
16 |
+
def just_inference(model, original_path, output_dir, text, lang):
|
17 |
+
print("Inference...")
|
18 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
19 |
+
model.to(device)
|
20 |
+
path_to_save = output_dir
|
21 |
+
t0 = time.time()
|
22 |
+
|
23 |
+
try:
|
24 |
+
# Load the audio
|
25 |
+
print("Loading audio...")
|
26 |
+
wav, sr = torchaudio.load(original_path)
|
27 |
+
print(f"Loaded audio with sample rate: {sr}")
|
28 |
+
|
29 |
+
wav = wav.squeeze().numpy()
|
30 |
+
print(f"Audio shape after squeezing: {wav.shape}")
|
31 |
+
|
32 |
+
# Apply noise reduction
|
33 |
+
print("Applying noise reduction...")
|
34 |
+
reduced_noise_audio = nr.reduce_noise(y=wav, sr=sr)
|
35 |
+
reduced_noise_audio = torch.tensor(reduced_noise_audio).unsqueeze(0)
|
36 |
+
print(f"Reduced noise audio shape: {reduced_noise_audio.shape}")
|
37 |
+
|
38 |
+
# Move the reduced noise audio to the correct device
|
39 |
+
reduced_noise_audio = reduced_noise_audio.to(device)
|
40 |
+
|
41 |
+
print("Getting conditioning latents...")
|
42 |
+
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[original_path])
|
43 |
+
print("Got conditioning latents.")
|
44 |
+
|
45 |
+
print("Starting inference stream...")
|
46 |
+
chunks = model.inference_stream(
|
47 |
+
text,
|
48 |
+
lang,
|
49 |
+
gpt_cond_latent,
|
50 |
+
speaker_embedding,
|
51 |
+
stream_chunk_size=15,
|
52 |
+
speed=0.95
|
53 |
+
)
|
54 |
+
print("Inference stream started.")
|
55 |
+
|
56 |
+
full_audio = torch.Tensor().to(device)
|
57 |
+
for i, chunk in enumerate(chunks):
|
58 |
+
try:
|
59 |
+
if i == 1:
|
60 |
+
time_to_first_chunk = time.time() - t0
|
61 |
+
print(f"Time to first chunk: {time_to_first_chunk}")
|
62 |
+
full_audio = torch.cat((full_audio, chunk.squeeze().to(device)), dim=-1)
|
63 |
+
print(f"Processed chunk {i}, chunk shape: {chunk.shape}")
|
64 |
+
except Exception as e:
|
65 |
+
print(f"Error processing chunk {i}: {e}")
|
66 |
+
raise
|
67 |
+
|
68 |
+
# Move full_audio to CPU before saving
|
69 |
+
full_audio = full_audio.cpu()
|
70 |
+
|
71 |
+
print(f"Saving full audio to {path_to_save}...")
|
72 |
+
torchaudio.save(path_to_save, full_audio.unsqueeze(0), 24000)
|
73 |
+
print("Audio saved.")
|
74 |
+
|
75 |
+
print("Inference finished")
|
76 |
+
return full_audio
|
77 |
+
|
78 |
+
except Exception as e:
|
79 |
+
print(f"Error during processing: {e}")
|
80 |
+
raise
|
load_models.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from models.nllb import nllb
|
2 |
+
#from models.parakeet import parakeet_ctc_model
|
3 |
+
model_nllb, tokenizer_nllb = nllb()
|
4 |
+
|
5 |
+
from models.TTS_utils import load_manual_xtts_v2
|
6 |
+
|
7 |
+
|
8 |
+
config_path = "test/config.json"
|
9 |
+
model_path = "test"
|
10 |
+
|
11 |
+
xtts_v2_model = load_manual_xtts_v2(config_path, model_path)
|
12 |
+
|
13 |
+
|
14 |
+
def get_nllb_model_and_tokenizer():
|
15 |
+
return model_nllb, tokenizer_nllb
|
16 |
+
|
17 |
+
def get_xtts_model():
|
18 |
+
return xtts_v2_model
|
main.ipynb
ADDED
@@ -0,0 +1,395 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stderr",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"c:\\Users\\spn\\anaconda3\\envs\\capstone\\Lib\\site-packages\\torchvision\\io\\image.py:13: UserWarning: Failed to load image Python extension: '[WinError 127] The specified procedure could not be found'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n",
|
13 |
+
" warn(\n"
|
14 |
+
]
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"name": "stdout",
|
18 |
+
"output_type": "stream",
|
19 |
+
"text": [
|
20 |
+
"[2024-06-10 23:30:49,190] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
|
21 |
+
]
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"name": "stderr",
|
25 |
+
"output_type": "stream",
|
26 |
+
"text": [
|
27 |
+
"[2024-06-10 23:30:49,544] torch.distributed.elastic.multiprocessing.redirects: [WARNING] NOTE: Redirects are currently not supported in Windows or MacOs.\n",
|
28 |
+
"[NeMo W 2024-06-10 23:30:52 nemo_logging:393] Could not import NeMo NLP collection which is required for speech translation model.\n"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"name": "stdout",
|
33 |
+
"output_type": "stream",
|
34 |
+
"text": [
|
35 |
+
"[NeMo I 2024-06-10 23:31:08 nemo_logging:381] Tokenizer SentencePieceTokenizer initialized with 1024 tokens\n"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"name": "stderr",
|
40 |
+
"output_type": "stream",
|
41 |
+
"text": [
|
42 |
+
"[NeMo W 2024-06-10 23:31:08 nemo_logging:393] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
|
43 |
+
" Train config : \n",
|
44 |
+
" manifest_filepath: /disk1/NVIDIA/datasets/LibriSpeech_NeMo/librivox-train-all.json\n",
|
45 |
+
" sample_rate: 16000\n",
|
46 |
+
" batch_size: 16\n",
|
47 |
+
" shuffle: true\n",
|
48 |
+
" num_workers: 8\n",
|
49 |
+
" pin_memory: true\n",
|
50 |
+
" use_start_end_token: false\n",
|
51 |
+
" trim_silence: false\n",
|
52 |
+
" max_duration: 16.7\n",
|
53 |
+
" min_duration: 0.1\n",
|
54 |
+
" is_tarred: false\n",
|
55 |
+
" tarred_audio_filepaths: null\n",
|
56 |
+
" shuffle_n: 2048\n",
|
57 |
+
" bucketing_strategy: fully_randomized\n",
|
58 |
+
" bucketing_batch_size: null\n",
|
59 |
+
" \n",
|
60 |
+
"[NeMo W 2024-06-10 23:31:08 nemo_logging:393] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s). \n",
|
61 |
+
" Validation config : \n",
|
62 |
+
" manifest_filepath: /disk1/NVIDIA/datasets/LibriSpeech_NeMo/librivox-dev-clean.json\n",
|
63 |
+
" sample_rate: 16000\n",
|
64 |
+
" batch_size: 16\n",
|
65 |
+
" shuffle: false\n",
|
66 |
+
" use_start_end_token: false\n",
|
67 |
+
" num_workers: 8\n",
|
68 |
+
" pin_memory: true\n",
|
69 |
+
" \n",
|
70 |
+
"[NeMo W 2024-06-10 23:31:08 nemo_logging:393] Please call the ModelPT.setup_test_data() or ModelPT.setup_multiple_test_data() method and provide a valid configuration file to setup the test data loader(s).\n",
|
71 |
+
" Test config : \n",
|
72 |
+
" manifest_filepath: null\n",
|
73 |
+
" sample_rate: 16000\n",
|
74 |
+
" batch_size: 16\n",
|
75 |
+
" shuffle: false\n",
|
76 |
+
" use_start_end_token: false\n",
|
77 |
+
" num_workers: 8\n",
|
78 |
+
" pin_memory: true\n",
|
79 |
+
" \n"
|
80 |
+
]
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"name": "stdout",
|
84 |
+
"output_type": "stream",
|
85 |
+
"text": [
|
86 |
+
"[NeMo I 2024-06-10 23:31:08 nemo_logging:381] PADDING: 0\n"
|
87 |
+
]
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"name": "stderr",
|
91 |
+
"output_type": "stream",
|
92 |
+
"text": [
|
93 |
+
"[NeMo W 2024-06-10 23:31:11 nemo_logging:393] `method_cfg` is deprecated and will be removed in the future. Please use `measure_cfg` instead.\n",
|
94 |
+
"[NeMo W 2024-06-10 23:31:11 nemo_logging:393] Re-writing `measure_cfg` with the value of `method_cfg`.\n",
|
95 |
+
"[NeMo W 2024-06-10 23:31:11 nemo_logging:393] `temperature` is deprecated and will be removed in the future. Please use `alpha` instead.\n",
|
96 |
+
"[NeMo W 2024-06-10 23:31:11 nemo_logging:393] Re-writing `alpha` with the value of `temperature`.\n",
|
97 |
+
"[NeMo W 2024-06-10 23:31:11 nemo_logging:393] `method_cfg` is deprecated and will be removed in the future. Please use `measure_cfg` instead.\n",
|
98 |
+
"[NeMo W 2024-06-10 23:31:11 nemo_logging:393] Re-writing `measure_cfg` with the value of `method_cfg`.\n",
|
99 |
+
"[NeMo W 2024-06-10 23:31:11 nemo_logging:393] `temperature` is deprecated and will be removed in the future. Please use `alpha` instead.\n",
|
100 |
+
"[NeMo W 2024-06-10 23:31:11 nemo_logging:393] Re-writing `alpha` with the value of `temperature`.\n"
|
101 |
+
]
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"name": "stdout",
|
105 |
+
"output_type": "stream",
|
106 |
+
"text": [
|
107 |
+
"[NeMo I 2024-06-10 23:31:16 nemo_logging:381] Model EncDecCTCModelBPE was successfully restored from C:\\Users\\spn\\.cache\\huggingface\\hub\\models--nvidia--parakeet-ctc-0.6b\\snapshots\\097ffc5b027beabc73acb627def2d1d278e774e9\\parakeet-ctc-0.6b.nemo.\n"
|
108 |
+
]
|
109 |
+
}
|
110 |
+
],
|
111 |
+
"source": [
|
112 |
+
"from models.nllb import nllb\n",
|
113 |
+
"#from models.TTS_utils import xtts_v2\n",
|
114 |
+
"from models.parakeet import parakeet_ctc_model\n",
|
115 |
+
"from models.es_fastconformer import stt_es_model\n",
|
116 |
+
"model_nllb, tokinizer_nllb = nllb()\n",
|
117 |
+
"#xtts_v2_model = xtts_v2()\n",
|
118 |
+
"parakeet = parakeet_ctc_model()\n",
|
119 |
+
"#sst = stt_es_model()"
|
120 |
+
]
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"cell_type": "code",
|
124 |
+
"execution_count": 2,
|
125 |
+
"metadata": {},
|
126 |
+
"outputs": [
|
127 |
+
{
|
128 |
+
"name": "stdout",
|
129 |
+
"output_type": "stream",
|
130 |
+
"text": [
|
131 |
+
"Writing audio_segments\\segment_0.wav...\n",
|
132 |
+
"Processing segment...\n",
|
133 |
+
"0.021454915\n",
|
134 |
+
"Noise reduction done!\n",
|
135 |
+
"Noise removed. Time: 0.06042814254760742\n"
|
136 |
+
]
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"data": {
|
140 |
+
"application/vnd.jupyter.widget-view+json": {
|
141 |
+
"model_id": "6909654da05f4b0a88458139a9b37d6d",
|
142 |
+
"version_major": 2,
|
143 |
+
"version_minor": 0
|
144 |
+
},
|
145 |
+
"text/plain": [
|
146 |
+
"Transcribing: 0%| | 0/1 [00:00<?, ?it/s]"
|
147 |
+
]
|
148 |
+
},
|
149 |
+
"metadata": {},
|
150 |
+
"output_type": "display_data"
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"name": "stdout",
|
154 |
+
"output_type": "stream",
|
155 |
+
"text": [
|
156 |
+
"Transcription: hello can you hear me\n",
|
157 |
+
"Transcription time: 1.3255603313446045\n",
|
158 |
+
"Translating...\n",
|
159 |
+
"Processing translation...\n",
|
160 |
+
"Translation: Hola, ¿ me escuchas?\n",
|
161 |
+
"Translation time: 0.932790994644165\n",
|
162 |
+
"Writing audio_segments\\segment_1.wav...\n",
|
163 |
+
"Processing segment...\n",
|
164 |
+
"0.010297036\n",
|
165 |
+
"No speech detected.\n",
|
166 |
+
"Writing audio_segments\\segment_2.wav...\n",
|
167 |
+
"Processing segment...\n",
|
168 |
+
"0.006772096\n",
|
169 |
+
"No speech detected.\n",
|
170 |
+
"Writing audio_segments\\segment_3.wav...\n",
|
171 |
+
"Processing segment...\n",
|
172 |
+
"0.0034770737\n",
|
173 |
+
"No speech detected.\n",
|
174 |
+
"Writing audio_segments\\segment_4.wav...\n",
|
175 |
+
"Processing segment...\n",
|
176 |
+
"0.0039069764\n",
|
177 |
+
"No speech detected.\n",
|
178 |
+
"Writing audio_segments\\segment_5.wav...\n",
|
179 |
+
"Processing segment...\n",
|
180 |
+
"0.0046523036\n",
|
181 |
+
"No speech detected.\n",
|
182 |
+
"Writing audio_segments\\segment_6.wav...\n",
|
183 |
+
"Processing segment...\n",
|
184 |
+
"0.0040206155\n",
|
185 |
+
"No speech detected.\n",
|
186 |
+
"Writing audio_segments\\segment_7.wav...\n",
|
187 |
+
"Processing segment...\n",
|
188 |
+
"0.0043495107\n",
|
189 |
+
"No speech detected.\n",
|
190 |
+
"Writing audio_segments\\segment_8.wav...\n",
|
191 |
+
"Processing segment...\n",
|
192 |
+
"0.00421352\n",
|
193 |
+
"No speech detected.\n",
|
194 |
+
"Writing audio_segments\\segment_9.wav...\n",
|
195 |
+
"Processing segment...\n",
|
196 |
+
"0.0040656724\n",
|
197 |
+
"No speech detected.\n",
|
198 |
+
"Writing audio_segments\\segment_10.wav...\n",
|
199 |
+
"Processing segment...\n",
|
200 |
+
"0.0042125704\n",
|
201 |
+
"No speech detected.\n",
|
202 |
+
"Writing audio_segments\\segment_11.wav...\n",
|
203 |
+
"Processing segment...\n",
|
204 |
+
"0.015398192\n",
|
205 |
+
"Noise reduction done!\n",
|
206 |
+
"Noise removed. Time: 0.020929336547851562\n"
|
207 |
+
]
|
208 |
+
},
|
209 |
+
{
|
210 |
+
"data": {
|
211 |
+
"application/vnd.jupyter.widget-view+json": {
|
212 |
+
"model_id": "de3d4b3a7bc14de2afbb01ff82252dc2",
|
213 |
+
"version_major": 2,
|
214 |
+
"version_minor": 0
|
215 |
+
},
|
216 |
+
"text/plain": [
|
217 |
+
"Transcribing: 0%| | 0/1 [00:00<?, ?it/s]"
|
218 |
+
]
|
219 |
+
},
|
220 |
+
"metadata": {},
|
221 |
+
"output_type": "display_data"
|
222 |
+
}
|
223 |
+
],
|
224 |
+
"source": [
|
225 |
+
"from stream_VAD import stream\n",
|
226 |
+
"stream(parakeet, model_nllb, tokinizer_nllb, \"english\", \"spanish\", 'record_temp.json', 'record_per.json')"
|
227 |
+
]
|
228 |
+
},
|
229 |
+
{
|
230 |
+
"cell_type": "code",
|
231 |
+
"execution_count": 4,
|
232 |
+
"metadata": {},
|
233 |
+
"outputs": [
|
234 |
+
{
|
235 |
+
"data": {
|
236 |
+
"application/vnd.jupyter.widget-view+json": {
|
237 |
+
"model_id": "fdc0440dfcaf4c9f814689fc47c10e3e",
|
238 |
+
"version_major": 2,
|
239 |
+
"version_minor": 0
|
240 |
+
},
|
241 |
+
"text/plain": [
|
242 |
+
"(…)tt_es_fastconformer_hybrid_large_pc.nemo: 0%| | 0.00/459M [00:00<?, ?B/s]"
|
243 |
+
]
|
244 |
+
},
|
245 |
+
"metadata": {},
|
246 |
+
"output_type": "display_data"
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"name": "stdout",
|
250 |
+
"output_type": "stream",
|
251 |
+
"text": [
|
252 |
+
"[NeMo I 2024-04-12 16:10:09 nemo_logging:381] Tokenizer SentencePieceTokenizer initialized with 1024 tokens\n"
|
253 |
+
]
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"name": "stderr",
|
257 |
+
"output_type": "stream",
|
258 |
+
"text": [
|
259 |
+
"[NeMo W 2024-04-12 16:10:10 nemo_logging:393] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
|
260 |
+
" Train config : \n",
|
261 |
+
" manifest_filepath: null\n",
|
262 |
+
" sample_rate: 16000\n",
|
263 |
+
" batch_size: 16\n",
|
264 |
+
" shuffle: true\n",
|
265 |
+
" num_workers: 8\n",
|
266 |
+
" pin_memory: true\n",
|
267 |
+
" use_start_end_token: false\n",
|
268 |
+
" trim_silence: false\n",
|
269 |
+
" max_duration: 20\n",
|
270 |
+
" min_duration: 0.1\n",
|
271 |
+
" is_tarred: false\n",
|
272 |
+
" tarred_audio_filepaths: null\n",
|
273 |
+
" shuffle_n: 2048\n",
|
274 |
+
" bucketing_strategy: fully_randomized\n",
|
275 |
+
" bucketing_batch_size: null\n",
|
276 |
+
" is_concat: false\n",
|
277 |
+
" concat_sampling_technique: random\n",
|
278 |
+
" concat_sampling_probabilities: ''\n",
|
279 |
+
" \n",
|
280 |
+
"[NeMo W 2024-04-12 16:10:10 nemo_logging:393] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s). \n",
|
281 |
+
" Validation config : \n",
|
282 |
+
" manifest_filepath: null\n",
|
283 |
+
" sample_rate: 16000\n",
|
284 |
+
" batch_size: 32\n",
|
285 |
+
" shuffle: false\n",
|
286 |
+
" num_workers: 8\n",
|
287 |
+
" pin_memory: true\n",
|
288 |
+
" use_start_end_token: false\n",
|
289 |
+
" is_concat: true\n",
|
290 |
+
" concat_sampling_technique: random\n",
|
291 |
+
" concat_sampling_probabilities:\n",
|
292 |
+
" - 0.099\n",
|
293 |
+
" - 0.2771\n",
|
294 |
+
" - 0.5482\n",
|
295 |
+
" - 0.0757\n",
|
296 |
+
" concat_shuffle: false\n",
|
297 |
+
" concat_sampling_seed: 1234\n",
|
298 |
+
" max_duration: 20\n",
|
299 |
+
" \n",
|
300 |
+
"[NeMo W 2024-04-12 16:10:10 nemo_logging:393] Please call the ModelPT.setup_test_data() or ModelPT.setup_multiple_test_data() method and provide a valid configuration file to setup the test data loader(s).\n",
|
301 |
+
" Test config : \n",
|
302 |
+
" manifest_filepath: null\n",
|
303 |
+
" sample_rate: 16000\n",
|
304 |
+
" batch_size: 16\n",
|
305 |
+
" shuffle: false\n",
|
306 |
+
" num_workers: 8\n",
|
307 |
+
" pin_memory: true\n",
|
308 |
+
" use_start_end_token: false\n",
|
309 |
+
" \n"
|
310 |
+
]
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"name": "stdout",
|
314 |
+
"output_type": "stream",
|
315 |
+
"text": [
|
316 |
+
"[NeMo I 2024-04-12 16:10:10 nemo_logging:381] PADDING: 0\n"
|
317 |
+
]
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"name": "stderr",
|
321 |
+
"output_type": "stream",
|
322 |
+
"text": [
|
323 |
+
"[NeMo W 2024-04-12 16:10:11 nemo_logging:393] c:\\Users\\spn\\anaconda3\\envs\\capstone\\Lib\\site-packages\\torch\\nn\\modules\\rnn.py:83: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.2 and num_layers=1\n",
|
324 |
+
" warnings.warn(\"dropout option adds dropout after all but last \"\n",
|
325 |
+
" \n"
|
326 |
+
]
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"name": "stdout",
|
330 |
+
"output_type": "stream",
|
331 |
+
"text": [
|
332 |
+
"[NeMo I 2024-04-12 16:10:11 nemo_logging:381] Using RNNT Loss : warprnnt_numba\n",
|
333 |
+
" Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}\n",
|
334 |
+
"[NeMo I 2024-04-12 16:10:12 nemo_logging:381] Model EncDecHybridRNNTCTCBPEModel was successfully restored from C:\\Users\\spn\\.cache\\huggingface\\hub\\models--nvidia--stt_es_fastconformer_hybrid_large_pc\\snapshots\\65f775445d5947d6784c3e80d9a14d859571947f\\stt_es_fastconformer_hybrid_large_pc.nemo.\n"
|
335 |
+
]
|
336 |
+
}
|
337 |
+
],
|
338 |
+
"source": [
|
339 |
+
"from models.es_fastconformer import stt_es_model\n",
|
340 |
+
"model = stt_es_model()\n",
|
341 |
+
"# check how much memory is used by the model\n",
|
342 |
+
"import torch\n",
|
343 |
+
"import psutil\n",
|
344 |
+
"import os\n",
|
345 |
+
"import time\n",
|
346 |
+
"\n"
|
347 |
+
]
|
348 |
+
},
|
349 |
+
{
|
350 |
+
"cell_type": "code",
|
351 |
+
"execution_count": 5,
|
352 |
+
"metadata": {},
|
353 |
+
"outputs": [
|
354 |
+
{
|
355 |
+
"name": "stdout",
|
356 |
+
"output_type": "stream",
|
357 |
+
"text": [
|
358 |
+
"Model size: 458.86 MB\n"
|
359 |
+
]
|
360 |
+
}
|
361 |
+
],
|
362 |
+
"source": [
|
363 |
+
"# get the size of the model in term of memory in MB\n",
|
364 |
+
"def get_size(model):\n",
|
365 |
+
" torch.save(model.state_dict(), 'temp.p')\n",
|
366 |
+
" size = os.path.getsize('temp.p') / 1e6\n",
|
367 |
+
" os.remove('temp.p')\n",
|
368 |
+
" return size\n",
|
369 |
+
"size = get_size(model)\n",
|
370 |
+
"print(f\"Model size: {size:.2f} MB\")"
|
371 |
+
]
|
372 |
+
}
|
373 |
+
],
|
374 |
+
"metadata": {
|
375 |
+
"kernelspec": {
|
376 |
+
"display_name": "capstone",
|
377 |
+
"language": "python",
|
378 |
+
"name": "python3"
|
379 |
+
},
|
380 |
+
"language_info": {
|
381 |
+
"codemirror_mode": {
|
382 |
+
"name": "ipython",
|
383 |
+
"version": 3
|
384 |
+
},
|
385 |
+
"file_extension": ".py",
|
386 |
+
"mimetype": "text/x-python",
|
387 |
+
"name": "python",
|
388 |
+
"nbconvert_exporter": "python",
|
389 |
+
"pygments_lexer": "ipython3",
|
390 |
+
"version": "3.11.7"
|
391 |
+
}
|
392 |
+
},
|
393 |
+
"nbformat": 4,
|
394 |
+
"nbformat_minor": 2
|
395 |
+
}
|
main.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
|
3 |
+
import av
|
4 |
+
import numpy as np
|
5 |
+
import pydub
|
6 |
+
from io import BytesIO
|
7 |
+
from models.nllb import nllb
|
8 |
+
from models.parakeet import parakeet_ctc_model
|
9 |
+
from stream_VAD import stream
|
10 |
+
from models.es_fastconformer import stt_es_model
|
11 |
+
|
12 |
+
RTC_CONFIGURATION = RTCConfiguration({"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]})
|
13 |
+
|
14 |
+
# Load models once
|
15 |
+
model_nllb, tokenizer_nllb = nllb()
|
16 |
+
parakeet = parakeet_ctc_model()
|
17 |
+
stt_model = stt_es_model()
|
18 |
+
|
19 |
+
def process_audio(audio_chunk, language):
|
20 |
+
# Convert audio chunk to pydub.AudioSegment
|
21 |
+
audio_segment = pydub.AudioSegment(
|
22 |
+
data=audio_chunk.tobytes(),
|
23 |
+
sample_width=audio_chunk.format.sample_width,
|
24 |
+
frame_rate=audio_chunk.sample_rate,
|
25 |
+
channels=len(audio_chunk.layout.channels)
|
26 |
+
)
|
27 |
+
|
28 |
+
# Process audio based on selected language
|
29 |
+
if language == "en":
|
30 |
+
processed_audio = stream(parakeet, model_nllb, tokenizer_nllb, "english", "spanish", audio_segment)
|
31 |
+
elif language == "es":
|
32 |
+
processed_audio = stream(stt_model, model_nllb, tokenizer_nllb, "spanish", "english", audio_segment)
|
33 |
+
else:
|
34 |
+
return audio_chunk
|
35 |
+
|
36 |
+
# Convert processed audio back to numpy array
|
37 |
+
processed_audio_np = np.array(processed_audio.get_array_of_samples())
|
38 |
+
|
39 |
+
return processed_audio.frame_rate, processed_audio_np
|
40 |
+
|
41 |
+
def audio_callback(frame: av.AudioFrame, language):
|
42 |
+
audio_data = frame.to_ndarray()
|
43 |
+
audio_chunk = av.AudioFrame.from_ndarray(audio_data, format="s16", layout="mono")
|
44 |
+
return process_audio(audio_chunk, language)
|
45 |
+
|
46 |
+
st.title("Real-Time Audio Processing")
|
47 |
+
|
48 |
+
language = st.radio("Select Language", ["en", "es"], index=0)
|
49 |
+
|
50 |
+
webrtc_ctx = webrtc_streamer(
|
51 |
+
key="audio",
|
52 |
+
mode=WebRtcMode.SENDRECV,
|
53 |
+
rtc_configuration=RTC_CONFIGURATION,
|
54 |
+
media_stream_constraints={"audio": True, "video": False},
|
55 |
+
audio_receiver_size=256,
|
56 |
+
async_processing=True,
|
57 |
+
)
|
58 |
+
|
59 |
+
if webrtc_ctx.audio_receiver:
|
60 |
+
webrtc_ctx.audio_receiver.on("data", lambda frame: audio_callback(frame, language))
|
61 |
+
|
62 |
+
if "audio_buffer" not in st.session_state:
|
63 |
+
st.session_state["audio_buffer"] = BytesIO()
|
64 |
+
|
65 |
+
if webrtc_ctx.audio_receiver:
|
66 |
+
audio_frames = webrtc_ctx.audio_receiver.get_frames()
|
67 |
+
|
68 |
+
for frame in audio_frames:
|
69 |
+
processed_audio_rate, processed_audio_np = audio_callback(frame, language)
|
70 |
+
|
71 |
+
audio_segment = pydub.AudioSegment(
|
72 |
+
data=processed_audio_np.tobytes(),
|
73 |
+
sample_width=processed_audio_np.dtype.itemsize,
|
74 |
+
frame_rate=processed_audio_rate,
|
75 |
+
channels=1
|
76 |
+
)
|
77 |
+
st.session_state["audio_buffer"].write(audio_segment.export(format="wav").read())
|
78 |
+
|
79 |
+
st.audio(st.session_state["audio_buffer"].getvalue(), format="audio/wav")
|
main_stream.ipynb
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"[2024-06-25 20:01:43,998] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
|
13 |
+
]
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"name": "stderr",
|
17 |
+
"output_type": "stream",
|
18 |
+
"text": [
|
19 |
+
"[2024-06-25 20:01:44,318] torch.distributed.elastic.multiprocessing.redirects: [WARNING] NOTE: Redirects are currently not supported in Windows or MacOs.\n"
|
20 |
+
]
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"name": "stdout",
|
24 |
+
"output_type": "stream",
|
25 |
+
"text": [
|
26 |
+
"Loading model...\n",
|
27 |
+
"[2024-06-25 20:02:01,663] [INFO] [logging.py:96:log_dist] [Rank -1] DeepSpeed info: version=0.14.0+ce78a63, git-hash=ce78a63, git-branch=master\n",
|
28 |
+
"[2024-06-25 20:02:01,664] [WARNING] [config_utils.py:69:_process_deprecated_field] Config parameter replace_method is deprecated. This parameter is no longer needed, please remove from your call to DeepSpeed-inference\n",
|
29 |
+
"[2024-06-25 20:02:01,665] [WARNING] [config_utils.py:69:_process_deprecated_field] Config parameter mp_size is deprecated use tensor_parallel.tp_size instead\n",
|
30 |
+
"[2024-06-25 20:02:01,666] [INFO] [logging.py:96:log_dist] [Rank -1] quantize_bits = 8 mlp_extra_grouping = False, quantize_groups = 1\n",
|
31 |
+
"[2024-06-25 20:02:01,900] [INFO] [logging.py:96:log_dist] [Rank -1] DeepSpeed-Inference config: {'layer_id': 0, 'hidden_size': 1024, 'intermediate_size': 4096, 'heads': 16, 'num_hidden_layers': -1, 'dtype': torch.float32, 'pre_layer_norm': True, 'norm_type': <NormType.LayerNorm: 1>, 'local_rank': -1, 'stochastic_mode': False, 'epsilon': 1e-05, 'mp_size': 1, 'scale_attention': True, 'triangular_masking': True, 'local_attention': False, 'window_size': 1, 'rotary_dim': -1, 'rotate_half': False, 'rotate_every_two': True, 'return_tuple': True, 'mlp_after_attn': True, 'mlp_act_func_type': <ActivationFuncType.GELU: 1>, 'specialized_mode': False, 'training_mp_size': 1, 'bigscience_bloom': False, 'max_out_tokens': 1024, 'min_out_tokens': 1, 'scale_attn_by_inverse_layer_idx': False, 'enable_qkv_quantization': False, 'use_mup': False, 'return_single_tuple': False, 'set_empty_params': False, 'transposed_mode': False, 'use_triton': False, 'triton_autotune': False, 'num_kv': -1, 'rope_theta': 10000}\n"
|
32 |
+
]
|
33 |
+
}
|
34 |
+
],
|
35 |
+
"source": [
|
36 |
+
"from models.TTS_utils import load_manual_xtts_v2\n",
|
37 |
+
"config_path = \"test/config.json\"\n",
|
38 |
+
"model_path = \"test\"\n",
|
39 |
+
"\n",
|
40 |
+
"xtts_v2_model = load_manual_xtts_v2(config_path, model_path)\n"
|
41 |
+
]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"cell_type": "code",
|
45 |
+
"execution_count": 4,
|
46 |
+
"metadata": {},
|
47 |
+
"outputs": [
|
48 |
+
{
|
49 |
+
"name": "stdout",
|
50 |
+
"output_type": "stream",
|
51 |
+
"text": [
|
52 |
+
"Inference...\n",
|
53 |
+
"No more text to process\n",
|
54 |
+
"Inference...\n",
|
55 |
+
"No more text to process\n"
|
56 |
+
]
|
57 |
+
}
|
58 |
+
],
|
59 |
+
"source": [
|
60 |
+
"from models.TTS_utils import stream_prod\n",
|
61 |
+
"stream_prod(xtts_v2_model, \"record_temp.json\", \"audio_segments/\")\n",
|
62 |
+
"\n"
|
63 |
+
]
|
64 |
+
}
|
65 |
+
],
|
66 |
+
"metadata": {
|
67 |
+
"kernelspec": {
|
68 |
+
"display_name": "capstone",
|
69 |
+
"language": "python",
|
70 |
+
"name": "python3"
|
71 |
+
},
|
72 |
+
"language_info": {
|
73 |
+
"codemirror_mode": {
|
74 |
+
"name": "ipython",
|
75 |
+
"version": 3
|
76 |
+
},
|
77 |
+
"file_extension": ".py",
|
78 |
+
"mimetype": "text/x-python",
|
79 |
+
"name": "python",
|
80 |
+
"nbconvert_exporter": "python",
|
81 |
+
"pygments_lexer": "ipython3",
|
82 |
+
"version": "3.11.7"
|
83 |
+
}
|
84 |
+
},
|
85 |
+
"nbformat": 4,
|
86 |
+
"nbformat_minor": 2
|
87 |
+
}
|
models/TTS_utils.py
ADDED
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from TTS.api import TTS
|
3 |
+
import time
|
4 |
+
import torchaudio
|
5 |
+
from TTS.tts.configs.xtts_config import XttsConfig
|
6 |
+
from TTS.tts.models.xtts import Xtts
|
7 |
+
import sounddevice as sd
|
8 |
+
|
9 |
+
|
10 |
+
def xtts_v2():
|
11 |
+
"""
|
12 |
+
Load and return the XTTS v2 model.
|
13 |
+
|
14 |
+
This function initializes the XTTS v2 model from the 🐸TTS library.
|
15 |
+
The model is configured to use a GPU if available, otherwise it defaults to CPU.
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
TTS: The initialized XTTS v2 model.
|
19 |
+
|
20 |
+
Example usage:
|
21 |
+
tts = xtts_v2()
|
22 |
+
"""
|
23 |
+
# Get device
|
24 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
25 |
+
|
26 |
+
# List available 🐸TTS models
|
27 |
+
# print(TTS().list_models())
|
28 |
+
|
29 |
+
# Init TTS
|
30 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
31 |
+
|
32 |
+
|
33 |
+
return tts
|
34 |
+
|
35 |
+
def load_manual_xtts_v2(config_path, checkpoint_path):
|
36 |
+
"""
|
37 |
+
Load the XTTS v2 model manually with configuration and checkpoint files.
|
38 |
+
|
39 |
+
Args:
|
40 |
+
config_path (str): Path to the configuration file.
|
41 |
+
Example: "path/to/config.json"
|
42 |
+
checkpoint_path (str): Path to the checkpoint directory.
|
43 |
+
Example: "path/to/checkpoint/"
|
44 |
+
|
45 |
+
Returns:
|
46 |
+
Xtts: The loaded XTTS v2 model.
|
47 |
+
|
48 |
+
Example usage:
|
49 |
+
model = load_manual_xtts_v2("config.json", "checkpoint/")
|
50 |
+
"""
|
51 |
+
print("Loading model...")
|
52 |
+
config = XttsConfig()
|
53 |
+
config.load_json(config_path)
|
54 |
+
model = Xtts.init_from_config(config)
|
55 |
+
model.load_checkpoint(config, checkpoint_dir=checkpoint_path, use_deepspeed=True)
|
56 |
+
model.cuda()
|
57 |
+
|
58 |
+
return model
|
59 |
+
|
60 |
+
import json
|
61 |
+
import concurrent.futures
|
62 |
+
|
63 |
+
# ----------------- StreamXTTSV2 -----------------
|
64 |
+
def get_text_order(json_path, num_elements, ):
|
65 |
+
"""
|
66 |
+
Retrieve a specified number of text elements from a JSON file and update the file.
|
67 |
+
|
68 |
+
Args:
|
69 |
+
json_path (str): Path to the JSON file.
|
70 |
+
Example: "path/to/data.json"
|
71 |
+
num_elements (int): Number of elements to retrieve.
|
72 |
+
Example: 3
|
73 |
+
|
74 |
+
Returns:
|
75 |
+
list: A list of tuples containing text, order, original_path, path_to_save, and language.
|
76 |
+
|
77 |
+
Example usage:
|
78 |
+
text_order = get_text_order("data.json", 3)
|
79 |
+
"""
|
80 |
+
with open(json_path) as f:
|
81 |
+
data = json.load(f)
|
82 |
+
# check if the data is empty
|
83 |
+
if not data['text']:
|
84 |
+
return "No more text to process"
|
85 |
+
if len(data['text']) < num_elements:
|
86 |
+
num_elements = len(data['text'])
|
87 |
+
text = data['text'][:num_elements]
|
88 |
+
order = data['order'][:num_elements]
|
89 |
+
original_path = data['original_path'][:num_elements]
|
90 |
+
path_to_save = data['path_to_save'][:num_elements]
|
91 |
+
language = data['language'][:num_elements]
|
92 |
+
# remove the first elements
|
93 |
+
data['text'] = data['text'][num_elements:]
|
94 |
+
data['order'] = data['order'][num_elements:]
|
95 |
+
data['original_path'] = data['original_path'][num_elements:]
|
96 |
+
data['path_to_save'] = data['path_to_save'][num_elements:]
|
97 |
+
data['language'] = data['language'][num_elements:]
|
98 |
+
data['original_text'] = data['original_text'][num_elements:]
|
99 |
+
# write the data back to the file
|
100 |
+
with open(json_path, 'w') as f:
|
101 |
+
json.dump(data, f)
|
102 |
+
# make it return an array of arrays of text and order
|
103 |
+
result = [i for i in zip(text, order, original_path, path_to_save, language)]
|
104 |
+
return result
|
105 |
+
|
106 |
+
def append_text_order(json_path, text, order, original_path, path_to_save, language, original_text=None):
|
107 |
+
"""
|
108 |
+
Append a text order to a JSON file.
|
109 |
+
|
110 |
+
Args:
|
111 |
+
json_path (str): Path to the JSON file.
|
112 |
+
Example: "path/to/data.json"
|
113 |
+
text (str): The text to append.
|
114 |
+
Example: "Hello, world!"
|
115 |
+
order (int): The order index.
|
116 |
+
Example: 1
|
117 |
+
original_path (str): Path to the original file.
|
118 |
+
Example: "path/to/original.wav"
|
119 |
+
path_to_save (str): Path to save the processed file.
|
120 |
+
Example: "path/to/save.wav"
|
121 |
+
language (str): Language of the text.
|
122 |
+
Example: "en"
|
123 |
+
original_text (str, optional): The original text if available.
|
124 |
+
Example: "Hola, mundo!"
|
125 |
+
|
126 |
+
Example usage:
|
127 |
+
append_text_order("data.json", "Hello, world!", 1, "original.wav", "save.wav", "en", "Hola, mundo!")
|
128 |
+
"""
|
129 |
+
with open(json_path) as f:
|
130 |
+
data = json.load(f)
|
131 |
+
data['text'].append(text)
|
132 |
+
data['order'].append(order)
|
133 |
+
data['original_path'].append(original_path)
|
134 |
+
data['path_to_save'].append(path_to_save)
|
135 |
+
data['language'].append(language)
|
136 |
+
data['original_text'].append(original_text)
|
137 |
+
with open(json_path, 'w') as f:
|
138 |
+
json.dump(data, f)
|
139 |
+
# ----------------- StreamXTTSV2 -----------------
|
140 |
+
class StreamXTTSV2:
|
141 |
+
"""
|
142 |
+
A class to handle streaming TTS using XTTS v2 model.
|
143 |
+
|
144 |
+
Args:
|
145 |
+
model (Xtts): The XTTS v2 model.
|
146 |
+
sample_rate (int, optional): The sample rate for audio playback. Default is 24000.
|
147 |
+
buffer_size (int, optional): The buffer size for audio playback. Default is 2.
|
148 |
+
"""
|
149 |
+
def __init__(self, model, sample_rate=24000, buffer_size=2):
|
150 |
+
self.model = model
|
151 |
+
#self.gpt_cond_latent = gpt_cond_latent
|
152 |
+
#self.speaker_embedding = speaker_embedding
|
153 |
+
self.sample_rate = sample_rate
|
154 |
+
self.buffer_size = buffer_size
|
155 |
+
self.speed = 0.95
|
156 |
+
self.stream_chunk_size = 40
|
157 |
+
self.buffer = torch.Tensor().to('cpu')
|
158 |
+
self.chunk_save = torch.Tensor().to('cpu')
|
159 |
+
self.is_playing = False
|
160 |
+
self.tasks_order = []
|
161 |
+
self.order = 0
|
162 |
+
self.initial = True
|
163 |
+
|
164 |
+
def chunk_callback(self, chunk, i, output_dir, order):
|
165 |
+
"""
|
166 |
+
Callback function to handle each chunk of audio during streaming.
|
167 |
+
|
168 |
+
Args:
|
169 |
+
chunk (torch.Tensor): The audio chunk.
|
170 |
+
Example: tensor([0.1, 0.2, 0.3])
|
171 |
+
i (int): The chunk index.
|
172 |
+
Example: 1
|
173 |
+
output_dir (str): Directory to save the chunk.
|
174 |
+
Example: "output/"
|
175 |
+
order (int): The order index.
|
176 |
+
Example: 1
|
177 |
+
"""
|
178 |
+
# Accumulate chunk into buffer
|
179 |
+
self.buffer = torch.cat((self.buffer, chunk.squeeze().to('cpu')), dim=-1)
|
180 |
+
self.chunk_save = torch.cat((self.chunk_save, chunk.squeeze().to('cpu')), dim=-1)
|
181 |
+
chunk_filename = output_dir + f"chunk_{i}_{order}.wav"
|
182 |
+
print(self.sample_rate)
|
183 |
+
torchaudio.save(chunk_filename, self.chunk_save.unsqueeze(0), self.sample_rate)
|
184 |
+
print(f"Chunk saved as {chunk_filename}")
|
185 |
+
self.chunk_save = torch.Tensor().to('cpu')
|
186 |
+
|
187 |
+
# Check if buffer has enough chunks to start playing
|
188 |
+
if not self.is_playing and len(self.buffer) >= self.buffer_size:
|
189 |
+
self.start_playback()
|
190 |
+
|
191 |
+
def start_playback(self):
|
192 |
+
"""Start audio playback."""
|
193 |
+
self.is_playing = True
|
194 |
+
sd.play(self.buffer.numpy(), self.sample_rate, blocking=False)
|
195 |
+
self.buffer = torch.Tensor().to('cpu') # Reset buffer after starting playback
|
196 |
+
|
197 |
+
def play(self, chunks, output_dir, path_to_save, order):
|
198 |
+
"""
|
199 |
+
Play the audio chunks and save the complete audio.
|
200 |
+
|
201 |
+
Args:
|
202 |
+
chunks (list): List of audio chunks.
|
203 |
+
Example: [tensor([0.1, 0.2, 0.3]), tensor([0.4, 0.5, 0.6])]
|
204 |
+
output_dir (str): Directory to save the chunks.
|
205 |
+
Example: "output/"
|
206 |
+
path_to_save (str): Path to save the complete audio file.
|
207 |
+
Example: "output/complete.wav"
|
208 |
+
order (int): The order index.
|
209 |
+
Example: 1
|
210 |
+
"""
|
211 |
+
t0 = time.time()
|
212 |
+
|
213 |
+
|
214 |
+
for i, chunk in enumerate(chunks):
|
215 |
+
#print(chunk)
|
216 |
+
if i == 0:
|
217 |
+
print(f"Time to first chunk: {time.time() - t0}")
|
218 |
+
print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
|
219 |
+
self.chunk_callback(chunk, i, output_dir, order)
|
220 |
+
|
221 |
+
# Ensure all remaining audio is played
|
222 |
+
while sd.get_stream().active:
|
223 |
+
time.sleep(0.1)
|
224 |
+
if len(self.buffer) > 0:
|
225 |
+
sd.play(self.buffer.numpy(), self.sample_rate, blocking=True)
|
226 |
+
|
227 |
+
# Save the complete audio to a file
|
228 |
+
torchaudio.save(path_to_save, self.buffer.unsqueeze(0), self.sample_rate)
|
229 |
+
print(f"Total audio length: {self.buffer.shape[-1]}")
|
230 |
+
print("Audio playback finished.")
|
231 |
+
#self.order += 1
|
232 |
+
|
233 |
+
|
234 |
+
def inference_and_play(self, json_path, output_dir):
|
235 |
+
"""
|
236 |
+
Perform inference and play the generated audio.
|
237 |
+
|
238 |
+
Args:
|
239 |
+
json_path (str): Path to the JSON file containing text orders.
|
240 |
+
Example: "path/to/data.json"
|
241 |
+
output_dir (str): Directory to save the chunks.
|
242 |
+
Example: "output/"
|
243 |
+
"""
|
244 |
+
print("Inference...")
|
245 |
+
|
246 |
+
|
247 |
+
|
248 |
+
self.texts = get_text_order(json_path, 3)
|
249 |
+
|
250 |
+
if self.texts == "No more text to process":
|
251 |
+
print("No more text to process")
|
252 |
+
return
|
253 |
+
if self.texts == "Not enough text to process":
|
254 |
+
print("Not enough text to process")
|
255 |
+
return
|
256 |
+
# is it returns a list of text and order
|
257 |
+
if self.texts is not None:
|
258 |
+
#print(self.texts)
|
259 |
+
self.gpt_cond_latent, self.speaker_embedding = self.model.get_conditioning_latents(audio_path=[self.texts[0][2]])
|
260 |
+
path_to_save = self.texts[0][3]
|
261 |
+
#print(self.gpt_cond_latent, self.speaker_embedding)
|
262 |
+
#print(self.texts)
|
263 |
+
|
264 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
265 |
+
#text, order = get_text_order(texts)
|
266 |
+
#print(text, order)
|
267 |
+
futures = []
|
268 |
+
print(self.texts)
|
269 |
+
|
270 |
+
for text, i, path_a, path_s, lang in self.texts:
|
271 |
+
#print(text, i, path)
|
272 |
+
print(f"Processing text {i}: {text}")
|
273 |
+
print(f"Processing text {i}: {lang}")
|
274 |
+
future = executor.submit(self.model.inference_stream, text, lang, self.gpt_cond_latent, self.speaker_embedding, stream_chunk_size=self.stream_chunk_size, speed=self.speed)
|
275 |
+
#print(future.result())
|
276 |
+
futures.append(future)
|
277 |
+
|
278 |
+
|
279 |
+
for future, text in zip(futures, self.texts):
|
280 |
+
#print(text)
|
281 |
+
chunks = future.result()
|
282 |
+
print(text[1])
|
283 |
+
self.play(chunks, output_dir, path_to_save, text[1])
|
284 |
+
self.buffer = torch.Tensor().to('cpu')
|
285 |
+
|
286 |
+
self.inference_and_play(json_path, output_dir )
|
287 |
+
|
288 |
+
|
289 |
+
def stream_prod(model, json_path, directory_path):
|
290 |
+
"""
|
291 |
+
Stream production function for XTTS v2.
|
292 |
+
|
293 |
+
Args:
|
294 |
+
model (Xtts): The XTTS v2 model.
|
295 |
+
Example: model = load_manual_xtts_v2("config.json", "checkpoint/")
|
296 |
+
json_path (str): Path to the JSON file containing text orders.
|
297 |
+
Example: "path/to/data.json"
|
298 |
+
directory_path (str): Directory to save the chunks.
|
299 |
+
Example: "output/"
|
300 |
+
"""
|
301 |
+
streamer = StreamXTTSV2(model, buffer_size=2)
|
302 |
+
results = streamer.inference_and_play(json_path, directory_path)
|
303 |
+
if results is None:
|
304 |
+
time.sleep(3)
|
305 |
+
stream_prod(model, json_path, directory_path)
|
306 |
+
return "Streaming finished"
|
307 |
+
|
308 |
+
|
309 |
+
def just_inference(model, original_path, output_dir, text, lang, order):
|
310 |
+
"""
|
311 |
+
Perform inference and save the generated audio.
|
312 |
+
|
313 |
+
Args:
|
314 |
+
model (Xtts): The XTTS v2 model.
|
315 |
+
Example: model = load_manual_xtts_v2("config.json", "checkpoint/")
|
316 |
+
original_path (str): Path to the original audio file.
|
317 |
+
Example: "path/to/original.wav"
|
318 |
+
output_dir (str): Directory to save the generated audio file.
|
319 |
+
Example: "output/"
|
320 |
+
text (str): The text to be synthesized.
|
321 |
+
Example: "Hello, world!"
|
322 |
+
lang (str): The language of the text.
|
323 |
+
Example: "en"
|
324 |
+
order (int): The order index.
|
325 |
+
Example: 1
|
326 |
+
|
327 |
+
Returns:
|
328 |
+
tuple: A tuple containing the path to the saved audio file and the time to first chunk.
|
329 |
+
Example: ("output/complete.wav", 1.23)
|
330 |
+
"""
|
331 |
+
print("Inference...")
|
332 |
+
path_to_save = output_dir
|
333 |
+
t0 = time.time()
|
334 |
+
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[original_path])
|
335 |
+
|
336 |
+
chunks = model.inference_stream(
|
337 |
+
text,
|
338 |
+
lang,
|
339 |
+
gpt_cond_latent,
|
340 |
+
speaker_embedding,
|
341 |
+
stream_chunk_size= 15 ,
|
342 |
+
speed=0.95
|
343 |
+
#temperature=0.1,
|
344 |
+
#enable_text_splitting=True,
|
345 |
+
)
|
346 |
+
full_audio = torch.Tensor().to('cpu')
|
347 |
+
wav_chuncks = []
|
348 |
+
for i, chunk in enumerate(chunks):
|
349 |
+
if i == 1:
|
350 |
+
time_to_first_chunk = time.time() - t0
|
351 |
+
print(f"Time to first chunck: {time_to_first_chunk}")
|
352 |
+
print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
|
353 |
+
wav_chuncks.append(chunk)
|
354 |
+
full_audio = torch.cat((full_audio, chunk.squeeze().to('cpu')), dim=-1)
|
355 |
+
|
356 |
+
|
357 |
+
|
358 |
+
# Save the complete audio to a file
|
359 |
+
torchaudio.save(path_to_save, full_audio.unsqueeze(0), 24000)
|
360 |
+
|
361 |
+
print("Inference finished")
|
362 |
+
return path_to_save, time_to_first_chunk
|
363 |
+
|
364 |
+
|
365 |
+
|
models/__init__.py
ADDED
File without changes
|
models/__pycache__/TTS_utils.cpython-311.pyc
ADDED
Binary file (18.2 kB). View file
|
|
models/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (173 Bytes). View file
|
|
models/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (155 Bytes). View file
|
|
models/__pycache__/es_fastconformer.cpython-311.pyc
ADDED
Binary file (1.98 kB). View file
|
|
models/__pycache__/nllb.cpython-311.pyc
ADDED
Binary file (3.84 kB). View file
|
|
models/__pycache__/nllb.cpython-38.pyc
ADDED
Binary file (2.49 kB). View file
|
|
models/__pycache__/noise_red.cpython-311.pyc
ADDED
Binary file (1.3 kB). View file
|
|
models/__pycache__/parakeet.cpython-311.pyc
ADDED
Binary file (2.08 kB). View file
|
|
models/__pycache__/parakeet.cpython-38.pyc
ADDED
Binary file (1.69 kB). View file
|
|
models/es_fastconformer.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nemo.collections.asr as nemo_asr
|
2 |
+
import torch
|
3 |
+
|
4 |
+
def stt_es_model():
|
5 |
+
"""
|
6 |
+
Load and return the pre-trained Spanish ASR model.
|
7 |
+
|
8 |
+
This function loads the pre-trained EncDecCTCModelBPE model from NVIDIA's NeMo collection.
|
9 |
+
The model is configured to use a GPU if available, otherwise it defaults to CPU.
|
10 |
+
|
11 |
+
Returns:
|
12 |
+
nemo_asr.models.EncDecCTCModelBPE: The loaded ASR model.
|
13 |
+
Example usage:
|
14 |
+
asr_model = stt_es_model()
|
15 |
+
"""
|
16 |
+
# Load the pre-trained model
|
17 |
+
asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained("nvidia/stt_es_fastconformer_hybrid_large_pc")
|
18 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
19 |
+
asr_model = asr_model.to(device)
|
20 |
+
return asr_model
|
21 |
+
|
22 |
+
def stt_es_process(asr_model, audio_file):
|
23 |
+
"""
|
24 |
+
Transcribe an audio file using the given ASR model.
|
25 |
+
|
26 |
+
Args:
|
27 |
+
asr_model (nemo_asr.models.EncDecCTCModelBPE): The ASR model to use for transcription.
|
28 |
+
Example: asr_model = stt_es_model()
|
29 |
+
audio_file (str): Path to the audio file to be transcribed.
|
30 |
+
Example: "path/to/audio_file.wav"
|
31 |
+
|
32 |
+
Returns:
|
33 |
+
list: A list containing the transcribed text.
|
34 |
+
Example: ["transcribed text"]
|
35 |
+
"""
|
36 |
+
text = asr_model.transcribe(paths2audio_files=[audio_file], batch_size=1)
|
37 |
+
return text
|
models/nllb.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
2 |
+
import torch
|
3 |
+
|
4 |
+
def nllb():
|
5 |
+
"""
|
6 |
+
Load and return the NLLB (No Language Left Behind) model and tokenizer.
|
7 |
+
|
8 |
+
This function loads the NLLB-200-distilled-1.3B model and tokenizer from Hugging Face's Transformers library.
|
9 |
+
The model is configured to use a GPU if available, otherwise it defaults to CPU.
|
10 |
+
|
11 |
+
Returns:
|
12 |
+
tuple: A tuple containing the loaded model and tokenizer.
|
13 |
+
- model (transformers.AutoModelForSeq2SeqLM): The loaded NLLB model.
|
14 |
+
- tokenizer (transformers.AutoTokenizer): The loaded tokenizer.
|
15 |
+
|
16 |
+
Example usage:
|
17 |
+
model, tokenizer = nllb()
|
18 |
+
"""
|
19 |
+
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
20 |
+
device = torch.device("cpu")
|
21 |
+
# Load the tokenizer and model
|
22 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-1.3B")
|
23 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-1.3B").to(device)
|
24 |
+
# write done to the file named status.txt
|
25 |
+
with open("status.txt", 'w') as f:
|
26 |
+
f.write("done")
|
27 |
+
return model, tokenizer
|
28 |
+
|
29 |
+
def nllb_translate(model, tokenizer, article, language):
|
30 |
+
"""
|
31 |
+
Translate an article using the NLLB model and tokenizer.
|
32 |
+
|
33 |
+
Args:
|
34 |
+
model (transformers.AutoModelForSeq2SeqLM): The NLLB model to use for translation.
|
35 |
+
Example: model, tokenizer = nllb()
|
36 |
+
tokenizer (transformers.AutoTokenizer): The tokenizer to use with the NLLB model.
|
37 |
+
Example: model, tokenizer = nllb()
|
38 |
+
article (str): The article text to be translated.
|
39 |
+
Example: "This is a sample article."
|
40 |
+
language (str): The target language for translation. Must be either 'spanish' or 'english'.
|
41 |
+
Example: "spanish"
|
42 |
+
|
43 |
+
Returns:
|
44 |
+
str: The translated text.
|
45 |
+
Example: "Este es un artículo de muestra."
|
46 |
+
"""
|
47 |
+
try:
|
48 |
+
# Tokenize the text
|
49 |
+
inputs = tokenizer(article, return_tensors="pt")
|
50 |
+
|
51 |
+
# Move the tokenized inputs to the same device as the model
|
52 |
+
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
53 |
+
|
54 |
+
if language == "es":
|
55 |
+
translated_tokens = model.generate(
|
56 |
+
**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["spa_Latn"], max_length=30
|
57 |
+
)
|
58 |
+
elif language == "en":
|
59 |
+
translated_tokens = model.generate(
|
60 |
+
**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["eng_Latn"], max_length=30
|
61 |
+
)
|
62 |
+
else:
|
63 |
+
raise ValueError("Unsupported language. Use 'es' or 'en'.")
|
64 |
+
|
65 |
+
# Decode the translation
|
66 |
+
text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
|
67 |
+
return text
|
68 |
+
|
69 |
+
except Exception as e:
|
70 |
+
print(f"Error during translation: {e}")
|
71 |
+
return "Translation failed"
|
72 |
+
|
models/noise_red.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from scipy.io import wavfile
|
2 |
+
import noisereduce as nr
|
3 |
+
# Load your data
|
4 |
+
|
5 |
+
def noise_reduction(path, new_path):
|
6 |
+
"""
|
7 |
+
Perform noise reduction on an audio file and save the output.
|
8 |
+
|
9 |
+
This function reads an audio file from the given path, performs noise reduction using the noisereduce library,
|
10 |
+
and saves the processed audio to a new file.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
path (str): Path to the input audio file.
|
14 |
+
Example: "path/to/input_audio.wav"
|
15 |
+
new_path (str): Path to save the processed audio file.
|
16 |
+
Example: "path/to/output_audio.wav"
|
17 |
+
|
18 |
+
Returns:
|
19 |
+
None
|
20 |
+
|
21 |
+
Example usage:
|
22 |
+
noise_reduction("input.wav", "output.wav")
|
23 |
+
"""
|
24 |
+
rate, data = wavfile.read(path)
|
25 |
+
# Perform noise reduction
|
26 |
+
reduced_noise = nr.reduce_noise(y=data, sr=rate)
|
27 |
+
wavfile.write(new_path, rate, reduced_noise)
|
28 |
+
return print("Noise reduction done!")
|
models/parakeet.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nemo.collections.asr as nemo_asr
|
2 |
+
import torch
|
3 |
+
|
4 |
+
|
5 |
+
def parakeet_ctc_model():
|
6 |
+
"""
|
7 |
+
Load and return the pre-trained Parakeet CTC model.
|
8 |
+
|
9 |
+
This function loads the pre-trained EncDecCTCModelBPE model from NVIDIA's NeMo collection.
|
10 |
+
The model is configured to use a GPU if available, otherwise it defaults to CPU.
|
11 |
+
|
12 |
+
Returns:
|
13 |
+
nemo_asr.models.EncDecCTCModelBPE: The loaded ASR model.
|
14 |
+
|
15 |
+
Example usage:
|
16 |
+
asr_model = parakeet_ctc_model()
|
17 |
+
"""
|
18 |
+
# Load the pre-trained model
|
19 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
20 |
+
asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained("nvidia/parakeet-ctc-0.6b")
|
21 |
+
asr_model = asr_model.to(device)
|
22 |
+
return asr_model
|
23 |
+
|
24 |
+
def parakeet_ctc_process(asr_model, audio_file):
|
25 |
+
"""
|
26 |
+
Transcribe an audio file using the given Parakeet CTC ASR model.
|
27 |
+
|
28 |
+
Args:
|
29 |
+
asr_model (nemo_asr.models.EncDecCTCModelBPE): The ASR model to use for transcription.
|
30 |
+
Example: asr_model = parakeet_ctc_model()
|
31 |
+
audio_file (str): Path to the audio file to be transcribed.
|
32 |
+
Example: "path/to/audio_file.wav"
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
list: A list containing the transcribed text.
|
36 |
+
Example: ["transcribed text"]
|
37 |
+
|
38 |
+
Example usage:
|
39 |
+
text = parakeet_ctc_process(asr_model, "path/to/audio_file.wav")
|
40 |
+
"""
|
41 |
+
text = asr_model.transcribe(paths2audio_files=[audio_file], batch_size=1)
|
42 |
+
|
43 |
+
return text
|
models/status.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
done
|
record_per.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"text": ["Hola, \u00bf c\u00f3mo est\u00e1s?", "Est\u00e1 bien, intent\u00e9moslo de nuevo.", "As\u00ed que este modelo debe capturar esto y traducir inmediatamente", "As\u00ed que este modelo debe capturar esto y traducir inmediatamente", "Hola, \u00bf qu\u00e9 pasa?", "Hola, \u00bf qu\u00e9 pasa?", "Est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien."], "original_path": ["audio-transcription/uploads/5\\audio.wav", "audio-transcription/uploads/1\\audio.wav", "audio-transcription/uploads/2\\audio.wav", "audio-transcription/uploads/3\\audio.wav", "audio-transcription/uploads/4\\audio.wav", "audio-transcription/uploads/5\\audio.wav", "audio-transcription/uploads/6\\audio.wav"], "order": [0, 0, 0, 0, 0, 0, 0], "path_to_save": ["results", "results", "results", "results", "results/", "results/", "results/"], "language": ["es", "es", "es", "es", "es", "es", "es"], "original_text": ["hello how are you", "okay let's try it again", " so this model should capture this and translate right away", " so this model should capture this and translate right away", "hello", "hello", "okay okay okay okay okay okay okay okay okay okay okay okay okay okay"]}
|
record_temp.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"text": ["Est\u00e1 bien, intent\u00e9moslo de nuevo.", "As\u00ed que este modelo debe capturar esto y traducir inmediatamente", "As\u00ed que este modelo debe capturar esto y traducir inmediatamente", "Hola, \u00bf qu\u00e9 pasa?", "Hola, \u00bf qu\u00e9 pasa?", "Est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien."], "original_path": ["audio-transcription/uploads/1\\audio.wav", "audio-transcription/uploads/2\\audio.wav", "audio-transcription/uploads/3\\audio.wav", "audio-transcription/uploads/4\\audio.wav", "audio-transcription/uploads/5\\audio.wav", "audio-transcription/uploads/6\\audio.wav"], "order": [0, 0, 0, 0, 0, 0], "path_to_save": ["results/", "results/", "results/", "results/", "results/", "results/"], "language": ["es", "es", "es", "es", "es", "es"], "original_text": ["okay let's try it again", " so this model should capture this and translate right away", " so this model should capture this and translate right away", "hello", "hello", "okay okay okay okay okay okay okay okay okay okay okay okay okay okay"]}
|
requirements.txt
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
language_tool_python
|
2 |
+
noisereduce
|
3 |
+
numpy
|
4 |
+
pandas
|
5 |
+
pydub
|
6 |
+
#scikit_learn==1.4.0
|
7 |
+
scipy
|
8 |
+
speechbrain
|
9 |
+
webrtcvad==2.0.10
|
10 |
+
deepspeed==0.14.0
|
11 |
+
transformers==4.40.2
|
12 |
+
hydra-core
|
13 |
+
pytorch_lightning
|
14 |
+
streamlit
|
15 |
+
sounddevice
|
16 |
+
playsound
|
17 |
+
streamlit-webrtc
|
18 |
+
pybind11
|
19 |
+
fasttext
|
20 |
+
Cython
|
21 |
+
# nemo_toolkit[all]==1.21
|
22 |
+
fastapi
|
23 |
+
uvicorn
|
24 |
+
pydantic==1.10.9
|
25 |
+
spacy
|
results/readme
ADDED
File without changes
|
run.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import threading
|
2 |
+
import argparse
|
3 |
+
import subprocess
|
4 |
+
from models.nllb import nllb
|
5 |
+
from models.parakeet import parakeet_ctc_model
|
6 |
+
from models.es_fastconformer import stt_es_model
|
7 |
+
from models.TTS_utils import load_manual_xtts_v2
|
8 |
+
from stream_VAD import stream
|
9 |
+
|
10 |
+
def main(xtts_path, xtts_config_path, language="en", record_temp="record_temp.json", record_per="record_per.json", record_path="audio_segments/", result_dir="results", segments_dir="audio_segments"):
|
11 |
+
"""
|
12 |
+
Main function to run the ASR stream and initiate the TTS stream production.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
xtts_path (str): Path to the xtts model file.
|
16 |
+
Example: "path/to/xtts_model.pt"
|
17 |
+
xtts_config_path (str): Path to the xtts configuration file.
|
18 |
+
Example: "path/to/xtts_config.json"
|
19 |
+
language (str, optional): Language for the ASR model. Must be either 'en' for English or 'es' for Spanish.
|
20 |
+
Default: 'en'
|
21 |
+
Example: "en"
|
22 |
+
record_temp (str, optional): Path to the temporary record JSON file.
|
23 |
+
Default: "record_temp.json"
|
24 |
+
Example: "path/to/record_temp.json"
|
25 |
+
record_per (str, optional): Path to the periodic record JSON file.
|
26 |
+
Default: "record_per.json"
|
27 |
+
Example: "path/to/record_per.json"
|
28 |
+
record_path (str, optional): Path to the directory where audio segments are recorded.
|
29 |
+
Default: "audio_segments/"
|
30 |
+
Example: "path/to/audio_segments/"
|
31 |
+
result_dir (str, optional): Path to the directory where results are stored.
|
32 |
+
Default: "results"
|
33 |
+
Example: "path/to/results"
|
34 |
+
segments_dir (str, optional): Path to the directory where audio segments are stored.
|
35 |
+
Default: "audio_segments"
|
36 |
+
Example: "path/to/audio_segments"
|
37 |
+
"""
|
38 |
+
model_nllb, tokinizer_nllb = nllb()
|
39 |
+
|
40 |
+
if language == "en":
|
41 |
+
asr = parakeet_ctc_model()
|
42 |
+
stream_thread = threading.Thread(target=stream, args=(asr, model_nllb, tokinizer_nllb, "english", "spanish", record_temp, record_per, result_dir, segments_dir))
|
43 |
+
|
44 |
+
elif language == "es":
|
45 |
+
asr = stt_es_model()
|
46 |
+
stream_thread = threading.Thread(target=stream, args=(asr, model_nllb, tokinizer_nllb, "spanish", "english", record_temp, record_per, result_dir, segments_dir))
|
47 |
+
|
48 |
+
else:
|
49 |
+
raise ValueError("Language not supported")
|
50 |
+
|
51 |
+
# Start the stream thread
|
52 |
+
stream_thread.start()
|
53 |
+
|
54 |
+
# Call the other script to start stream_prod
|
55 |
+
subprocess.Popen(['python', 'stream_prod_main.py', xtts_path, xtts_config_path, record_temp, record_path])
|
56 |
+
|
57 |
+
# Wait for the stream thread to complete
|
58 |
+
stream_thread.join()
|
59 |
+
|
60 |
+
if __name__ == "__main__":
|
61 |
+
parser = argparse.ArgumentParser(description="Run stream and initiate stream_prod.")
|
62 |
+
parser.add_argument("xtts_path", type=str, help="Path to the xtts model.")
|
63 |
+
parser.add_argument("xtts_config_path", type=str, help="Path to the xtts config.")
|
64 |
+
parser.add_argument("language", type=str, choices=["en", "es"], help="Language (en or es).")
|
65 |
+
parser.add_argument("--record_temp", type=str, default="record_temp.json", help="Path to the record temp file.")
|
66 |
+
parser.add_argument("--record_per", type=str, default="record_per.json", help="Path to the record per file.")
|
67 |
+
parser.add_argument("--record_path", type=str, default="audio_segments/", help="Path to the record directory.")
|
68 |
+
parser.add_argument("--result_dir", type=str, default="results", help="Path to the result directory.")
|
69 |
+
parser.add_argument("--segments_dir", type=str, default="audio_segments", help="Path to the segments directory.")
|
70 |
+
|
71 |
+
args = parser.parse_args()
|
72 |
+
|
73 |
+
main(args.xtts_path, args.xtts_config_path, args.language, args.record_temp, args.record_per, args.record_path, args.result_dir, args.segments_dir)
|
setup.sh
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
git clone https://github.com/coqui-ai/TTS/ && \
|
3 |
+
cd TTS && \
|
4 |
+
make install
|
5 |
+
|
6 |
+
pip install PyAudio-0.2.11-cp37-cp37m-win_amd64.whl
|
7 |
+
pip install pybind11
|
8 |
+
pip install wheel setuptools pip --upgrade
|
9 |
+
pip install fasttext
|
10 |
+
apt-get update && apt-get install -y libsndfile1 ffmpeg
|
11 |
+
pip install Cython
|
12 |
+
# pip install nemo_toolkit['all']
|
13 |
+
|
14 |
+
# show the version of nemo in python
|
15 |
+
python -c "import nemo; print(nemo.__version__)"
|
16 |
+
pip install torch==2.2.2+cu121 -f https://download.pytorch.org/whl/cu121/torch_stable.html
|
17 |
+
pip install torchaudio==2.2.2+cu121 -f https://download.pytorch.org/whl/cu121/torch_stable.html
|
18 |
+
pip install -r requirements.txt
|
status.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
done
|
stream_VAD.py
ADDED
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import collections
|
2 |
+
import contextlib
|
3 |
+
import wave
|
4 |
+
import webrtcvad
|
5 |
+
import pyaudio
|
6 |
+
import os
|
7 |
+
import librosa
|
8 |
+
import numpy as np
|
9 |
+
from models.nllb import nllb_translate
|
10 |
+
from models.TTS_utils import append_text_order
|
11 |
+
from models.parakeet import parakeet_ctc_process
|
12 |
+
from models.es_fastconformer import stt_es_process
|
13 |
+
from concurrent.futures import ThreadPoolExecutor
|
14 |
+
import time
|
15 |
+
from models.noise_red import noise_reduction
|
16 |
+
class Frame(object):
|
17 |
+
"""
|
18 |
+
Represents a "frame" of audio data.
|
19 |
+
|
20 |
+
Args:
|
21 |
+
bytes (bytes): The audio data.
|
22 |
+
timestamp (float): The timestamp of the frame.
|
23 |
+
duration (float): The duration of the frame.
|
24 |
+
"""
|
25 |
+
def __init__(self, bytes, timestamp, duration):
|
26 |
+
self.bytes = bytes
|
27 |
+
self.timestamp = timestamp
|
28 |
+
self.duration = duration
|
29 |
+
|
30 |
+
def read_audio(stream, frame_duration_ms, rate):
|
31 |
+
"""
|
32 |
+
Generates audio frames from the input stream.
|
33 |
+
|
34 |
+
Args:
|
35 |
+
stream (pyaudio.Stream): The audio stream.
|
36 |
+
frame_duration_ms (int): Duration of each frame in milliseconds.
|
37 |
+
rate (int): The sample rate of the audio.
|
38 |
+
|
39 |
+
Yields:
|
40 |
+
bytes: The audio frames.
|
41 |
+
"""
|
42 |
+
frames_per_buffer = int(rate * frame_duration_ms / 1000)
|
43 |
+
while True:
|
44 |
+
yield stream.read(frames_per_buffer)
|
45 |
+
|
46 |
+
def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, frames):
|
47 |
+
"""
|
48 |
+
Filters out non-voiced audio frames.
|
49 |
+
|
50 |
+
Args:
|
51 |
+
sample_rate (int): The sample rate of the audio.
|
52 |
+
frame_duration_ms (int): Duration of each frame in milliseconds.
|
53 |
+
padding_duration_ms (int): Duration of padding in milliseconds.
|
54 |
+
vad (webrtcvad.Vad): The VAD object.
|
55 |
+
frames (generator): A generator yielding audio frames.
|
56 |
+
|
57 |
+
Yields:
|
58 |
+
bytes: Voiced audio frames.
|
59 |
+
"""
|
60 |
+
num_padding_frames = int(padding_duration_ms / frame_duration_ms)
|
61 |
+
ring_buffer = collections.deque(maxlen=num_padding_frames)
|
62 |
+
triggered = False
|
63 |
+
|
64 |
+
voiced_frames = []
|
65 |
+
for frame in frames:
|
66 |
+
is_speech = vad.is_speech(frame.bytes, sample_rate)
|
67 |
+
|
68 |
+
if not triggered:
|
69 |
+
ring_buffer.append((frame, is_speech))
|
70 |
+
num_voiced = len([f for f, speech in ring_buffer if speech])
|
71 |
+
if num_voiced > 0.9 * ring_buffer.maxlen:
|
72 |
+
triggered = True
|
73 |
+
voiced_frames.extend(f for f, speech in ring_buffer)
|
74 |
+
ring_buffer.clear()
|
75 |
+
else:
|
76 |
+
voiced_frames.append(frame)
|
77 |
+
ring_buffer.append((frame, is_speech))
|
78 |
+
num_unvoiced = len([f for f, speech in ring_buffer if not speech])
|
79 |
+
if num_unvoiced > 0.9 * ring_buffer.maxlen:
|
80 |
+
yield b''.join([f.bytes for f in voiced_frames])
|
81 |
+
ring_buffer.clear()
|
82 |
+
voiced_frames = []
|
83 |
+
triggered = False
|
84 |
+
if voiced_frames:
|
85 |
+
yield b''.join([f.bytes for f in voiced_frames])
|
86 |
+
|
87 |
+
|
88 |
+
def is_segment_empty(file_path):
|
89 |
+
"""
|
90 |
+
Check if the audio segment is empty.
|
91 |
+
|
92 |
+
Args:
|
93 |
+
file_path (str): Path to the audio file.
|
94 |
+
|
95 |
+
Returns:
|
96 |
+
bool: True if the segment is empty, False otherwise.
|
97 |
+
"""
|
98 |
+
audio, _ = librosa.load(file_path)
|
99 |
+
rms = librosa.feature.rms(y=audio) # Pass the audio data as an argument
|
100 |
+
rms_mean = np.mean(rms)
|
101 |
+
print(rms_mean)
|
102 |
+
|
103 |
+
if rms_mean < 0.015:
|
104 |
+
return True
|
105 |
+
else:
|
106 |
+
return False
|
107 |
+
|
108 |
+
|
109 |
+
def process_segment(asr_model, model_nllb, tokenizer_nllb, path_segments, path_results, target_lang, order, json_path_temp, json_path_record):
|
110 |
+
"""
|
111 |
+
Process an audio segment: noise reduction, transcription, translation, and append results.
|
112 |
+
|
113 |
+
Args:
|
114 |
+
asr_model: The ASR model for transcription.
|
115 |
+
model_nllb: The NLLB model for translation.
|
116 |
+
tokenizer_nllb: The tokenizer for the NLLB model.
|
117 |
+
path_segments (str): Path to the audio segment.
|
118 |
+
path_results (str): Path to save the results.
|
119 |
+
target_lang (str): Target language for translation.
|
120 |
+
order (int): Order index of the segment.
|
121 |
+
json_path_temp (str): Path to the temporary JSON file.
|
122 |
+
json_path_record (str): Path to the record JSON file.
|
123 |
+
"""
|
124 |
+
print("Processing segment...")
|
125 |
+
if is_segment_empty(path_segments):
|
126 |
+
print("No speech detected.")
|
127 |
+
# remove the empty segment
|
128 |
+
os.remove(path_segments)
|
129 |
+
return
|
130 |
+
# Noise Reduction
|
131 |
+
start_time = time.time()
|
132 |
+
noise_reduction(path_segments, path_segments)
|
133 |
+
print("Noise removed. Time:", time.time() - start_time)
|
134 |
+
|
135 |
+
|
136 |
+
# Transcription
|
137 |
+
transcription = transcribe(asr_model, path_segments, target_lang)
|
138 |
+
#if not transcription.strip():
|
139 |
+
# print("No speech detected.")
|
140 |
+
# return
|
141 |
+
|
142 |
+
# Translation
|
143 |
+
print("Translating...")
|
144 |
+
translation = translate(model_nllb, tokenizer_nllb, transcription, target_lang)
|
145 |
+
|
146 |
+
# Text-to-Speech
|
147 |
+
# process_tts(tts_model, translation, path_segments, target_lang, path_results)
|
148 |
+
append_text_order(json_path_temp,translation, order, path_segments, path_results, "es" if target_lang == "spanish" else "en", transcription)
|
149 |
+
append_text_order(json_path_record,translation, order, path_segments, path_results, "es" if target_lang == "spanish" else "en", transcription)
|
150 |
+
def transcribe(asr_model, path_segments, target_lang):
|
151 |
+
"""
|
152 |
+
Transcribe an audio segment using the specified ASR model.
|
153 |
+
|
154 |
+
Args:
|
155 |
+
asr_model: The ASR model for transcription.
|
156 |
+
path_segments (str): Path to the audio segment.
|
157 |
+
target_lang (str): Target language for transcription.
|
158 |
+
|
159 |
+
Returns:
|
160 |
+
str: The transcription of the audio segment.
|
161 |
+
"""
|
162 |
+
start_time = time.time()
|
163 |
+
transcription_func = {
|
164 |
+
"spanish": parakeet_ctc_process,
|
165 |
+
"english": stt_es_process
|
166 |
+
}[target_lang]
|
167 |
+
transcription = transcription_func(asr_model, path_segments)
|
168 |
+
print("Transcription:", transcription[0])
|
169 |
+
print("Transcription time:", time.time() - start_time)
|
170 |
+
return transcription[0]
|
171 |
+
|
172 |
+
def translate(model_nllb, tokenizer_nllb, text, target_lang):
|
173 |
+
"""
|
174 |
+
Translate text using the specified NLLB model and tokenizer.
|
175 |
+
|
176 |
+
Args:
|
177 |
+
model_nllb: The NLLB model for translation.
|
178 |
+
tokenizer_nllb: The tokenizer for the NLLB model.
|
179 |
+
text (str): The text to translate.
|
180 |
+
target_lang (str): Target language for translation.
|
181 |
+
|
182 |
+
Returns:
|
183 |
+
str: The translated text.
|
184 |
+
"""
|
185 |
+
print("Processing translation...")
|
186 |
+
start_time = time.time()
|
187 |
+
translation = nllb_translate(model_nllb, tokenizer_nllb, text, target_lang)
|
188 |
+
print("Translation:", translation)
|
189 |
+
print("Translation time:", time.time() - start_time)
|
190 |
+
return translation
|
191 |
+
|
192 |
+
|
193 |
+
|
194 |
+
|
195 |
+
|
196 |
+
|
197 |
+
|
198 |
+
def stream(asr_model, model_nllb, tokinizer_nllb, source_lang, target_lang, json_file_temp, json_file_record,result_dir = "results",segments_dir = "audio_segments"):
|
199 |
+
"""
|
200 |
+
Stream audio input, process segments, and save the results.
|
201 |
+
|
202 |
+
Args:
|
203 |
+
asr_model: The ASR model for transcription.
|
204 |
+
model_nllb: The NLLB model for translation.
|
205 |
+
tokinizer_nllb: The tokenizer for the NLLB model.
|
206 |
+
source_lang (str): Source language of the audio.
|
207 |
+
target_lang (str): Target language for translation.
|
208 |
+
json_file_temp (str): Path to the temporary JSON file.
|
209 |
+
json_file_record (str): Path to the record JSON file.
|
210 |
+
result_dir (str, optional): Directory to save the results. Default is "results".
|
211 |
+
segments_dir (str, optional): Directory to save the audio segments. Default is "audio_segments".
|
212 |
+
"""
|
213 |
+
FORMAT = pyaudio.paInt16
|
214 |
+
CHANNELS = 1
|
215 |
+
RATE = 16000
|
216 |
+
CHUNK_DURATION_MS = 30 # supports 10, 20 and 30 (ms)
|
217 |
+
PADDING_DURATION_MS = 300
|
218 |
+
vad = webrtcvad.Vad(1)
|
219 |
+
|
220 |
+
audio = pyaudio.PyAudio()
|
221 |
+
stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=160)
|
222 |
+
frames = read_audio(stream, CHUNK_DURATION_MS, RATE)
|
223 |
+
frames = (Frame(f, None, None) for f in frames)
|
224 |
+
|
225 |
+
|
226 |
+
if not os.path.exists(segments_dir):
|
227 |
+
os.makedirs(segments_dir)
|
228 |
+
if not os.path.exists(result_dir):
|
229 |
+
os.makedirs(result_dir)
|
230 |
+
|
231 |
+
executor = ThreadPoolExecutor(max_workers=2) # Adjust the number of workers as per your requirement
|
232 |
+
|
233 |
+
for i, segment in enumerate(vad_collector(RATE, CHUNK_DURATION_MS, PADDING_DURATION_MS, vad, frames)):
|
234 |
+
path_segements = os.path.join(segments_dir, f"segment_{i}.wav")
|
235 |
+
path_results = os.path.join(result_dir, f"result_{i}.wav")
|
236 |
+
print(f"Writing {path_segements}...")
|
237 |
+
with contextlib.closing(wave.open(path_segements, 'wb')) as wf:
|
238 |
+
wf.setnchannels(CHANNELS)
|
239 |
+
wf.setsampwidth(audio.get_sample_size(FORMAT))
|
240 |
+
wf.setframerate(RATE)
|
241 |
+
wf.writeframes(segment)
|
242 |
+
|
243 |
+
executor.submit(process_segment, asr_model, model_nllb, tokinizer_nllb, path_segements,path_results, target_lang, i, json_file_temp, json_file_record)
|
244 |
+
|
245 |
+
stream.stop_stream()
|
246 |
+
stream.close()
|
247 |
+
audio.terminate()
|
248 |
+
|
249 |
+
|