Spaces:

usmanyousaf
/

virtual_therapist

Sleeping

File size: 4,526 Bytes

import os
import time
import uuid
import base64
import tempfile
from flask import Flask, request, render_template, jsonify, session
from dotenv import load_dotenv
from groq import Groq
from deepgram import DeepgramClient, SpeakOptions

# Load environment variables
load_dotenv()
# Fetch API keys from environment variables
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")

# Set up the Groq client
client = Groq(api_key=GROQ_API_KEY)

# Set up Deepgram client
deepgram = DeepgramClient(DEEPGRAM_API_KEY)

# Flask app
app = Flask(__name__)
app.secret_key = os.urandom(24)

# Store conversation history
conversation_history = []

# Synthesize therapist response to speech without permanently storing the file
def synthesize_audio(text):
    try:
        # Retrieve the selected voice or default to "aura-asteria-en"
        model = session.get('selected_voice', 'aura-asteria-en')
        options = SpeakOptions(model=model)

        # Use a temporary file to store the synthesized audio
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_file:
            tmp_filename = tmp_file.name

        # Synthesize the response and save to the temporary file
        deepgram.speak.v("1").save(tmp_filename, {"text": text}, options)

        # Read the audio data into memory
        with open(tmp_filename, "rb") as f:
            audio_data = f.read()

        # Remove the temporary file
        os.remove(tmp_filename)

        # Encode the audio data as base64 and return a data URI
        audio_base64 = base64.b64encode(audio_data).decode('utf-8')
        audio_data_uri = f"data:audio/mpeg;base64,{audio_base64}"
        return audio_data_uri
    except Exception as e:
        raise ValueError(f"Speech synthesis failed: {str(e)}")

@app.route('/')
def choose_voice():
    return render_template('chose voice page.html')

@app.route('/start-chat')
def start_chat():
    selected_voice = request.args.get('voice', 'aura-asteria-en')
    session['selected_voice'] = selected_voice
    return render_template('index.html')

@app.route('/process', methods=['POST'])
def process_audio():
    global conversation_history

    # Step 1: Accept audio input
    audio_data = request.files.get('audio_data')
    if not audio_data:
        return jsonify({'error': 'No audio file uploaded'}), 400

    try:
        # Step 2: Transcribe the audio using Groq Whisper
        transcription = client.audio.transcriptions.create(
            file=('recording.wav', audio_data.read()),
            model="whisper-large-v3",
            prompt="Transcribe the audio accurately.",
            response_format="text"
        )
        user_input = transcription.strip()
        if not user_input:
            return jsonify({'error': 'No valid transcription from audio'}), 400

        # Append user input to conversation history
        conversation_history.append({"role": "user", "content": user_input})

        # Step 3: Generate therapist response
        fixed_prompt = [
            {"role": "system", "content": """
                You are an AI therapist named Virtual Therapist, designed to provide conversational support and mental health guidance in a clear, concise, and professional manner. Your responses should:
                1. Be short and to the point.
                2. Maintain a professional tone.
                3. Encourage open dialogue.
                4. Provide solutions or suggestions where appropriate.
                5. Stay respectful and non-judgmental.
                6. Avoid lengthy explanations.
            """}
        ]

        conversation_history_with_prompt = fixed_prompt + conversation_history

        response = client.chat.completions.create(
            messages=conversation_history_with_prompt,
            model="llama3-8b-8192"
        )
        assistant_reply = response.choices[0].message.content

        # Append assistant reply to conversation history
        conversation_history.append({"role": "assistant", "content": assistant_reply})

        # Step 4: Synthesize therapist response to speech (in memory, no permanent files)
        audio_url = synthesize_audio(assistant_reply)

        # Return data URI instead of file URL
        return jsonify({
            'transcription': user_input,
            'response': assistant_reply,
            'audioUrl': audio_url
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    app.run(debug=True)