import os import time import uuid import base64 import tempfile from flask import Flask, request, render_template, jsonify, session from dotenv import load_dotenv from groq import Groq from deepgram import DeepgramClient, SpeakOptions # Load environment variables load_dotenv() # Fetch API keys from environment variables GROQ_API_KEY = os.getenv("GROQ_API_KEY") DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY") # Set up the Groq client client = Groq(api_key=GROQ_API_KEY) # Set up Deepgram client deepgram = DeepgramClient(DEEPGRAM_API_KEY) # Flask app app = Flask(__name__) app.secret_key = os.urandom(24) # Store conversation history conversation_history = [] # Synthesize therapist response to speech without permanently storing the file def synthesize_audio(text): try: # Retrieve the selected voice or default to "aura-asteria-en" model = session.get('selected_voice', 'aura-asteria-en') options = SpeakOptions(model=model) # Use a temporary file to store the synthesized audio with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_file: tmp_filename = tmp_file.name # Synthesize the response and save to the temporary file deepgram.speak.v("1").save(tmp_filename, {"text": text}, options) # Read the audio data into memory with open(tmp_filename, "rb") as f: audio_data = f.read() # Remove the temporary file os.remove(tmp_filename) # Encode the audio data as base64 and return a data URI audio_base64 = base64.b64encode(audio_data).decode('utf-8') audio_data_uri = f"data:audio/mpeg;base64,{audio_base64}" return audio_data_uri except Exception as e: raise ValueError(f"Speech synthesis failed: {str(e)}") @app.route('/') def choose_voice(): return render_template('chose voice page.html') @app.route('/start-chat') def start_chat(): selected_voice = request.args.get('voice', 'aura-asteria-en') session['selected_voice'] = selected_voice return render_template('index.html') @app.route('/process', methods=['POST']) def process_audio(): global conversation_history # Step 1: Accept audio input audio_data = request.files.get('audio_data') if not audio_data: return jsonify({'error': 'No audio file uploaded'}), 400 try: # Step 2: Transcribe the audio using Groq Whisper transcription = client.audio.transcriptions.create( file=('recording.wav', audio_data.read()), model="whisper-large-v3", prompt="Transcribe the audio accurately.", response_format="text" ) user_input = transcription.strip() if not user_input: return jsonify({'error': 'No valid transcription from audio'}), 400 # Append user input to conversation history conversation_history.append({"role": "user", "content": user_input}) # Step 3: Generate therapist response fixed_prompt = [ {"role": "system", "content": """ You are an AI therapist named Virtual Therapist, designed to provide conversational support and mental health guidance in a clear, concise, and professional manner. Your responses should: 1. Be short and to the point. 2. Maintain a professional tone. 3. Encourage open dialogue. 4. Provide solutions or suggestions where appropriate. 5. Stay respectful and non-judgmental. 6. Avoid lengthy explanations. """} ] conversation_history_with_prompt = fixed_prompt + conversation_history response = client.chat.completions.create( messages=conversation_history_with_prompt, model="llama3-8b-8192" ) assistant_reply = response.choices[0].message.content # Append assistant reply to conversation history conversation_history.append({"role": "assistant", "content": assistant_reply}) # Step 4: Synthesize therapist response to speech (in memory, no permanent files) audio_url = synthesize_audio(assistant_reply) # Return data URI instead of file URL return jsonify({ 'transcription': user_input, 'response': assistant_reply, 'audioUrl': audio_url }) except Exception as e: return jsonify({'error': str(e)}), 500 if __name__ == '__main__': app.run(debug=True)