usmanyousaf's picture
Update app.py
19a0758 verified
import os
import time
import uuid
import base64
import tempfile
from flask import Flask, request, render_template, jsonify, session
from dotenv import load_dotenv
from groq import Groq
from deepgram import DeepgramClient, SpeakOptions
# Load environment variables
load_dotenv()
# Fetch API keys from environment variables
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
# Set up the Groq client
client = Groq(api_key=GROQ_API_KEY)
# Set up Deepgram client
deepgram = DeepgramClient(DEEPGRAM_API_KEY)
# Flask app
app = Flask(__name__)
app.secret_key = os.urandom(24)
# Store conversation history
conversation_history = []
# Synthesize therapist response to speech without permanently storing the file
def synthesize_audio(text):
try:
# Retrieve the selected voice or default to "aura-asteria-en"
model = session.get('selected_voice', 'aura-asteria-en')
options = SpeakOptions(model=model)
# Use a temporary file to store the synthesized audio
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_file:
tmp_filename = tmp_file.name
# Synthesize the response and save to the temporary file
deepgram.speak.v("1").save(tmp_filename, {"text": text}, options)
# Read the audio data into memory
with open(tmp_filename, "rb") as f:
audio_data = f.read()
# Remove the temporary file
os.remove(tmp_filename)
# Encode the audio data as base64 and return a data URI
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
audio_data_uri = f"data:audio/mpeg;base64,{audio_base64}"
return audio_data_uri
except Exception as e:
raise ValueError(f"Speech synthesis failed: {str(e)}")
@app.route('/')
def choose_voice():
return render_template('chose voice page.html')
@app.route('/start-chat')
def start_chat():
selected_voice = request.args.get('voice', 'aura-asteria-en')
session['selected_voice'] = selected_voice
return render_template('index.html')
@app.route('/process', methods=['POST'])
def process_audio():
global conversation_history
# Step 1: Accept audio input
audio_data = request.files.get('audio_data')
if not audio_data:
return jsonify({'error': 'No audio file uploaded'}), 400
try:
# Step 2: Transcribe the audio using Groq Whisper
transcription = client.audio.transcriptions.create(
file=('recording.wav', audio_data.read()),
model="whisper-large-v3",
prompt="Transcribe the audio accurately.",
response_format="text"
)
user_input = transcription.strip()
if not user_input:
return jsonify({'error': 'No valid transcription from audio'}), 400
# Append user input to conversation history
conversation_history.append({"role": "user", "content": user_input})
# Step 3: Generate therapist response
fixed_prompt = [
{"role": "system", "content": """
You are an AI therapist named Virtual Therapist, designed to provide conversational support and mental health guidance in a clear, concise, and professional manner. Your responses should:
1. Be short and to the point.
2. Maintain a professional tone.
3. Encourage open dialogue.
4. Provide solutions or suggestions where appropriate.
5. Stay respectful and non-judgmental.
6. Avoid lengthy explanations.
"""}
]
conversation_history_with_prompt = fixed_prompt + conversation_history
response = client.chat.completions.create(
messages=conversation_history_with_prompt,
model="llama3-8b-8192"
)
assistant_reply = response.choices[0].message.content
# Append assistant reply to conversation history
conversation_history.append({"role": "assistant", "content": assistant_reply})
# Step 4: Synthesize therapist response to speech (in memory, no permanent files)
audio_url = synthesize_audio(assistant_reply)
# Return data URI instead of file URL
return jsonify({
'transcription': user_input,
'response': assistant_reply,
'audioUrl': audio_url
})
except Exception as e:
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run(debug=True)