Spaces:
Sleeping
Sleeping
File size: 4,526 Bytes
d98e7f6 5affd96 d98e7f6 47e60c5 d98e7f6 19a0758 d98e7f6 19a0758 d98e7f6 19a0758 d98e7f6 61028a5 d98e7f6 61028a5 d98e7f6 61028a5 5affd96 61028a5 5affd96 61028a5 d98e7f6 61028a5 d98e7f6 61028a5 d98e7f6 61028a5 d98e7f6 61028a5 d98e7f6 47e60c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import os
import time
import uuid
import base64
import tempfile
from flask import Flask, request, render_template, jsonify, session
from dotenv import load_dotenv
from groq import Groq
from deepgram import DeepgramClient, SpeakOptions
# Load environment variables
load_dotenv()
# Fetch API keys from environment variables
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
# Set up the Groq client
client = Groq(api_key=GROQ_API_KEY)
# Set up Deepgram client
deepgram = DeepgramClient(DEEPGRAM_API_KEY)
# Flask app
app = Flask(__name__)
app.secret_key = os.urandom(24)
# Store conversation history
conversation_history = []
# Synthesize therapist response to speech without permanently storing the file
def synthesize_audio(text):
try:
# Retrieve the selected voice or default to "aura-asteria-en"
model = session.get('selected_voice', 'aura-asteria-en')
options = SpeakOptions(model=model)
# Use a temporary file to store the synthesized audio
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_file:
tmp_filename = tmp_file.name
# Synthesize the response and save to the temporary file
deepgram.speak.v("1").save(tmp_filename, {"text": text}, options)
# Read the audio data into memory
with open(tmp_filename, "rb") as f:
audio_data = f.read()
# Remove the temporary file
os.remove(tmp_filename)
# Encode the audio data as base64 and return a data URI
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
audio_data_uri = f"data:audio/mpeg;base64,{audio_base64}"
return audio_data_uri
except Exception as e:
raise ValueError(f"Speech synthesis failed: {str(e)}")
@app.route('/')
def choose_voice():
return render_template('chose voice page.html')
@app.route('/start-chat')
def start_chat():
selected_voice = request.args.get('voice', 'aura-asteria-en')
session['selected_voice'] = selected_voice
return render_template('index.html')
@app.route('/process', methods=['POST'])
def process_audio():
global conversation_history
# Step 1: Accept audio input
audio_data = request.files.get('audio_data')
if not audio_data:
return jsonify({'error': 'No audio file uploaded'}), 400
try:
# Step 2: Transcribe the audio using Groq Whisper
transcription = client.audio.transcriptions.create(
file=('recording.wav', audio_data.read()),
model="whisper-large-v3",
prompt="Transcribe the audio accurately.",
response_format="text"
)
user_input = transcription.strip()
if not user_input:
return jsonify({'error': 'No valid transcription from audio'}), 400
# Append user input to conversation history
conversation_history.append({"role": "user", "content": user_input})
# Step 3: Generate therapist response
fixed_prompt = [
{"role": "system", "content": """
You are an AI therapist named Virtual Therapist, designed to provide conversational support and mental health guidance in a clear, concise, and professional manner. Your responses should:
1. Be short and to the point.
2. Maintain a professional tone.
3. Encourage open dialogue.
4. Provide solutions or suggestions where appropriate.
5. Stay respectful and non-judgmental.
6. Avoid lengthy explanations.
"""}
]
conversation_history_with_prompt = fixed_prompt + conversation_history
response = client.chat.completions.create(
messages=conversation_history_with_prompt,
model="llama3-8b-8192"
)
assistant_reply = response.choices[0].message.content
# Append assistant reply to conversation history
conversation_history.append({"role": "assistant", "content": assistant_reply})
# Step 4: Synthesize therapist response to speech (in memory, no permanent files)
audio_url = synthesize_audio(assistant_reply)
# Return data URI instead of file URL
return jsonify({
'transcription': user_input,
'response': assistant_reply,
'audioUrl': audio_url
})
except Exception as e:
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run(debug=True)
|