File size: 4,526 Bytes
d98e7f6
 
 
5affd96
 
 
d98e7f6
 
47e60c5
d98e7f6
 
 
19a0758
 
 
d98e7f6
 
19a0758
d98e7f6
 
19a0758
d98e7f6
 
 
 
 
 
 
 
61028a5
 
d98e7f6
61028a5
 
d98e7f6
61028a5
 
5affd96
 
 
61028a5
5affd96
 
 
 
 
 
 
 
 
61028a5
 
 
 
d98e7f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61028a5
d98e7f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61028a5
 
d98e7f6
61028a5
d98e7f6
 
 
61028a5
d98e7f6
 
 
 
 
 
47e60c5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
import time
import uuid
import base64
import tempfile
from flask import Flask, request, render_template, jsonify, session
from dotenv import load_dotenv
from groq import Groq
from deepgram import DeepgramClient, SpeakOptions

# Load environment variables
load_dotenv()
# Fetch API keys from environment variables
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")

# Set up the Groq client
client = Groq(api_key=GROQ_API_KEY)

# Set up Deepgram client
deepgram = DeepgramClient(DEEPGRAM_API_KEY)

# Flask app
app = Flask(__name__)
app.secret_key = os.urandom(24)

# Store conversation history
conversation_history = []

# Synthesize therapist response to speech without permanently storing the file
def synthesize_audio(text):
    try:
        # Retrieve the selected voice or default to "aura-asteria-en"
        model = session.get('selected_voice', 'aura-asteria-en')
        options = SpeakOptions(model=model)

        # Use a temporary file to store the synthesized audio
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_file:
            tmp_filename = tmp_file.name

        # Synthesize the response and save to the temporary file
        deepgram.speak.v("1").save(tmp_filename, {"text": text}, options)

        # Read the audio data into memory
        with open(tmp_filename, "rb") as f:
            audio_data = f.read()

        # Remove the temporary file
        os.remove(tmp_filename)

        # Encode the audio data as base64 and return a data URI
        audio_base64 = base64.b64encode(audio_data).decode('utf-8')
        audio_data_uri = f"data:audio/mpeg;base64,{audio_base64}"
        return audio_data_uri
    except Exception as e:
        raise ValueError(f"Speech synthesis failed: {str(e)}")

@app.route('/')
def choose_voice():
    return render_template('chose voice page.html')

@app.route('/start-chat')
def start_chat():
    selected_voice = request.args.get('voice', 'aura-asteria-en')
    session['selected_voice'] = selected_voice
    return render_template('index.html')

@app.route('/process', methods=['POST'])
def process_audio():
    global conversation_history

    # Step 1: Accept audio input
    audio_data = request.files.get('audio_data')
    if not audio_data:
        return jsonify({'error': 'No audio file uploaded'}), 400

    try:
        # Step 2: Transcribe the audio using Groq Whisper
        transcription = client.audio.transcriptions.create(
            file=('recording.wav', audio_data.read()),
            model="whisper-large-v3",
            prompt="Transcribe the audio accurately.",
            response_format="text"
        )
        user_input = transcription.strip()
        if not user_input:
            return jsonify({'error': 'No valid transcription from audio'}), 400

        # Append user input to conversation history
        conversation_history.append({"role": "user", "content": user_input})

        # Step 3: Generate therapist response
        fixed_prompt = [
            {"role": "system", "content": """
                You are an AI therapist named Virtual Therapist, designed to provide conversational support and mental health guidance in a clear, concise, and professional manner. Your responses should:
                1. Be short and to the point.
                2. Maintain a professional tone.
                3. Encourage open dialogue.
                4. Provide solutions or suggestions where appropriate.
                5. Stay respectful and non-judgmental.
                6. Avoid lengthy explanations.
            """}
        ]

        conversation_history_with_prompt = fixed_prompt + conversation_history

        response = client.chat.completions.create(
            messages=conversation_history_with_prompt,
            model="llama3-8b-8192"
        )
        assistant_reply = response.choices[0].message.content

        # Append assistant reply to conversation history
        conversation_history.append({"role": "assistant", "content": assistant_reply})

        # Step 4: Synthesize therapist response to speech (in memory, no permanent files)
        audio_url = synthesize_audio(assistant_reply)

        # Return data URI instead of file URL
        return jsonify({
            'transcription': user_input,
            'response': assistant_reply,
            'audioUrl': audio_url
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    app.run(debug=True)