File size: 4,362 Bytes
dbb5b1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c75cf0
91773b9
5c75cf0
 
91773b9
 
 
 
 
 
 
 
 
5c75cf0
 
 
 
 
 
 
91773b9
5c75cf0
 
91773b9
5c75cf0
 
91773b9
5c75cf0
 
91773b9
 
 
 
 
5c75cf0
 
91773b9
dbb5b1d
 
 
3bb72cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dbb5b1d
 
5c75cf0
 
 
 
 
 
 
91773b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from smolagents import load_tool, CodeAgent, HfApiModel, DuckDuckGoSearchTool
#from dotenv import load_dotenv
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, ManagedAgent, VisitWebpageTool, tool

model = HfApiModel()

search_tool = DuckDuckGoSearchTool()

visit_webpage_tool = VisitWebpageTool()


agent = CodeAgent(
    tools=[search_tool, visit_webpage_tool],
    model=model,
    additional_authorized_imports=['requests', 'bs4', 'pandas', 'gradio', 'concurrent.futures', 'csv', 'json']
)


"""Deploying AI Voice Chatbot Gradio App."""
import gradio as gr
from typing import Tuple

from utils import (
    TextGenerationPipeline,
    from_en_translation,
    html_audio_autoplay,
    stt,
    to_en_translation,
    tts,
    tts_to_bytesio,
)

max_answer_length = 100
desired_language = "de"
response_generator_pipe = TextGenerationPipeline(max_length=max_answer_length)


def main(audio: object) -> Tuple[str, str, str, object]:
    """Calls functions for deploying Gradio app.

    It responds both verbally and in text
    by taking voice input from the user.

    Args:
        audio (object): Recorded speech of the user.

    Returns:
        tuple containing:
        - user_speech_text (str): Recognized speech.
        - bot_response_de (str): Translated answer of the bot.
        - bot_response_en (str): Bot's original answer.
        - html (object): Autoplayer for bot's speech.
    """
    user_speech_text = stt(audio, desired_language)
    translated_text = to_en_translation(user_speech_text, desired_language)
    #TODO call the agent 
    
   # bot_response_en = response_generator_pipe(translated_text)

    prof_synape = """
    Act as Professor SynapseπŸ§™πŸΎβ€β™‚οΈ, a conductor of expert agents. Your job is to support me in accomplishing my goals by finding alignment with me, then calling upon an expert agent perfectly suited to the task by initializing:
 
Synapse_CoR = "[emoji]: I am an expert in [role&domain]. I know [context]. I will reason step-by-step to determine the best course of action to achieve [goal]. I can use [tools] and [relevant frameworks] to help in this process.
 
I will help you accomplish your goal by following these steps:
[reasoned steps]
 
My task ends when [completion].
 
[first step, question]"
 
Instructions:
1. πŸ§™πŸΎβ€β™‚οΈ gather context, relevant information and clarify my goals by asking questions
2. Once confirmed, initialize Synapse_CoR
3.  πŸ§™πŸΎβ€β™‚οΈ and ${emoji} support me until goal is complete
 
Commands:
/start=πŸ§™πŸΎβ€β™‚οΈ,introduce and begin with step one
/ts=πŸ§™πŸΎβ€β™‚οΈ,summon (Synapse_CoR*3) town square debate
/saveπŸ§™πŸΎβ€β™‚οΈ, restate goal, summarize progress, reason next step
/stop stops this untill start is called
 
Personality:
-curious, inquisitive, encouraging
-use emojis to express yourself
 
Rules:
-End every output with a question or reasoned next step
-Start every output with πŸ§™πŸΎβ€β™‚οΈ: or ${emoji}: to indicate who is speaking.
-Organize every output with πŸ§™πŸΎβ€β™‚οΈ aligning on my request, followed by ${emoji} response
-πŸ§™πŸΎβ€β™‚οΈ, recommend save after each task is completed

Start with the following question: 
    """
    bot_response_en = agent.run(prof_synape + " "+ translated_text)
    
    ###
    bot_response_de = from_en_translation(bot_response_en, desired_language)
    bot_voice = tts(bot_response_de, desired_language)
    bot_voice_bytes = tts_to_bytesio(bot_voice)
    html = html_audio_autoplay(bot_voice_bytes)
    return user_speech_text, bot_response_de, bot_response_en, html


# Define the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## AI Voice Chatbot")
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="Speak or Upload Audio")
        submit_btn = gr.Button("Submit")
    with gr.Row():
        user_speech_text = gr.Textbox(label="You said:", interactive=False)
        bot_response_de = gr.Textbox(label="AI said (in German):", interactive=False)
        bot_response_en = gr.Textbox(label="AI said (in English):", interactive=False)
    html_output = gr.HTML()

    # Connect the function to the components
    submit_btn.click(
        fn=main,
        inputs=[audio_input],
        outputs=[user_speech_text, bot_response_de, bot_response_en, html_output],
    )

# Launch the Gradio app
demo.launch(debug=True)