Spaces:
Running
Running
import gradio as gr | |
from openai import OpenAI | |
import os | |
import requests | |
from bs4 import BeautifulSoup | |
import random | |
import time | |
ACCESS_TOKEN = os.getenv("HF_TOKEN") | |
client = OpenAI( | |
base_url="https://api-inference.huggingface.co/v1/", | |
api_key=ACCESS_TOKEN, | |
) | |
USER_AGENTS = [ | |
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', | |
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36', | |
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36' | |
] | |
def perform_web_search(query): | |
"""Esegue scraping diretto dei risultati DuckDuckGo""" | |
try: | |
headers = { | |
'User-Agent': random.choice(USER_AGENTS), | |
'Accept-Language': 'it-IT,it;q=0.9', | |
'Referer': 'https://duckduckgo.com/', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' | |
} | |
response = requests.get( | |
f"https://duckduckgo.com/html/?q={query}", | |
headers=headers, | |
timeout=10 | |
) | |
# Controllo CAPTCHA o blocco | |
if "Please enable cookies" in response.text or "CAPTCHA" in response.text: | |
print("Bloccato da CAPTCHA o errore di visualizzazione!") | |
return None | |
soup = BeautifulSoup(response.text, 'html.parser') | |
results = [] | |
# Nuova struttura HTML di DuckDuckGo (aggiornata a luglio 2024) | |
for result in soup.select('div.result'): | |
title_element = result.select_one('h2 a.result__a') | |
link_element = result.select_one('a.result__url') | |
snippet_element = result.select_one('.result__snippet') | |
if not all([title_element, link_element]): | |
continue | |
# Estrazione link effettivo | |
href = link_element['href'] | |
snippet = snippet_element.get_text() if snippet_element else "" | |
results.append({ | |
'title': title_element.get_text(), | |
'link': href, | |
'snippet': snippet | |
}) | |
if len(results) >= 8: | |
break | |
print(f"Trovati {len(results)} risultati") | |
if not results: | |
with open("debug.html", "w", encoding="utf-8") as f: | |
f.write(response.text) | |
print("HTML salvato in debug.html") | |
return results | |
except Exception as e: | |
print(f"Errore scraping: {str(e)}") | |
return None | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
frequency_penalty, | |
seed, | |
custom_model, | |
enable_web_search | |
): | |
system_message = system_message or "Rispondi sempre in italiano. Sei un assistente utile." | |
seed = seed if seed != -1 else None | |
web_results = None | |
if enable_web_search: | |
print("MESS----> ",message) | |
web_results = perform_web_search(message) | |
print("WEB.RES-> ", web_results) | |
messages = [{"role": "system", "content": system_message}] | |
if web_results: | |
search_context = "**Risultati ricerca web:**\n" | |
for i, res in enumerate(web_results, 1): | |
search_context += f"{i}. [{res['title']}]({res['link']})\n{res['snippet']}\n\n" | |
messages.append({"role": "user", "content": f"{search_context}\nDomanda: {message}"}) | |
else: | |
messages.append({"role": "user", "content": message}) | |
for user_msg, assistant_msg in history: | |
if user_msg: | |
messages.append({"role": "user", "content": user_msg}) | |
if assistant_msg: | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
model_to_use = custom_model.strip() or "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" | |
response = "" | |
for chunk in client.chat.completions.create( | |
model=model_to_use, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
frequency_penalty=frequency_penalty, | |
seed=seed, | |
messages=messages, | |
): | |
response += chunk.choices[0].delta.content or "" | |
yield response | |
# Configurazione interfaccia | |
parametri_controllo = [ | |
gr.Textbox( | |
value="Rispondi sempre in italiano. Sei un assistente utile.", | |
placeholder="Inserisci le istruzioni per l'assistente...", | |
label="Prompt di Sistema" | |
), | |
gr.Slider(1, 4096, value=2048, step=1, label="Token massimi"), | |
gr.Slider(0.1, 4.0, value=0.6, step=0.1, label="Temperature"), | |
gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P"), | |
gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Penalità frequenza"), | |
gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 per casuale)"), | |
gr.Textbox( | |
value="", | |
label="Modello personalizzato", | |
placeholder="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" | |
), | |
gr.Checkbox( | |
label="Abilita ricerca web (sperimentale)", | |
value=False, | |
info="ATTENZIONE: Lo scraping diretto può violare i ToS di DuckDuckGo" | |
) | |
] | |
chatbot = gr.Chatbot( | |
height=400, | |
show_copy_button=True, | |
likeable=True, | |
layout="panel" | |
) | |
demo = gr.ChatInterface( | |
fn=respond, | |
additional_inputs=parametri_controllo, | |
chatbot=chatbot, | |
theme="Nymbo/Alyx_Theme", | |
title="🤖 Assistente AI con DuckDuckGo Web Scraping", | |
submit_btn="Invia", | |
clear_btn="Pulisci" | |
) | |
with demo: | |
gr.Markdown("⚠️ **Avviso importante:** Lo scraping diretto di DuckDuckGo è contro i termini di servizio e può comportare il blocco dell'IP. Usare esclusivamente per scopi educativi.") | |
with gr.Accordion("⚙️ Configurazione Modello", open=False): | |
barra_ricerca = gr.Textbox( | |
label="Cerca modelli", | |
placeholder="Digita per filtrare..." | |
) | |
modelli = [ | |
"meta-llama/Llama-3.3-70B-Instruct", | |
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", | |
"Qwen/Qwen2.5-72B-Instruct", | |
"mistralai/Mixtral-8x7B-Instruct-v0.1", | |
"microsoft/Phi-3.5-mini-instruct", | |
"NousResearch/Hermes-3-Llama-3.1-8B", | |
"HuggingFaceH4/zephyr-7b-beta" | |
] | |
selezione_modello = gr.Radio( | |
label="Modelli disponibili", | |
choices=modelli, | |
value="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" | |
) | |
def filtra_modelli(testo): | |
return gr.update(choices=[m for m in modelli if testo.lower() in m.lower()]) | |
barra_ricerca.change(filtra_modelli, barra_ricerca, selezione_modello) | |
selezione_modello.change(lambda x: x, selezione_modello, parametri_controllo[-2]) | |
if __name__ == "__main__": | |
demo.launch(show_api=False) |