Spaces:

Ekimetrics
/

climate-question-answering

Running

@@ -33,7 +33,7 @@ from collections import defaultdict
 # ClimateQ&A imports
 from climateqa.engine.llm import get_llm
 from climateqa.engine.vectorstore import get_pinecone_vectorstore
-from climateqa.knowledge.retriever import ClimateQARetriever
 from climateqa.engine.reranker import get_reranker
 from climateqa.engine.embeddings import get_embeddings_function
 from climateqa.engine.chains.prompts import audience_prompts
@@ -47,6 +47,8 @@ from climateqa.engine.embeddings import get_embeddings_function
 from front.utils import make_html_source,parse_output_llm_with_sources,serialize_docs,make_toolbox,generate_html_graphs
 # Load environment variables in local mode
 try:
     from dotenv import load_dotenv
@@ -88,13 +90,12 @@ share_client = service.get_share_client(file_share_name)
 user_id = create_user_id()
-embeddings_function = get_embeddings_function()
-llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
-reranker = get_reranker("nano")
 # Create vectorstore and retriever
 vectorstore = get_pinecone_vectorstore(embeddings_function)
-vectorstore_graphs = Chroma(persist_directory="/home/tim/ai4s/climate_qa/climate-question-answering/data/vectorstore_owid", embedding_function=embeddings_function)
 # agent = make_graph_agent(llm,vectorstore,reranker)
 agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, reranker=reranker)
@@ -140,6 +141,7 @@ async def chat(query,history,audience,sources,reports,current_graphs):
     gallery = []
     updates = []
     start_streaming = False
     steps_display = {
         "categorize_intent":("🔄️ Analyzing user message",True),
@@ -151,11 +153,6 @@ async def chat(query,history,audience,sources,reports,current_graphs):
     answer_message_content = ""
     try:
         async for event in result:
-            # if event["event"] == "on_chat_model_stream" and event["metadata"]["langgraph_node"] in ["answer_rag", "answer_rag_no_docs", "answer_chitchat", "answer_ai_impact"]:
-            #     if start_streaming == False:
-            #         start_streaming = True
-            #         history[-1] = (query,"")
             if "langgraph_node" in event["metadata"]:
                 node = event["metadata"]["langgraph_node"]
@@ -163,10 +160,12 @@ async def chat(query,history,audience,sources,reports,current_graphs):
                     try:
                         docs = event["data"]["output"]["documents"]
                         docs_html = []
-                        for i, d in enumerate(docs, 1):
-                            docs_html.append(make_html_source(d, i))
-                        used_documents = used_documents + [d.metadata["name"] for d in docs]
                         history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
                         docs_html = "".join(docs_html)
@@ -180,15 +179,15 @@ async def chat(query,history,audience,sources,reports,current_graphs):
                     if not hasattr(history[-1], 'metadata') or history[-1].metadata["title"] != event_description: # if a new step begins
                         history.append(ChatMessage(role="assistant", content = "", metadata={'title' :event_description}))
-                elif event["name"] != "transform_query" and event["event"] == "on_chat_model_stream" and node in ["answer_rag", "answer_search"]:# if streaming answer
                     if start_streaming == False:
                         start_streaming = True
                         history.append(ChatMessage(role="assistant", content = ""))
                     answer_message_content +=  event["data"]["chunk"].content
                     answer_message_content = parse_output_llm_with_sources(answer_message_content)
                     history[-1] = ChatMessage(role="assistant", content = answer_message_content)
                 elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
                     try:
                         recommended_content = event["data"]["output"]["recommended_content"]
@@ -239,116 +238,9 @@ async def chat(query,history,audience,sources,reports,current_graphs):
                     except Exception as e:
                         print(f"Error getting graphs: {e}")
-                # history.append(ChatMessage(role="assistant", content = new_message_content))
-            #     if docs_used is True and event["metadata"]["langgraph_node"] in ["answer_rag_no_docs", "answer_chitchat", "answer_ai_impact"]:
-            #         docs_used = False
-            # elif docs_used is True and event["name"] == "retrieve_documents" and event["event"] == "on_chain_end":
-            #     try:
-            #         docs = event["data"]["output"]["documents"]
-            #         docs_html = []
-            #         for i, d in enumerate(docs, 1):
-            #             docs_html.append(make_html_source(d, i))
-            #         docs_html = "".join(docs_html)
-            #     except Exception as e:
-            #         print(f"Error getting documents: {e}")
-            #         print(event)
-            # # elif event["name"] == "retrieve_documents" and event["event"] == "on_chain_start":
-            # #     print(event)
-            # #     questions = event["data"]["input"]["questions"]
-            # #     questions = "\n".join([f"{i+1}. {q['question']} ({q['source']})" for i,q in enumerate(questions)])
-            # #     answer_yet = "🔄️ Searching in the knowledge base\n{questions}"
-            # #     history[-1] = (query,answer_yet)
-            # elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
-            #     try:
-            #         recommended_content = event["data"]["output"]["recommended_content"]
-            #         # graphs = [
-            #         #     {
-            #         #         "embedding": x.metadata["returned_content"],
-            #         #         "metadata": {
-            #         #             "source": x.metadata["source"],
-            #         #             "category": x.metadata["category"]
-            #         #             }
-            #         #             } for x in recommended_content if x.metadata["source"] == "OWID"
-            #         #             ]
-            #         unique_graphs = []
-            #         seen_embeddings = set()
-            #         for x in recommended_content:
-            #             embedding = x.metadata["returned_content"]
-            #             # Check if the embedding has already been seen
-            #             if embedding not in seen_embeddings:
-            #                 unique_graphs.append({
-            #                     "embedding": embedding,
-            #                     "metadata": {
-            #                         "source": x.metadata["source"],
-            #                         "category": x.metadata["category"]
-            #                     }
-            #                 })
-            #                 # Add the embedding to the seen set
-            #                 seen_embeddings.add(embedding)
-            #         categories = {}
-            #         for graph in unique_graphs:
-            #             category = graph['metadata']['category']
-            #             if category not in categories:
-            #                 categories[category] = []
-            #             categories[category].append(graph['embedding'])
-            #         # graphs_html = ""
-            #         for category, embeddings in categories.items():
-            #             # graphs_html += f"<h3>{category}</h3>"
-            #             # current_graphs.append(f"<h3>{category}</h3>")
-            #             for embedding in embeddings:
-            #                 current_graphs.append([embedding, category])
-            #                 # graphs_html += f"<div>{embedding}</div>"
-            #     except Exception as e:
-            #         print(f"Error getting graphs: {e}")
-            # for event_name,(event_description,display_output) in steps_display.items():
-            #     if event["name"] == event_name:
-            #         if event["event"] == "on_chain_start":
-            #             # answer_yet = f"<p><span class='loader'></span>{event_description}</p>"
-            #             # answer_yet = make_toolbox(event_description, "", checked = False)
-            #             answer_yet = event_description
-            #             history[-1] = (query,answer_yet)
-            #         # elif event["event"] == "on_chain_end":
-            #         #     answer_yet = ""
-            #         #     history[-1] = (query,answer_yet)
-            #             # if display_output:
-            #             #     print(event["data"]["output"])
-            # # if op['path'] == path_reformulation: # reforulated question
-            # #     try:
-            # #         output_language = op['value']["language"] # str
-            # #         output_query = op["value"]["question"]
-            # #     except Exception as e:
-            # #         raise gr.Error(f"ClimateQ&A Error: {e} - The error has been noted, try another question and if the error remains, you can contact us :)")
-            # # if op["path"] == path_keywords:
-            # #     try:
-            # #         output_keywords = op['value']["keywords"] # str
-            # #         output_keywords = " AND ".join(output_keywords)
-            # #     except Exception as e:
-            # #         pass
-            # history = [tuple(x) for x in history]
-            # yield history,docs_html,output_query,output_language,gallery,current_graphs #,output_query,output_keywords
                 if event["name"] == "transform_query" and event["event"] =="on_chain_end":
                     if hasattr(history[-1],"content"):
                         history[-1].content += "Decompose question into sub-questions: \n\n - " + "\n - ".join([q["question"] for q in event["data"]["output"]["remaining_questions"]])
@@ -356,7 +248,7 @@ async def chat(query,history,audience,sources,reports,current_graphs):
                 if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
                     print("X")
-            yield history,docs_html,output_query,output_language,gallery, current_graphs #,output_query,output_keywords
     except Exception as e:
         print(event, "has failed")
@@ -368,7 +260,7 @@ async def chat(query,history,audience,sources,reports,current_graphs):
         if os.getenv("GRADIO_ENV") != "local":
             timestamp = str(datetime.now().timestamp())
             file = timestamp + ".json"
-            prompt = history[-1][0]
             logs = {
                 "user_id": str(user_id),
                 "prompt": prompt,
@@ -376,7 +268,7 @@ async def chat(query,history,audience,sources,reports,current_graphs):
                 "question":output_query,
                 "sources":sources,
                 "docs":serialize_docs(docs),
-                "answer": history[-1][1],
                 "time": timestamp,
             }
             log_on_azure(file, logs, share_client)
@@ -384,12 +276,49 @@ async def chat(query,history,audience,sources,reports,current_graphs):
         print(f"Error logging on Azure Blob Storage: {e}")
         raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
-    image_dict = {}
-    for i,doc in enumerate(docs):
         if doc.metadata["chunk_type"] == "image":
             try:
                 key = f"Image {i+1}"
                 image_path = doc.metadata["image_path"].split("documents/")[1]
                 img = get_image_from_azure_blob_storage(image_path)
@@ -397,45 +326,18 @@ async def chat(query,history,audience,sources,reports,current_graphs):
                 buffered = BytesIO()
                 img.save(buffered, format="PNG")
                 img_str = base64.b64encode(buffered.getvalue()).decode()
-                # Embedding the base64 string in Markdown
-                markdown_image = f"![Alt text](data:image/png;base64,{img_str})"
-                image_dict[key] = {"img":img,"md":markdown_image,"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"]}
             except Exception as e:
                 print(f"Skipped adding image {i} because of {e}")
-    if len(image_dict) > 0:
-        gallery = [x["img"] for x in list(image_dict.values())]
-        img = list(image_dict.values())[0]
-        img_md = img["md"]
-        img_caption = img["caption"]
-        img_code = img["figure_code"]
-        if img_code != "N/A":
-            img_name = f"{img['key']} - {img['figure_code']}"
-        else:
-            img_name = f"{img['key']}"
-        history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
-#         print(f"\n\nImages:\n{gallery}")
-#     # gallery = [x.metadata["image_path"] for x in docs if (len(x.metadata["image_path"]) > 0 and "IAS" in x.metadata["image_path"])]
-#     # if len(gallery) > 0:
-#     #     gallery = list(set("|".join(gallery).split("|")))
-#     #     gallery = [get_image_from_azure_blob_storage(x) for x in gallery]
-#         yield history,docs_html,output_query,output_language,gallery,current_graphs #,output_query,output_keywords
-# #     else:
-# #         docs_string = "No relevant passages found in the climate science reports (IPCC and IPBES)"
-# #         complete_response = "**No relevant passages found in the climate science reports (IPCC and IPBES), you may want to ask a more specific question (specifying your question on climate issues).**"
-# #         messages.append({"role": "assistant", "content": complete_response})
-# #         gradio_format = make_pairs([a["content"] for a in messages[1:]])
-# #         yield gradio_format, messages, docs_string
-    yield history,docs_html,output_query,output_language,gallery, current_graphs#,output_query,output_keywords
 def save_feedback(feed: str, user_id):
@@ -498,6 +400,10 @@ Hello, I am ClimateQ&A, a conversational assistant designed to help you understa
 ⚠️ Limitations
 *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
 What do you want to learn ?
 """
@@ -517,8 +423,7 @@ def save_graph(saved_graphs_state, embedding, category):
     return saved_graphs_state, gr.Button("Graph Saved")
-# with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main-component") as demo:
-#     user_id_state = gr.State([user_id])
 #     chat_completed_state = gr.State(0)
 #     current_graphs = gr.State([])
@@ -532,7 +437,6 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
         with gr.Row(elem_id="chatbot-row"):
             with gr.Column(scale=2):
-                # state = gr.State([system_template])
                 chatbot = gr.Chatbot(
                     value = [ChatMessage(role="assistant", content=init_prompt)],
                     type = "messages",
@@ -541,6 +445,8 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
                     elem_id="chatbot",
                     layout = "panel",
                     avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
                 )
                 # bot.like(vote,None,None)
@@ -585,6 +491,9 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
                     with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
                         sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
                         docs_textbox = gr.State("")
                     # with Modal(visible = False) as config_modal:
                     with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
@@ -656,6 +565,10 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
                         #             )
 #---------------------------------------------------------------------------------------
 # OTHER TABS
@@ -752,7 +665,7 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
         # history = history + [(query,None)]
         # history = [tuple(x) for x in history]
         history = history + [ChatMessage(role="user", content=query)]
-        return (gr.update(interactive = False),gr.update(selected=3),history)
     def finish_chat():
         return (gr.update(interactive = True,value = ""),gr.update(selected=3))
@@ -765,10 +678,11 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
     (textbox
         .submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
-        .then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, current_graphs], [chatbot,sources_textbox,output_query,output_language,gallery_component, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
         .then(finish_chat, None, [textbox,tabs],api_name = "finish_chat_textbox")
         .then(change_completion_status, [chat_completed_state], [chat_completed_state])
         # .then(lambda graphs : generate_html_graphs(graphs), [current_graphs], [graphs_container],)
     )
     (examples_hidden
@@ -777,7 +691,7 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
         # .then(finish_chat, None, [textbox,tabs],api_name = "finish_chat_examples")
         # .then(change_completion_status, [chat_completed_state], [chat_completed_state])
         .change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
-        .then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, current_graphs], [chatbot,sources_textbox,output_query,output_language,gallery_component, current_graphs],concurrency_limit = 8,api_name = "chat_examples")
         .then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
         # .then(lambda graphs : graphs, [current_graphs], [graphs_container])
@@ -797,4 +711,4 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
     demo.queue()
-demo.launch(debug=True)

 # ClimateQ&A imports
 from climateqa.engine.llm import get_llm
 from climateqa.engine.vectorstore import get_pinecone_vectorstore
+# from climateqa.knowledge.retriever import ClimateQARetriever
 from climateqa.engine.reranker import get_reranker
 from climateqa.engine.embeddings import get_embeddings_function
 from climateqa.engine.chains.prompts import audience_prompts
 from front.utils import make_html_source,parse_output_llm_with_sources,serialize_docs,make_toolbox,generate_html_graphs
+from front.utils import make_html_source, make_html_figure_sources,parse_output_llm_with_sources,serialize_docs,make_toolbox
 # Load environment variables in local mode
 try:
     from dotenv import load_dotenv
 user_id = create_user_id()
 # Create vectorstore and retriever
 vectorstore = get_pinecone_vectorstore(embeddings_function)
+llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
+reranker = get_reranker("large")
+agent = make_graph_agent(llm,vectorstore,reranker)
 # agent = make_graph_agent(llm,vectorstore,reranker)
 agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, reranker=reranker)
     gallery = []
     updates = []
     start_streaming = False
+    figures = '<div class="figures-container"> <p> Go to the "Figures" tab at the top of the page to see full size images </p> </div>'
     steps_display = {
         "categorize_intent":("🔄️ Analyzing user message",True),
     answer_message_content = ""
     try:
         async for event in result:
             if "langgraph_node" in event["metadata"]:
                 node = event["metadata"]["langgraph_node"]
                     try:
                         docs = event["data"]["output"]["documents"]
                         docs_html = []
+                        textual_docs = [d for d in docs if d.metadata["chunk_type"] == "text"]
+                        for i, d in enumerate(textual_docs, 1):
+                            if d.metadata["chunk_type"] == "text":
+                                docs_html.append(make_html_source(d, i))
+                        used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
                         history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
                         docs_html = "".join(docs_html)
                     if not hasattr(history[-1], 'metadata') or history[-1].metadata["title"] != event_description: # if a new step begins
                         history.append(ChatMessage(role="assistant", content = "", metadata={'title' :event_description}))
+                elif event["name"] != "transform_query" and event["event"] == "on_chat_model_stream" and node in ["answer_rag", "answer_search","answer_chitchat"]:# if streaming answer
                     if start_streaming == False:
                         start_streaming = True
                         history.append(ChatMessage(role="assistant", content = ""))
                     answer_message_content +=  event["data"]["chunk"].content
                     answer_message_content = parse_output_llm_with_sources(answer_message_content)
                     history[-1] = ChatMessage(role="assistant", content = answer_message_content)
+                    # history.append(ChatMessage(role="assistant", content = new_message_content))
                 elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
                     try:
                         recommended_content = event["data"]["output"]["recommended_content"]
                     except Exception as e:
                         print(f"Error getting graphs: {e}")
                 if event["name"] == "transform_query" and event["event"] =="on_chain_end":
                     if hasattr(history[-1],"content"):
                         history[-1].content += "Decompose question into sub-questions: \n\n - " + "\n - ".join([q["question"] for q in event["data"]["output"]["remaining_questions"]])
                 if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
                     print("X")
+            yield history,docs_html,output_query,output_language,gallery, figures, current_graphs #,output_query,output_keywords
     except Exception as e:
         print(event, "has failed")
         if os.getenv("GRADIO_ENV") != "local":
             timestamp = str(datetime.now().timestamp())
             file = timestamp + ".json"
+            prompt = history[1]["content"]
             logs = {
                 "user_id": str(user_id),
                 "prompt": prompt,
                 "question":output_query,
                 "sources":sources,
                 "docs":serialize_docs(docs),
+                "answer": history[-1].content,
                 "time": timestamp,
             }
             log_on_azure(file, logs, share_client)
         print(f"Error logging on Azure Blob Storage: {e}")
         raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
+    # image_dict = {}
+    # for i,doc in enumerate(docs):
+    #     if doc.metadata["chunk_type"] == "image":
+    #         try:
+    #             key = f"Image {i+1}"
+    #             image_path = doc.metadata["image_path"].split("documents/")[1]
+    #             img = get_image_from_azure_blob_storage(image_path)
+    #             # Convert the image to a byte buffer
+    #             buffered = BytesIO()
+    #             img.save(buffered, format="PNG")
+    #             img_str = base64.b64encode(buffered.getvalue()).decode()
+    #             # Embedding the base64 string in Markdown
+    #             markdown_image = f"![Alt text](data:image/png;base64,{img_str})"
+    #             image_dict[key] = {"img":img,"md":markdown_image,"short_name": doc.metadata["short_name"],"figure_code":doc.metadata["figure_code"],"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"], "img_str" : img_str}
+    #         except Exception as e:
+    #             print(f"Skipped adding image {i} because of {e}")
+    # if len(image_dict) > 0:
+    #     gallery = [x["img"] for x in list(image_dict.values())]
+    #     img = list(image_dict.values())[0]
+    #     img_md = img["md"]
+    #     img_caption = img["caption"]
+    #     img_code = img["figure_code"]
+    #     if img_code != "N/A":
+    #         img_name = f"{img['key']} - {img['figure_code']}"
+    #     else:
+    #         img_name = f"{img['key']}"
+    #     history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
+    docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"]
+    for i, doc in enumerate(docs_figures):
         if doc.metadata["chunk_type"] == "image":
             try:
                 key = f"Image {i+1}"
                 image_path = doc.metadata["image_path"].split("documents/")[1]
                 img = get_image_from_azure_blob_storage(image_path)
                 buffered = BytesIO()
                 img.save(buffered, format="PNG")
                 img_str = base64.b64encode(buffered.getvalue()).decode()
+                figures = figures + make_html_figure_sources(doc, i, img_str)
+                gallery.append(img)
             except Exception as e:
                 print(f"Skipped adding image {i} because of {e}")
+    yield history,docs_html,output_query,output_language,gallery, figures#,output_query,output_keywords
 def save_feedback(feed: str, user_id):
 ⚠️ Limitations
 *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
+🛈 Information
+Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information.
 What do you want to learn ?
 """
     return saved_graphs_state, gr.Button("Graph Saved")
+with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=theme,elem_id = "main-component") as demo:
 #     chat_completed_state = gr.State(0)
 #     current_graphs = gr.State([])
         with gr.Row(elem_id="chatbot-row"):
             with gr.Column(scale=2):
                 chatbot = gr.Chatbot(
                     value = [ChatMessage(role="assistant", content=init_prompt)],
                     type = "messages",
                     elem_id="chatbot",
                     layout = "panel",
                     avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
+                    max_height="80vh",
+                    height="100vh"
                 )
                 # bot.like(vote,None,None)
                     with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
                         sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
                         docs_textbox = gr.State("")
                     # with Modal(visible = False) as config_modal:
                     with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
                         #             )
+                    with gr.Tab("Figures",elem_id = "tab-figures",id = 3):
+                        figures_cards = gr.HTML(show_label=False, elem_id="sources-figures")
 #---------------------------------------------------------------------------------------
 # OTHER TABS
         # history = history + [(query,None)]
         # history = [tuple(x) for x in history]
         history = history + [ChatMessage(role="user", content=query)]
+        return (gr.update(interactive = False),gr.update(selected=1),history)
     def finish_chat():
         return (gr.update(interactive = True,value = ""),gr.update(selected=3))
     (textbox
         .submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
+        .then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, current_graphs], [chatbot,sources_textbox,output_query,output_language,gallery_component, figures_cards, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
         .then(finish_chat, None, [textbox,tabs],api_name = "finish_chat_textbox")
         .then(change_completion_status, [chat_completed_state], [chat_completed_state])
         # .then(lambda graphs : generate_html_graphs(graphs), [current_graphs], [graphs_container],)
     )
     (examples_hidden
         # .then(finish_chat, None, [textbox,tabs],api_name = "finish_chat_examples")
         # .then(change_completion_status, [chat_completed_state], [chat_completed_state])
         .change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
+        .then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, current_graphs], [chatbot,sources_textbox,output_query,output_language,gallery_component, figures_cards, current_graphs],concurrency_limit = 8,api_name = "chat_examples")
         .then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
         # .then(lambda graphs : graphs, [current_graphs], [graphs_container])
     demo.queue()
+demo.launch(ssr_mode=False)

climateqa/engine/chains/answer_ai_impact.py CHANGED Viewed

@@ -38,7 +38,6 @@ def make_ai_impact_chain(llm):
 def make_ai_impact_node(llm):
     ai_impact_chain = make_ai_impact_chain(llm)
     async def answer_ai_impact(state,config):
         answer = await ai_impact_chain.ainvoke({"question":state["user_input"]},config)

 def make_ai_impact_node(llm):
     ai_impact_chain = make_ai_impact_chain(llm)
     async def answer_ai_impact(state,config):
         answer = await ai_impact_chain.ainvoke({"question":state["user_input"]},config)

climateqa/engine/chains/intent_categorization.py CHANGED Viewed

@@ -7,34 +7,6 @@ from langchain_core.utils.function_calling import convert_to_openai_function
 from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
-# class IntentCategorizer(BaseModel):
-#     """Analyzing the user message input"""
-#     language: str = Field(
-#         description="Find the language of the message input in full words (ex: French, English, Spanish, ...), defaults to English",
-#         default="English",
-#     )
-#     intent: str = Field(
-#         enum=[
-#             "ai",
-#             # "geo_info",
-#             # "esg"
-#             "search",
-#             "chitchat",
-#         ],
-#         description="""
-#             Categorize the user input in one of the following category
-#             Any question
-#             Examples:
-#             - ai = any question related to AI: "What are the environmental consequences of AI", "How does AI affect the environment"
-#             - search = Searching for any question about climate change, energy, biodiversity, nature, and everything we can find the IPCC or IPBES reports or scientific papers. Also questions about individual actions or anything loosely related to the environment.
-#             - chitchat = Any chit chat or any question that is not related to the environment or climate change or for which it is not necessary to look for the answer in the IPCC, IPBES, IPOS or scientific reports.
-#         """,
-#             # - geo_info = Geolocated info about climate change: Any question where the user wants to know localized impacts of climate change, eg: "What will be the temperature in Marseille in 2050"
-#             # - esg = Any question about the ESG regulation, frameworks and standards like the CSRD, TCFD, SASB, GRI, CDP, etc.
-#     )
 class IntentCategorizer(BaseModel):
     """Analyzing the user message input"""
@@ -44,9 +16,9 @@ class IntentCategorizer(BaseModel):
     )
     intent: str = Field(
         enum=[
-            "ai",
             # "geo_info",
-            # "esg"
             "search",
             "chitchat",
         ],
@@ -55,12 +27,13 @@ class IntentCategorizer(BaseModel):
             Any question
             Examples:
-            - ai = Any query related to Artificial Intelligence: "What are the environmental consequences of AI", "How does AI affect the environment"
             - search = Searching for any quesiton about climate change, energy, biodiversity, nature, and everything we can find the IPCC or IPBES reports or scientific papers,
             - chitchat = Any general question that is not related to the environment or climate change or just conversational, or if you don't think searching the IPCC or IPBES reports would be relevant
         """,
             # - geo_info = Geolocated info about climate change: Any question where the user wants to know localized impacts of climate change, eg: "What will be the temperature in Marseille in 2050"
             # - esg = Any question about the ESG regulation, frameworks and standards like the CSRD, TCFD, SASB, GRI, CDP, etc.
     )
@@ -71,7 +44,7 @@ def make_intent_categorization_chain(llm):
     llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"IntentCategorizer"})
     prompt = ChatPromptTemplate.from_messages([
-        ("system", "You are a helpful assistant, you will analyze, and categorize the user input message using the function provided. Categorize the user input as ai ONLY if it is related to Artificial Intelligence, search if it is related to the environment, climate change, energy, biodiversity, nature, etc. and chitchat if it is just general conversation."),
         ("user", "input: {input}")
     ])

 from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
 class IntentCategorizer(BaseModel):
     """Analyzing the user message input"""
     )
     intent: str = Field(
         enum=[
+            "ai_impact",
             # "geo_info",
+            # "esg",
             "search",
             "chitchat",
         ],
             Any question
             Examples:
+            - ai_impact = Environmental impacts of AI: "What are the environmental impacts of AI", "How does AI affect the environment"
             - search = Searching for any quesiton about climate change, energy, biodiversity, nature, and everything we can find the IPCC or IPBES reports or scientific papers,
             - chitchat = Any general question that is not related to the environment or climate change or just conversational, or if you don't think searching the IPCC or IPBES reports would be relevant
         """,
             # - geo_info = Geolocated info about climate change: Any question where the user wants to know localized impacts of climate change, eg: "What will be the temperature in Marseille in 2050"
             # - esg = Any question about the ESG regulation, frameworks and standards like the CSRD, TCFD, SASB, GRI, CDP, etc.
     )
     llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"IntentCategorizer"})
     prompt = ChatPromptTemplate.from_messages([
+        ("system", "You are a helpful assistant, you will analyze, translate and categorize the user input message using the function provided. Categorize the user input as ai ONLY if it is related to Artificial Intelligence, search if it is related to the environment, climate change, energy, biodiversity, nature, etc. and chitchat if it is just general conversation."),
         ("user", "input: {input}")
     ])

climateqa/engine/chains/query_transformation.py CHANGED Viewed

@@ -161,6 +161,7 @@ def make_query_transform_node(llm,k_final=15):
             question_state = {"question":question}
             analysis_output = rewriter_chain.invoke({"input":question})
             # The case when the llm does not return any sources
             if not analysis_output["sources"] or not all(source in ["IPCC", "IPBS", "IPOS"] for source in analysis_output["sources"]):
                 analysis_output["sources"] = ["IPCC", "IPBES", "IPOS"]

             question_state = {"question":question}
             analysis_output = rewriter_chain.invoke({"input":question})
+            # TODO WARNING llm should always return smthg
             # The case when the llm does not return any sources
             if not analysis_output["sources"] or not all(source in ["IPCC", "IPBS", "IPOS"] for source in analysis_output["sources"]):
                 analysis_output["sources"] = ["IPCC", "IPBES", "IPOS"]

climateqa/engine/chains/retrieve_documents.py CHANGED Viewed

@@ -84,11 +84,13 @@ def make_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_fina
         # # Option 2 - Get 100/n documents by question and rerank the total
         # if rerank_by_question:
         #     k_by_question = divide_into_parts(k_final,len(questions))
-        # docs = state["documents"]
-        # if docs is None: docs = []
-        docs = []
         k_by_question = k_final // state["n_questions"]
         sources = current_question["sources"]

         # # Option 2 - Get 100/n documents by question and rerank the total
         # if rerank_by_question:
         #     k_by_question = divide_into_parts(k_final,len(questions))
+        if "documents" in state and state["documents"] is not None:
+            docs = state["documents"]
+        else:
+            docs = []
         k_by_question = k_final // state["n_questions"]
         sources = current_question["sources"]

climateqa/engine/graph.py CHANGED Viewed

@@ -92,10 +92,9 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
     transform_query = make_query_transform_node(llm)
     translate_query = make_translation_node(llm)
     answer_chitchat = make_chitchat_node(llm)
-    # answer_ai_impact = make_ai_impact_node(llm)
     retrieve_documents = make_retriever_node(vectorstore_ipcc, reranker, llm)
     retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
-    # answer_rag_graph = make_rag_graph_node(llm)
     answer_rag = make_rag_node(llm, with_docs=True)
     answer_rag_no_docs = make_rag_node(llm, with_docs=False)
     chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
@@ -190,142 +189,3 @@ def display_graph(app):
             )
         )
     )
-# import sys
-# import os
-# from contextlib import contextmanager
-# from langchain.schema import Document
-# from langgraph.graph import END, StateGraph
-# from langchain_core.runnables.graph import CurveStyle, NodeColors, MermaidDrawMethod
-# from typing_extensions import TypedDict
-# from typing import List
-# from IPython.display import display, HTML, Image
-# from .chains.answer_chitchat import make_chitchat_node
-# from .chains.answer_ai_impact import make_ai_impact_node
-# from .chains.query_transformation import make_query_transform_node
-# from .chains.translation import make_translation_node
-# from .chains.intent_categorization import make_intent_categorization_node
-# from .chains.retriever import make_retriever_node
-# from .chains.answer_rag import make_rag_node
-# class GraphState(TypedDict):
-#     """
-#     Represents the state of our graph.
-#     """
-#     user_input : str
-#     language : str
-#     intent : str
-#     query: str
-#     questions : List[dict]
-#     answer: str
-#     audience: str = "experts"
-#     sources_input: List[str] = ["auto"]
-#     documents: List[Document]
-# def search(state):
-#     return {}
-# def route_intent(state):
-#     intent = state["intent"]
-#     if intent in ["chitchat","esg"]:
-#         return "answer_chitchat"
-#     elif intent == "ai_impact":
-#         return "answer_ai_impact"
-#     else:
-#         # Search route
-#         return "search"
-# def route_translation(state):
-#     if state["language"].lower() == "english":
-#         return "transform_query"
-#     else:
-#         return "translate_query"
-# def route_based_on_relevant_docs(state,threshold_docs=0.2):
-#     docs = [x for x in state["documents"] if x.metadata["reranking_score"] > threshold_docs]
-#     if len(docs) > 0:
-#         return "answer_rag"
-#     else:
-#         return "answer_rag_no_docs"
-# def make_id_dict(values):
-#     return {k:k for k in values}
-# def make_graph_agent(llm,vectorstore,reranker,threshold_docs = 0.2):
-#     workflow = StateGraph(GraphState)
-#     # Define the node functions
-#     categorize_intent = make_intent_categorization_node(llm)
-#     transform_query = make_query_transform_node(llm)
-#     translate_query = make_translation_node(llm)
-#     answer_chitchat = make_chitchat_node(llm)
-#     answer_ai_impact = make_ai_impact_node(llm)
-#     retrieve_documents = make_retriever_node(vectorstore,reranker)
-#     answer_rag = make_rag_node(llm,with_docs=True)
-#     answer_rag_no_docs = make_rag_node(llm,with_docs=False)
-#     # Define the nodes
-#     workflow.add_node("categorize_intent", categorize_intent)
-#     workflow.add_node("search", search)
-#     workflow.add_node("transform_query", transform_query)
-#     workflow.add_node("translate_query", translate_query)
-#     workflow.add_node("answer_chitchat", answer_chitchat)
-#     workflow.add_node("answer_ai_impact", answer_ai_impact)
-#     workflow.add_node("retrieve_documents",retrieve_documents)
-#     workflow.add_node("answer_rag",answer_rag)
-#     workflow.add_node("answer_rag_no_docs",answer_rag_no_docs)
-#     # Entry point
-#     workflow.set_entry_point("categorize_intent")
-#     # CONDITIONAL EDGES
-#     workflow.add_conditional_edges(
-#         "categorize_intent",
-#         route_intent,
-#         make_id_dict(["answer_chitchat","answer_ai_impact","search"])
-#     )
-#     workflow.add_conditional_edges(
-#         "search",
-#         route_translation,
-#         make_id_dict(["translate_query","transform_query"])
-#     )
-#     workflow.add_conditional_edges(
-#         "retrieve_documents",
-#         lambda x : route_based_on_relevant_docs(x,threshold_docs=threshold_docs),
-#         make_id_dict(["answer_rag","answer_rag_no_docs"])
-#     )
-#     # Define the edges
-#     workflow.add_edge("translate_query", "transform_query")
-#     workflow.add_edge("transform_query", "retrieve_documents")
-#     workflow.add_edge("retrieve_documents", "answer_rag")
-#     workflow.add_edge("answer_rag", END)
-#     workflow.add_edge("answer_rag_no_docs", END)
-#     workflow.add_edge("answer_chitchat", END)
-#     workflow.add_edge("answer_ai_impact", END)
-#     # Compile
-#     app = workflow.compile()
-#     return app
-# def display_graph(app):
-#     display(
-#         Image(
-#             app.get_graph(xray = True).draw_mermaid_png(
-#                 draw_method=MermaidDrawMethod.API,
-#             )
-#         )
-#     )

     transform_query = make_query_transform_node(llm)
     translate_query = make_translation_node(llm)
     answer_chitchat = make_chitchat_node(llm)
+    answer_ai_impact = make_ai_impact_node(llm)
     retrieve_documents = make_retriever_node(vectorstore_ipcc, reranker, llm)
     retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
     answer_rag = make_rag_node(llm, with_docs=True)
     answer_rag_no_docs = make_rag_node(llm, with_docs=False)
     chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
             )
         )
     )

climateqa/engine/llm/openai.py CHANGED Viewed

@@ -7,7 +7,7 @@ try:
 except Exception:
     pass
-def get_llm(model="gpt-3.5-turbo-0125",max_tokens=1024, temperature=0.0, streaming=True,timeout=30, **kwargs):
     llm = ChatOpenAI(
         model=model,

 except Exception:
     pass
+def get_llm(model="gpt-4o-mini",max_tokens=1024, temperature=0.0, streaming=True,timeout=30, **kwargs):
     llm = ChatOpenAI(
         model=model,

climateqa/engine/reranker.py CHANGED Viewed

@@ -6,7 +6,7 @@ from sentence_transformers import CrossEncoder
 load_dotenv()
-def get_reranker(model = "jina", cohere_api_key = None):
     assert model in ["nano","tiny","small","large", "jina"]
@@ -34,7 +34,6 @@ def rerank_docs(reranker,docs,query):
     # Get a list of texts from langchain docs
     input_docs = [x.page_content for x in docs]
-    print(f"\n\nDOCS:{input_docs}\n\n")
     # Rerank using rerankers library
     results = reranker.rank(query=query, docs=input_docs)

 load_dotenv()
+def get_reranker(model = "nano", cohere_api_key = None):
     assert model in ["nano","tiny","small","large", "jina"]
     # Get a list of texts from langchain docs
     input_docs = [x.page_content for x in docs]
     # Rerank using rerankers library
     results = reranker.rank(query=query, docs=input_docs)

front/utils.py CHANGED Viewed

@@ -134,7 +134,7 @@ def make_html_source(source,i):
     score = meta['reranking_score']
     if score > 0.8:
         color = "score-green"
-    elif score > 0.4:
         color = "score-orange"
     else:
         color = "score-red"
@@ -170,8 +170,9 @@ def make_html_source(source,i):
     <div class="card card-image">
         <div class="card-content">
             <h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
-            <p>{content}</p>
             <p class='ai-generated'>AI-generated description</p>
             {relevancy_score}
         </div>
         <div class="card-footer">
@@ -186,6 +187,53 @@ def make_html_source(source,i):
     return card
 def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):

     score = meta['reranking_score']
     if score > 0.8:
         color = "score-green"
+    elif score > 0.5:
         color = "score-orange"
     else:
         color = "score-red"
     <div class="card card-image">
         <div class="card-content">
             <h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
             <p class='ai-generated'>AI-generated description</p>
+            <p>{content}</p>
             {relevancy_score}
         </div>
         <div class="card-footer">
     return card
+def make_html_figure_sources(source,i,img_str):
+    meta = source.metadata
+    content = source.page_content.strip()
+    score = meta['reranking_score']
+    if score > 0.8:
+        color = "score-green"
+    elif score > 0.5:
+        color = "score-orange"
+    else:
+        color = "score-red"
+    toc_levels = []
+    if len(toc_levels) > 0:
+        name = f"<b>{toc_levels}</b><br/>{meta['name']}"
+    else:
+        name = meta['name']
+    relevancy_score = f"<p class=relevancy-score>Relevancy score: <span class='{color}'>{score:.1%}</span></p>"
+    if meta["figure_code"] != "N/A":
+        title = f"{meta['figure_code']} - {meta['short_name']}"
+    else:
+        title = f"{meta['short_name']}"
+    card = f"""
+    <div class="card card-image">
+        <div class="card-content">
+            <h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
+            <p class='ai-generated'>AI-generated description</p>
+            <img src="data:image/png;base64, { img_str } alt="Alt text" />
+            <p>{content}</p>
+            {relevancy_score}
+        </div>
+        <div class="card-footer">
+            <span>{name}</span>
+            <a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
+                <span role="img" aria-label="Open PDF">🔗</span>
+            </a>
+        </div>
+    </div>
+    """
+    return card
 def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):

requirements.txt CHANGED Viewed

@@ -17,3 +17,4 @@ torch==2.3.0
 nvidia-cudnn-cu12==8.9.2.26
 langchain-community==0.2
 msal==1.31

 nvidia-cudnn-cu12==8.9.2.26
 langchain-community==0.2
 msal==1.31
+matplotlib==3.9.2

style.css CHANGED Viewed

@@ -11,7 +11,23 @@
     margin: 0px;
 }
-  .warning-box {
     background-color: #fff3cd;
     border: 1px solid #ffeeba;
     border-radius: 4px;
@@ -194,41 +210,59 @@ label.selected{
     padding:0px !important;
 }
 @media screen and (min-width: 1024px) {
     div#tab-examples{
         height:calc(100vh - 190px) !important;
-        overflow-y: auto;
     }
     div#sources-textbox{
         height:calc(100vh - 190px) !important;
-        overflow-y: auto !important;
     }
     div#tab-config{
         height:calc(100vh - 190px) !important;
-        overflow-y: auto !important;
     }
     div#chatbot-row{
         height:calc(100vh - 90px) !important;
     }
-    div#chatbot{
-        height:calc(100vh - 170px) !important;
-    }
     .max-height{
         height:calc(100vh - 90px) !important;
         overflow-y: auto;
     }
-    /* .tabitem:nth-child(n+3) {
-        padding-top:30px;
-        padding-left:40px;
-        padding-right:40px;
-    } */
 }
 footer {
@@ -508,4 +542,22 @@ div#tab-saved-graphs {
 }
 .message-buttons-left.panel.message-buttons.with-avatar {
     display: none;
 }

     margin: 0px;
 }
+/* fix for huggingface infinite growth*/
+main.flex.flex-1.flex-col {
+    max-height: 95vh !important;
+}
+.avatar-container.svelte-1x5p6hu:not(.thumbnail-item) img {
+    width: 100%;
+    height: 100%;
+    object-fit: cover;
+    border-radius: 50%;
+    padding: 0px;
+    margin: 0px;
+}
+.warning-box {
     background-color: #fff3cd;
     border: 1px solid #ffeeba;
     border-radius: 4px;
     padding:0px !important;
 }
 @media screen and (min-width: 1024px) {
+    .gradio-container {
+        max-height: calc(100vh - 190px) !important;
+        overflow: hidden;
+    }
+    /* div#chatbot{
+        height:calc(100vh - 170px) !important;
+        max-height:calc(100vh - 170px) !important;
+    } */
     div#tab-examples{
         height:calc(100vh - 190px) !important;
+        /* overflow-y: auto; */
     }
     div#sources-textbox{
         height:calc(100vh - 190px) !important;
+        /* overflow-y: auto !important; */
+    }
+    div#sources-figures{
+        height:calc(100vh - 190px) !important;
+        /* overflow-y: auto !important; */
     }
     div#tab-config{
         height:calc(100vh - 190px) !important;
+        /* overflow-y: auto !important; */
+    }
+    /* Force container to respect height limits */
+    .main-component{
+        contain: size layout;
+        overflow: hidden;
     }
+/*
     div#chatbot-row{
         height:calc(100vh - 90px) !important;
+        max-height:calc(100vh - 90px) !important;
     }
     .max-height{
         height:calc(100vh - 90px) !important;
+        max-height:calc(100vh - 90px) !important;
         overflow-y: auto;
     }
+*/
 }
 footer {
 }
 .message-buttons-left.panel.message-buttons.with-avatar {
     display: none;
+}
+  .score-red{
+    color:red !important;
+  }
+.message-buttons-left.panel.message-buttons.with-avatar {
+    display: none;
+}
+/* Specific fixes for Hugging Face Space iframe */
+.h-full {
+    height: auto !important;
+    min-height: 0 !important;
+}
+.space-content {
+    height: auto !important;
+    max-height: 100vh !important;
+    overflow: hidden;
 }