h2ogpt-chatbot

Running

App Files Files Community

pseudotensor commited on Jul 8, 2023

Commit

cf9ad1a

•

1 Parent(s): 1c0f538

Update with h2oGPT hash e7d4914948ac2b9a5a82f1cc82556197b261cb46

Browse files

Files changed (13) hide show

app.py +1 -1
client_test.py +22 -14
enums.py +16 -1
evaluate_params.py +47 -0
gen.py +0 -0
gpt4all_llm.py +9 -0
gpt_langchain.py +156 -52
gradio_runner.py +143 -41
gradio_utils/__pycache__/grclient.cpython-310.pyc +0 -0
gradio_utils/__pycache__/prompt_form.cpython-310.pyc +0 -0
gradio_utils/prompt_form.py +7 -3
h2oai_pipeline.py +1 -0
prompter.py +4 -2

app.py CHANGED Viewed

	@@ -1 +1 @@
1	- ~~generate~~.py


1	+ gen.py

client_test.py CHANGED Viewed

@@ -12,13 +12,13 @@ Currently, this will force model to be on a single GPU.
 Then run this client as:
-python client_test.py
 For HF spaces:
-HOST="https://h2oai-h2ogpt-chatbot.hf.space" python client_test.py
 Result:
@@ -28,7 +28,7 @@ Loaded as API: https://h2oai-h2ogpt-chatbot.hf.space ✔
 For demo:
-HOST="https://gpt.h2o.ai" python client_test.py
 Result:
@@ -48,7 +48,7 @@ import markdown  # pip install markdown
 import pytest
 from bs4 import BeautifulSoup  # pip install beautifulsoup4
-from enums import DocumentChoices
 debug = False
@@ -67,7 +67,9 @@ def get_client(serialize=True):
 def get_args(prompt, prompt_type, chat=False, stream_output=False,
              max_new_tokens=50,
              top_k_docs=3,
-             langchain_mode='Disabled'):
     from collections import OrderedDict
     kwargs = OrderedDict(instruction=prompt if chat else '',  # only for chat=True
                          iinput='',  # only for chat=True
@@ -76,7 +78,7 @@ def get_args(prompt, prompt_type, chat=False, stream_output=False,
                          # but leave stream_output=False for simple input/output mode
                          stream_output=stream_output,
                          prompt_type=prompt_type,
-                         prompt_dict='',
                          temperature=0.1,
                          top_p=0.75,
                          top_k=40,
@@ -92,12 +94,13 @@ def get_args(prompt, prompt_type, chat=False, stream_output=False,
                          instruction_nochat=prompt if not chat else '',
                          iinput_nochat='',  # only for chat=False
                          langchain_mode=langchain_mode,
                          top_k_docs=top_k_docs,
                          chunk=True,
                          chunk_size=512,
                          document_choice=[DocumentChoices.All_Relevant.name],
                          )
-    from generate import eval_func_param_names
     assert len(set(eval_func_param_names).difference(set(list(kwargs.keys())))) == 0
     if chat:
         # add chatbot output on end.  Assumes serialize=False
@@ -198,6 +201,7 @@ def run_client_nochat_api_lean_morestuff(prompt, prompt_type='human_bot', max_ne
         instruction_nochat=prompt,
         iinput_nochat='',
         langchain_mode='Disabled',
         top_k_docs=4,
         document_choice=['All'],
     )
@@ -219,21 +223,24 @@ def run_client_nochat_api_lean_morestuff(prompt, prompt_type='human_bot', max_ne
 @pytest.mark.skip(reason="For manual use against some server, no server launched")
 def test_client_chat(prompt_type='human_bot'):
     return run_client_chat(prompt='Who are you?', prompt_type=prompt_type, stream_output=False, max_new_tokens=50,
-                           langchain_mode='Disabled')
 @pytest.mark.skip(reason="For manual use against some server, no server launched")
 def test_client_chat_stream(prompt_type='human_bot'):
     return run_client_chat(prompt="Tell a very long kid's story about birds.", prompt_type=prompt_type,
                            stream_output=True, max_new_tokens=512,
-                           langchain_mode='Disabled')
-def run_client_chat(prompt, prompt_type, stream_output, max_new_tokens, langchain_mode):
     client = get_client(serialize=False)
     kwargs, args = get_args(prompt, prompt_type, chat=True, stream_output=stream_output,
-                            max_new_tokens=max_new_tokens, langchain_mode=langchain_mode)
     return run_client(client, prompt, args, kwargs)
@@ -276,14 +283,15 @@ def run_client(client, prompt, args, kwargs, do_md_to_text=True, verbose=False):
 def test_client_nochat_stream(prompt_type='human_bot'):
     return run_client_nochat_gen(prompt="Tell a very long kid's story about birds.", prompt_type=prompt_type,
                                  stream_output=True, max_new_tokens=512,
-                                 langchain_mode='Disabled')
-def run_client_nochat_gen(prompt, prompt_type, stream_output, max_new_tokens, langchain_mode):
     client = get_client(serialize=False)
     kwargs, args = get_args(prompt, prompt_type, chat=False, stream_output=stream_output,
-                            max_new_tokens=max_new_tokens, langchain_mode=langchain_mode)
     return run_client_gen(client, prompt, args, kwargs)

 Then run this client as:
+python src/client_test.py
 For HF spaces:
+HOST="https://h2oai-h2ogpt-chatbot.hf.space" python src/client_test.py
 Result:
 For demo:
+HOST="https://gpt.h2o.ai" python src/client_test.py
 Result:
 import pytest
 from bs4 import BeautifulSoup  # pip install beautifulsoup4
+from enums import DocumentChoices, LangChainAction
 debug = False
 def get_args(prompt, prompt_type, chat=False, stream_output=False,
              max_new_tokens=50,
              top_k_docs=3,
+             langchain_mode='Disabled',
+             langchain_action=LangChainAction.QUERY.value,
+             prompt_dict=None):
     from collections import OrderedDict
     kwargs = OrderedDict(instruction=prompt if chat else '',  # only for chat=True
                          iinput='',  # only for chat=True
                          # but leave stream_output=False for simple input/output mode
                          stream_output=stream_output,
                          prompt_type=prompt_type,
+                         prompt_dict=prompt_dict,
                          temperature=0.1,
                          top_p=0.75,
                          top_k=40,
                          instruction_nochat=prompt if not chat else '',
                          iinput_nochat='',  # only for chat=False
                          langchain_mode=langchain_mode,
+                         langchain_action=langchain_action,
                          top_k_docs=top_k_docs,
                          chunk=True,
                          chunk_size=512,
                          document_choice=[DocumentChoices.All_Relevant.name],
                          )
+    from evaluate_params import eval_func_param_names
     assert len(set(eval_func_param_names).difference(set(list(kwargs.keys())))) == 0
     if chat:
         # add chatbot output on end.  Assumes serialize=False
         instruction_nochat=prompt,
         iinput_nochat='',
         langchain_mode='Disabled',
+        langchain_action=LangChainAction.QUERY.value,
         top_k_docs=4,
         document_choice=['All'],
     )
 @pytest.mark.skip(reason="For manual use against some server, no server launched")
 def test_client_chat(prompt_type='human_bot'):
     return run_client_chat(prompt='Who are you?', prompt_type=prompt_type, stream_output=False, max_new_tokens=50,
+                           langchain_mode='Disabled', langchain_action=LangChainAction.QUERY.value)
 @pytest.mark.skip(reason="For manual use against some server, no server launched")
 def test_client_chat_stream(prompt_type='human_bot'):
     return run_client_chat(prompt="Tell a very long kid's story about birds.", prompt_type=prompt_type,
                            stream_output=True, max_new_tokens=512,
+                           langchain_mode='Disabled', langchain_action=LangChainAction.QUERY.value)
+def run_client_chat(prompt, prompt_type, stream_output, max_new_tokens, langchain_mode, langchain_action,
+                    prompt_dict=None):
     client = get_client(serialize=False)
     kwargs, args = get_args(prompt, prompt_type, chat=True, stream_output=stream_output,
+                            max_new_tokens=max_new_tokens, langchain_mode=langchain_mode,
+                            langchain_action=langchain_action,
+                            prompt_dict=prompt_dict)
     return run_client(client, prompt, args, kwargs)
 def test_client_nochat_stream(prompt_type='human_bot'):
     return run_client_nochat_gen(prompt="Tell a very long kid's story about birds.", prompt_type=prompt_type,
                                  stream_output=True, max_new_tokens=512,
+                                 langchain_mode='Disabled', langchain_action=LangChainAction.QUERY.value)
+def run_client_nochat_gen(prompt, prompt_type, stream_output, max_new_tokens, langchain_mode, langchain_action):
     client = get_client(serialize=False)
     kwargs, args = get_args(prompt, prompt_type, chat=False, stream_output=stream_output,
+                            max_new_tokens=max_new_tokens, langchain_mode=langchain_mode,
+                            langchain_action=langchain_action)
     return run_client_gen(client, prompt, args, kwargs)

enums.py CHANGED Viewed

@@ -37,6 +37,9 @@ class DocumentChoices(Enum):
     Just_LLM = 3
 class LangChainMode(Enum):
     """LangChain mode"""
@@ -52,10 +55,22 @@ class LangChainMode(Enum):
     H2O_DAI_DOCS = "DriverlessAI docs"
 no_server_str = no_lora_str = no_model_str = '[None/Remove]'
-# from site-packages/langchain/llms/openai.py, but needed since ChatOpenAI doesn't have this information
 model_token_mapping = {
     "gpt-4": 8192,
     "gpt-4-0314": 8192,

     Just_LLM = 3
+non_query_commands = [DocumentChoices.All_Relevant_Only_Sources.name, DocumentChoices.Only_All_Sources.name]
 class LangChainMode(Enum):
     """LangChain mode"""
     H2O_DAI_DOCS = "DriverlessAI docs"
+class LangChainAction(Enum):
+    """LangChain action"""
+    QUERY = "Query"
+    # WIP:
+    #SUMMARIZE_MAP = "Summarize_map_reduce"
+    SUMMARIZE_MAP = "Summarize"
+    SUMMARIZE_ALL = "Summarize_all"
+    SUMMARIZE_REFINE = "Summarize_refine"
 no_server_str = no_lora_str = no_model_str = '[None/Remove]'
+# from site-packages/langchain/llms/openai.py
+# but needed since ChatOpenAI doesn't have this information
 model_token_mapping = {
     "gpt-4": 8192,
     "gpt-4-0314": 8192,

evaluate_params.py ADDED Viewed

	@@ -0,0 +1,47 @@

+no_default_param_names = [
+    'instruction',
+    'iinput',
+    'context',
+    'instruction_nochat',
+    'iinput_nochat',
+]
+gen_hyper = ['temperature',
+             'top_p',
+             'top_k',
+             'num_beams',
+             'max_new_tokens',
+             'min_new_tokens',
+             'early_stopping',
+             'max_time',
+             'repetition_penalty',
+             'num_return_sequences',
+             'do_sample',
+             ]
+eval_func_param_names = ['instruction',
+                         'iinput',
+                         'context',
+                         'stream_output',
+                         'prompt_type',
+                         'prompt_dict'] + \
+                        gen_hyper + \
+                        ['chat',
+                         'instruction_nochat',
+                         'iinput_nochat',
+                         'langchain_mode',
+                         'langchain_action',
+                         'top_k_docs',
+                         'chunk',
+                         'chunk_size',
+                         'document_choice',
+                         ]
+# form evaluate defaults for submit_nochat_api
+eval_func_param_names_defaults = eval_func_param_names.copy()
+for k in no_default_param_names:
+    if k in eval_func_param_names_defaults:
+        eval_func_param_names_defaults.remove(k)
+eval_extra_columns = ['prompt', 'response', 'score']

gen.py ADDED Viewed

The diff for this file is too large to render. See raw diff

gpt4all_llm.py CHANGED Viewed

@@ -19,6 +19,15 @@ def get_model_tokenizer_gpt4all(base_model, **kwargs):
                         n_ctx=2048 - 256)
     env_gpt4all_file = ".env_gpt4all"
     model_kwargs.update(dotenv_values(env_gpt4all_file))
     if base_model == "llama":
         if 'model_path_llama' not in model_kwargs:

                         n_ctx=2048 - 256)
     env_gpt4all_file = ".env_gpt4all"
     model_kwargs.update(dotenv_values(env_gpt4all_file))
+    # make int or float if can to satisfy types for class
+    for k, v in model_kwargs.items():
+        try:
+            if float(v) == int(v):
+                model_kwargs[k] = int(v)
+            else:
+                model_kwargs[k] = float(v)
+        except:
+            pass
     if base_model == "llama":
         if 'model_path_llama' not in model_kwargs:

gpt_langchain.py CHANGED Viewed

@@ -23,8 +23,10 @@ from langchain.callbacks import streaming_stdout
 from langchain.embeddings import HuggingFaceInstructEmbeddings
 from tqdm import tqdm
-from enums import DocumentChoices, no_lora_str, model_token_mapping, source_prefix, source_postfix
-from generate import gen_hyper, get_model, SEED
 from prompter import non_hf_types, PromptType, Prompter
 from utils import wrapped_partial, EThread, import_matplotlib, sanitize_filename, makedirs, get_url, flatten_list, \
     get_device, ProgressParallel, remove, hash_file, clear_torch_cache, NullContext, get_hf_server, FakeTokenizer
@@ -43,7 +45,8 @@ from langchain.chains.qa_with_sources import load_qa_with_sources_chain
 from langchain.document_loaders import PyPDFLoader, TextLoader, CSVLoader, PythonLoader, TomlLoader, \
     UnstructuredURLLoader, UnstructuredHTMLLoader, UnstructuredWordDocumentLoader, UnstructuredMarkdownLoader, \
     EverNoteLoader, UnstructuredEmailLoader, UnstructuredODTLoader, UnstructuredPowerPointLoader, \
-    UnstructuredEPubLoader, UnstructuredImageLoader, UnstructuredRTFLoader, ArxivLoader, UnstructuredPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter, Language
 from langchain.chains.question_answering import load_qa_chain
 from langchain.docstore.document import Document
@@ -351,6 +354,7 @@ class GradioInference(LLM):
         stream_output = self.stream
         gr_client = self.client
         client_langchain_mode = 'Disabled'
         top_k_docs = 1
         chunk = True
         chunk_size = 512
@@ -379,6 +383,7 @@ class GradioInference(LLM):
                              instruction_nochat=prompt if not self.chat_client else '',
                              iinput_nochat='',  # only for chat=False
                              langchain_mode=client_langchain_mode,
                              top_k_docs=top_k_docs,
                              chunk=chunk,
                              chunk_size=chunk_size,
@@ -637,6 +642,7 @@ def get_llm(use_openai_model=False,
         callbacks = [StreamingGradioCallbackHandler()]
         assert prompter is not None
         stop_sequences = list(set(prompter.terminate_response + [prompter.PreResponse]))
         if gr_client:
             chat_client = False
@@ -744,7 +750,7 @@ def get_llm(use_openai_model=False,
         if stream_output:
             skip_prompt = False
-            from generate import H2OTextIteratorStreamer
             decoder_kwargs = {}
             streamer = H2OTextIteratorStreamer(tokenizer, skip_prompt=skip_prompt, block=False, **decoder_kwargs)
             gen_kwargs.update(dict(streamer=streamer))
@@ -944,14 +950,16 @@ have_playwright = False
 image_types = ["png", "jpg", "jpeg"]
 non_image_types = ["pdf", "txt", "csv", "toml", "py", "rst", "rtf",
-                   "md", "html",
                    "enex", "eml", "epub", "odt", "pptx", "ppt",
                    "zip", "urls",
                    ]
 # "msg",  GPL3
 if have_libreoffice:
-    non_image_types.extend(["docx", "doc"])
 file_types = non_image_types + image_types
@@ -961,7 +969,7 @@ def add_meta(docs1, file):
     hashid = hash_file(file)
     if not isinstance(docs1, (list, tuple, types.GeneratorType)):
         docs1 = [docs1]
-    [x.metadata.update(dict(input_type=file_extension, date=str(datetime.now), hashid=hashid)) for x in docs1]
 def file_to_doc(file, base_path=None, verbose=False, fail_any_exception=False,
@@ -1038,6 +1046,10 @@ def file_to_doc(file, base_path=None, verbose=False, fail_any_exception=False,
         docs1 = UnstructuredWordDocumentLoader(file_path=file).load()
         add_meta(docs1, file)
         doc1 = chunk_sources(docs1, chunk=chunk, chunk_size=chunk_size)
     elif file.lower().endswith('.odt'):
         docs1 = UnstructuredODTLoader(file_path=file).load()
         add_meta(docs1, file)
@@ -1171,7 +1183,7 @@ def file_to_doc(file, base_path=None, verbose=False, fail_any_exception=False,
             # so just extract in path where
             zip_ref.extractall(base_path)
             # recurse
-            doc1 = path_to_docs(base_path, verbose=verbose, fail_any_exception=fail_any_exception)
     else:
         raise RuntimeError("No file handler for %s" % os.path.basename(file))
@@ -1758,6 +1770,8 @@ def run_qa_db(**kwargs):
 def _run_qa_db(query=None,
                use_openai_model=False, use_openai_embedding=False,
                first_para=False, text_limit=None, top_k_docs=4, chunk=True, chunk_size=512,
                user_path=None,
@@ -1787,6 +1801,7 @@ def _run_qa_db(query=None,
                repetition_penalty=1.0,
                num_return_sequences=1,
                langchain_mode=None,
                document_choice=[DocumentChoices.All_Relevant.name],
                n_jobs=-1,
                verbose=False,
@@ -1803,7 +1818,7 @@ def _run_qa_db(query=None,
     :param use_openai_embedding:
     :param first_para:
     :param text_limit:
-    :param k:
     :param chunk:
     :param chunk_size:
     :param user_path: user path to glob recursively from
@@ -1869,12 +1884,28 @@ def _run_qa_db(query=None,
     sim_kwargs = {k: v for k, v in locals().items() if k in func_names}
     missing_kwargs = [x for x in func_names if x not in sim_kwargs]
     assert not missing_kwargs, "Missing: %s" % missing_kwargs
-    docs, chain, scores, use_context = get_similarity_chain(**sim_kwargs)
-    if cmd in [DocumentChoices.All_Relevant_Only_Sources.name, DocumentChoices.Only_All_Sources.name]:
         formatted_doc_chunks = '\n\n'.join([get_url(x) + '\n\n' + x.page_content for x in docs])
         yield formatted_doc_chunks, ''
         return
     if chain is None and model_name not in non_hf_types:
         # can only return if HF type
         return
@@ -1933,6 +1964,7 @@ def _run_qa_db(query=None,
 def get_similarity_chain(query=None,
                          use_openai_model=False, use_openai_embedding=False,
                          first_para=False, text_limit=None, top_k_docs=4, chunk=True, chunk_size=512,
                          user_path=None,
@@ -1947,6 +1979,7 @@ def get_similarity_chain(query=None,
                          load_db_if_exists=False,
                          db=None,
                          langchain_mode=None,
                          document_choice=[DocumentChoices.All_Relevant.name],
                          n_jobs=-1,
                          # beyond run_db_query:
@@ -1997,25 +2030,56 @@ def get_similarity_chain(query=None,
                                                         db=db,
                                                         n_jobs=n_jobs,
                                                         verbose=verbose)
-    if 'falcon' in model_name:
-        extra = "According to only the information in the document sources provided within the context above, "
-        prefix = "Pay attention and remember information below, which will help to answer the question or imperative after the context ends."
-    elif inference_server in ['openai', 'openai_chat']:
-        extra = "According to (primarily) the information in the document sources provided within context above, "
-        prefix = "Pay attention and remember information below, which will help to answer the question or imperative after the context ends.  If the answer cannot be primarily obtained from information within the context, then respond that the answer does not appear in the context of the documents."
-    else:
-        extra = ""
-        prefix = ""
-    if langchain_mode in ['Disabled', 'ChatLLM', 'LLM'] or not use_context:
-        template_if_no_docs = template = """%s{context}{question}""" % prefix
-    else:
-        template = """%s
-\"\"\"
-{context}
 \"\"\"
-%s{question}""" % (prefix, extra)
-        template_if_no_docs = """%s{context}%s{question}""" % (prefix, extra)
     if not use_openai_model and prompt_type not in ['plain'] or model_name in non_hf_types:
         use_template = True
     else:
@@ -2040,14 +2104,26 @@ def get_similarity_chain(query=None,
         if cmd == DocumentChoices.Just_LLM.name:
             docs = []
             scores = []
-        elif cmd == DocumentChoices.Only_All_Sources.name:
             db_documents, db_metadatas = get_docs_and_meta(db, top_k_docs, filter_kwargs=filter_kwargs)
             # similar to langchain's chroma's _results_to_docs_and_scores
             docs_with_score = [(Document(page_content=result[0], metadata=result[1] or {}), 0)
-                               for result in zip(db_documents, db_metadatas)][:top_k_docs]
             docs = [x[0] for x in docs_with_score]
             scores = [x[1] for x in docs_with_score]
         else:
             if top_k_docs == -1 or auto_reduce_chunks:
                 # docs_with_score = db.similarity_search_with_score(query, k=k_db, **filter_kwargs)[:top_k_docs]
                 top_k_docs_tokenize = 100
@@ -2120,6 +2196,7 @@ def get_similarity_chain(query=None,
             if reverse_docs:
                 docs_with_score.reverse()
             # cut off so no high distance docs/sources considered
             docs = [x[0] for x in docs_with_score if x[1] < cut_distanct]
             scores = [x[1] for x in docs_with_score if x[1] < cut_distanct]
             if len(scores) > 0 and verbose:
@@ -2131,14 +2208,14 @@ def get_similarity_chain(query=None,
     if not docs and use_context and model_name not in non_hf_types:
         # if HF type and have no docs, can bail out
-        return docs, None, [], False
-    if cmd in [DocumentChoices.All_Relevant_Only_Sources.name, DocumentChoices.Only_All_Sources.name]:
         # no LLM use
-        return docs, None, [], False
     common_words_file = "data/NGSL_1.2_stats.csv.zip"
-    if os.path.isfile(common_words_file):
         df = pd.read_csv("data/NGSL_1.2_stats.csv.zip")
         import string
         reduced_query = query.translate(str.maketrans(string.punctuation, ' ' * len(string.punctuation))).strip()
@@ -2155,25 +2232,47 @@ def get_similarity_chain(query=None,
         use_context = False
         template = template_if_no_docs
-    if use_template:
-        # instruct-like, rather than few-shot prompt_type='plain' as default
-        # but then sources confuse the model with how inserted among rest of text, so avoid
-        prompt = PromptTemplate(
-            # input_variables=["summaries", "question"],
-            input_variables=["context", "question"],
-            template=template,
-        )
-        chain = load_qa_chain(llm, prompt=prompt)
-    else:
-        chain = load_qa_with_sources_chain(llm)
-    if not use_context:
-        chain_kwargs = dict(input_documents=[], question=query)
     else:
-        chain_kwargs = dict(input_documents=docs, question=query)
-    target = wrapped_partial(chain, chain_kwargs)
-    return docs, target, scores, use_context
 def get_sources_answer(query, answer, scores, show_rank, answer_with_sources, verbose=False):
@@ -2243,6 +2342,11 @@ def chunk_sources(sources, chunk=True, chunk_size=512, language=None):
     splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0, keep_separator=keep_separator,
                                               separators=separators)
     source_chunks = splitter.split_documents(sources)
     return source_chunks

 from langchain.embeddings import HuggingFaceInstructEmbeddings
 from tqdm import tqdm
+from enums import DocumentChoices, no_lora_str, model_token_mapping, source_prefix, source_postfix, non_query_commands, \
+    LangChainAction, LangChainMode
+from evaluate_params import gen_hyper
+from gen import get_model, SEED
 from prompter import non_hf_types, PromptType, Prompter
 from utils import wrapped_partial, EThread, import_matplotlib, sanitize_filename, makedirs, get_url, flatten_list, \
     get_device, ProgressParallel, remove, hash_file, clear_torch_cache, NullContext, get_hf_server, FakeTokenizer
 from langchain.document_loaders import PyPDFLoader, TextLoader, CSVLoader, PythonLoader, TomlLoader, \
     UnstructuredURLLoader, UnstructuredHTMLLoader, UnstructuredWordDocumentLoader, UnstructuredMarkdownLoader, \
     EverNoteLoader, UnstructuredEmailLoader, UnstructuredODTLoader, UnstructuredPowerPointLoader, \
+    UnstructuredEPubLoader, UnstructuredImageLoader, UnstructuredRTFLoader, ArxivLoader, UnstructuredPDFLoader, \
+    UnstructuredExcelLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter, Language
 from langchain.chains.question_answering import load_qa_chain
 from langchain.docstore.document import Document
         stream_output = self.stream
         gr_client = self.client
         client_langchain_mode = 'Disabled'
+        client_langchain_action = LangChainAction.QUERY.value
         top_k_docs = 1
         chunk = True
         chunk_size = 512
                              instruction_nochat=prompt if not self.chat_client else '',
                              iinput_nochat='',  # only for chat=False
                              langchain_mode=client_langchain_mode,
+                             langchain_action=client_langchain_action,
                              top_k_docs=top_k_docs,
                              chunk=chunk,
                              chunk_size=chunk_size,
         callbacks = [StreamingGradioCallbackHandler()]
         assert prompter is not None
         stop_sequences = list(set(prompter.terminate_response + [prompter.PreResponse]))
+        stop_sequences = [x for x in stop_sequences if x]
         if gr_client:
             chat_client = False
         if stream_output:
             skip_prompt = False
+            from gen import H2OTextIteratorStreamer
             decoder_kwargs = {}
             streamer = H2OTextIteratorStreamer(tokenizer, skip_prompt=skip_prompt, block=False, **decoder_kwargs)
             gen_kwargs.update(dict(streamer=streamer))
 image_types = ["png", "jpg", "jpeg"]
 non_image_types = ["pdf", "txt", "csv", "toml", "py", "rst", "rtf",
+                   "md",
+                   "html", "mhtml",
                    "enex", "eml", "epub", "odt", "pptx", "ppt",
                    "zip", "urls",
                    ]
 # "msg",  GPL3
 if have_libreoffice:
+    non_image_types.extend(["docx", "doc", "xls", "xlsx"])
 file_types = non_image_types + image_types
     hashid = hash_file(file)
     if not isinstance(docs1, (list, tuple, types.GeneratorType)):
         docs1 = [docs1]
+    [x.metadata.update(dict(input_type=file_extension, date=str(datetime.now()), hashid=hashid)) for x in docs1]
 def file_to_doc(file, base_path=None, verbose=False, fail_any_exception=False,
         docs1 = UnstructuredWordDocumentLoader(file_path=file).load()
         add_meta(docs1, file)
         doc1 = chunk_sources(docs1, chunk=chunk, chunk_size=chunk_size)
+    elif (file.lower().endswith('.xlsx') or file.lower().endswith('.xls')) and have_libreoffice:
+        docs1 = UnstructuredExcelLoader(file_path=file).load()
+        add_meta(docs1, file)
+        doc1 = chunk_sources(docs1, chunk=chunk, chunk_size=chunk_size)
     elif file.lower().endswith('.odt'):
         docs1 = UnstructuredODTLoader(file_path=file).load()
         add_meta(docs1, file)
             # so just extract in path where
             zip_ref.extractall(base_path)
             # recurse
+            doc1 = path_to_docs(base_path, verbose=verbose, fail_any_exception=fail_any_exception, n_jobs=n_jobs)
     else:
         raise RuntimeError("No file handler for %s" % os.path.basename(file))
 def _run_qa_db(query=None,
+               iinput=None,
+               context=None,
                use_openai_model=False, use_openai_embedding=False,
                first_para=False, text_limit=None, top_k_docs=4, chunk=True, chunk_size=512,
                user_path=None,
                repetition_penalty=1.0,
                num_return_sequences=1,
                langchain_mode=None,
+               langchain_action=None,
                document_choice=[DocumentChoices.All_Relevant.name],
                n_jobs=-1,
                verbose=False,
     :param use_openai_embedding:
     :param first_para:
     :param text_limit:
+    :param top_k_docs:
     :param chunk:
     :param chunk_size:
     :param user_path: user path to glob recursively from
     sim_kwargs = {k: v for k, v in locals().items() if k in func_names}
     missing_kwargs = [x for x in func_names if x not in sim_kwargs]
     assert not missing_kwargs, "Missing: %s" % missing_kwargs
+    docs, chain, scores, use_context, have_any_docs = get_similarity_chain(**sim_kwargs)
+    if cmd in non_query_commands:
         formatted_doc_chunks = '\n\n'.join([get_url(x) + '\n\n' + x.page_content for x in docs])
         yield formatted_doc_chunks, ''
         return
+    if not docs and langchain_action in [LangChainAction.SUMMARIZE_MAP.value,
+                                         LangChainAction.SUMMARIZE_ALL.value,
+                                         LangChainAction.SUMMARIZE_REFINE.value]:
+        ret = 'No relevant documents to summarize.' if have_any_docs else 'No documents to summarize.'
+        extra = ''
+        yield ret, extra
+        return
+    if not docs and langchain_mode not in [LangChainMode.DISABLED.value,
+                                           LangChainMode.CHAT_LLM.value,
+                                           LangChainMode.LLM.value]:
+        ret = 'No relevant documents to query.' if have_any_docs else 'No documents to query.'
+        extra = ''
+        yield ret, extra
+        return
     if chain is None and model_name not in non_hf_types:
+        # here if no docs at all and not HF type
         # can only return if HF type
         return
 def get_similarity_chain(query=None,
+                         iinput=None,
                          use_openai_model=False, use_openai_embedding=False,
                          first_para=False, text_limit=None, top_k_docs=4, chunk=True, chunk_size=512,
                          user_path=None,
                          load_db_if_exists=False,
                          db=None,
                          langchain_mode=None,
+                         langchain_action=None,
                          document_choice=[DocumentChoices.All_Relevant.name],
                          n_jobs=-1,
                          # beyond run_db_query:
                                                         db=db,
                                                         n_jobs=n_jobs,
                                                         verbose=verbose)
+    have_any_docs = db is not None
+    if langchain_action == LangChainAction.QUERY.value:
+        if iinput:
+            query = "%s\n%s" % (query, iinput)
+        if 'falcon' in model_name:
+            extra = "According to only the information in the document sources provided within the context above, "
+            prefix = "Pay attention and remember information below, which will help to answer the question or imperative after the context ends."
+        elif inference_server in ['openai', 'openai_chat']:
+            extra = "According to (primarily) the information in the document sources provided within context above, "
+            prefix = "Pay attention and remember information below, which will help to answer the question or imperative after the context ends.  If the answer cannot be primarily obtained from information within the context, then respond that the answer does not appear in the context of the documents."
+        else:
+            extra = ""
+            prefix = ""
+        if langchain_mode in ['Disabled', 'ChatLLM', 'LLM'] or not use_context:
+            template_if_no_docs = template = """%s{context}{question}""" % prefix
+        else:
+            template = """%s
+    \"\"\"
+    {context}
+    \"\"\"
+    %s{question}""" % (prefix, extra)
+            template_if_no_docs = """%s{context}%s{question}""" % (prefix, extra)
+    elif langchain_action in [LangChainAction.SUMMARIZE_ALL.value, LangChainAction.SUMMARIZE_MAP.value]:
+        none = ['', '\n', None]
+        if query in none and iinput in none:
+            prompt_summary = "Using only the text above, write a condensed and concise summary:\n"
+        elif query not in none:
+            prompt_summary = "Focusing on %s, write a condensed and concise Summary:\n" % query
+        elif iinput not in None:
+            prompt_summary = iinput
+        else:
+            prompt_summary = "Focusing on %s, %s:\n" % (query, iinput)
+        # don't auto reduce
+        auto_reduce_chunks = False
+        if langchain_action == LangChainAction.SUMMARIZE_MAP.value:
+            fstring = '{text}'
+        else:
+            fstring = '{input_documents}'
+        template = """In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text:
 \"\"\"
+%s
+\"\"\"\n%s""" % (fstring, prompt_summary)
+        template_if_no_docs = "Exactly only say: There are no documents to summarize."
+    elif langchain_action in [LangChainAction.SUMMARIZE_REFINE]:
+        template = ''  # unused
+        template_if_no_docs = ''  # unused
+    else:
+        raise RuntimeError("No such langchain_action=%s" % langchain_action)
     if not use_openai_model and prompt_type not in ['plain'] or model_name in non_hf_types:
         use_template = True
     else:
         if cmd == DocumentChoices.Just_LLM.name:
             docs = []
             scores = []
+        elif cmd == DocumentChoices.Only_All_Sources.name or query in [None, '', '\n']:
             db_documents, db_metadatas = get_docs_and_meta(db, top_k_docs, filter_kwargs=filter_kwargs)
             # similar to langchain's chroma's _results_to_docs_and_scores
             docs_with_score = [(Document(page_content=result[0], metadata=result[1] or {}), 0)
+                               for result in zip(db_documents, db_metadatas)]
+            # order documents
+            doc_hashes = [x['doc_hash'] for x in db_metadatas]
+            doc_chunk_ids = [x['chunk_id'] for x in db_metadatas]
+            docs_with_score = [x for _, _, x in
+                               sorted(zip(doc_hashes, doc_chunk_ids, docs_with_score), key=lambda x: (x[0], x[1]))
+                               ]
+            docs_with_score = docs_with_score[:top_k_docs]
             docs = [x[0] for x in docs_with_score]
             scores = [x[1] for x in docs_with_score]
+            have_any_docs |= len(docs) > 0
         else:
+            # FIXME: if langchain_action == LangChainAction.SUMMARIZE_MAP.value
+            # if map_reduce, then no need to auto reduce chunks
             if top_k_docs == -1 or auto_reduce_chunks:
                 # docs_with_score = db.similarity_search_with_score(query, k=k_db, **filter_kwargs)[:top_k_docs]
                 top_k_docs_tokenize = 100
             if reverse_docs:
                 docs_with_score.reverse()
             # cut off so no high distance docs/sources considered
+            have_any_docs |= len(docs_with_score) > 0  # before cut
             docs = [x[0] for x in docs_with_score if x[1] < cut_distanct]
             scores = [x[1] for x in docs_with_score if x[1] < cut_distanct]
             if len(scores) > 0 and verbose:
     if not docs and use_context and model_name not in non_hf_types:
         # if HF type and have no docs, can bail out
+        return docs, None, [], False, have_any_docs
+    if cmd in non_query_commands:
         # no LLM use
+        return docs, None, [], False, have_any_docs
     common_words_file = "data/NGSL_1.2_stats.csv.zip"
+    if os.path.isfile(common_words_file) and langchain_mode == LangChainAction.QUERY.value:
         df = pd.read_csv("data/NGSL_1.2_stats.csv.zip")
         import string
         reduced_query = query.translate(str.maketrans(string.punctuation, ' ' * len(string.punctuation))).strip()
         use_context = False
         template = template_if_no_docs
+    if langchain_action == LangChainAction.QUERY.value:
+        if use_template:
+            # instruct-like, rather than few-shot prompt_type='plain' as default
+            # but then sources confuse the model with how inserted among rest of text, so avoid
+            prompt = PromptTemplate(
+                # input_variables=["summaries", "question"],
+                input_variables=["context", "question"],
+                template=template,
+            )
+            chain = load_qa_chain(llm, prompt=prompt)
+        else:
+            # only if use_openai_model = True, unused normally except in testing
+            chain = load_qa_with_sources_chain(llm)
+        if not use_context:
+            chain_kwargs = dict(input_documents=[], question=query)
+        else:
+            chain_kwargs = dict(input_documents=docs, question=query)
+        target = wrapped_partial(chain, chain_kwargs)
+    elif langchain_action in [LangChainAction.SUMMARIZE_MAP.value,
+                              LangChainAction.SUMMARIZE_REFINE,
+                              LangChainAction.SUMMARIZE_ALL.value]:
+        from langchain.chains.summarize import load_summarize_chain
+        if langchain_action == LangChainAction.SUMMARIZE_MAP.value:
+            prompt = PromptTemplate(input_variables=["text"], template=template)
+            chain = load_summarize_chain(llm, chain_type="map_reduce",
+                                         map_prompt=prompt, combine_prompt=prompt, return_intermediate_steps=True)
+            target = wrapped_partial(chain, {"input_documents": docs})  # , return_only_outputs=True)
+        elif langchain_action == LangChainAction.SUMMARIZE_ALL.value:
+            assert use_template
+            prompt = PromptTemplate(input_variables=["text"], template=template)
+            chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt, return_intermediate_steps=True)
+            target = wrapped_partial(chain)
+        elif langchain_action == LangChainAction.SUMMARIZE_REFINE.value:
+            chain = load_summarize_chain(llm, chain_type="refine", return_intermediate_steps=True)
+            target = wrapped_partial(chain)
+        else:
+            raise RuntimeError("No such langchain_action=%s" % langchain_action)
     else:
+        raise RuntimeError("No such langchain_action=%s" % langchain_action)
+    return docs, target, scores, use_context, have_any_docs
 def get_sources_answer(query, answer, scores, show_rank, answer_with_sources, verbose=False):
     splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0, keep_separator=keep_separator,
                                               separators=separators)
     source_chunks = splitter.split_documents(sources)
+    # currently in order, but when pull from db won't be, so mark order and document by hash
+    doc_hash = str(uuid.uuid4())[:10]
+    [x.metadata.update(dict(doc_hash=doc_hash, chunk_id=chunk_id)) for chunk_id, x in enumerate(source_chunks)]
     return source_chunks

gradio_runner.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import copy
 import functools
 import inspect
@@ -49,16 +50,16 @@ def fix_pydantic_duplicate_validators_error():
 fix_pydantic_duplicate_validators_error()
-from enums import DocumentChoices, no_model_str, no_lora_str, no_server_str, LangChainMode
 from gradio_themes import H2oTheme, SoftTheme, get_h2o_title, get_simple_title, get_dark_js, spacing_xsm, radius_xsm, \
     text_xsm
 from prompter import prompt_type_to_model_name, prompt_types_strings, inv_prompt_type_to_model_lower, non_hf_types, \
     get_prompt
 from utils import get_githash, flatten_list, zip_data, s3up, clear_torch_cache, get_torch_allocated, system_info_print, \
     ping, get_short_name, get_url, makedirs, get_kwargs, remove, system_info, ping_gpu
-from generate import get_model, languages_covered, evaluate, eval_func_param_names, score_qa, langchain_modes, \
-    inputs_kwargs_list, scratch_base_dir, evaluate_from_str, no_default_param_names, \
-    eval_func_param_names_defaults, get_max_max_new_tokens, get_minmax_top_k_docs, history_to_context
 from apscheduler.schedulers.background import BackgroundScheduler
@@ -99,6 +100,7 @@ def go_gradio(**kwargs):
     dbs = kwargs['dbs']
     db_type = kwargs['db_type']
     visible_langchain_modes = kwargs['visible_langchain_modes']
     allow_upload_to_user_data = kwargs['allow_upload_to_user_data']
     allow_upload_to_my_data = kwargs['allow_upload_to_my_data']
     enable_sources_list = kwargs['enable_sources_list']
@@ -213,7 +215,28 @@ def go_gradio(**kwargs):
         'base_model') else no_model_msg
     output_label0_model2 = no_model_msg
     default_kwargs = {k: kwargs[k] for k in eval_func_param_names_defaults}
     for k in no_default_param_names:
         default_kwargs[k] = ''
@@ -239,7 +262,8 @@ def go_gradio(**kwargs):
         model_options_state = gr.State([model_options])
         lora_options_state = gr.State([lora_options])
         server_options_state = gr.State([server_options])
-        my_db_state = gr.State([None, None])
         chat_state = gr.State({})
         # make user default first and default choice, dedup
         docs_state00 = kwargs['document_choice'] + [x.name for x in list(DocumentChoices)]
@@ -283,7 +307,7 @@ def go_gradio(**kwargs):
                     col_chat = gr.Column(visible=kwargs['chat'])
                     with col_chat:
-                        instruction, submit, stop_btn = make_prompt_form(kwargs)
                         text_output, text_output2, text_outputs = make_chatbots(output_label0, output_label0_model2,
                                                                                 **kwargs)
@@ -332,6 +356,12 @@ def go_gradio(**kwargs):
                             value=kwargs['langchain_mode'],
                             label="Data Collection of Sources",
                             visible=kwargs['langchain_mode'] != 'Disabled')
                     data_row2 = gr.Row(visible=kwargs['langchain_mode'] != 'Disabled')
                     with data_row2:
                         with gr.Column(scale=50):
@@ -726,6 +756,7 @@ def go_gradio(**kwargs):
                                                 caption_loader=caption_loader,
                                                 verbose=kwargs['verbose'],
                                                 user_path=kwargs['user_path'],
                                                 )
         add_file_outputs = [fileup_output, langchain_mode, add_to_shared_db_btn, add_to_my_db_btn]
         add_file_kwargs = dict(fn=update_user_db_func,
@@ -804,6 +835,7 @@ def go_gradio(**kwargs):
                                               caption_loader=caption_loader,
                                               verbose=kwargs['verbose'],
                                               user_path=kwargs['user_path'],
                                               )
         add_my_file_outputs = [fileup_output, langchain_mode, my_db_state, add_to_shared_db_btn, add_to_my_db_btn]
@@ -920,19 +952,59 @@ def go_gradio(**kwargs):
         for k in inputs_kwargs_list:
             assert k in kwargs_evaluate, "Missing %s" % k
-        def evaluate_gradio(*args1, **kwargs1):
-            for res_dict in evaluate(*args1, **kwargs1):
-                if kwargs['langchain_mode'] == 'Disabled':
-                    yield fix_text_for_gradio(res_dict['response'])
-                else:
-                    yield '<br>' + fix_text_for_gradio(res_dict['response'])
-        fun = partial(evaluate_gradio,
                       **kwargs_evaluate)
-        fun2 = partial(evaluate_gradio,
                        **kwargs_evaluate)
-        fun_with_dict_str = partial(evaluate_from_str,
-                                    default_kwargs=default_kwargs,
                                     **kwargs_evaluate
                                     )
@@ -1072,14 +1144,17 @@ def go_gradio(**kwargs):
             User that fills history for bot
             :param args:
             :param undo:
             :param sanitize_user_prompt:
-            :param model2:
             :return:
             """
             args_list = list(args)
             user_message = args_list[eval_func_param_names.index('instruction')]  # chat only
             input1 = args_list[eval_func_param_names.index('iinput')]  # chat only
             prompt_type1 = args_list[eval_func_param_names.index('prompt_type')]
             if not prompt_type1:
                 # shouldn't have to specify if CLI launched model
                 prompt_type1 = kwargs['prompt_type']
@@ -1110,8 +1185,12 @@ def go_gradio(**kwargs):
                     history[-1][1] = None
                 return history
             if user_message1 in ['', None, '\n']:
-                # reject non-retry submit/enter
-                return history
             user_message1 = fix_text_for_gradio(user_message1)
             return history + [[user_message1, None]]
@@ -1147,11 +1226,13 @@ def go_gradio(**kwargs):
             else:
                 return 2000
-        def prep_bot(*args, retry=False):
             """
             :param args:
             :param retry:
             :return: last element is True if should run bot, False if should just yield history
             """
             # don't deepcopy, can contain model itself
@@ -1159,12 +1240,16 @@ def go_gradio(**kwargs):
             model_state1 = args_list[-3]
             my_db_state1 = args_list[-2]
             history = args_list[-1]
-            langchain_mode1 = args_list[eval_func_param_names.index('langchain_mode')]
             if model_state1['model'] is None or model_state1['model'] == no_model_str:
                 return history, None, None, None
             args_list = args_list[:-3]  # only keep rest needed for evaluate()
             if not history:
                 print("No history", flush=True)
                 history = []
@@ -1175,22 +1260,23 @@ def go_gradio(**kwargs):
                 instruction1 = history[-1][0]
                 history[-1][1] = None
             elif not instruction1:
-                # if not retrying, then reject empty query
-                return history, None, None, None
             elif len(history) > 0 and history[-1][1] not in [None, '']:
                 # reject submit button if already filled and not retrying
                 # None when not filling with '' to keep client happy
                 return history, None, None, None
             # shouldn't have to specify in API prompt_type if CLI launched model, so prefer global CLI one if have it
-            prompt_type1 = kwargs.get('prompt_type', args_list[eval_func_param_names.index('prompt_type')])
-            # prefer model specific prompt type instead of global one, and apply back to args_list for evaluate()
-            args_list[eval_func_param_names.index('prompt_type')] = prompt_type1 = \
-                model_state1.get('prompt_type', prompt_type1)
-            prompt_dict1 = kwargs.get('prompt_dict', args_list[eval_func_param_names.index('prompt_dict')])
-            args_list[eval_func_param_names.index('prompt_dict')] = prompt_dict1 = \
-                model_state1.get('prompt_dict', prompt_dict1)
             chat1 = args_list[eval_func_param_names.index('chat')]
             model_max_length1 = get_model_max_length(model_state1)
@@ -1264,6 +1350,7 @@ def go_gradio(**kwargs):
                 for res in get_response(fun1, history):
                     yield res
             finally:
                 clear_embeddings(langchain_mode1, my_db_state1)
         def all_bot(*args, retry=False, model_states1=None):
@@ -1277,7 +1364,7 @@ def go_gradio(**kwargs):
             my_db_state1 = None  # will be filled below by some bot
             try:
                 gen_list = []
-                for chatbot1, model_state1 in zip(chatbots, model_states1):
                     args_list1 = args_list0.copy()
                     args_list1.insert(-1, model_state1)  # insert at -1 so is at -2
                     # if at start, have None in response still, replace with '' so client etc. acts like normal
@@ -1289,7 +1376,8 @@ def go_gradio(**kwargs):
                     # so consistent with prep_bot()
                     # with model_state1 at -3, my_db_state1 at -2, and history(chatbot) at -1
                     # langchain_mode1 and my_db_state1 should be same for every bot
-                    history, fun1, langchain_mode1, my_db_state1 = prep_bot(*tuple(args_list1), retry=retry)
                     gen1 = get_response(fun1, history)
                     if stream_output1:
                         gen1 = TimeoutIterator(gen1, timeout=0.01, sentinel=None, raise_on_exception=False)
@@ -1301,6 +1389,7 @@ def go_gradio(**kwargs):
                 tgen0 = time.time()
                 for res1 in itertools.zip_longest(*gen_list):
                     if time.time() - tgen0 > max_time1:
                         break
                     bots = [x[0] if x is not None and not isinstance(x, BaseException) else y for x, y in
@@ -1735,6 +1824,9 @@ def go_gradio(**kwargs):
         def load_model(model_name, lora_weights, server_name, model_state_old, prompt_type_old, load_8bit,
                        infer_devices, gpu_id):
             # ensure old model removed from GPU memory
             if kwargs['debug']:
                 print("Pre-switch pre-del GPU memory: %s" % get_torch_allocated(), flush=True)
@@ -2161,6 +2253,15 @@ def update_user_db(file, db1, x, y, *args, dbs=None, langchain_mode='UserData',
         clear_torch_cache()
 def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None, langchain_mode='UserData',
                     user_path=None,
                     use_openai_embedding=None,
@@ -2170,7 +2271,8 @@ def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None,
                     captions_model=None,
                     enable_ocr=None,
                     verbose=None,
-                    is_url=None, is_txt=None):
     assert use_openai_embedding is not None
     assert hf_embedding_model is not None
     assert caption_loader is not None
@@ -2211,6 +2313,7 @@ def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None,
         print("Adding %s" % file, flush=True)
     sources = path_to_docs(file if not is_url and not is_txt else None,
                            verbose=verbose,
                            chunk=chunk, chunk_size=chunk_size,
                            url=file if is_url else None,
                            text=file if is_txt else None,
@@ -2222,7 +2325,8 @@ def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None,
     exceptions = [x for x in sources if x.metadata.get('exception')]
     sources = [x for x in sources if 'exception' not in x.metadata]
-    with filelock.FileLock("db_%s.lock" % langchain_mode.replace(' ', '_')):
         if langchain_mode == 'MyData':
             if db1[0] is not None:
                 # then add
@@ -2235,18 +2339,14 @@ def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None,
                 # for production hit, when user gets clicky:
                 assert len(db1) == 2, "Bad MyData db: %s" % db1
                 # then create
-                # assign fresh hash for this user session, so not shared
                 # if added has to original state and didn't change, then would be shared db for all users
-                db1[1] = str(uuid.uuid4())
                 persist_directory = os.path.join(scratch_base_dir, 'db_dir_%s_%s' % (langchain_mode, db1[1]))
                 db = get_db(sources, use_openai_embedding=use_openai_embedding,
                             db_type=db_type,
                             persist_directory=persist_directory,
                             langchain_mode=langchain_mode,
                             hf_embedding_model=hf_embedding_model)
-            if db is None:
-                db1[1] = None
-            else:
                 db1[0] = db
             source_files_added = get_source_files(db=db1[0], exceptions=exceptions)
             return None, langchain_mode, db1, x, y, source_files_added
@@ -2274,7 +2374,9 @@ def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None,
 def get_db(db1, langchain_mode, dbs=None):
-    with filelock.FileLock("db_%s.lock" % langchain_mode.replace(' ', '_')):
         if langchain_mode in ['wiki_full']:
             # NOTE: avoid showing full wiki.  Takes about 30 seconds over about 90k entries, but not useful for now
             db = None

+import ast
 import copy
 import functools
 import inspect
 fix_pydantic_duplicate_validators_error()
+from enums import DocumentChoices, no_model_str, no_lora_str, no_server_str, LangChainAction, LangChainMode
 from gradio_themes import H2oTheme, SoftTheme, get_h2o_title, get_simple_title, get_dark_js, spacing_xsm, radius_xsm, \
     text_xsm
 from prompter import prompt_type_to_model_name, prompt_types_strings, inv_prompt_type_to_model_lower, non_hf_types, \
     get_prompt
 from utils import get_githash, flatten_list, zip_data, s3up, clear_torch_cache, get_torch_allocated, system_info_print, \
     ping, get_short_name, get_url, makedirs, get_kwargs, remove, system_info, ping_gpu
+from gen import get_model, languages_covered, evaluate, score_qa, langchain_modes, inputs_kwargs_list, scratch_base_dir, \
+    get_max_max_new_tokens, get_minmax_top_k_docs, history_to_context, langchain_actions
+from evaluate_params import eval_func_param_names, no_default_param_names, eval_func_param_names_defaults
 from apscheduler.schedulers.background import BackgroundScheduler
     dbs = kwargs['dbs']
     db_type = kwargs['db_type']
     visible_langchain_modes = kwargs['visible_langchain_modes']
+    visible_langchain_actions = kwargs['visible_langchain_actions']
     allow_upload_to_user_data = kwargs['allow_upload_to_user_data']
     allow_upload_to_my_data = kwargs['allow_upload_to_my_data']
     enable_sources_list = kwargs['enable_sources_list']
         'base_model') else no_model_msg
     output_label0_model2 = no_model_msg
+    def update_prompt(prompt_type1, prompt_dict1, model_state1, which_model=0):
+        if not prompt_type1 or which_model != 0:
+            # keep prompt_type and prompt_dict in sync if possible
+            prompt_type1 = kwargs.get('prompt_type', prompt_type1)
+            prompt_dict1 = kwargs.get('prompt_dict', prompt_dict1)
+            # prefer model specific prompt type instead of global one
+            if not prompt_type1 or which_model != 0:
+                prompt_type1 = model_state1.get('prompt_type', prompt_type1)
+                prompt_dict1 = model_state1.get('prompt_dict', prompt_dict1)
+        if not prompt_dict1 or which_model != 0:
+            # if still not defined, try to get
+            prompt_dict1 = kwargs.get('prompt_dict', prompt_dict1)
+            if not prompt_dict1 or which_model != 0:
+                prompt_dict1 = model_state1.get('prompt_dict', prompt_dict1)
+        return prompt_type1, prompt_dict1
     default_kwargs = {k: kwargs[k] for k in eval_func_param_names_defaults}
+    # ensure prompt_type consistent with prep_bot(), so nochat API works same way
+    default_kwargs['prompt_type'], default_kwargs['prompt_dict'] = \
+        update_prompt(default_kwargs['prompt_type'], default_kwargs['prompt_dict'],
+                      model_state1=model_state0, which_model=0)
     for k in no_default_param_names:
         default_kwargs[k] = ''
         model_options_state = gr.State([model_options])
         lora_options_state = gr.State([lora_options])
         server_options_state = gr.State([server_options])
+        # uuid in db is used as user ID
+        my_db_state = gr.State([None, str(uuid.uuid4())])
         chat_state = gr.State({})
         # make user default first and default choice, dedup
         docs_state00 = kwargs['document_choice'] + [x.name for x in list(DocumentChoices)]
                     col_chat = gr.Column(visible=kwargs['chat'])
                     with col_chat:
+                        instruction, submit, stop_btn = make_prompt_form(kwargs, LangChainMode)
                         text_output, text_output2, text_outputs = make_chatbots(output_label0, output_label0_model2,
                                                                                 **kwargs)
                             value=kwargs['langchain_mode'],
                             label="Data Collection of Sources",
                             visible=kwargs['langchain_mode'] != 'Disabled')
+                        allowed_actions = [x for x in langchain_actions if x in visible_langchain_actions]
+                        langchain_action = gr.Radio(
+                            allowed_actions,
+                            value=allowed_actions[0] if len(allowed_actions) > 0 else None,
+                            label="Data Action",
+                            visible=True)
                     data_row2 = gr.Row(visible=kwargs['langchain_mode'] != 'Disabled')
                     with data_row2:
                         with gr.Column(scale=50):
                                                 caption_loader=caption_loader,
                                                 verbose=kwargs['verbose'],
                                                 user_path=kwargs['user_path'],
+                                                n_jobs=kwargs['n_jobs'],
                                                 )
         add_file_outputs = [fileup_output, langchain_mode, add_to_shared_db_btn, add_to_my_db_btn]
         add_file_kwargs = dict(fn=update_user_db_func,
                                               caption_loader=caption_loader,
                                               verbose=kwargs['verbose'],
                                               user_path=kwargs['user_path'],
+                                              n_jobs=kwargs['n_jobs'],
                                               )
         add_my_file_outputs = [fileup_output, langchain_mode, my_db_state, add_to_shared_db_btn, add_to_my_db_btn]
         for k in inputs_kwargs_list:
             assert k in kwargs_evaluate, "Missing %s" % k
+        def evaluate_nochat(*args1, default_kwargs1=None, str_api=False, **kwargs1):
+            args_list = list(args1)
+            if str_api:
+                user_kwargs = args_list[2]
+                assert isinstance(user_kwargs, str)
+                user_kwargs = ast.literal_eval(user_kwargs)
+            else:
+                user_kwargs = {k: v for k, v in zip(eval_func_param_names, args_list[2:])}
+            # only used for submit_nochat_api
+            user_kwargs['chat'] = False
+            if 'stream_output' not in user_kwargs:
+                user_kwargs['stream_output'] = False
+            if 'langchain_mode' not in user_kwargs:
+                # if user doesn't specify, then assume disabled, not use default
+                user_kwargs['langchain_mode'] = 'Disabled'
+            if 'langchain_action' not in user_kwargs:
+                user_kwargs['langchain_action'] = LangChainAction.QUERY.value
+            set1 = set(list(default_kwargs1.keys()))
+            set2 = set(eval_func_param_names)
+            assert set1 == set2, "Set diff: %s %s: %s" % (set1, set2, set1.symmetric_difference(set2))
+            # correct ordering.  Note some things may not be in default_kwargs, so can't be default of user_kwargs.get()
+            model_state1 = args_list[0]
+            my_db_state1 = args_list[1]
+            args_list = [user_kwargs[k] if k in user_kwargs and user_kwargs[k] is not None else default_kwargs1[k] for k
+                         in eval_func_param_names]
+            assert len(args_list) == len(eval_func_param_names)
+            args_list = [model_state1, my_db_state1] + args_list
+            try:
+                for res_dict in evaluate(*tuple(args_list), **kwargs1):
+                    if str_api:
+                        # full return of dict
+                        yield res_dict
+                    elif kwargs['langchain_mode'] == 'Disabled':
+                        yield fix_text_for_gradio(res_dict['response'])
+                    else:
+                        yield '<br>' + fix_text_for_gradio(res_dict['response'])
+            finally:
+                clear_torch_cache()
+                clear_embeddings(user_kwargs['langchain_mode'], my_db_state1)
+        fun = partial(evaluate_nochat,
+                      default_kwargs1=default_kwargs,
+                      str_api=False,
                       **kwargs_evaluate)
+        fun2 = partial(evaluate_nochat,
+                       default_kwargs1=default_kwargs,
+                       str_api=False,
                        **kwargs_evaluate)
+        fun_with_dict_str = partial(evaluate_nochat,
+                                    default_kwargs1=default_kwargs,
+                                    str_api=True,
                                     **kwargs_evaluate
                                     )
             User that fills history for bot
             :param args:
             :param undo:
+            :param retry:
             :param sanitize_user_prompt:
             :return:
             """
             args_list = list(args)
             user_message = args_list[eval_func_param_names.index('instruction')]  # chat only
             input1 = args_list[eval_func_param_names.index('iinput')]  # chat only
             prompt_type1 = args_list[eval_func_param_names.index('prompt_type')]
+            langchain_mode1 = args_list[eval_func_param_names.index('langchain_mode')]
+            langchain_action1 = args_list[eval_func_param_names.index('langchain_action')]
+            document_choice1 = args_list[eval_func_param_names.index('document_choice')]
             if not prompt_type1:
                 # shouldn't have to specify if CLI launched model
                 prompt_type1 = kwargs['prompt_type']
                     history[-1][1] = None
                 return history
             if user_message1 in ['', None, '\n']:
+                if langchain_action1 in LangChainAction.QUERY.value and \
+                        DocumentChoices.Only_All_Sources.name not in document_choice1 \
+                        or \
+                        langchain_mode1 in [LangChainMode.CHAT_LLM.value, LangChainMode.LLM.value]:
+                    # reject non-retry submit/enter
+                    return history
             user_message1 = fix_text_for_gradio(user_message1)
             return history + [[user_message1, None]]
             else:
                 return 2000
+        def prep_bot(*args, retry=False, which_model=0):
             """
             :param args:
             :param retry:
+            :param which_model: identifies which model if doing model_lock
+                 API only called for which_model=0, default for inputs_list, but rest should ignore inputs_list
             :return: last element is True if should run bot, False if should just yield history
             """
             # don't deepcopy, can contain model itself
             model_state1 = args_list[-3]
             my_db_state1 = args_list[-2]
             history = args_list[-1]
+            prompt_type1 = args_list[eval_func_param_names.index('prompt_type')]
+            prompt_dict1 = args_list[eval_func_param_names.index('prompt_dict')]
             if model_state1['model'] is None or model_state1['model'] == no_model_str:
                 return history, None, None, None
             args_list = args_list[:-3]  # only keep rest needed for evaluate()
+            langchain_mode1 = args_list[eval_func_param_names.index('langchain_mode')]
+            langchain_action1 = args_list[eval_func_param_names.index('langchain_action')]
+            document_choice1 = args_list[eval_func_param_names.index('document_choice')]
             if not history:
                 print("No history", flush=True)
                 history = []
                 instruction1 = history[-1][0]
                 history[-1][1] = None
             elif not instruction1:
+                if langchain_action1 in LangChainAction.QUERY.value and \
+                        DocumentChoices.Only_All_Sources.name not in document_choice1 \
+                        or \
+                        langchain_mode1 in [LangChainMode.CHAT_LLM.value, LangChainMode.LLM.value]:
+                    # if not retrying, then reject empty query
+                    return history, None, None, None
             elif len(history) > 0 and history[-1][1] not in [None, '']:
                 # reject submit button if already filled and not retrying
                 # None when not filling with '' to keep client happy
                 return history, None, None, None
             # shouldn't have to specify in API prompt_type if CLI launched model, so prefer global CLI one if have it
+            prompt_type1, prompt_dict1 = update_prompt(prompt_type1, prompt_dict1, model_state1,
+                                                       which_model=which_model)
+            # apply back to args_list for evaluate()
+            args_list[eval_func_param_names.index('prompt_type')] = prompt_type1
+            args_list[eval_func_param_names.index('prompt_dict')] = prompt_dict1
             chat1 = args_list[eval_func_param_names.index('chat')]
             model_max_length1 = get_model_max_length(model_state1)
                 for res in get_response(fun1, history):
                     yield res
             finally:
+                clear_torch_cache()
                 clear_embeddings(langchain_mode1, my_db_state1)
         def all_bot(*args, retry=False, model_states1=None):
             my_db_state1 = None  # will be filled below by some bot
             try:
                 gen_list = []
+                for chatboti, (chatbot1, model_state1) in enumerate(zip(chatbots, model_states1)):
                     args_list1 = args_list0.copy()
                     args_list1.insert(-1, model_state1)  # insert at -1 so is at -2
                     # if at start, have None in response still, replace with '' so client etc. acts like normal
                     # so consistent with prep_bot()
                     # with model_state1 at -3, my_db_state1 at -2, and history(chatbot) at -1
                     # langchain_mode1 and my_db_state1 should be same for every bot
+                    history, fun1, langchain_mode1, my_db_state1 = prep_bot(*tuple(args_list1), retry=retry,
+                                                                            which_model=chatboti)
                     gen1 = get_response(fun1, history)
                     if stream_output1:
                         gen1 = TimeoutIterator(gen1, timeout=0.01, sentinel=None, raise_on_exception=False)
                 tgen0 = time.time()
                 for res1 in itertools.zip_longest(*gen_list):
                     if time.time() - tgen0 > max_time1:
+                        print("Took too long: %s" % max_time1, flush=True)
                         break
                     bots = [x[0] if x is not None and not isinstance(x, BaseException) else y for x, y in
         def load_model(model_name, lora_weights, server_name, model_state_old, prompt_type_old, load_8bit,
                        infer_devices, gpu_id):
+            # ensure no API calls reach here
+            if is_public:
+                raise RuntimeError("Illegal access for %s" % model_name)
             # ensure old model removed from GPU memory
             if kwargs['debug']:
                 print("Pre-switch pre-del GPU memory: %s" % get_torch_allocated(), flush=True)
         clear_torch_cache()
+def get_lock_file(db1, langchain_mode):
+    assert len(db1) == 2 and db1[1] is not None and isinstance(db1[1], str)
+    user_id = db1[1]
+    base_path = 'locks'
+    makedirs(base_path)
+    lock_file = "db_%s_%s.lock" % (langchain_mode.replace(' ', '_'), user_id)
+    return lock_file
 def _update_user_db(file, db1, x, y, chunk, chunk_size, dbs=None, db_type=None, langchain_mode='UserData',
                     user_path=None,
                     use_openai_embedding=None,
                     captions_model=None,
                     enable_ocr=None,
                     verbose=None,
+                    is_url=None, is_txt=None,
+                    n_jobs=-1):
     assert use_openai_embedding is not None
     assert hf_embedding_model is not None
     assert caption_loader is not None
         print("Adding %s" % file, flush=True)
     sources = path_to_docs(file if not is_url and not is_txt else None,
                            verbose=verbose,
+                           n_jobs=n_jobs,
                            chunk=chunk, chunk_size=chunk_size,
                            url=file if is_url else None,
                            text=file if is_txt else None,
     exceptions = [x for x in sources if x.metadata.get('exception')]
     sources = [x for x in sources if 'exception' not in x.metadata]
+    lock_file = get_lock_file(db1, langchain_mode)
+    with filelock.FileLock(lock_file):
         if langchain_mode == 'MyData':
             if db1[0] is not None:
                 # then add
                 # for production hit, when user gets clicky:
                 assert len(db1) == 2, "Bad MyData db: %s" % db1
                 # then create
                 # if added has to original state and didn't change, then would be shared db for all users
                 persist_directory = os.path.join(scratch_base_dir, 'db_dir_%s_%s' % (langchain_mode, db1[1]))
                 db = get_db(sources, use_openai_embedding=use_openai_embedding,
                             db_type=db_type,
                             persist_directory=persist_directory,
                             langchain_mode=langchain_mode,
                             hf_embedding_model=hf_embedding_model)
+            if db is not None:
                 db1[0] = db
             source_files_added = get_source_files(db=db1[0], exceptions=exceptions)
             return None, langchain_mode, db1, x, y, source_files_added
 def get_db(db1, langchain_mode, dbs=None):
+    lock_file = get_lock_file(db1, langchain_mode)
+    with filelock.FileLock(lock_file):
         if langchain_mode in ['wiki_full']:
             # NOTE: avoid showing full wiki.  Takes about 30 seconds over about 90k entries, but not useful for now
             db = None

gradio_utils/__pycache__/grclient.cpython-310.pyc CHANGED Viewed

Binary files a/gradio_utils/__pycache__/grclient.cpython-310.pyc and b/gradio_utils/__pycache__/grclient.cpython-310.pyc differ

gradio_utils/__pycache__/prompt_form.cpython-310.pyc CHANGED Viewed

Binary files a/gradio_utils/__pycache__/prompt_form.cpython-310.pyc and b/gradio_utils/__pycache__/prompt_form.cpython-310.pyc differ

gradio_utils/prompt_form.py CHANGED Viewed

@@ -95,11 +95,15 @@ def make_chatbots(output_label0, output_label0_model2, **kwargs):
     return text_output, text_output2, text_outputs
-def make_prompt_form(kwargs):
     if kwargs['input_lines'] > 1:
-        instruction_label = "Shift-Enter to Submit, Enter for more lines"
     else:
-        instruction_label = "Enter to Submit, Shift-Enter for more lines"
     with gr.Row():#elem_id='prompt-form-area'):
         with gr.Column(scale=50):

     return text_output, text_output2, text_outputs
+def make_prompt_form(kwargs, LangChainMode):
+    if kwargs['langchain_mode'] != LangChainMode.DISABLED.value:
+        extra_prompt_form = ".  For summarization, empty submission uses first top_k_docs documents."
+    else:
+        extra_prompt_form = ""
     if kwargs['input_lines'] > 1:
+        instruction_label = "Shift-Enter to Submit, Enter for more lines%s" % extra_prompt_form
     else:
+        instruction_label = "Enter to Submit, Shift-Enter for more lines%s" % extra_prompt_form
     with gr.Row():#elem_id='prompt-form-area'):
         with gr.Column(scale=50):

h2oai_pipeline.py CHANGED Viewed

@@ -136,6 +136,7 @@ class H2OTextGenerationPipeline(TextGenerationPipeline):
             else:
                 outputs = rec['generated_text']
             rec['generated_text'] = outputs
         return records
     def _forward(self, model_inputs, **generate_kwargs):

             else:
                 outputs = rec['generated_text']
             rec['generated_text'] = outputs
+            print("prompt: %s\noutputs: %s\n\n" % (self.prompt_text, outputs), flush=True)
         return records
     def _forward(self, model_inputs, **generate_kwargs):

prompter.py CHANGED Viewed

@@ -120,7 +120,7 @@ def get_prompt(prompt_type, prompt_dict, chat, context, reduced, making_context,
     elif prompt_type in [PromptType.custom.value, str(PromptType.custom.value),
                          PromptType.custom.name]:
         promptA = prompt_dict.get('promptA', '')
-        promptB = prompt_dict('promptB', '')
         PreInstruct = prompt_dict.get('PreInstruct', '')
         PreInput = prompt_dict.get('PreInput', '')
         PreResponse = prompt_dict.get('PreResponse', '')
@@ -693,7 +693,9 @@ class Prompter(object):
                 output = clean_response(output)
             elif prompt is None:
                 # then use most basic parsing like pipeline
-                if self.botstr in output:
                     if self.humanstr:
                         output = clean_response(output.split(self.botstr)[1].split(self.humanstr)[0])
                     else:

     elif prompt_type in [PromptType.custom.value, str(PromptType.custom.value),
                          PromptType.custom.name]:
         promptA = prompt_dict.get('promptA', '')
+        promptB = prompt_dict.get('promptB', '')
         PreInstruct = prompt_dict.get('PreInstruct', '')
         PreInput = prompt_dict.get('PreInput', '')
         PreResponse = prompt_dict.get('PreResponse', '')
                 output = clean_response(output)
             elif prompt is None:
                 # then use most basic parsing like pipeline
+                if not self.botstr:
+                    pass
+                elif self.botstr in output:
                     if self.humanstr:
                         output = clean_response(output.split(self.botstr)[1].split(self.humanstr)[0])
                     else: