llm-human-feedback-collector-chat-interface-kto

Sleeping

App Files Files Community

davidberenstein1957 HF staff commited on Jul 25, 2024

Commit

9ae4463

1 Parent(s): 77bf3e6

Update kto deployment

Browse files

Files changed (4) hide show

README.md +5 -5
app.py +23 -13
chat_interface_preference.py +106 -102
requirements.txt +8 -6

README.md CHANGED Viewed

@@ -1,15 +1,15 @@
 ---
-title: LLM Human Feedback Collector Chat Interface (KTO)
 emoji: 🦾💪🏽
 colorFrom: pink
 colorTo: blue
 sdk: gradio
-sdk_version: 4.36.1
 app_file: app.py
-pinned: false
 license: mit
 suggested_hardware: t4-small
-short_description: LLM, chatbot
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Human Feedback Collector | Meta-Llama-3.1-8B-Instruct | (DPO)
 emoji: 🦾💪🏽
 colorFrom: pink
 colorTo: blue
 sdk: gradio
+sdk_version: 4.39
 app_file: app.py
+pinned: true
 license: mit
 suggested_hardware: t4-small
+short_description: LLM, chatbot, human-feedback
 ---
+Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>

app.py CHANGED Viewed

@@ -1,12 +1,17 @@
 #!/usr/bin/env python
 import os
-from threading import Thread
 from typing import Iterator
 import gradio as gr
 import spaces
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from chat_interface_preference import ChatInterface
@@ -18,7 +23,6 @@ if torch.cuda.is_available():
     model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
     model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
-style = "<style>.user-message,.system-message{display:flex;margin:10px}.user-message .message-content{background-color:#c2e3f7;color:#000}.system-message .message-content{background-color:#f5f5f5;color:#000}.message-content{padding:10px;border-radius:10px;max-width:70%;word-wrap:break-word}.container{display:flex;justify-content:space-between}.column{width:48%}</style>"
 @spaces.GPU
@@ -31,7 +35,8 @@ def generate(
     top_k: int = 40,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
-    conversation = []
     for user, assistant in chat_history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
@@ -68,11 +73,8 @@ chat_interface = ChatInterface(
     prefence_techniques="kto",
     min_turns=1,
     max_turns=10,
-    repo_id="llm-human-feedback-collector-chat-interface-kto",
-    chatbot=gr.Chatbot(
-        height=450, label="Meta-Llama-3.1-8B-Instruct", show_share_button=True
-    ),
-    css=style,
     cache_examples=False,
     additional_inputs=[
         gr.Slider(
@@ -87,7 +89,7 @@ chat_interface = ChatInterface(
             minimum=0.05,
             maximum=1.2,
             step=0.05,
-            value=0.2,
         ),
         gr.Slider(
             label="Top-p (nucleus sampling)",
@@ -117,8 +119,16 @@ chat_interface = ChatInterface(
         ["What are great things cook when getting started with Asian cooking?"],
         ["Who was Anthony Bourdain?"],
     ],
-    title="💪🏽🦾 LLM human-feedback collector ChatInterface (KTO) 🦾💪🏽",
-    description="""This is an adaptation of the gr.ChatInferface which allows for human feedback collection for SFT, DPO and KTO.""",
 )
 with gr.Blocks(css="style.css") as demo:

 #!/usr/bin/env python
 import os
+import random
+from threading import Thread  # noqa
 from typing import Iterator
 import gradio as gr
 import spaces
+import torch  # noqa
+from transformers import (
+    AutoModelForCausalLM,  # noqa
+    AutoTokenizer,  # noqa
+    TextIteratorStreamer,  # noqa
+)
 from chat_interface_preference import ChatInterface
     model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
     model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
 @spaces.GPU
     top_k: int = 40,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
+    system_message = random.choice(["concise", "explicit", "simple", "complex", "usefull", "helpfull"])
+    conversation = [{"role": "system", "content": f"Communicate {system_message}."}]
     for user, assistant in chat_history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
     prefence_techniques="kto",
     min_turns=1,
     max_turns=10,
+    repo_id="llm-human-feedback-collector-chat-interface-dpo",
+    chatbot=gr.Chatbot(height=450, label="Meta-Llama-3.1-8B-Instruct", show_share_button=True),
     cache_examples=False,
     additional_inputs=[
         gr.Slider(
             minimum=0.05,
             maximum=1.2,
             step=0.05,
+            value=0.7,
         ),
         gr.Slider(
             label="Top-p (nucleus sampling)",
         ["What are great things cook when getting started with Asian cooking?"],
         ["Who was Anthony Bourdain?"],
     ],
+    title="💪🏽🦾 Human Feedback Collector | Meta-Llama-3.1-8B-Instruct | (KTO) 🦾💪🏽",
+    description="".join(
+        [
+            "This is an adaptation of the [`gr.ChatInferface`](https://www.gradio.app/docs/gradio/chatinterface) which also uses the [`huggingface_hub.CommitScheduler`](https://huggingface.co/docs/huggingface_hub/main/en/package_reference/hf_api#huggingface_hub.CommitScheduler) to allow for human feedback collection. ",
+            "Another cool tool for capturing Gradio interactions is the [`gr.HuggingFaceDatasetSaver`](https://www.gradio.app/guides/using-flagging#the-hugging-face-dataset-saver-callback). ",
+            "This demo shows how you might capture human feedback directly from applications within Gradio. ",
+            "The captured feedback can directly be used for fine-tuning LLMs within framework like [transformers](https://github.com/huggingface/transformers), [TRL](https://github.com/huggingface/trl) or [AutoTrain](https://huggingface.co/autotrain), ",
+            "however, it might benefit from additional data curation with something like [Argilla](https://github.com/argilla-io/argilla/) for human feedback and/or [distilabel](https://github.com/argilla-io/distilabel/) for AI feedback. Argilla can even be [deployed for free on Hugging Face Spaces](https://argilla-io.github.io/argilla/latest/getting_started/huggingface-spaces/).",
+        ]
+    ),
 )
 with gr.Blocks(css="style.css") as demo:

chat_interface_preference.py CHANGED Viewed

@@ -144,15 +144,15 @@ class ChatInterface(Blocks):
         submit_btn_bad = None
         stop_btn = "Stop"
         undo_btn = "↩️ Undo"
-        clear_btn = "🗑️ Log and clear"
         if "kto" in prefence_techniques:
-            submit_btn_good = "Log response 👍"
-            submit_btn_bad = "Log response 👎"
         if any([technique for technique in ["dpo", "simpo", "rlhf", "orpo"] if technique in self.prefence_techniques]):
-            submit_btn_two = "Generate 2"
-            submit_btn_a = "Log preference 🅰️"
-            submit_btn_b = "Log preference 🅱️"
-            submit_btn_ab = "Continue random 🅰️=🅱️"
         super().__init__(
             analytics_enabled=analytics_enabled,
             mode="chat_interface",
@@ -219,14 +219,13 @@ class ChatInterface(Blocks):
         with self:
             if title:
                 Markdown(f"<h1 style='text-align: center; margin-bottom: 1rem'>{self.title}</h1>")
-            if description:
-                Markdown(description)
             if self.commit_scheduler:
                 Markdown(
-                    f"## Data is being logged to a datset on the hub: [{self.commit_scheduler.repo_id}](https://huggingface.co/datasets/{self.commit_scheduler.repo_id})"
                 )
-                Markdown(f"### Techniques: {self.prefence_techniques}")
-                Markdown(f"### MIN TURNS: {self.min_turns} - MAX TURN: {self.max_turns}")
             if chatbot:
                 self.chatbot = chatbot.render()
             else:
@@ -387,13 +386,13 @@ class ChatInterface(Blocks):
     def _setup_events(self) -> None:
         submit_fn_one = self._stream_fn if self.is_generator else self._submit_fn
-        submit_fn_one_partial = functools.partial(submit_fn_one, n_generations=1)
         submit_triggers_one = (
             [self.textbox.submit, self.submit_btn_one.click] if self.submit_btn_one else [self.textbox.submit]
         )
         submit_tuples = [(submit_fn_one_partial, submit_triggers_one)]
         if self.submit_btn_two:
-            submit_fn_two = functools.partial(submit_fn_one, n_generations=2)
             submit_triggers_two = [self.submit_btn_two.click]
             submit_tuples.append((submit_fn_two, submit_triggers_two))
         for _fn, _triggers in submit_tuples:
@@ -608,7 +607,7 @@ class ChatInterface(Blocks):
             if turn[-1]:
                 conversation += self._get_chat_message(turn[-1], role="user", turn=(idx + 1))
-        return self.css + "<body>" + conversation + "</body>"
     def _get_conversation_in_openai_format(self, history):
         conversation = []
@@ -622,26 +621,22 @@ class ChatInterface(Blocks):
     @staticmethod
     def _get_chat_message(message, role, turn):
-        if role == "user":
-            justify = "right"
-        else:
-            justify = "left"
         return (
-            f'<div class="{role}-message" style="justify-content: {justify};">'
-            + '<div class="message-content">'
-            + f"<strong>Turn {turn} - {role.capitalize()}:</strong><br>"
             + f"<em>Length: {len(message)} characters</em><br><br>"
             + f'<div class="message-identifier">{message}</div>'
-            + "</div></div>"
         )
     def _get_chat_message_comparison(self, content_a, content_b):
         return (
-            '<div class="container">'
-            + '<div class="column">'
             + self._get_chat_message(message=content_a, role="system", turn="A")
             + "</div>"
-            + '<div class="column">'
             + self._get_chat_message(message=content_b, role="system", turn="B")
             + "</div>"
             + "</div>"
@@ -688,30 +683,34 @@ class ChatInterface(Blocks):
         self._check_message(message)
         self._check_num_turns(history)
-        _, response = history_with_input[-1]
         if self._check_if_two_responses(response):
-            raise Error("Two options detected: undo, log or random pick continuation.")
-        inputs, _, _ = special_args(self.fn, inputs=[message, history, *args], request=request)
-        async def _get_response():
-            if self.is_async:
-                response = await self.fn(*inputs)
             else:
-                response = await anyio.to_thread.run_sync(self.fn, *inputs, limiter=self.limiter)
-            return response
-        if n_generations == 1:
-            response = await _get_response()
-        else:
-            response_one, response_two = await _get_response(), await _get_response()
-            response = self._get_chat_message_comparison(response_one, response_two)
-        if self.multimodal and isinstance(message, dict):
-            self._append_multimodal_history(message, response, history)
-        elif isinstance(message, str):
-            history.append([message, response])
-        return history, history
     async def _stream_fn(
         self,
@@ -728,67 +727,35 @@ class ChatInterface(Blocks):
             history = history_with_input[:-1]
         self._check_message(message)
         self._check_num_turns(history)
-        _, response = history_with_input[-1]
-        if self._check_if_two_responses(response):
-            raise Error("Two options detected: undo, log or random pick continuation.")
-        inputs, _, _ = special_args(self.fn, inputs=[message, history, *args], request=request)
-        try:
-            if self.is_async:
-                generator = self.fn(*inputs)
-            else:
-                generator = await anyio.to_thread.run_sync(self.fn, *inputs, limiter=self.limiter)
-            generator = SyncToAsyncIterator(generator, self.limiter)
-            first_response = await async_iteration(generator)
-            if n_generations == 2:
-                first_response_formatted = self._get_chat_message_comparison(first_response, "")
-            else:
-                first_response_formatted = first_response
-            if self.multimodal and isinstance(message, dict):
-                for x in message["files"]:
-                    history.append([(x,), None])
-                update = history + [[message["text"], first_response_formatted]]
-                yield update, update
-            else:
-                update = history + [[message, first_response_formatted]]
-                yield update, update
-        except StopIteration:
-            if self.multimodal and isinstance(message, dict):
-                self._append_multimodal_history(message, None, history)
-                yield history, history
-            else:
-                update = history + [[message, None]]
-                yield update, update
-        async for response in generator:
-            if n_generations == 2:
-                response_formatted = self._get_chat_message_comparison(response, "")
-            else:
-                response_formatted = response
-            if self.multimodal and isinstance(message, dict):
-                update = history + [[message["text"], response_formatted]]
-                yield update, update
-            else:
-                update = history + [[message, response_formatted]]
-                yield update, update
-        if n_generations == 2:
-            if self.is_async:
-                generator_two = self.fn(*inputs)
-            else:
-                generator_two = await anyio.to_thread.run_sync(self.fn, *inputs, limiter=self.limiter)
-                generator_two = SyncToAsyncIterator(generator_two, self.limiter)
             try:
-                first_response_two = await async_iteration(generator_two)
-                first_response_two_formatted = self._get_chat_message_comparison(response, first_response_two)
                 if self.multimodal and isinstance(message, dict):
                     for x in message["files"]:
                         history.append([(x,), None])
-                    update = history + [[message["text"], first_response_two_formatted]]
                     yield update, update
                 else:
-                    update = history + [[message, first_response_two_formatted]]
                     yield update, update
             except StopIteration:
                 if self.multimodal and isinstance(message, dict):
@@ -797,15 +764,52 @@ class ChatInterface(Blocks):
                 else:
                     update = history + [[message, None]]
                     yield update, update
-            async for response_two in generator_two:
-                response_two = self._get_chat_message_comparison(response, response_two)
                 if self.multimodal and isinstance(message, dict):
-                    update = history + [[message["text"], response_two]]
                     yield update, update
                 else:
-                    update = history + [[message, response_two]]
                     yield update, update
     async def _log_fn(
         self, message: str | dict[str, list], history: list[list[str | tuple | None]], log: str
     ) -> tuple[

         submit_btn_bad = None
         stop_btn = "Stop"
         undo_btn = "↩️ Undo"
+        clear_btn = "🗑️ Clear"
         if "kto" in prefence_techniques:
+            submit_btn_good = "The response 👍"
+            submit_btn_bad = "The response 👎"
         if any([technique for technique in ["dpo", "simpo", "rlhf", "orpo"] if technique in self.prefence_techniques]):
+            submit_btn_two = None
+            submit_btn_a = "A is better than B"
+            submit_btn_b = "B is better than A"
+            submit_btn_ab = "A and B are similar"
         super().__init__(
             analytics_enabled=analytics_enabled,
             mode="chat_interface",
         with self:
             if title:
                 Markdown(f"<h1 style='text-align: center; margin-bottom: 1rem'>{self.title}</h1>")
             if self.commit_scheduler:
                 Markdown(
+                    f'<center><h2>Data is being logged to <a href="https://huggingface.co/datasets/{self.commit_scheduler.repo_id}">a dataset on the Hugging Face Hub</a></h2></center>'
                 )
+            if description:
+                Markdown(description)
             if chatbot:
                 self.chatbot = chatbot.render()
             else:
     def _setup_events(self) -> None:
         submit_fn_one = self._stream_fn if self.is_generator else self._submit_fn
+        submit_fn_one_partial = functools.partial(submit_fn_one, n_generations=2)
         submit_triggers_one = (
             [self.textbox.submit, self.submit_btn_one.click] if self.submit_btn_one else [self.textbox.submit]
         )
         submit_tuples = [(submit_fn_one_partial, submit_triggers_one)]
         if self.submit_btn_two:
+            submit_fn_two = functools.partial(submit_fn_one, n_generations=1)
             submit_triggers_two = [self.submit_btn_two.click]
             submit_tuples.append((submit_fn_two, submit_triggers_two))
         for _fn, _triggers in submit_tuples:
             if turn[-1]:
                 conversation += self._get_chat_message(turn[-1], role="user", turn=(idx + 1))
+        return "<body>" + conversation + "</body>"
     def _get_conversation_in_openai_format(self, history):
         conversation = []
     @staticmethod
     def _get_chat_message(message, role, turn):
+        # return f"<p><div class='message-identifier'>{message}</div></p>"
         return (
+            '<div class="message-content">'
+            + f"<strong>Option {turn}</strong><br>"
             + f"<em>Length: {len(message)} characters</em><br><br>"
             + f'<div class="message-identifier">{message}</div>'
+            + "</div>"
         )
     def _get_chat_message_comparison(self, content_a, content_b):
         return (
+            '<div class="container" style="display: flex; width: 100%;">'
+            + '<div class="column" style="flex: 1; padding: 10px;">'
             + self._get_chat_message(message=content_a, role="system", turn="A")
             + "</div>"
+            + '<div class="column" style="flex: 1; padding: 10px;">'
             + self._get_chat_message(message=content_b, role="system", turn="B")
             + "</div>"
             + "</div>"
         self._check_message(message)
         self._check_num_turns(history)
+        if history:
+            _, response = history[-1]
+        else:
+            response = None
         if self._check_if_two_responses(response):
+            Info("Two options detected: provide preference, undo or clear to continue conversation.")
+            return history, history
+        else:
+            inputs, _, _ = special_args(self.fn, inputs=[message, history, *args], request=request)
+            async def _get_response():
+                if self.is_async:
+                    response = await self.fn(*inputs)
+                else:
+                    response = await anyio.to_thread.run_sync(self.fn, *inputs, limiter=self.limiter)
+                return response
+            if n_generations == 1:
+                response = await _get_response()
             else:
+                response_one, response_two = await _get_response(), await _get_response()
+                response = self._get_chat_message_comparison(response_one, response_two)
+            if self.multimodal and isinstance(message, dict):
+                self._append_multimodal_history(message, response, history)
+            elif isinstance(message, str):
+                history.append([message, response])
+            return history, history
     async def _stream_fn(
         self,
             history = history_with_input[:-1]
         self._check_message(message)
         self._check_num_turns(history)
+        if history:
+            _, response = history[-1]
+        else:
+            response = None
+        if self._check_if_two_responses(response):
+            Info("Two options detected: provide preference, undo or clear to continue conversation.")
+            yield history, history
+        else:
+            inputs, _, _ = special_args(self.fn, inputs=[message, history, *args], request=request)
             try:
+                if self.is_async:
+                    generator = self.fn(*inputs)
+                else:
+                    generator = await anyio.to_thread.run_sync(self.fn, *inputs, limiter=self.limiter)
+                generator = SyncToAsyncIterator(generator, self.limiter)
+                first_response = await async_iteration(generator)
+                if n_generations == 2:
+                    first_response_formatted = self._get_chat_message_comparison(first_response, "")
+                else:
+                    first_response_formatted = first_response
                 if self.multimodal and isinstance(message, dict):
                     for x in message["files"]:
                         history.append([(x,), None])
+                    update = history + [[message["text"], first_response_formatted]]
                     yield update, update
                 else:
+                    update = history + [[message, first_response_formatted]]
                     yield update, update
             except StopIteration:
                 if self.multimodal and isinstance(message, dict):
                 else:
                     update = history + [[message, None]]
                     yield update, update
+            async for response in generator:
+                if n_generations == 2:
+                    response_formatted = self._get_chat_message_comparison(response, "")
+                else:
+                    response_formatted = response
                 if self.multimodal and isinstance(message, dict):
+                    update = history + [[message["text"], response_formatted]]
                     yield update, update
                 else:
+                    update = history + [[message, response_formatted]]
                     yield update, update
+            if n_generations == 2:
+                if self.is_async:
+                    generator_two = self.fn(*inputs)
+                else:
+                    generator_two = await anyio.to_thread.run_sync(self.fn, *inputs, limiter=self.limiter)
+                    generator_two = SyncToAsyncIterator(generator_two, self.limiter)
+                try:
+                    first_response_two = await async_iteration(generator_two)
+                    first_response_two_formatted = self._get_chat_message_comparison(response, first_response_two)
+                    if self.multimodal and isinstance(message, dict):
+                        for x in message["files"]:
+                            history.append([(x,), None])
+                        update = history + [[message["text"], first_response_two_formatted]]
+                        yield update, update
+                    else:
+                        update = history + [[message, first_response_two_formatted]]
+                        yield update, update
+                except StopIteration:
+                    if self.multimodal and isinstance(message, dict):
+                        self._append_multimodal_history(message, None, history)
+                        yield history, history
+                    else:
+                        update = history + [[message, None]]
+                        yield update, update
+                async for response_two in generator_two:
+                    response_two = self._get_chat_message_comparison(response, response_two)
+                    if self.multimodal and isinstance(message, dict):
+                        update = history + [[message["text"], response_two]]
+                        yield update, update
+                    else:
+                        update = history + [[message, response_two]]
+                        yield update, update
     async def _log_fn(
         self, message: str | dict[str, list], history: list[list[str | tuple | None]], log: str
     ) -> tuple[

requirements.txt CHANGED Viewed

@@ -1,8 +1,10 @@
-accelerate==0.31.0
-bitsandbytes==0.42
-gradio==4.36.1
 scipy==1.13.0
-sentencepiece==0.2.0
 spaces==0.28.3
-torch==2.0.1
-transformers==4.41.2

+gradio==4.39
 scipy==1.13.0
 spaces==0.28.3
+torch
+accelerate
+bitsandbytes
+torch
+transformers>=4.43.2
+einops
+sentencepiece