autocomplete-emails

Runtime error

App Files Files Community

Roblox22r

pszemraj commited on Nov 16, 2022

Commit

f590842

0 Parent(s):

Duplicate from postbot/autocomplete-emails

Browse files

Co-authored-by: Peter Szemraj <[email protected]>

Files changed (6) hide show

.gitattributes +31 -0
.gitignore +21 -0
README.md +19 -0
app.py +297 -0
requirements.txt +3 -0
utils.py +165 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,31 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,21 @@

+# ignore gradio db files# sys files
+*__pycache__*
+*__pycache__/
+# data
+*.txt
+*.pkl
+*flagged/
+# ignore log files
+*.log
+*logs/
+# scratch
+*scratch/
+*scratch*
+# notebooks
+*notebooks/

README.md ADDED Viewed

	@@ -0,0 +1,19 @@

+---
+title: Autocomplete Emails
+emoji: 📨
+colorFrom: gray
+colorTo: blue
+sdk: gradio
+sdk_version: 3.1.4
+app_file: app.py
+pinned: true
+license: apache-2.0
+tags:
+- email
+- autocomplete
+- text generation
+- contrastive search
+duplicated_from: postbot/autocomplete-emails
+---
+Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>

app.py ADDED Viewed

	@@ -0,0 +1,297 @@

+import argparse
+import pprint as pp
+import logging
+import time
+import gradio as gr
+import torch
+from transformers import pipeline
+from utils import make_mailto_form, postprocess, clear, make_email_link
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+use_gpu = torch.cuda.is_available()
+def generate_text(
+    prompt: str,
+    gen_length=64,
+    penalty_alpha=0.6,
+    top_k=6,
+    no_repeat_ngram_size=2,
+    length_penalty=1.0,
+    # perma params (not set by user)
+    abs_max_length=512,
+    verbose=False,
+):
+    """
+    generate_text - generate text from a prompt using a text generation pipeline
+    Args:
+        prompt (str): the prompt to generate text from
+        model_input (_type_): the text generation pipeline
+        max_length (int, optional): the maximum length of the generated text. Defaults to 128.
+        method (str, optional): the generation method. Defaults to "Sampling".
+        verbose (bool, optional): the verbosity of the output. Defaults to False.
+    Returns:
+        str: the generated text
+    """
+    global generator
+    if verbose:
+        logging.info(f"Generating text from prompt:\n\n{prompt}")
+        logging.info(
+            pp.pformat(
+                f"params:\tmax_length={gen_length}, penalty_alpha={penalty_alpha}, top_k={top_k}, no_repeat_ngram_size={no_repeat_ngram_size}, length_penalty={length_penalty}"
+            )
+        )
+    st = time.perf_counter()
+    input_tokens = generator.tokenizer(prompt)
+    input_len = len(input_tokens["input_ids"])
+    if input_len > abs_max_length:
+        logging.info(f"Input too long {input_len} > {abs_max_length}, may cause errors")
+    result = generator(
+        prompt,
+        max_length=gen_length + input_len,
+        min_length=input_len + 4,
+        penalty_alpha=penalty_alpha,
+        top_k=top_k,
+        no_repeat_ngram_size=no_repeat_ngram_size,
+        length_penalty=length_penalty,
+    )  # generate
+    response = result[0]["generated_text"]
+    rt = time.perf_counter() - st
+    if verbose:
+        logging.info(f"Generated text: {response}")
+    rt_string = f"Generation time: {rt:.2f}s"
+    logging.info(rt_string)
+    formatted_email = postprocess(response)
+    return make_mailto_form(body=formatted_email), formatted_email
+def load_emailgen_model(model_tag: str):
+    """
+    load_emailgen_model - load a text generation pipeline for email generation
+    Args:
+        model_tag (str): the huggingface model tag to load
+    Returns:
+        transformers.pipelines.TextGenerationPipeline: the text generation pipeline
+    """
+    global generator
+    generator = pipeline(
+        "text-generation",
+        model_tag,
+        device=0 if use_gpu else -1,
+    )
+def get_parser():
+    """
+    get_parser - a helper function for the argparse module
+    """
+    parser = argparse.ArgumentParser(
+        description="Text Generation demo for postbot",
+    )
+    parser.add_argument(
+        "-m",
+        "--model",
+        required=False,
+        type=str,
+        default="postbot/distilgpt2-emailgen-V2",
+        help="Pass an different huggingface model tag to use a custom model",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        required=False,
+        action="store_true",
+        help="Verbose output",
+    )
+    parser.add_argument(
+        "-a",
+        "--penalty_alpha",
+        type=float,
+        default=0.6,
+        help="The penalty alpha for the text generation pipeline (contrastive search) - default 0.6",
+    )
+    parser.add_argument(
+        "-k",
+        "--top_k",
+        type=int,
+        default=6,
+        help="The top k for the text generation pipeline (contrastive search) - default 6",
+    )
+    return parser
+default_prompt = """
+Hello,
+Following up on last week's bubblegum shipment, I"""
+available_models = [
+    "postbot/distilgpt2-emailgen-V2",
+    "postbot/distilgpt2-emailgen",
+    "postbot/gpt2-medium-emailgen",
+]
+if __name__ == "__main__":
+    logging.info("\n\n\nStarting new instance of app.py")
+    args = get_parser().parse_args()
+    logging.info(f"received args:\t{args}")
+    model_tag = args.model
+    verbose = args.verbose
+    top_k = args.top_k
+    alpha = args.penalty_alpha
+    assert top_k > 0, "top_k must be greater than 0"
+    assert alpha >= 0.0 and alpha <= 1.0, "penalty_alpha must be between 0 and 1"
+    logging.info(f"Loading model: {model_tag}, use GPU = {use_gpu}")
+    generator = pipeline(
+        "text-generation",
+        model_tag,
+        device=0 if use_gpu else -1,
+    )
+    demo = gr.Blocks()
+    logging.info("launching interface...")
+    with demo:
+        gr.Markdown("# Auto-Complete Emails - Demo")
+        gr.Markdown(
+            "Enter part of an email, and a text-gen model will complete it! See details below. "
+        )
+        gr.Markdown("---")
+        with gr.Column():
+            gr.Markdown("## Generate Text")
+            gr.Markdown("Edit the prompt and parameters and press **Generate**!")
+            prompt_text = gr.Textbox(
+                lines=4,
+                label="Email Prompt",
+                value=default_prompt,
+            )
+            with gr.Row():
+                clear_button = gr.Button(
+                    value="Clear Prompt",
+                )
+                num_gen_tokens = gr.Slider(
+                    label="Generation Tokens",
+                    value=32,
+                    maximum=96,
+                    minimum=16,
+                    step=8,
+                )
+            generate_button = gr.Button(
+                value="Generate!",
+                variant="primary",
+            )
+            gr.Markdown("---")
+            gr.Markdown("### Results")
+            # put a large HTML placeholder here
+            generated_email = gr.Textbox(
+                label="Generated Text",
+                placeholder="This is where the generated text will appear",
+                interactive=False,
+            )
+            email_mailto_button = gr.HTML(
+                "<i>a clickable email button will appear here</i>"
+            )
+            gr.Markdown("---")
+            gr.Markdown("## Advanced Options")
+            gr.Markdown(
+                "This demo generates text via the new [constrastive search](https://huggingface.co/blog/introducing-csearch). See details on the csearch blog post for the methods' parameters or [here](https://huggingface.co/blog/how-to-generate), for general decoding."
+            )
+            with gr.Row():
+                model_name = gr.Dropdown(
+                    choices=available_models,
+                    label="Choose a model",
+                    value=model_tag,
+                )
+                load_model_button = gr.Button(
+                    "Load Model",
+                    variant="secondary",
+                )
+                no_repeat_ngram_size = gr.Radio(
+                    choices=[1, 2, 3, 4],
+                    label="no repeat ngram size",
+                    value=3,
+                )
+            with gr.Row():
+                contrastive_top_k = gr.Radio(
+                    choices=[2, 4, 6, 8],
+                    label="Top K",
+                    value=top_k,
+                )
+                penalty_alpha = gr.Slider(
+                    label="Penalty Alpha",
+                    value=alpha,
+                    maximum=1.0,
+                    minimum=0.0,
+                    step=0.1,
+                )
+                length_penalty = gr.Slider(
+                    minimum=0.5,
+                    maximum=1.0,
+                    label="Length Penalty",
+                    value=1.0,
+                    step=0.1,
+                )
+            gr.Markdown("---")
+        with gr.Column():
+            gr.Markdown("## About")
+            gr.Markdown(
+                "[This model](https://huggingface.co/postbot/distilgpt2-emailgen) is a fine-tuned version of distilgpt2 on a dataset of 100k emails sourced from the internet, including the classic `aeslc` dataset.\n\nCheck out the model card for details on notebook & command line usage."
+            )
+            gr.Markdown(
+                "The intended use of this model is to provide suggestions to _auto-complete_ the rest of your email. Said another way, it should serve as a **tool to write predictable emails faster**. It is not intended to write entire emails from scratch; at least **some input** is required to guide the direction of the model.\n\nPlease verify any suggestions by the model for A) False claims and B) negation statements **before** accepting/sending something."
+            )
+            gr.Markdown("---")
+        clear_button.click(
+            fn=clear,
+            inputs=[prompt_text],
+            outputs=[prompt_text],
+        )
+        generate_button.click(
+            fn=generate_text,
+            inputs=[
+                prompt_text,
+                num_gen_tokens,
+                penalty_alpha,
+                contrastive_top_k,
+                no_repeat_ngram_size,
+                length_penalty,
+            ],
+            outputs=[email_mailto_button, generated_email],
+        )
+        load_model_button.click(
+            fn=load_emailgen_model,
+            inputs=[model_name],
+            outputs=[],
+        )
+    demo.launch(
+        enable_queue=True,
+        share=True,  # for local testing
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+torch
+transformers>=4.24.0

utils.py ADDED Viewed

	@@ -0,0 +1,165 @@

+"""
+    utils.py - Utility functions for the project.
+"""
+import logging
+import re
+def postprocess(text: str):
+    """
+    postprocess - remove common values in scraped dataset
+    Args:
+        text (str): the text to postprocess
+    """
+    replacements = {
+        "ENA": "COMPANY",
+        "Enron": "COMPANY",
+        "Enron Corporation": "COMPANY",
+        "Sony Pictures Entertainment": "COMPANY",
+        "Columbia Pictures": "COMPANY",
+        "Sony": "COMPANY",
+        "Columbia": "COMPANY",
+        "Hillary": "Jane",
+        "Clinton": "Smith",
+        "Amy": "Jane",
+        "Sara": "Jane",
+        "Harambe": "Jane",
+        "Pascal": "PERSON",
+    }
+    # replace common values, also check lowercase
+    for k, v in replacements.items():
+        text = text.replace(k, v)
+        text = text.replace(k.lower(), v)
+    return text
+def clear(text, verbose=False, **kwargs):
+    """for use with buttons"""
+    if verbose:
+        logging.info(f"Clearing text: {text}")
+    return ""
+def make_email_link(
+    subject: str = "Email subject - This was generated by Postbot",
+    link_text: str = "click to open in your email client",
+    body: str = None,
+    tag_placeholder: str = "PLACEHOLDER",
+):
+    """
+    email_link - generate an email link
+    Args:
+        subject (str, optional): the subject of the email. Defaults to "Email subject - This was generated by Postbot".
+        link_text (str, optional): the text of the link. Defaults to "click to open in your email client".
+        body (str, optional): the body of the email. Defaults to None.
+        tag_placeholder (str, optional): the placeholder for the tag. Defaults to "PLACEHOLDER".
+    Returns:
+        str: the email link, in the form of an html link
+    """
+    if body is None:
+        body = "hmm - no body. replace me"
+    # strip brackets and other HTML-tag characters from body with regex
+    body = re.sub(r"<[^>]*>", tag_placeholder, body)
+    # replace all newline chars with a whitespace
+    body = body.replace("\n", " ")
+    nice_html_button = f"""<!DOCTYPE html>
+    <html>
+    <head>
+        <title>Generated Email</title>
+    <style>
+        body {{
+            font-family: sans-serif;
+            font-size: 1.2em;
+        }}
+        .button {{
+            background-color: #6CCEC6;
+            border: none;
+            color: white;
+            padding: 15px 32px;
+            text-align: center;
+            text-decoration: none;
+            display: inline-block;
+            font-size: 16px;
+            margin: 4px 2px;
+            cursor: pointer;
+            value: "Send Email";
+        }}
+    </style>
+    <button class="button" onclick="window.location.href='mailto:?subject={subject}&body={body}'">{link_text} value="Open in Email client"</button>
+    </html>"""
+    # return f'<a href="mailto:%20?subject={subject}&body={body}">{link_text}</a>'
+    return nice_html_button
+def make_mailto_form(
+    body: str = None,
+    subject: str = "This email was generated by Postbot with AI!",
+    cc_email: str = "",
+):
+    """Returns a mailto link with the given parameters"""
+    if body is None:
+        body = "hmm - no body. Replace me or try rerunning the model."
+    template = f"""<!DOCTYPE html>
+    <html>
+    <head>
+        <title>Generated Email</title>
+    <style>
+        body {{
+            font-family: sans-serif;
+            font-size: 1.2em;
+        }}
+        .button {{
+            background-color: #6CCEC6;
+            border: none;
+            color: white;
+            padding: 15px 32px;
+            text-align: center;
+            text-decoration: none;
+            display: inline-block;
+            font-size: 16px;
+            margin: 4px 2px;
+            cursor: pointer;
+            value: "Send Email";
+        }}
+    </style>
+    </head>
+    <body>
+        <h1>Adjust and Open in your mail client:</h1>
+        <form action="mailto:" method="get" enctype="text/plain">
+        <div>
+            <label for="cc">CC Email:
+            <input type="text" name="cc" id="cc" value="{cc_email}"/>
+            </label>
+        </div>
+        <div>
+            <label for="subject">Subject:
+            <input type="text" name="subject" id="subject" value="{subject}"/>
+            </label>
+        </div>
+        <div>
+            <label>Email Body:</label>
+            <br />
+            <textarea name="body" id="body" rows="12" cols="35">{body}</textarea>
+        </div>
+        <div>
+            <input type="submit" name="submit" value="Open in Email App" class="button"/>
+        </div>
+        </form>
+    </body>
+    </html>"""
+    return template