File size: 4,583 Bytes
709d394
 
486a2f6
38fedf1
709d394
 
a13c01c
 
29437cc
a13c01c
 
 
38fedf1
 
709d394
 
38fedf1
709d394
 
 
 
 
dc0acc6
709d394
ef2fea2
600a2a9
709d394
 
 
 
29437cc
 
 
 
709d394
 
1cdad52
4f6966f
b5aae38
 
 
 
 
6e1661f
cd0aa02
6e1661f
 
 
cd0aa02
a13c01c
6e1661f
a13c01c
 
 
b5aae38
acf224c
7fc9307
acf224c
 
7fc9307
a13c01c
 
 
 
 
8325138
1cdad52
4f6966f
 
 
 
 
1cdad52
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import spaces
import gradio as gr
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

title = """
# Welcome to 🌟Tonic's🫡Command-R
🫡Command-R is a Large Language Model optimized for conversational interaction and long context tasks. It targets the “scalable” category of models that balance high performance with strong accuracy, enabling companies to move beyond proof of concept, and into production. 🫡Command-R boasts high precision on retrieval augmented generation (RAG) and tool use tasks, low latency and high throughput, a long 128k context, and strong capabilities across 10 key languages. You can build with this endpoint using🫡Command-R available here : [CohereForAI/c4ai-command-r-v01](https://huggingface.co./CohereForAI/c4ai-command-r-v01). You can also use 🫡Command-R by cloning this space. Simply click here: <a style="display:inline-block" href="https://huggingface.co./spaces/Tonic/Command-R?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3> 
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface:[MultiTransformer](https://huggingface.co./MultiTransformer) Math 🔍 [introspector](https://huggingface.co./introspector) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Torchon](https://github.com/Tonic-AI/Torchon)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
"""

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_id = "CohereForAI/c4ai-command-r-v01"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config)

@spaces.GPU
def generate_response(user_input, max_new_tokens, temperature):
    messages = [{"role": "user", "content": user_input}]
    input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
    input_ids = input_ids.to(model.device)
    gen_tokens = model.generate(
        input_ids = input_ids, 
        max_new_tokens=max_new_tokens,
        do_sample=True, 
        temperature=temperature,
    )

    gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
    if gen_text.startswith(user_input):
        gen_text = gen_text[len(user_input):].lstrip()

    return gen_text


    
examples = [
        {"message": "What is the weather like today?", "max_new_tokens": 250, "temperature": 0.5},
        {"message": "Tell me a joke.", "max_new_tokens": 650, "temperature": 0.7},
        {"message": "Explain the concept of machine learning.", "max_new_tokens": 980, "temperature": 0.4}
]
example_choices = [f"Example {i+1}" for i in range(len(examples))]

def load_example(choice):
    index = example_choices.index(choice)
    example = examples[index]
    return example["message"], example["max_new_tokens"], example["temperature"]


with gr.Blocks() as demo:
    gr.Markdown(title)
    with gr.Row():
        max_new_tokens_slider = gr.Slider(minimum=100, maximum=4000, value=980, label="Max New Tokens")
        temperature_slider = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.3, label="Temperature")
    message_box = gr.Textbox(lines=2, label="Your Message")
    generate_button = gr.Button("Try🫡Command-R")
    output_box = gr.Textbox(label="🫡Command-R")

    generate_button.click(
        fn=generate_response,
        inputs=[message_box, max_new_tokens_slider, temperature_slider],
        outputs=output_box
    )
    example_dropdown = gr.Dropdown(label="🫡Load Example", choices=example_choices)
    example_button = gr.Button("🫡Load")
    example_button.click(
        fn=load_example,
        inputs=example_dropdown,
        outputs=[message_box, max_new_tokens_slider, temperature_slider]
    )

demo.launch()