|
import gradio as gr |
|
from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM |
|
import torch |
|
import spaces |
|
|
|
MODEL_PATH = "benhaotang/phi4-qwq-sky-t1" |
|
MODEL_URL = f"https://huggingface.co./{MODEL_PATH}" |
|
|
|
def load_model(): |
|
bnb_config = BitsAndBytesConfig( |
|
load_in_8bit=False, |
|
llm_int8_enable_fp32_cpu_offload=True |
|
) |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
MODEL_PATH, |
|
device_map="auto", |
|
torch_dtype=torch.float16, |
|
offload_folder="offload_folder", |
|
quantization_config=bnb_config |
|
) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) |
|
|
|
pipe = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
device_map="auto", |
|
) |
|
|
|
return pipe |
|
|
|
pipe = load_model() |
|
|
|
@spaces.GPU(duration=110) |
|
def generate_response(prompt, max_length=1024): |
|
|
|
messages = [ |
|
{"role": "system", "content": "You are a helpful AI assistant. You always think step by step."}, |
|
{"role": "user", "content": prompt} |
|
] |
|
|
|
outputs = pipe(messages, max_new_tokens=max_length) |
|
|
|
|
|
|
|
try: |
|
|
|
message_list = outputs[0]["generated_text"] |
|
|
|
assistant_message = message_list[-1] |
|
if assistant_message["role"] == "assistant": |
|
return assistant_message["content"] |
|
except Exception as e: |
|
|
|
|
|
return str(outputs[0]["generated_text"]) |
|
|
|
return outputs[0]["generated_text"] |
|
|
|
|
|
example_prompt = """For a scalar field theory with interaction Lagrangian $\mathcal{L}_{int} = g\phi^3 + \lambda\phi^4$: |
|
|
|
1. Enumerate all possible 1-loop Feynman diagrams contributing to the scalar propagator |
|
|
|
2. For each diagram, write down its loop contribution |
|
|
|
3. Provide Mathematica code to calculate these loop amplitudes with dimensional regularization at $d=4-\epsilon$ |
|
|
|
Please explain your reasoning step by step.""" |
|
|
|
demo = gr.Interface( |
|
fn=generate_response, |
|
inputs=[ |
|
gr.Textbox( |
|
label="Enter your question", |
|
placeholder="Ask me anything...", |
|
lines=5 |
|
), |
|
], |
|
outputs=gr.Textbox(label="Response", lines=10), |
|
title="benhaotang/phi4-qwq-sky-t1", |
|
description=f""" To achieve CoT and science reasoning on small scale with a merge of CoT finetuned phi4 model. |
|
|
|
Model: [benhaotang/phi4-qwq-sky-t1]({MODEL_URL})""", |
|
examples=[ |
|
[example_prompt] |
|
] |
|
) |
|
|
|
demo.launch() |