Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,906 Bytes
2765be0 df667f5 2765be0 df667f5 2765be0 df667f5 2765be0 df667f5 2765be0 e6f0922 d3f1e5b df667f5 d3f1e5b df667f5 7046ce9 91706fb f058cc0 d3f1e5b 69913d6 f058cc0 90b61ce 91706fb f058cc0 90b61ce 2765be0 d3f1e5b df667f5 2765be0 06d5510 df667f5 90b61ce df667f5 d3f1e5b df667f5 2765be0 df667f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import gradio as gr
from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM
import torch
import spaces
MODEL_PATH = "benhaotang/phi4-qwq-sky-t1"
MODEL_URL = f"https://huggingface.co./{MODEL_PATH}"
def load_model():
bnb_config = BitsAndBytesConfig(
load_in_8bit=False,
llm_int8_enable_fp32_cpu_offload=True
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
device_map="auto",
torch_dtype=torch.float16,
offload_folder="offload_folder",
quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device_map="auto",
)
return pipe
pipe = load_model()
@spaces.GPU(duration=110)
def generate_response(prompt, max_length=1024):
# Create messages with system prompt
messages = [
{"role": "system", "content": "You are a helpful AI assistant. You always think step by step."},
{"role": "user", "content": prompt}
]
outputs = pipe(messages, max_new_tokens=max_length)
# print("Raw output:", outputs) #removed after debugging output format problem is done
# Extract just the assistant's response
try:
# outputs[0]["generated_text"] is already a list of messages
message_list = outputs[0]["generated_text"]
# Get the last message (assistant's response)
assistant_message = message_list[-1]
if assistant_message["role"] == "assistant":
return assistant_message["content"]
except Exception as e:
# print(f"Error extracting response: {e}")
# If extraction fails, return the raw output
return str(outputs[0]["generated_text"])
return outputs[0]["generated_text"]
# Example with proper line breaks
example_prompt = """For a scalar field theory with interaction Lagrangian $\mathcal{L}_{int} = g\phi^3 + \lambda\phi^4$:
1. Enumerate all possible 1-loop Feynman diagrams contributing to the scalar propagator
2. For each diagram, write down its loop contribution
3. Provide Mathematica code to calculate these loop amplitudes with dimensional regularization at $d=4-\epsilon$
Please explain your reasoning step by step."""
demo = gr.Interface(
fn=generate_response,
inputs=[
gr.Textbox(
label="Enter your question",
placeholder="Ask me anything...",
lines=5
),
],
outputs=gr.Textbox(label="Response", lines=10),
title="benhaotang/phi4-qwq-sky-t1",
description=f""" To achieve CoT and science reasoning on small scale with a merge of CoT finetuned phi4 model.
Model: [benhaotang/phi4-qwq-sky-t1]({MODEL_URL})""",
examples=[
[example_prompt] # Now using the formatted example
]
)
demo.launch() |