Spaces:
wuhp
/
Running on Zero

wuhp commited on
Commit
5755412
·
verified ·
1 Parent(s): eccd8f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -218
app.py CHANGED
@@ -1,240 +1,52 @@
1
- import os
 
2
  import torch
3
- from torch.utils.data import Dataset
4
  from transformers import (
5
  AutoConfig,
6
  AutoTokenizer,
7
  AutoModelForCausalLM,
8
- Trainer,
9
- TrainingArguments,
10
- GenerationConfig,
11
  pipeline
12
  )
13
- import gradio as gr
14
-
15
-
16
- # ---------------------------
17
- # A) Dummy training dataset
18
- # ---------------------------
19
- class MyTextDataset(Dataset):
20
- """
21
- Very simple dataset example. In reality:
22
- - Use real text data,
23
- - Possibly use HF 'datasets' library,
24
- - Tokenize in chunks, etc.
25
- """
26
- def __init__(self, tokenizer, texts, block_size=128):
27
- self.examples = []
28
- for txt in texts:
29
- # Tokenize each text
30
- tokens = tokenizer(txt, truncation=True, max_length=block_size)
31
- self.examples.append(tokens["input_ids"])
32
-
33
- def __len__(self):
34
- return len(self.examples)
35
-
36
- def __getitem__(self, idx):
37
- return torch.tensor(self.examples[idx], dtype=torch.long)
38
-
39
-
40
- # ---------------------------
41
- # B) Training routine
42
- # ---------------------------
43
- def train_model(
44
- model_name_or_path="wuhp/myr1",
45
- subfolder="myr1",
46
- output_dir="finetuned_myr1",
47
- epochs=1
48
- ):
49
- """
50
- Demonstrates how to load your custom model from HF, and run a
51
- quick 'Trainer' to finetune it on some mock texts.
52
-
53
- - model_name_or_path: huggingface repo ID (or local folder).
54
- - subfolder: if your model config/weights live in a subfolder
55
- within that repo, specify it here.
56
- - output_dir: where to save final trained model.
57
- - epochs: how many epochs for this mock training example.
58
- """
59
-
60
- # 1) Load config (trust_remote_code=True so we can import custom .py from your repo)
61
- config = AutoConfig.from_pretrained(
62
- model_name_or_path,
63
- subfolder=subfolder,
64
- trust_remote_code=True
65
- )
66
-
67
- # 2) Load tokenizer
68
- tokenizer = AutoTokenizer.from_pretrained(
69
- model_name_or_path,
70
- subfolder=subfolder,
71
- trust_remote_code=True
72
- )
73
-
74
- # 3) Load model
75
- # AutoModelForCausalLM will detect your custom architecture from modeling_deepseek.py
76
- model = AutoModelForCausalLM.from_pretrained(
77
- model_name_or_path,
78
- subfolder=subfolder,
79
- config=config,
80
- torch_dtype=torch.float16, # or "auto", or float32
81
- device_map="auto", # If you have enough GPU memory, or "cpu"
82
- trust_remote_code=True
83
- )
84
-
85
- # 4) Create a tiny training dataset
86
- train_texts = [
87
- "Hello from DeepSeek!",
88
- "The sky is blue.",
89
- "Large language models can do amazing things."
90
- ]
91
- eval_texts = [
92
- "Testing is essential for robust code.",
93
- "Generative AI is fun."
94
- ]
95
- train_dataset = MyTextDataset(tokenizer, train_texts)
96
- eval_dataset = MyTextDataset(tokenizer, eval_texts)
97
-
98
- # 5) Trainer hyperparams
99
- training_args = TrainingArguments(
100
- output_dir=output_dir,
101
- overwrite_output_dir=True,
102
- num_train_epochs=epochs,
103
- per_device_train_batch_size=1,
104
- per_device_eval_batch_size=1,
105
- evaluation_strategy="epoch",
106
- save_strategy="epoch",
107
- logging_steps=1,
108
- gradient_accumulation_steps=1,
109
- fp16=True if torch.cuda.is_available() else False,
110
- # If you have limited VRAM and can't do FP16, set fp16=False above
111
- )
112
-
113
- # 6) Define data collator for causal LM. Typically:
114
- from transformers import DataCollatorForLanguageModeling
115
- data_collator = DataCollatorForLanguageModeling(
116
- tokenizer=tokenizer, mlm=False
117
- )
118
-
119
- # 7) Build trainer
120
- trainer = Trainer(
121
- model=model,
122
- args=training_args,
123
- data_collator=data_collator,
124
- train_dataset=train_dataset,
125
- eval_dataset=eval_dataset
126
- )
127
-
128
- # 8) Train
129
- trainer.train()
130
-
131
- # 9) Save model & tokenizer
132
- trainer.save_model(output_dir)
133
- tokenizer.save_pretrained(output_dir)
134
-
135
- return trainer
136
-
137
-
138
- # ---------------------------
139
- # C) Gradio app function
140
- # ---------------------------
141
- def create_gradio_demo(
142
- model_name_or_path="finetuned_myr1",
143
- generation_config_path=None
144
- ):
145
- """
146
- Loads a (fine-tuned) model from local or HF, sets up
147
- a text-generation pipeline, and returns a Gradio interface.
148
- """
149
 
150
- # 1) Load config
151
- config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
152
-
153
- # 2) Load model & tokenizer
154
- tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
 
155
  model = AutoModelForCausalLM.from_pretrained(
156
- model_name_or_path,
 
157
  config=config,
158
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
159
  device_map="auto",
160
  trust_remote_code=True
161
  )
162
-
163
- # 3) (Optional) load generation config if present
164
- # e.g. custom top_k, top_p, temperature, etc.
165
- # If your repo has "generation_config.json" in subfolder="myr1",
166
- # you could also do:
167
- # GenerationConfig.from_pretrained("wuhp/myr1", subfolder="myr1", ...)
168
- # Or from local path if downloaded.
169
- if generation_config_path:
170
- gen_config = GenerationConfig.from_json_file(generation_config_path)
171
- else:
172
- # fallback to default or config
173
- gen_config = GenerationConfig.from_model_config(config)
174
-
175
- # 4) Build a text-generation pipeline
176
  text_pipeline = pipeline(
177
  "text-generation",
178
  model=model,
179
- tokenizer=tokenizer,
180
- generation_config=gen_config,
181
  )
 
182
 
183
- # 5) Define Gradio predict function
184
- def predict(prompt, max_new_tokens=64, temperature=0.7, top_p=0.95):
185
- """
186
- Generates text from the model given a user prompt.
187
- """
188
- outputs = text_pipeline(
189
- prompt,
190
- max_new_tokens=int(max_new_tokens),
191
- temperature=float(temperature),
192
- top_p=float(top_p)
193
- )
194
- # The pipeline returns a list of dicts like [{'generated_text': '...'}]
195
- return outputs[0]["generated_text"]
196
-
197
- # 6) Create the Gradio Interface
198
- with gr.Blocks() as demo:
199
- gr.Markdown("## DeepSeek LLM Demo")
200
- prompt = gr.Textbox(label="Enter your prompt:")
201
- max_new_tokens = gr.Slider(1, 512, step=1, value=64, label="Max New Tokens")
202
- temperature = gr.Slider(0.0, 1.5, step=0.1, value=0.7, label="Temperature")
203
- top_p = gr.Slider(0.0, 1.0, step=0.05, value=0.95, label="Top-p")
204
- output = gr.Textbox(label="Generated Text")
205
 
206
- generate_btn = gr.Button("Generate")
207
- generate_btn.click(
208
- fn=predict,
209
- inputs=[prompt, max_new_tokens, temperature, top_p],
210
- outputs=output
211
- )
212
- return demo
213
-
214
-
215
- # ---------------------------
216
- # D) Main: train + launch
217
- # ---------------------------
218
- if __name__ == "__main__":
219
- # 1) TRAIN (mock demonstration).
220
- # If you just want to *load* your existing model, skip this step.
221
- print("Starting mock training on wuhp/myr1 (subfolder myr1)...")
222
- trainer = train_model(
223
- model_name_or_path="wuhp/myr1",
224
- subfolder="myr1",
225
- output_dir="finetuned_myr1",
226
- epochs=1
227
  )
228
- print("Training complete.")
229
 
230
- # 2) Build Gradio app from the newly saved model in 'finetuned_myr1'
231
- # If you want to load the original (un-finetuned) weights, just pass
232
- # model_name_or_path="wuhp/myr1" and subfolder="myr1" again.
233
- demo = create_gradio_demo(
234
- model_name_or_path="finetuned_myr1",
235
- generation_config_path=None # or "finetuned_myr1/generation_config.json"
236
- )
 
 
237
 
238
- # 3) Launch
239
- print("Launching Gradio demo on http://127.0.0.1:7860 ...")
240
- demo.launch()
 
1
+ import gradio as gr
2
+ import spaces
3
  import torch
 
4
  from transformers import (
5
  AutoConfig,
6
  AutoTokenizer,
7
  AutoModelForCausalLM,
 
 
 
8
  pipeline
9
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ # 1) Decorate your GPU-dependent function(s)
12
+ @spaces.GPU(duration=60) # default is 60s, can increase if needed
13
+ def load_pipeline():
14
+ # -- load config & model from wuhp/myr1 --
15
+ config = AutoConfig.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
16
+ tokenizer = AutoTokenizer.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
17
  model = AutoModelForCausalLM.from_pretrained(
18
+ "wuhp/myr1",
19
+ subfolder="myr1",
20
  config=config,
21
+ torch_dtype=torch.float16, # half precision
22
  device_map="auto",
23
  trust_remote_code=True
24
  )
25
+ # optional: load generation config if you have generation_config.json
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  text_pipeline = pipeline(
27
  "text-generation",
28
  model=model,
29
+ tokenizer=tokenizer
 
30
  )
31
+ return text_pipeline
32
 
33
+ # We'll load it once and store globally
34
+ text_pipeline = load_pipeline()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ def predict(prompt, max_new_tokens=64):
37
+ outputs = text_pipeline(
38
+ prompt, max_new_tokens=int(max_new_tokens), do_sample=True, temperature=0.7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  )
40
+ return outputs[0]["generated_text"]
41
 
42
+ # 2) Build your Gradio app
43
+ with gr.Blocks() as demo:
44
+ gr.Markdown("## My LLM Inference (ZeroGPU)")
45
+ prompt = gr.Textbox(label="Prompt")
46
+ max_nt = gr.Slider(1, 200, value=64, step=1, label="Max New Tokens")
47
+ output = gr.Textbox(label="Generated Text")
48
+
49
+ btn = gr.Button("Generate")
50
+ btn.click(fn=predict, inputs=[prompt, max_nt], outputs=output)
51
 
52
+ demo.launch()