myr1-2

Running on Zero

App Files Files Community

wuhp commited on 11 days ago

Commit

5755412

verified ·

1 Parent(s): eccd8f6

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -218

app.py CHANGED Viewed

@@ -1,240 +1,52 @@
-import os
 import torch
-from torch.utils.data import Dataset
 from transformers import (
     AutoConfig,
     AutoTokenizer,
     AutoModelForCausalLM,
-    Trainer,
-    TrainingArguments,
-    GenerationConfig,
     pipeline
 )
-import gradio as gr
-# ---------------------------
-# A) Dummy training dataset
-# ---------------------------
-class MyTextDataset(Dataset):
-    """
-    Very simple dataset example. In reality:
-      - Use real text data,
-      - Possibly use HF 'datasets' library,
-      - Tokenize in chunks, etc.
-    """
-    def __init__(self, tokenizer, texts, block_size=128):
-        self.examples = []
-        for txt in texts:
-            # Tokenize each text
-            tokens = tokenizer(txt, truncation=True, max_length=block_size)
-            self.examples.append(tokens["input_ids"])
-    def __len__(self):
-        return len(self.examples)
-    def __getitem__(self, idx):
-        return torch.tensor(self.examples[idx], dtype=torch.long)
-# ---------------------------
-# B) Training routine
-# ---------------------------
-def train_model(
-    model_name_or_path="wuhp/myr1",
-    subfolder="myr1",
-    output_dir="finetuned_myr1",
-    epochs=1
-):
-    """
-    Demonstrates how to load your custom model from HF, and run a
-    quick 'Trainer' to finetune it on some mock texts.
-    - model_name_or_path: huggingface repo ID (or local folder).
-    - subfolder: if your model config/weights live in a subfolder
-                 within that repo, specify it here.
-    - output_dir: where to save final trained model.
-    - epochs: how many epochs for this mock training example.
-    """
-    # 1) Load config (trust_remote_code=True so we can import custom .py from your repo)
-    config = AutoConfig.from_pretrained(
-        model_name_or_path,
-        subfolder=subfolder,
-        trust_remote_code=True
-    )
-    # 2) Load tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_name_or_path,
-        subfolder=subfolder,
-        trust_remote_code=True
-    )
-    # 3) Load model
-    #    AutoModelForCausalLM will detect your custom architecture from modeling_deepseek.py
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name_or_path,
-        subfolder=subfolder,
-        config=config,
-        torch_dtype=torch.float16,       # or "auto", or float32
-        device_map="auto",               # If you have enough GPU memory, or "cpu"
-        trust_remote_code=True
-    )
-    # 4) Create a tiny training dataset
-    train_texts = [
-        "Hello from DeepSeek!",
-        "The sky is blue.",
-        "Large language models can do amazing things."
-    ]
-    eval_texts = [
-        "Testing is essential for robust code.",
-        "Generative AI is fun."
-    ]
-    train_dataset = MyTextDataset(tokenizer, train_texts)
-    eval_dataset  = MyTextDataset(tokenizer, eval_texts)
-    # 5) Trainer hyperparams
-    training_args = TrainingArguments(
-        output_dir=output_dir,
-        overwrite_output_dir=True,
-        num_train_epochs=epochs,
-        per_device_train_batch_size=1,
-        per_device_eval_batch_size=1,
-        evaluation_strategy="epoch",
-        save_strategy="epoch",
-        logging_steps=1,
-        gradient_accumulation_steps=1,
-        fp16=True if torch.cuda.is_available() else False,
-        # If you have limited VRAM and can't do FP16, set fp16=False above
-    )
-    # 6) Define data collator for causal LM. Typically:
-    from transformers import DataCollatorForLanguageModeling
-    data_collator = DataCollatorForLanguageModeling(
-        tokenizer=tokenizer, mlm=False
-    )
-    # 7) Build trainer
-    trainer = Trainer(
-        model=model,
-        args=training_args,
-        data_collator=data_collator,
-        train_dataset=train_dataset,
-        eval_dataset=eval_dataset
-    )
-    # 8) Train
-    trainer.train()
-    # 9) Save model & tokenizer
-    trainer.save_model(output_dir)
-    tokenizer.save_pretrained(output_dir)
-    return trainer
-# ---------------------------
-# C) Gradio app function
-# ---------------------------
-def create_gradio_demo(
-    model_name_or_path="finetuned_myr1",
-    generation_config_path=None
-):
-    """
-    Loads a (fine-tuned) model from local or HF, sets up
-    a text-generation pipeline, and returns a Gradio interface.
-    """
-    # 1) Load config
-    config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
-    # 2) Load model & tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
-        model_name_or_path,
         config=config,
-        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
         device_map="auto",
         trust_remote_code=True
     )
-    # 3) (Optional) load generation config if present
-    #    e.g. custom top_k, top_p, temperature, etc.
-    #    If your repo has "generation_config.json" in subfolder="myr1",
-    #    you could also do:
-    #       GenerationConfig.from_pretrained("wuhp/myr1", subfolder="myr1", ...)
-    #    Or from local path if downloaded.
-    if generation_config_path:
-        gen_config = GenerationConfig.from_json_file(generation_config_path)
-    else:
-        # fallback to default or config
-        gen_config = GenerationConfig.from_model_config(config)
-    # 4) Build a text-generation pipeline
     text_pipeline = pipeline(
         "text-generation",
         model=model,
-        tokenizer=tokenizer,
-        generation_config=gen_config,
     )
-    # 5) Define Gradio predict function
-    def predict(prompt, max_new_tokens=64, temperature=0.7, top_p=0.95):
-        """
-        Generates text from the model given a user prompt.
-        """
-        outputs = text_pipeline(
-            prompt,
-            max_new_tokens=int(max_new_tokens),
-            temperature=float(temperature),
-            top_p=float(top_p)
-        )
-        # The pipeline returns a list of dicts like [{'generated_text': '...'}]
-        return outputs[0]["generated_text"]
-    # 6) Create the Gradio Interface
-    with gr.Blocks() as demo:
-        gr.Markdown("## DeepSeek LLM Demo")
-        prompt = gr.Textbox(label="Enter your prompt:")
-        max_new_tokens = gr.Slider(1, 512, step=1, value=64, label="Max New Tokens")
-        temperature = gr.Slider(0.0, 1.5, step=0.1, value=0.7, label="Temperature")
-        top_p = gr.Slider(0.0, 1.0, step=0.05, value=0.95, label="Top-p")
-        output = gr.Textbox(label="Generated Text")
-        generate_btn = gr.Button("Generate")
-        generate_btn.click(
-            fn=predict,
-            inputs=[prompt, max_new_tokens, temperature, top_p],
-            outputs=output
-        )
-    return demo
-# ---------------------------
-# D) Main: train + launch
-# ---------------------------
-if __name__ == "__main__":
-    # 1) TRAIN (mock demonstration).
-    #    If you just want to *load* your existing model, skip this step.
-    print("Starting mock training on wuhp/myr1 (subfolder myr1)...")
-    trainer = train_model(
-        model_name_or_path="wuhp/myr1",
-        subfolder="myr1",
-        output_dir="finetuned_myr1",
-        epochs=1
     )
-    print("Training complete.")
-    # 2) Build Gradio app from the newly saved model in 'finetuned_myr1'
-    #    If you want to load the original (un-finetuned) weights, just pass
-    #    model_name_or_path="wuhp/myr1" and subfolder="myr1" again.
-    demo = create_gradio_demo(
-        model_name_or_path="finetuned_myr1",
-        generation_config_path=None  # or "finetuned_myr1/generation_config.json"
-    )
-    # 3) Launch
-    print("Launching Gradio demo on http://127.0.0.1:7860 ...")
-    demo.launch()

+import gradio as gr
+import spaces
 import torch
 from transformers import (
     AutoConfig,
     AutoTokenizer,
     AutoModelForCausalLM,
     pipeline
 )
+# 1) Decorate your GPU-dependent function(s)
+@spaces.GPU(duration=60)  # default is 60s, can increase if needed
+def load_pipeline():
+    # -- load config & model from wuhp/myr1 --
+    config = AutoConfig.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
+        "wuhp/myr1",
+        subfolder="myr1",
         config=config,
+        torch_dtype=torch.float16,  # half precision
         device_map="auto",
         trust_remote_code=True
     )
+    # optional: load generation config if you have generation_config.json
     text_pipeline = pipeline(
         "text-generation",
         model=model,
+        tokenizer=tokenizer
     )
+    return text_pipeline
+# We'll load it once and store globally
+text_pipeline = load_pipeline()
+def predict(prompt, max_new_tokens=64):
+    outputs = text_pipeline(
+        prompt, max_new_tokens=int(max_new_tokens), do_sample=True, temperature=0.7
     )
+    return outputs[0]["generated_text"]
+# 2) Build your Gradio app
+with gr.Blocks() as demo:
+    gr.Markdown("## My LLM Inference (ZeroGPU)")
+    prompt = gr.Textbox(label="Prompt")
+    max_nt = gr.Slider(1, 200, value=64, step=1, label="Max New Tokens")
+    output = gr.Textbox(label="Generated Text")
+    btn = gr.Button("Generate")
+    btn.click(fn=predict, inputs=[prompt, max_nt], outputs=output)
+demo.launch()