import os os.environ["GRADIO_TEMP_DIR"] = os.path.join(os.getcwd(), ".tmp_outputs") os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" import uuid import gradio as gr import spaces from videosys import CogVideoXConfig, CogVideoXPABConfig, VideoSysEngine PROMPT = "A modern living room with a minimalist design, featuring a large window, a white ceiling, and a wooden floor. The room is furnished with a white sofa, a gray ottoman, a wooden table, and a hanging light. The space is well-lit and has a clean, contemporary aesthetic." def load_model(model_name, enable_video_sys=False, pab_threshold=[100, 850], pab_range=2): pab_config = CogVideoXPABConfig(spatial_threshold=pab_threshold, spatial_range=pab_range) config = CogVideoXConfig(model_name, enable_pab=enable_video_sys, pab_config=pab_config) engine = VideoSysEngine(config) return engine def generate(engine, prompt, num_inference_steps=50, guidance_scale=6.0): video = engine.generate(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).video[0] unique_filename = f"{uuid.uuid4().hex}.mp4" output_path = os.path.join("./.tmp_outputs", unique_filename) engine.save_video(video, output_path) return output_path @spaces.GPU(duration=240) def generate_vs( model_name, prompt, num_inference_steps, guidance_scale, threshold_start, threshold_end, gap, progress=gr.Progress(track_tqdm=True), ): threshold = [int(threshold_end), int(threshold_start)] gap = int(gap) engine = load_model(model_name, enable_video_sys=True, pab_threshold=threshold, pab_range=gap) video_path = generate(engine, prompt, num_inference_steps, guidance_scale) return video_path css = """ body { font-family: Arial, sans-serif; line-height: 1.6; color: #333; margin: 0 auto; padding: 20px; } .container { display: flex; flex-direction: column; gap: 10px; } .row { display: flex; flex-wrap: wrap; gap: 10px; } .column { flex: 1; min-width: 0; } .video-output { width: 100%; max-width: 720px; height: auto; margin: 0 auto; } .server-status { margin-top: 5px; padding: 5px; font-size: 0.8em; } .server-status h4 { margin: 0 0 3px 0; font-size: 0.9em; } .server-status .row { margin-bottom: 2px; } .server-status .textbox { min-height: unset !important; } .server-status .textbox input { padding: 1px 5px !important; height: 20px !important; font-size: 0.9em !important; } .server-status .textbox label { margin-bottom: 0 !important; font-size: 0.9em !important; line-height: 1.2 !important; } .server-status .textbox { gap: 0 !important; } .server-status .textbox input { margin-top: -2px !important; } @media (max-width: 768px) { .row { flex-direction: column; } .column { width: 100%; } } .video-output { width: 100%; height: auto; } } """ with gr.Blocks(css=css) as demo: gr.HTML( """
KoolCogVideoX Huggingface Space🤗
KoolCogVideoX is fine-tuned on CogVideoX specifically for interior design scenarios.
The demo is powered by https://github.com/NUS-HPC-AI-Lab/VideoSys.
⚠️ This demo is for academic research and experiential use only. Users should strictly adhere to local laws and ethics.
Due to limited GPU quota, the 5B model cannot be run using GPU ZERO. You can duplicate this space and utilize your own resources to run the 5B model.
""" ) with gr.Row(): with gr.Column(): prompt = gr.Textbox(label="Prompt (Less than 200 Words)", value=PROMPT, lines=2) with gr.Column(): gr.Markdown("**Generation Parameters**
") with gr.Row(): model_name = gr.Radio( ["bertjiazheng/KoolCogVideoX-2b", "bertjiazheng/KoolCogVideoX-5b"], label="Model Type", value="bertjiazheng/KoolCogVideoX-2b" ) with gr.Row(): num_inference_steps = gr.Slider(label="Inference Steps", maximum=50, value=50) guidance_scale = gr.Slider(label="Guidance Scale", value=6.0, maximum=15.0) gr.Markdown("**Pyramid Attention Broadcast Parameters**
") with gr.Row(): pab_range = gr.Slider( label="Broadcast Range", value=2, step=1, minimum=1, maximum=4, info="Attention broadcast range.", ) pab_threshold_start = gr.Slider( label="Start Timestep", minimum=500, maximum=1000, value=850, step=1, info="Broadcast start timestep (1000 is the fisrt).", ) pab_threshold_end = gr.Slider( label="End Timestep", minimum=0, maximum=500, step=1, value=100, info="Broadcast end timestep (0 is the last).", ) with gr.Row(): generate_button_vs = gr.Button("⚡️ Generate Video with VideoSys") with gr.Column(): with gr.Row(): video_output_vs = gr.Video(label="CogVideoX with VideoSys", width=720, height=480) gr.Markdown("""
🎥 Video Gallery
These videos are generated by KoolCogVideoX-5b.

A modern living room with a minimalist design, featuring a white sofa, a marble coffee table, a geometric painting, and a chandelier hanging from the ceiling. The room is well-lit with natural light, and the color scheme is neutral with accents of gold and black. The furniture is arranged in a way that creates a comfortable and inviting space.

A modern living room with a minimalist design, featuring a large window, a white ceiling, and a wooden floor. The room is furnished with a white sofa, a gray ottoman, a wooden table, and a hanging light. The space is well-lit and has a clean, contemporary aesthetic.

A modern bedroom with a minimalist design, featuring a large bed with a gray comforter and a blue blanket, a white dresser with a mirror, and a white closet. The room is decorated with framed artwork and a black and white poster on the wall. The floor is made of light wood, and the room has a clean and contemporary feel.

A modern kitchen with a sleek design, featuring a marble countertop, stainless steel appliances, and a variety of bottles and glasses. The kitchen is well-lit with recessed lighting and has a contemporary aesthetic.

""") generate_button_vs.click( generate_vs, inputs=[ model_name, prompt, num_inference_steps, guidance_scale, pab_threshold_start, pab_threshold_end, pab_range, ], outputs=[video_output_vs], concurrency_id="gen", concurrency_limit=1, ) if __name__ == "__main__": demo.queue(max_size=10, default_concurrency_limit=1) demo.launch()