svjack/Star_Rail_Tribbie_HunyuanVideo_lora

Introduction

Trained by MMD video clips of Tribbie (缇宝) from Star Rail

Installtion

sudo apt-get update && sudo apt-get install ffmpeg git-lfs 
pip install torch torchvision diffusers transformers moviepy==1.0.3 peft safetensors
git clone https://huggingface.co./svjack/Star_Rail_Tribbie_HunyuanVideo_lora && cd Star_Rail_Tribbie_HunyuanVideo_lora

Inference

import torch
from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
from diffusers.utils import export_to_video
from safetensors.torch import load_file
import os

def infer_video(
    pretrained_model,
    prompt,
    height,
    width,
    num_frames,
    num_inference_steps,
    seed,
    output_dir,
    use_lora=False,
    lora_path=None,
    alpha=None,
):
    """
    合并使用和不使用 LoRA 的视频生成函数。

    参数:
        pretrained_model (str): 预训练模型的路径。
        prompt (str): 生成视频的提示词。
        height (int): 生成视频的高度。
        width (int): 生成视频的宽度。
        num_frames (int): 生成视频的帧数。
        num_inference_steps (int): 推断步数。
        seed (int): 随机种子。
        output_dir (str): 输出视频的目录。
        use_lora (bool): 是否使用 LoRA，默认为 False。
        lora_path (str): LoRA 文件的路径，仅在 use_lora=True 时有效。
        alpha (int): LoRA 的 alpha 参数，仅在 use_lora=True 时有效。
    """
    # 加载模型
    transformer = HunyuanVideoTransformer3DModel.from_pretrained(
        pretrained_model,
        subfolder="transformer",
        torch_dtype=torch.bfloat16,
    )
    # 如果使用 LoRA
    if use_lora:
        if lora_path is None:
            raise ValueError("lora_path must be provided when use_lora is True")

        # 加载 LoRA 权重
        lora_sd = load_file(lora_path)
        rank = 0
        for key in lora_sd.keys():
            if ".lora_A.weight" in key:
                rank = lora_sd[key].shape[0]

        alpha = 1 if alpha is None else alpha
        lora_weight = alpha / rank

        print(f"lora rank = {rank}")
        print(f"alpha = {alpha}")
        print(f"lora weight = {lora_weight}")

        # 应用 LoRA
        transformer.load_lora_adapter(lora_sd, adapter_name="default_lora")
        transformer.set_adapters(adapter_names="default_lora", weights=lora_weight)

    pipe = HunyuanVideoPipeline.from_pretrained(pretrained_model, transformer=transformer, torch_dtype=torch.float16)
    pipe.transformer = transformer
    
    pipe.vae.enable_tiling(
        tile_sample_min_height=256,
        tile_sample_min_width=256,
        tile_sample_min_num_frames=64,
        tile_sample_stride_height=192,
        tile_sample_stride_width=192,
        tile_sample_stride_num_frames=16,
    )
    pipe.enable_sequential_cpu_offload()

    # 进行推断
    output = pipe(
        prompt=prompt,
        height=height,
        width=width,
        num_frames=num_frames,
        num_inference_steps=num_inference_steps,
        generator=torch.Generator(device="cpu").manual_seed(seed),
    ).frames[0]

    # 导出视频
    output_filename = "output_lora.mp4" if use_lora else "output_base.mp4"
    export_to_video(
        output,
        os.path.join(output_dir, output_filename),
        fps=15,
    )

### Eat Hamburger

prompt = '''
In the style of Tribbie ,
The video features an animated character with red hair and a white dress adorned with floral patterns.
The character is enjoying a large, juicy hamburger, taking slow, deliberate bites as she savors the flavors.
Her movements are relaxed and unhurried, occasionally pausing to wipe her hands with a napkin.
The background remains calm and inviting, with the faint chatter of other patrons and the occasional clink of dishes adding to the ambiance.
The focus remains on the character as she enjoys her meal, her expressive gestures and contented smile drawing the viewer into the moment.
'''

infer_video(
    pretrained_model="hunyuanvideo-community/HunyuanVideo",
    prompt = prompt,
    height=512,
    width=512,
    num_frames=33,
    num_inference_steps=20,
    seed=42,
    output_dir="./",
    use_lora=True,
    lora_path="checkpoints/hyv-lora-00025500.safetensors",
    alpha=16,
)

### Eat Ice Cream

prompt = '''
In the style of Tribbie,
The video features an animated character with red hair and a white dress adorned with floral patterns.
The character sits comfortably, enjoying a scoop of creamy ice cream on a cone. She takes slow, deliberate licks, savoring the sweet flavors with a contented smile. Occasionally, she pauses to wipe a stray drip with a napkin, her movements relaxed and unhurried.
The background remains calm and inviting, with faint chatter and the occasional clink of dishes adding to the ambiance.
The focus stays on her as she enjoys the treat, her expressive gestures and joyful demeanor drawing the viewer into the simple, delightful moment.
'''

infer_video(
    pretrained_model="hunyuanvideo-community/HunyuanVideo",
    prompt = prompt,
    height=512,
    width=512,
    num_frames=33,
    num_inference_steps=20,
    seed=42,
    output_dir="./",
    use_lora=True,
    lora_path="checkpoints/hyv-lora-00025500.safetensors",
    alpha=16,
)

### Play with Cat

prompt = '''
In the style of Tribbie,
The video features an animated character with red hair and a white dress adorned with floral patterns.
The character sits comfortably, gently cradling a small white kitten in her arms. With soft, deliberate strokes,
she pets the kitten as it purrs and nuzzles against her hand. The background is calm and inviting,
with faint chatter and the occasional clink of dishes adding to the ambiance.
The focus remains on her tender interaction with the kitten, her expressive gestures and contented
smile drawing the viewer into the heartwarming moment.
'''

infer_video(
    pretrained_model="hunyuanvideo-community/HunyuanVideo",
    prompt = prompt,
    height=768,
    width=1024,
    num_frames=33,
    num_inference_steps=20,
    seed=42,
    output_dir="./",
    use_lora=True,
    lora_path="checkpoints/hyv-lora-00025500.safetensors",
    alpha=16,
)

Eat Hamburger

Eat Ice Cream

Play with Cat

STG Inference

import os
import torch
from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
from pipeline_stg_hunyuan_video import HunyuanVideoSTGPipeline
from diffusers.utils import export_to_video
from safetensors.torch import load_file

def infer_video_with_stg(
    pretrained_model,
    prompt,
    height,
    width,
    num_frames,
    num_inference_steps,
    seed,
    output_dir,
    use_lora=False,
    lora_path=None,
    alpha=None,
    stg_mode="STG",
    stg_applied_layers_idx=[2],
    stg_scale=0.7,
    do_rescaling=False,
):
    """
    合并使用和不使用 LoRA 的视频生成函数，并支持 STG 模式。

    参数:
        pretrained_model (str): 预训练模型的路径。
        prompt (str): 生成视频的提示词。
        height (int): 生成视频的高度。
        width (int): 生成视频的宽度。
        num_frames (int): 生成视频的帧数。
        num_inference_steps (int): 推断步数。
        seed (int): 随机种子。
        output_dir (str): 输出视频的目录。
        use_lora (bool): 是否使用 LoRA，默认为 False。
        lora_path (str): LoRA 文件的路径，仅在 use_lora=True 时有效。
        alpha (int): LoRA 的 alpha 参数，仅在 use_lora=True 时有效。
        stg_mode (str): STG 模式，默认为 "STG"。
        stg_applied_layers_idx (list): STG 应用的层索引，默认为 [2]。
        stg_scale (float): STG 的缩放比例，默认为 0.7。
        do_rescaling (bool): 是否进行重新缩放，默认为 False。
    """
    # 加载模型
    transformer = HunyuanVideoTransformer3DModel.from_pretrained(
        pretrained_model,
        subfolder="transformer",
        torch_dtype=torch.bfloat16,
    )

    # 如果使用 LoRA
    if use_lora:
        if lora_path is None:
            raise ValueError("lora_path must be provided when use_lora is True")

        # 加载 LoRA 权重
        lora_sd = load_file(lora_path)
        rank = 0
        for key in lora_sd.keys():
            if ".lora_A.weight" in key:
                rank = lora_sd[key].shape[0]

        alpha = 1 if alpha is None else alpha
        lora_weight = alpha / rank

        print(f"lora rank = {rank}")
        print(f"alpha = {alpha}")
        print(f"lora weight = {lora_weight}")

        # 应用 LoRA
        transformer.load_lora_adapter(lora_sd, adapter_name="default_lora")
        transformer.set_adapters(adapter_names="default_lora", weights=lora_weight)

    pipe = HunyuanVideoSTGPipeline.from_pretrained(pretrained_model, transformer=transformer, torch_dtype=torch.float16)
    pipe.transformer = transformer
    
    pipe.vae.enable_tiling(
        tile_sample_min_height=256,
        tile_sample_min_width=256,
        tile_sample_min_num_frames=64,
        tile_sample_stride_height=192,
        tile_sample_stride_width=192,
        tile_sample_stride_num_frames=16,
    )
    pipe.enable_sequential_cpu_offload()

    # 进行推断
    output = pipe(
        prompt=prompt,
        height=height,
        width=width,
        num_frames=num_frames,
        num_inference_steps=num_inference_steps,
        stg_applied_layers_idx=stg_applied_layers_idx,
        stg_scale=stg_scale,
        do_rescaling=do_rescaling,
        generator=torch.Generator(device="cpu").manual_seed(seed),
    ).frames[0]

    # 导出视频
    if stg_scale == 0:
        video_name = f"CFG_rescale_{do_rescaling}.mp4"
    else:
        layers_str = "_".join(map(str, stg_applied_layers_idx))
        video_name = f"{stg_mode}_scale_{stg_scale}_layers_{layers_str}_rescale_{do_rescaling}.mp4"

    os.makedirs(output_dir, exist_ok=True)
    video_path = os.path.join(output_dir, video_name)
    export_to_video(output, video_path, fps=15)

    print(f"Video saved to {video_path}")

### Eat Hamburger

prompt = '''
In the style of Tribbie ,
The video features an animated character with red hair and a white dress adorned with floral patterns.
The character is enjoying a large, juicy hamburger, taking slow, deliberate bites as she savors the flavors.
Her movements are relaxed and unhurried, occasionally pausing to wipe her hands with a napkin.
The background remains calm and inviting, with the faint chatter of other patrons and the occasional clink of dishes adding to the ambiance.
The focus remains on the character as she enjoys her meal, her expressive gestures and contented smile drawing the viewer into the moment.
'''

infer_video_with_stg(
    pretrained_model="hunyuanvideo-community/HunyuanVideo",
    prompt=prompt,
    height=512,
    width=512,
    num_frames=33,
    num_inference_steps=20,
    seed=42,
    output_dir=".",
    use_lora=True,
    lora_path="checkpoints/hyv-lora-00025500.safetensors",
    alpha=16,
    stg_mode="STG",
    stg_applied_layers_idx=[2],
    stg_scale=0.7,
    do_rescaling=False,
)

### Eat Ice Cream

prompt = '''
In the style of Tribbie,
The video features an animated character with red hair and a white dress adorned with floral patterns.
The character sits comfortably, enjoying a scoop of creamy ice cream on a cone. She takes slow, deliberate licks, savoring the sweet flavors with a contented smile. Occasionally, she pauses to wipe a stray drip with a napkin, her movements relaxed and unhurried.
The background remains calm and inviting, with faint chatter and the occasional clink of dishes adding to the ambiance.
The focus stays on her as she enjoys the treat, her expressive gestures and joyful demeanor drawing the viewer into the simple, delightful moment.
'''

infer_video_with_stg(
    pretrained_model="hunyuanvideo-community/HunyuanVideo",
    prompt=prompt,
    height=512,
    width=512,
    num_frames=33,
    num_inference_steps=20,
    seed=42,
    output_dir=".",
    use_lora=True,
    lora_path="checkpoints/hyv-lora-00025500.safetensors",
    alpha=16,
    stg_mode="STG",
    stg_applied_layers_idx=[2],
    stg_scale=0.7,
    do_rescaling=False,
)

### Play with Cat

prompt = '''
In the style of Tribbie,
The video features an animated character with red hair and a white dress adorned with floral patterns.
The character sits comfortably, gently cradling a small white kitten in her arms. With soft, deliberate strokes,
she pets the kitten as it purrs and nuzzles against her hand. The background is calm and inviting,
with faint chatter and the occasional clink of dishes adding to the ambiance.
The focus remains on her tender interaction with the kitten, her expressive gestures and contented
smile drawing the viewer into the heartwarming moment.
'''

infer_video_with_stg(
    pretrained_model="hunyuanvideo-community/HunyuanVideo",
    prompt=prompt,
    height=512,
    width=512,
    num_frames=33,
    num_inference_steps=20,
    seed=42,
    output_dir=".",
    use_lora=True,
    lora_path="checkpoints/hyv-lora-00025500.safetensors",
    alpha=16,
    stg_mode="STG",
    stg_applied_layers_idx=[2],
    stg_scale=0.7,
    do_rescaling=False,
)