YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co./docs/hub/model-cards#model-card-metadata)
Introduction
Trained by MMD video clips of Tribbie (缇宝) from Star Rail
Installtion
sudo apt-get update && sudo apt-get install ffmpeg git-lfs
pip install torch torchvision diffusers transformers moviepy==1.0.3 peft safetensors
git clone https://huggingface.co./svjack/Star_Rail_Tribbie_HunyuanVideo_lora && cd Star_Rail_Tribbie_HunyuanVideo_lora
Inference
import torch
from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
from diffusers.utils import export_to_video
from safetensors.torch import load_file
import os
def infer_video(
pretrained_model,
prompt,
height,
width,
num_frames,
num_inference_steps,
seed,
output_dir,
use_lora=False,
lora_path=None,
alpha=None,
):
"""
合并使用和不使用 LoRA 的视频生成函数。
参数:
pretrained_model (str): 预训练模型的路径。
prompt (str): 生成视频的提示词。
height (int): 生成视频的高度。
width (int): 生成视频的宽度。
num_frames (int): 生成视频的帧数。
num_inference_steps (int): 推断步数。
seed (int): 随机种子。
output_dir (str): 输出视频的目录。
use_lora (bool): 是否使用 LoRA,默认为 False。
lora_path (str): LoRA 文件的路径,仅在 use_lora=True 时有效。
alpha (int): LoRA 的 alpha 参数,仅在 use_lora=True 时有效。
"""
# 加载模型
transformer = HunyuanVideoTransformer3DModel.from_pretrained(
pretrained_model,
subfolder="transformer",
torch_dtype=torch.bfloat16,
)
# 如果使用 LoRA
if use_lora:
if lora_path is None:
raise ValueError("lora_path must be provided when use_lora is True")
# 加载 LoRA 权重
lora_sd = load_file(lora_path)
rank = 0
for key in lora_sd.keys():
if ".lora_A.weight" in key:
rank = lora_sd[key].shape[0]
alpha = 1 if alpha is None else alpha
lora_weight = alpha / rank
print(f"lora rank = {rank}")
print(f"alpha = {alpha}")
print(f"lora weight = {lora_weight}")
# 应用 LoRA
transformer.load_lora_adapter(lora_sd, adapter_name="default_lora")
transformer.set_adapters(adapter_names="default_lora", weights=lora_weight)
pipe = HunyuanVideoPipeline.from_pretrained(pretrained_model, transformer=transformer, torch_dtype=torch.float16)
pipe.transformer = transformer
pipe.vae.enable_tiling(
tile_sample_min_height=256,
tile_sample_min_width=256,
tile_sample_min_num_frames=64,
tile_sample_stride_height=192,
tile_sample_stride_width=192,
tile_sample_stride_num_frames=16,
)
pipe.enable_sequential_cpu_offload()
# 进行推断
output = pipe(
prompt=prompt,
height=height,
width=width,
num_frames=num_frames,
num_inference_steps=num_inference_steps,
generator=torch.Generator(device="cpu").manual_seed(seed),
).frames[0]
# 导出视频
output_filename = "output_lora.mp4" if use_lora else "output_base.mp4"
export_to_video(
output,
os.path.join(output_dir, output_filename),
fps=15,
)
### Eat Hamburger
prompt = '''
In the style of Tribbie ,
The video features an animated character with red hair and a white dress adorned with floral patterns.
The character is enjoying a large, juicy hamburger, taking slow, deliberate bites as she savors the flavors.
Her movements are relaxed and unhurried, occasionally pausing to wipe her hands with a napkin.
The background remains calm and inviting, with the faint chatter of other patrons and the occasional clink of dishes adding to the ambiance.
The focus remains on the character as she enjoys her meal, her expressive gestures and contented smile drawing the viewer into the moment.
'''
infer_video(
pretrained_model="hunyuanvideo-community/HunyuanVideo",
prompt = prompt,
height=512,
width=512,
num_frames=33,
num_inference_steps=20,
seed=42,
output_dir="./",
use_lora=True,
lora_path="checkpoints/hyv-lora-00025500.safetensors",
alpha=16,
)
### Eat Ice Cream
prompt = '''
In the style of Tribbie,
The video features an animated character with red hair and a white dress adorned with floral patterns.
The character sits comfortably, enjoying a scoop of creamy ice cream on a cone. She takes slow, deliberate licks, savoring the sweet flavors with a contented smile. Occasionally, she pauses to wipe a stray drip with a napkin, her movements relaxed and unhurried.
The background remains calm and inviting, with faint chatter and the occasional clink of dishes adding to the ambiance.
The focus stays on her as she enjoys the treat, her expressive gestures and joyful demeanor drawing the viewer into the simple, delightful moment.
'''
infer_video(
pretrained_model="hunyuanvideo-community/HunyuanVideo",
prompt = prompt,
height=512,
width=512,
num_frames=33,
num_inference_steps=20,
seed=42,
output_dir="./",
use_lora=True,
lora_path="checkpoints/hyv-lora-00025500.safetensors",
alpha=16,
)
### Play with Cat
prompt = '''
In the style of Tribbie,
The video features an animated character with red hair and a white dress adorned with floral patterns.
The character sits comfortably, gently cradling a small white kitten in her arms. With soft, deliberate strokes,
she pets the kitten as it purrs and nuzzles against her hand. The background is calm and inviting,
with faint chatter and the occasional clink of dishes adding to the ambiance.
The focus remains on her tender interaction with the kitten, her expressive gestures and contented
smile drawing the viewer into the heartwarming moment.
'''
infer_video(
pretrained_model="hunyuanvideo-community/HunyuanVideo",
prompt = prompt,
height=768,
width=1024,
num_frames=33,
num_inference_steps=20,
seed=42,
output_dir="./",
use_lora=True,
lora_path="checkpoints/hyv-lora-00025500.safetensors",
alpha=16,
)
Eat Hamburger
Eat Ice Cream
Play with Cat
STG Inference
import os
import torch
from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
from pipeline_stg_hunyuan_video import HunyuanVideoSTGPipeline
from diffusers.utils import export_to_video
from safetensors.torch import load_file
def infer_video_with_stg(
pretrained_model,
prompt,
height,
width,
num_frames,
num_inference_steps,
seed,
output_dir,
use_lora=False,
lora_path=None,
alpha=None,
stg_mode="STG",
stg_applied_layers_idx=[2],
stg_scale=0.7,
do_rescaling=False,
):
"""
合并使用和不使用 LoRA 的视频生成函数,并支持 STG 模式。
参数:
pretrained_model (str): 预训练模型的路径。
prompt (str): 生成视频的提示词。
height (int): 生成视频的高度。
width (int): 生成视频的宽度。
num_frames (int): 生成视频的帧数。
num_inference_steps (int): 推断步数。
seed (int): 随机种子。
output_dir (str): 输出视频的目录。
use_lora (bool): 是否使用 LoRA,默认为 False。
lora_path (str): LoRA 文件的路径,仅在 use_lora=True 时有效。
alpha (int): LoRA 的 alpha 参数,仅在 use_lora=True 时有效。
stg_mode (str): STG 模式,默认为 "STG"。
stg_applied_layers_idx (list): STG 应用的层索引,默认为 [2]。
stg_scale (float): STG 的缩放比例,默认为 0.7。
do_rescaling (bool): 是否进行重新缩放,默认为 False。
"""
# 加载模型
transformer = HunyuanVideoTransformer3DModel.from_pretrained(
pretrained_model,
subfolder="transformer",
torch_dtype=torch.bfloat16,
)
# 如果使用 LoRA
if use_lora:
if lora_path is None:
raise ValueError("lora_path must be provided when use_lora is True")
# 加载 LoRA 权重
lora_sd = load_file(lora_path)
rank = 0
for key in lora_sd.keys():
if ".lora_A.weight" in key:
rank = lora_sd[key].shape[0]
alpha = 1 if alpha is None else alpha
lora_weight = alpha / rank
print(f"lora rank = {rank}")
print(f"alpha = {alpha}")
print(f"lora weight = {lora_weight}")
# 应用 LoRA
transformer.load_lora_adapter(lora_sd, adapter_name="default_lora")
transformer.set_adapters(adapter_names="default_lora", weights=lora_weight)
pipe = HunyuanVideoSTGPipeline.from_pretrained(pretrained_model, transformer=transformer, torch_dtype=torch.float16)
pipe.transformer = transformer
pipe.vae.enable_tiling(
tile_sample_min_height=256,
tile_sample_min_width=256,
tile_sample_min_num_frames=64,
tile_sample_stride_height=192,
tile_sample_stride_width=192,
tile_sample_stride_num_frames=16,
)
pipe.enable_sequential_cpu_offload()
# 进行推断
output = pipe(
prompt=prompt,
height=height,
width=width,
num_frames=num_frames,
num_inference_steps=num_inference_steps,
stg_applied_layers_idx=stg_applied_layers_idx,
stg_scale=stg_scale,
do_rescaling=do_rescaling,
generator=torch.Generator(device="cpu").manual_seed(seed),
).frames[0]
# 导出视频
if stg_scale == 0:
video_name = f"CFG_rescale_{do_rescaling}.mp4"
else:
layers_str = "_".join(map(str, stg_applied_layers_idx))
video_name = f"{stg_mode}_scale_{stg_scale}_layers_{layers_str}_rescale_{do_rescaling}.mp4"
os.makedirs(output_dir, exist_ok=True)
video_path = os.path.join(output_dir, video_name)
export_to_video(output, video_path, fps=15)
print(f"Video saved to {video_path}")
### Eat Hamburger
prompt = '''
In the style of Tribbie ,
The video features an animated character with red hair and a white dress adorned with floral patterns.
The character is enjoying a large, juicy hamburger, taking slow, deliberate bites as she savors the flavors.
Her movements are relaxed and unhurried, occasionally pausing to wipe her hands with a napkin.
The background remains calm and inviting, with the faint chatter of other patrons and the occasional clink of dishes adding to the ambiance.
The focus remains on the character as she enjoys her meal, her expressive gestures and contented smile drawing the viewer into the moment.
'''
infer_video_with_stg(
pretrained_model="hunyuanvideo-community/HunyuanVideo",
prompt=prompt,
height=512,
width=512,
num_frames=33,
num_inference_steps=20,
seed=42,
output_dir=".",
use_lora=True,
lora_path="checkpoints/hyv-lora-00025500.safetensors",
alpha=16,
stg_mode="STG",
stg_applied_layers_idx=[2],
stg_scale=0.7,
do_rescaling=False,
)
### Eat Ice Cream
prompt = '''
In the style of Tribbie,
The video features an animated character with red hair and a white dress adorned with floral patterns.
The character sits comfortably, enjoying a scoop of creamy ice cream on a cone. She takes slow, deliberate licks, savoring the sweet flavors with a contented smile. Occasionally, she pauses to wipe a stray drip with a napkin, her movements relaxed and unhurried.
The background remains calm and inviting, with faint chatter and the occasional clink of dishes adding to the ambiance.
The focus stays on her as she enjoys the treat, her expressive gestures and joyful demeanor drawing the viewer into the simple, delightful moment.
'''
infer_video_with_stg(
pretrained_model="hunyuanvideo-community/HunyuanVideo",
prompt=prompt,
height=512,
width=512,
num_frames=33,
num_inference_steps=20,
seed=42,
output_dir=".",
use_lora=True,
lora_path="checkpoints/hyv-lora-00025500.safetensors",
alpha=16,
stg_mode="STG",
stg_applied_layers_idx=[2],
stg_scale=0.7,
do_rescaling=False,
)
### Play with Cat
prompt = '''
In the style of Tribbie,
The video features an animated character with red hair and a white dress adorned with floral patterns.
The character sits comfortably, gently cradling a small white kitten in her arms. With soft, deliberate strokes,
she pets the kitten as it purrs and nuzzles against her hand. The background is calm and inviting,
with faint chatter and the occasional clink of dishes adding to the ambiance.
The focus remains on her tender interaction with the kitten, her expressive gestures and contented
smile drawing the viewer into the heartwarming moment.
'''
infer_video_with_stg(
pretrained_model="hunyuanvideo-community/HunyuanVideo",
prompt=prompt,
height=512,
width=512,
num_frames=33,
num_inference_steps=20,
seed=42,
output_dir=".",
use_lora=True,
lora_path="checkpoints/hyv-lora-00025500.safetensors",
alpha=16,
stg_mode="STG",
stg_applied_layers_idx=[2],
stg_scale=0.7,
do_rescaling=False,
)
Eat Hamburger
Eat Ice Cream
Play with Cat
Inference Providers
NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API:
The model has no library tag.