Spaces:
Sleeping
Sleeping
Fix ViT-H builder
Browse files- app.py +1 -1
- tokenize_anything/models/easy_build.py +2 -0
app.py
CHANGED
@@ -31,7 +31,7 @@ from tokenize_anything.utils.image import im_vstack
|
|
31 |
def parse_args():
|
32 |
"""Parse arguments."""
|
33 |
parser = argparse.ArgumentParser(description="Launch gradio application")
|
34 |
-
parser.add_argument("--model-type", type=str, default="
|
35 |
parser.add_argument("--checkpoint", type=str, default="models/tap_vit_h_v1_1.pkl")
|
36 |
parser.add_argument("--concept", type=str, default="concepts/merged_2560.pkl")
|
37 |
parser.add_argument("--device", nargs="+", type=int, default=[0], help="Index of devices")
|
|
|
31 |
def parse_args():
|
32 |
"""Parse arguments."""
|
33 |
parser = argparse.ArgumentParser(description="Launch gradio application")
|
34 |
+
parser.add_argument("--model-type", type=str, default="tap_vit_h")
|
35 |
parser.add_argument("--checkpoint", type=str, default="models/tap_vit_h_v1_1.pkl")
|
36 |
parser.add_argument("--concept", type=str, default="concepts/merged_2560.pkl")
|
37 |
parser.add_argument("--device", nargs="+", type=int, default=[0], help="Index of devices")
|
tokenize_anything/models/easy_build.py
CHANGED
@@ -106,8 +106,10 @@ def image_tokenizer(image_encoder, checkpoint=None, device=0, dtype="float16", *
|
|
106 |
|
107 |
vit_b_encoder = partial(vit_encoder, depth=12, embed_dim=768, num_heads=12)
|
108 |
vit_l_encoder = partial(vit_encoder, depth=24, embed_dim=1024, num_heads=16)
|
|
|
109 |
|
110 |
model_registry = {
|
111 |
"tap_vit_b": partial(image_tokenizer, image_encoder=vit_b_encoder),
|
112 |
"tap_vit_l": partial(image_tokenizer, image_encoder=vit_l_encoder),
|
|
|
113 |
}
|
|
|
106 |
|
107 |
vit_b_encoder = partial(vit_encoder, depth=12, embed_dim=768, num_heads=12)
|
108 |
vit_l_encoder = partial(vit_encoder, depth=24, embed_dim=1024, num_heads=16)
|
109 |
+
vit_h_encoder = partial(vit_encoder, depth=32, embed_dim=1280, num_heads=16)
|
110 |
|
111 |
model_registry = {
|
112 |
"tap_vit_b": partial(image_tokenizer, image_encoder=vit_b_encoder),
|
113 |
"tap_vit_l": partial(image_tokenizer, image_encoder=vit_l_encoder),
|
114 |
+
"tap_vit_h": partial(image_tokenizer, image_encoder=vit_h_encoder),
|
115 |
}
|