DataHammer
commited on
Upload folder using huggingface_hub
Browse files- README.md +40 -0
- adapter_config.json +26 -0
- adapter_model.safetensors +3 -0
- additional_config.json +1 -0
- generation_config.json +10 -0
README.md
CHANGED
@@ -1,3 +1,43 @@
|
|
1 |
---
|
2 |
license: mit
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: mit
|
3 |
---
|
4 |
+
|
5 |
+
This is the fine-tuned MiniCPM-V-2_6 adapter for paper [**Automatic Evaluation for Text-to-Image Generation: Fine-grained Framework,
|
6 |
+
Distilled Evaluation Model and Meta-Evaluation Benchmark**]()
|
7 |
+
|
8 |
+
## Performance
|
9 |
+
|
10 |
+
| Methods | Manual-1 ($\rho$) | Manual-1 ($\tau$) | Manual-2 ($\rho$) | Manual-2 ($\tau$) | Manual-3 ($\rho$) | Manual-3 ($\tau$) | Manual-Avg. ($\rho$) | Manual-Avg. ($\tau$) |
|
11 |
+
| ------------------------------------ | ----------------- | ----------------- | ----------------- | ----------------- | ----------------- | ----------------- | -------------------- | -------------------- |
|
12 |
+
| Average Score of Human Annotators |
|
13 |
+
| Manual-Avg. | 0.9511 | 0.8807 | 0.9452 | 0.8686 | 0.9513 | 0.8793 | - | - |
|
14 |
+
| Traditional Methods |
|
15 |
+
| FID | -0.1183 | -0.0871 | -0.1000 | -0.0724 | -0.0897 | -0.0685 | -0.1231 | -0.0862 |
|
16 |
+
| LPIPS | -0.1206 | -0.0898 | -0.0882 | -0.0644 | -0.1025 | -0.0732 | -0.1244 | -0.0856 |
|
17 |
+
| DreamSim | -0.1284 | -0.0953 | -0.1230 | -0.0897 | -0.1308 | -0.0973 | -0.1382 | -0.0968 |
|
18 |
+
| CLIPScore | 0.1532 | 0.1078 | 0.1725 | 0.1210 | 0.1227 | 0.0855 | 0.1505 | 0.1016 |
|
19 |
+
| BLIPv2Score | 0.2278 | 0.1588 | 0.2280 | 0.1617 | 0.2134 | 0.1477 | 0.2152 | 0.1423 |
|
20 |
+
| ImageReward | 0.4171 | 0.3065 | 0.3712 | 0.2690 | 0.4134 | 0.3030 | 0.4046 | 0.2839 |
|
21 |
+
| LLM/MLLM-Based Methods |
|
22 |
+
| LLMScore$_{GPT-4}$ | 0.3009 | 0.2212 | 0.2697 | 0.2012 | 0.3299 | 0.2497 | 0.3096 | 0.2228 |
|
23 |
+
| DSG$_{Dependent}$ | 0.4742 | 0.3790 | 0.4204 | 0.3339 | 0.4562 | 0.3652 | 0.4582 | 0.3512 |
|
24 |
+
| DSG$_{Independent}$ | 0.4815 | 0.3891 | 0.4382 | 0.3502 | 0.4721 | 0.3827 | 0.4704 | 0.3655 |
|
25 |
+
| VQAScore$_{CLIP-FlanT5}$ | 0.4984 | 0.3768 | 0.4864 | 0.3619 | 0.5118 | 0.3854 | 0.5116 | 0.3712 |
|
26 |
+
| VIEScore$_{MiniCPM-V-2.6}$ | 0.2834 | 0.2251 | 0.2814 | 0.2231 | 0.3016 | 0.2422 | 0.2941 | 0.2250 |
|
27 |
+
| VIEScore$_{MiniCPM-V-2.6}$ | 0.4906 | 0.3878 | 0.4869 | 0.3836 | 0.4889 | 0.3899 | 0.5101 | 0.3897 |
|
28 |
+
| VIEScore$_{GPT-4o}$ | **0.5522** | **0.4283** | 0.5306 | 0.4101 | 0.5170 | 0.4024 | 0.5545 | 0.4170 |
|
29 |
+
| Closed-Source MLLM with Our Pipeline |
|
30 |
+
| Ours$_{GPT-4o}$ | 0.5437 | 0.4302 | 0.5355 | 0.4214 | 0.5138 | 0.4061 | 0.5566 | 0.4285 |
|
31 |
+
| Our Fine-tuned Open-source MLLM |
|
32 |
+
| Ours$_{MiniCPM-V-2.6}$ | 0.5306 | 0.4214 | **0.6067** | **0.4769** | **0.5744** | **0.4563** | **0.5938** | **0.4566** |
|
33 |
+
|
34 |
+
|
35 |
+
## Quick Start
|
36 |
+
* Merge the adapter with original MiniCPM-V-2_6 checkpoint with [SWIFT](https://github.com/modelscope/ms-swift):
|
37 |
+
```shell
|
38 |
+
swift export \
|
39 |
+
--model_type minicpm-v-v2_6-chat \
|
40 |
+
--model_id_or_path openbmb/MiniCPM-V-2_6 \
|
41 |
+
--ckpt_dir /path/to/the/downloaded/adapter \
|
42 |
+
--merge_lora true
|
43 |
+
```
|
adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "/disk/maziao/model-zoo/hf-repo/models--openbmb--MiniCPM-V-2_6",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 256,
|
14 |
+
"lora_dropout": 0.05,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": [],
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 128,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": "^(llm|resampler)(?!.*(lm_head|output|emb|wte|shared)).*",
|
23 |
+
"task_type": "CAUSAL_LM",
|
24 |
+
"use_dora": false,
|
25 |
+
"use_rslora": false
|
26 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46b8ddad79ad9f3cfbdfd95ff726d23c416d6be8c587c87e971e33b22b4aa850
|
3 |
+
size 649025984
|
additional_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06}
|
generation_config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_sample": true,
|
3 |
+
"eos_token_id": 151645,
|
4 |
+
"max_new_tokens": 2048,
|
5 |
+
"pad_token_id": 151643,
|
6 |
+
"temperature": 0.3,
|
7 |
+
"top_k": 20,
|
8 |
+
"top_p": 0.7,
|
9 |
+
"transformers_version": "4.37.2"
|
10 |
+
}
|