Spaces:

tousin23
/

X-RayDemo

Running

App Files Files Community

tousin23 commited on 3 days ago

Commit

6551065

verified ·

1 Parent(s): fabd620

Upload 41 files

Browse files

Files changed (42) hide show

.gitattributes +3 -0
.gitignore +160 -0
LICENSE +28 -0
README.md +77 -12
configs/config.py +68 -0
dataset/data_helper.py +98 -0
dataset/data_module.py +73 -0
evalcap/__init__.py +0 -0
evalcap/bleu/LICENSE +23 -0
evalcap/bleu/__init__.py +1 -0
evalcap/bleu/bleu.py +50 -0
evalcap/bleu/bleu_scorer.py +264 -0
evalcap/cider/__init__.py +1 -0
evalcap/cider/cider.py +57 -0
evalcap/cider/cider_scorer.py +193 -0
evalcap/meteor/__init__.py +1 -0
evalcap/meteor/meteor-1.5.jar +3 -0
evalcap/meteor/meteor.py +130 -0
evalcap/meteor/test_meteor.py +10 -0
evalcap/rouge/__init__.py +1 -0
evalcap/rouge/rouge.py +105 -0
evalcap/tokenizer/__init__.py +1 -0
evalcap/tokenizer/ptbtokenizer.py +68 -0
evalcap/tokenizer/stanford-corenlp-3.4.1.jar +3 -0
images/align.png +3 -0
lightning_tools/callbacks.py +30 -0
lightning_tools/optim.py +59 -0
models/R2GenGPT.py +379 -0
requirements.txt +7 -4
scripts/1-1.shallow_run_iuxray.sh +30 -0
scripts/1-2.shallow_test_iuxray.sh +28 -0
scripts/2-1.delta_run_iuxray.sh +30 -0
scripts/2-2.delta_test_iuxray.sh +28 -0
scripts/3-1.deep_run_iuxray.sh +30 -0
scripts/3-2.deep_test_iuxray.sh +28 -0
scripts/4-1.shallow_run.sh +39 -0
scripts/4-2.shallow_test.sh +28 -0
scripts/5-1.delta_run.sh +39 -0
scripts/5-2.delta_test.sh +30 -0
scripts/6-1.deep_run.sh +38 -0
scripts/6-2.deep_test.sh +28 -0
train.py +51 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+evalcap/meteor/meteor-1.5.jar filter=lfs diff=lfs merge=lfs -text
+evalcap/tokenizer/stanford-corenlp-3.4.1.jar filter=lfs diff=lfs merge=lfs -text
+images/align.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

LICENSE ADDED Viewed

	@@ -0,0 +1,28 @@

+BSD 3-Clause License
+Copyright (c) 2023, zhanyuwang
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

README.md CHANGED Viewed

@@ -1,12 +1,77 @@
----
-title: X RayDemo
-emoji: 🏃
-colorFrom: green
-colorTo: green
-sdk: streamlit
-sdk_version: 1.35.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# R2GenGPT: Radiology Report Generation with Frozen LLMs
+## Introduction
+![overview](https://github.com/wang-zhanyu/R2GenGPT/blob/main/images/align.png)
+## Getting Started
+### Installation
+**1. Prepare the code and the environment**
+Git clone our repository and install the requirements.
+```bash
+https://github.com/wang-zhanyu/R2GenGPT.git
+cd R2GenGPT
+pip install -r requirements.txt
+```
+**2. Prepare the training dataset**
+IU-xray: download the dataset from [here](https://drive.google.com/file/d/1c0BXEuDy8Cmm2jfN0YYGkQxFZd2ZIoLg/view)
+Mimic-cxr: you can download our preprocess annotation file from [here](https://drive.google.com/file/d/14689ztodTtrQJYs--ihB_hgsPMMNHX-H/view?usp=sharing) and download the images from [official website](https://physionet.org/content/mimic-cxr-jpg/2.0.0/)
+After downloading the data, place it in the ./data folder.
+### Training
+For shallow alignment
+```bash
+bash scripts/4-1.shallow_run.sh
+```
+For delta alignment
+```bash
+bash scripts/5-1.delta_run.sh
+```
+For deep alignment
+```bash
+bash scripts/6-1.deep_run.sh
+```
+### Testing (For MIMIC-CXR)
+You can download our pretrained Delta checkpoints for [Here](https://drive.google.com/drive/folders/1ywEITWfYIAAYy0VY1IZ24Ec_GoNmkqIY?usp=sharing)
+For shallow alignment
+```bash
+bash scripts/4-2.shallow_test.sh
+```
+For delta alignment
+```bash
+bash scripts/5-2.delta_test.sh
+```
+For deep alignment
+```bash
+bash scripts/6-2.shallow_test.sh
+```
+## Acknowledgement
++ [MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4) Some codes of this repo are based on MiniGPT-4.
++ [Llama2](https://github.com/facebookresearch/llama) The fantastic language ability of Llama-2 with only 7B parameters is just amazing.
+## License
+This repository is under [BSD 3-Clause License](LICENSE.md).

configs/config.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import argparse
+parser = argparse.ArgumentParser(description="hyper-parameter for R2GenGPT")
+# ========================= Dataset Configs ==========================
+parser.add_argument('--test', action='store_true', help="only run test set")
+parser.add_argument('--validate', action='store_true', help="only run validation set")
+parser.add_argument('--dataset', type=str, default='mimic_cxr', help="iu-xray or mimic-cxr")
+parser.add_argument('--annotation', type=str, default=r'./data/mimic_cxr/annotation.json', help="annotation file of the dataset")
+parser.add_argument('--base_dir', type=str, default=r'./data/mimic_cxr/images', help="base dir to help find images")
+parser.add_argument('--batch_size', default=6, type=int, help="use for training duration per worker")
+parser.add_argument('--val_batch_size', default=16, type=int, help="use for validation duration per worker")
+parser.add_argument('--test_batch_size', default=16, type=int, help="use for testing duration per worker")
+parser.add_argument('--prefetch_factor', default=4, type=int, help="use for training duration per worker")
+parser.add_argument('--num_workers', default=8, type=int, help="Cpu num for dataloaders")
+# ========================= Model Settings ============================
+parser.add_argument('--vision_model', default='microsoft/swin-base-patch4-window7-224', type=str, help="vision model to use")
+parser.add_argument('--llama_model', default='meta-llama/Llama-2-7b-chat-hf', type=str, help="LLM model to use")
+parser.add_argument('--freeze_vm', default=True, type=lambda x: (str(x).lower() == 'true'), help='freeze vision model')
+parser.add_argument('--llm_use_lora', default=False, type=lambda x: (str(x).lower() == 'true'), help="whether use lora for LLM model")
+parser.add_argument('--llm_r', default=16, type=int, help='The dimension used by the LoRA update matrices')
+parser.add_argument('--llm_alpha', default=16, type=int, help='Scaling factor.')
+parser.add_argument('--vis_use_lora', default=False, type=lambda x: (str(x).lower() == 'true'), help="whether use lora for vision model")
+parser.add_argument('--vis_r', default=16, type=int, help='The dimension used by the LoRA update matrices')
+parser.add_argument('--vis_alpha', default=16, type=int, help='Scaling factor.')
+parser.add_argument('--lora_dropout', default=0.1, type=float, help='lora dropout')
+parser.add_argument('--global_only', default=False, type=lambda x: (str(x).lower() == 'true'), help='use global embedding only')
+parser.add_argument('--low_resource', default=False, type=bool)
+parser.add_argument('--end_sym', default='</s>', type=str)
+# ======================== SavedModel Configs ===========================
+parser.add_argument('--savedmodel_path', type=str, default='save/mimic/v1')
+parser.add_argument('--ckpt_file', type=str, default=None, help='the checkpoint file to load')
+parser.add_argument('--delta_file', type=str, default=None, help='the delta file to load')
+parser.add_argument('--weights', type=list, default=[0.5, 0.5])
+parser.add_argument('--scorer_types', type=list, default=['Bleu_4', 'CIDEr'])
+# ========================= Learning Configs ==========================
+parser.add_argument('--learning_rate', default=1e-4, type=float, help='initial learning rate')
+parser.add_argument('--gradient_clip_val', default=None, type=int, help='gradient clip value')
+# ========================= Decoding Settings ==========================
+parser.add_argument('--beam_size', type=int, default=3)
+parser.add_argument('--do_sample', type=bool, default=False)
+parser.add_argument('--no_repeat_ngram_size', type=int, default=2)
+parser.add_argument('--num_beam_groups', type=int, default=1)
+parser.add_argument('--min_new_tokens', type=int, default=80)
+parser.add_argument('--max_new_tokens', type=int, default=120)
+parser.add_argument('--max_length', type=int, default=100)
+parser.add_argument('--repetition_penalty', type=float, default=2.0)
+parser.add_argument('--length_penalty', type=float, default=2.0)
+parser.add_argument('--diversity_penalty', type=float, default=0)
+parser.add_argument('--temperature', type=float, default=0)
+# ====================== Pytorch Lightning ===========================
+parser.add_argument('--devices', type=int, default=2, help='how many gpus to use')
+parser.add_argument('--num_nodes', type=int, default=1, help='Number of GPU nodes for distributed training.')
+parser.add_argument('--accelerator', type=str, default="gpu", choices=["cpu", "gpu", "tpu", "ipu", "hpu", "mps"], help='accelerator types')
+parser.add_argument('--strategy', type=str, default="ddp", help='default ddp for multi-gpus')
+parser.add_argument('--precision', type=str, default='bf16-mixed', help='16 or 32 bf16-mixed, using for original pytorch amp auto cast')
+parser.add_argument('--limit_val_batches', type=float, default=1.0, help='How much of validation dataset to check (float = fraction, int = num_batches).')
+parser.add_argument('--limit_test_batches', type=float, default=1.0, help='How much of test dataset to check (float = fraction, int = num_batches).')
+parser.add_argument('--limit_train_batches', type=float, default=1.0, help='How much of training dataset to check (float = fraction, int = num_batches)')
+parser.add_argument('--max_epochs', type=int, default=3, help='Stop training once this number of epochs is reached')
+parser.add_argument('--every_n_train_steps', type=int, default=0, help='How many training steps to save a checkpoint')
+parser.add_argument('--val_check_interval', type=float, default=1.0, help='How often to check the validation set')
+parser.add_argument('--accumulate_grad_batches', type=int, default=1, help='Accumulates gradients over k batches before stepping the optimizer')
+parser.add_argument("--num_sanity_val_steps", type=int, default=2, help='Sanity check runs n validation batches before starting the training routine')

dataset/data_helper.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import os
+import json
+import re
+import numpy as np
+from PIL import Image
+import torch.utils.data as data
+from transformers import BertTokenizer, AutoImageProcessor
+class FieldParser:
+    def __init__(
+            self,
+            args
+    ):
+        super().__init__()
+        self.args = args
+        self.dataset = args.dataset
+        self.vit_feature_extractor = AutoImageProcessor.from_pretrained(args.vision_model)
+    def _parse_image(self, img):
+        pixel_values = self.vit_feature_extractor(img, return_tensors="pt").pixel_values
+        return pixel_values[0]
+    # from https://github.com/cuhksz-nlp/R2Gen/blob/main/modules/tokenizers.py
+    def clean_report(self, report):
+        # clean Iu-xray reports
+        if self.dataset == "iu_xray":
+            report_cleaner = lambda t: t.replace('..', '.').replace('..', '.').replace('..', '.').replace('1. ', '') \
+            .replace('. 2. ', '. ').replace('. 3. ', '. ').replace('. 4. ', '. ').replace('. 5. ', '. ') \
+            .replace(' 2. ', '. ').replace(' 3. ', '. ').replace(' 4. ', '. ').replace(' 5. ', '. ') \
+            .strip().lower().split('. ')
+            sent_cleaner = lambda t: re.sub('[.,?;*!%^&_+():-\[\]{}]', '', t.replace('"', '').replace('/', '').
+                                            replace('\\', '').replace("'", '').strip().lower())
+            tokens = [sent_cleaner(sent) for sent in report_cleaner(report) if sent_cleaner(sent) != []]
+            report = ' . '.join(tokens) + ' .'
+        # clean MIMIC-CXR reports
+        else:
+            report_cleaner = lambda t: t.replace('\n', ' ').replace('__', '_').replace('__', '_').replace('__', '_') \
+                .replace('__', '_').replace('__', '_').replace('__', '_').replace('__', '_').replace('  ', ' ') \
+                .replace('  ', ' ').replace('  ', ' ').replace('  ', ' ').replace('  ', ' ').replace('  ', ' ') \
+                .replace('..', '.').replace('..', '.').replace('..', '.').replace('..', '.').replace('..', '.') \
+                .replace('..', '.').replace('..', '.').replace('..', '.').replace('1. ', '').replace('. 2. ', '. ') \
+                .replace('. 3. ', '. ').replace('. 4. ', '. ').replace('. 5. ', '. ').replace(' 2. ', '. ') \
+                .replace(' 3. ', '. ').replace(' 4. ', '. ').replace(' 5. ', '. ').replace(':', ' :') \
+                .strip().lower().split('. ')
+            sent_cleaner = lambda t: re.sub('[.,?;*!%^&_+()\[\]{}]', '', t.replace('"', '').replace('/', '')
+                                .replace('\\', '').replace("'", '').strip().lower())
+            tokens = [sent_cleaner(sent) for sent in report_cleaner(report) if sent_cleaner(sent) != []]
+            report = ' . '.join(tokens) + ' .'
+        # report = ' '.join(report.split()[:self.args.max_txt_len])
+        return report
+    def parse(self, features):
+        to_return = {'id': features['id']}
+        report = features.get("report", "")
+        report = self.clean_report(report)
+        to_return['input_text'] = report
+        # chest x-ray images
+        images = []
+        for image_path in features['image_path']:
+            with Image.open(os.path.join(self.args.base_dir, image_path)) as pil:
+                array = np.array(pil, dtype=np.uint8)
+                if array.shape[-1] != 3 or len(array.shape) != 3:
+                    array = np.array(pil.convert("RGB"), dtype=np.uint8)
+                image = self._parse_image(array)
+                images.append(image)
+        to_return["image"] = images
+        return to_return
+    def transform_with_parse(self, inputs):
+        return self.parse(inputs)
+class ParseDataset(data.Dataset):
+    def __init__(self, args, split='train'):
+        self.args = args
+        self.meta = json.load(open(args.annotation, 'r'))
+        self.meta = self.meta[split]
+        self.parser = FieldParser(args)
+    def __len__(self):
+        return len(self.meta)
+    def __getitem__(self, index):
+        return self.parser.transform_with_parse(self.meta[index])
+def create_datasets(args):
+    train_dataset = ParseDataset(args, 'train')
+    dev_dataset = ParseDataset(args, 'val')
+    test_dataset = ParseDataset(args, 'test')
+    return train_dataset, dev_dataset, test_dataset

dataset/data_module.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from lightning.pytorch import LightningDataModule
+from torch.utils.data import DataLoader
+from dataset.data_helper import create_datasets
+class DataModule(LightningDataModule):
+    def __init__(
+            self,
+            args
+    ):
+        super().__init__()
+        self.args = args
+    def prepare_data(self):
+        """
+        Use this method to do things that might write to disk or that need to be done only from a single process in distributed settings.
+        download
+        tokenize
+        etc…
+        :return:
+        """
+    def setup(self, stage: str):
+        """
+        There are also data operations you might want to perform on every GPU. Use setup to do things like:
+        count number of classes
+        build vocabulary
+        perform train/val/test splits
+        apply transforms (defined explicitly in your datamodule or assigned in init)
+        etc…
+        :param stage:
+        :return:
+        """
+        train_dataset, dev_dataset, test_dataset = create_datasets(self.args)
+        self.dataset = {
+            "train": train_dataset, "validation": dev_dataset, "test": test_dataset
+        }
+    def train_dataloader(self):
+        """
+        Use this method to generate the train dataloader. Usually you just wrap the dataset you defined in setup.
+        :return:
+        """
+        loader = DataLoader(self.dataset["train"], batch_size=self.args.batch_size, drop_last=True, pin_memory=True,
+                        num_workers=self.args.num_workers, prefetch_factor=self.args.prefetch_factor)
+        return loader
+    def val_dataloader(self):
+        """
+        Use this method to generate the val dataloader. Usually you just wrap the dataset you defined in setup.
+        :return:
+        """
+        loader = DataLoader(self.dataset["validation"], batch_size=self.args.val_batch_size, drop_last=False, pin_memory=True,
+                            num_workers=self.args.num_workers, prefetch_factor=self.args.prefetch_factor)
+        return loader
+    def test_dataloader(self):
+        loader = DataLoader(self.dataset["test"], batch_size=self.args.test_batch_size, drop_last=False, pin_memory=False,
+                        num_workers=self.args.num_workers, prefetch_factor=self.args.prefetch_factor)
+        return loader

evalcap/__init__.py ADDED Viewed

File without changes

evalcap/bleu/LICENSE ADDED Viewed

	@@ -0,0 +1,23 @@

+Copyright (c) 2015 Xinlei Chen, Hao Fang, Tsung-Yi Lin, and Ramakrishna Vedantam
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+python2 转 python3
+python2 dict, iteritems()
+python3 dict, items()

evalcap/bleu/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __author__ = 'tylin'

evalcap/bleu/bleu.py ADDED Viewed

	@@ -0,0 +1,50 @@

+#!/usr/bin/env python
+#
+# File Name : bleu.py
+#
+# Description : Wrapper for BLEU scorer.
+#
+# Creation Date : 06-01-2015
+# Last Modified : Thu 19 Mar 2015 09:13:28 PM PDT
+# Authors : Hao Fang <[email protected]> and Tsung-Yi Lin <[email protected]>
+import os
+dir_path = os.path.dirname(os.path.abspath(__file__))
+import sys
+sys.path.append(dir_path)
+from bleu_scorer import BleuScorer
+class Bleu:
+    def __init__(self, n=4):
+        # default compute Blue score up to 4
+        self._n = n
+        self._hypo_for_image = {}
+        self.ref_for_image = {}
+    def compute_score(self, gts, res, verbose=0):
+        assert(gts.keys() == res.keys())
+        imgIds = gts.keys()
+        bleu_scorer = BleuScorer(n=self._n)
+        for id in imgIds:
+            hypo = res[id]
+            ref = gts[id]
+            # Sanity check.
+            assert(type(hypo) is list)
+            assert(len(hypo) == 1)
+            assert(type(ref) is list)
+            assert(len(ref) >= 1)
+            bleu_scorer += (hypo[0], ref)
+        #score, scores = bleu_scorer.compute_score(option='shortest')
+        score, scores = bleu_scorer.compute_score(option='closest', verbose=verbose)
+        # score, scores = bleu_scorer.compute_score(option='average', verbose=1)
+        # return (bleu, bleu_info)
+        return score, scores
+    def method(self):
+        return "Bleu"

evalcap/bleu/bleu_scorer.py ADDED Viewed

	@@ -0,0 +1,264 @@

+#!/usr/bin/env python
+# bleu_scorer.py
+# David Chiang <[email protected]>
+# Copyright (c) 2004-2006 University of Maryland. All rights
+# reserved. Do not redistribute without permission from the
+# author. Not for commercial use.
+# Modified by:
+# Hao Fang <[email protected]>
+# Tsung-Yi Lin <[email protected]>
+'''Provides:
+cook_refs(refs, n=4): Transform a list of reference sentences as strings into a form usable by cook_test().
+cook_test(test, refs, n=4): Transform a test sentence as a string (together with the cooked reference sentences) into a form usable by score_cooked().
+'''
+import copy
+import sys, math, re
+from collections import defaultdict
+def precook(s, n=4, out=False):
+    """Takes a string as input and returns an object that can be given to
+    either cook_refs or cook_test. This is optional: cook_refs and cook_test
+    can take string arguments as well."""
+    words = s.split()
+    counts = defaultdict(int)
+    for k in range(1,n+1):
+        for i in range(len(words)-k+1):
+            ngram = tuple(words[i:i+k])
+            counts[ngram] += 1
+    return (len(words), counts)
+def cook_refs(refs, eff=None, n=4): ## lhuang: oracle will call with "average"
+    '''Takes a list of reference sentences for a single segment
+    and returns an object that encapsulates everything that BLEU
+    needs to know about them.'''
+    reflen = []
+    maxcounts = {}
+    for ref in refs:
+        rl, counts = precook(ref, n)
+        reflen.append(rl)
+        for (ngram,count) in counts.items():
+            maxcounts[ngram] = max(maxcounts.get(ngram,0), count)
+    # Calculate effective reference sentence length.
+    if eff == "shortest":
+        reflen = min(reflen)
+    elif eff == "average":
+        reflen = float(sum(reflen))/len(reflen)
+    ## lhuang: N.B.: leave reflen computaiton to the very end!!
+    ## lhuang: N.B.: in case of "closest", keep a list of reflens!! (bad design)
+    return (reflen, maxcounts)
+def cook_test(test, crefs, eff=None, n=4):
+    '''Takes a test sentence and returns an object that
+    encapsulates everything that BLEU needs to know about it.'''
+    reflen, refmaxcounts = crefs[0], crefs[1]
+    testlen, counts = precook(test, n, True)
+    result = {}
+    # Calculate effective reference sentence length.
+    if eff == "closest":
+        result["reflen"] = min((abs(l-testlen), l) for l in reflen)[1]
+    else: ## i.e., "average" or "shortest" or None
+        result["reflen"] = reflen
+    result["testlen"] = testlen
+    result["guess"] = [max(0,testlen-k+1) for k in range(1,n+1)]
+    result['correct'] = [0]*n
+    for (ngram, count) in counts.items():
+        result["correct"][len(ngram)-1] += min(refmaxcounts.get(ngram,0), count)
+    return result
+class BleuScorer(object):
+    """Bleu scorer.
+    """
+    __slots__ = "n", "crefs", "ctest", "_score", "_ratio", "_testlen", "_reflen", "special_reflen"
+    # special_reflen is used in oracle (proportional effective ref len for a node).
+    def copy(self):
+        ''' copy the refs.'''
+        new = BleuScorer(n=self.n)
+        new.ctest = copy.copy(self.ctest)
+        new.crefs = copy.copy(self.crefs)
+        new._score = None
+        return new
+    def __init__(self, test=None, refs=None, n=4, special_reflen=None):
+        ''' singular instance '''
+        self.n = n
+        self.crefs = []
+        self.ctest = []
+        self.cook_append(test, refs)
+        self.special_reflen = special_reflen
+    def cook_append(self, test, refs):
+        '''called by constructor and __iadd__ to avoid creating new instances.'''
+        if refs is not None:
+            self.crefs.append(cook_refs(refs))
+            if test is not None:
+                cooked_test = cook_test(test, self.crefs[-1])
+                self.ctest.append(cooked_test) ## N.B.: -1
+            else:
+                self.ctest.append(None) # lens of crefs and ctest have to match
+        self._score = None ## need to recompute
+    def ratio(self, option=None):
+        self.compute_score(option=option)
+        return self._ratio
+    def score_ratio(self, option=None):
+        '''return (bleu, len_ratio) pair'''
+        return (self.fscore(option=option), self.ratio(option=option))
+    def score_ratio_str(self, option=None):
+        return "%.4f (%.2f)" % self.score_ratio(option)
+    def reflen(self, option=None):
+        self.compute_score(option=option)
+        return self._reflen
+    def testlen(self, option=None):
+        self.compute_score(option=option)
+        return self._testlen
+    def retest(self, new_test):
+        if type(new_test) is str:
+            new_test = [new_test]
+        assert len(new_test) == len(self.crefs), new_test
+        self.ctest = []
+        for t, rs in zip(new_test, self.crefs):
+            self.ctest.append(cook_test(t, rs))
+        self._score = None
+        return self
+    def rescore(self, new_test):
+        ''' replace test(s) with new test(s), and returns the new score.'''
+        return self.retest(new_test).compute_score()
+    def size(self):
+        assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % (len(self.crefs), len(self.ctest))
+        return len(self.crefs)
+    def __iadd__(self, other):
+        '''add an instance (e.g., from another sentence).'''
+        if type(other) is tuple:
+            ## avoid creating new BleuScorer instances
+            self.cook_append(other[0], other[1])
+        else:
+            assert self.compatible(other), "incompatible BLEUs."
+            self.ctest.extend(other.ctest)
+            self.crefs.extend(other.crefs)
+            self._score = None ## need to recompute
+        return self
+    def compatible(self, other):
+        return isinstance(other, BleuScorer) and self.n == other.n
+    def single_reflen(self, option="average"):
+        return self._single_reflen(self.crefs[0][0], option)
+    def _single_reflen(self, reflens, option=None, testlen=None):
+        if option == "shortest":
+            reflen = min(reflens)
+        elif option == "average":
+            reflen = float(sum(reflens))/len(reflens)
+        elif option == "closest":
+            reflen = min((abs(l-testlen), l) for l in reflens)[1]
+        else:
+            assert False, "unsupported reflen option %s" % option
+        return reflen
+    def recompute_score(self, option=None, verbose=0):
+        self._score = None
+        return self.compute_score(option, verbose)
+    def compute_score(self, option=None, verbose=0):
+        n = self.n
+        small = 1e-9
+        tiny = 1e-15 ## so that if guess is 0 still return 0
+        bleu_list = [[] for _ in range(n)]
+        if self._score is not None:
+            return self._score
+        if option is None:
+            option = "average" if len(self.crefs) == 1 else "closest"
+        self._testlen = 0
+        self._reflen = 0
+        totalcomps = {'testlen':0, 'reflen':0, 'guess':[0]*n, 'correct':[0]*n}
+        # for each sentence
+        for comps in self.ctest:
+            testlen = comps['testlen']
+            self._testlen += testlen
+            if self.special_reflen is None: ## need computation
+                reflen = self._single_reflen(comps['reflen'], option, testlen)
+            else:
+                reflen = self.special_reflen
+            self._reflen += reflen
+            for key in ['guess','correct']:
+                for k in range(n):
+                    totalcomps[key][k] += comps[key][k]
+            # append per image bleu score
+            bleu = 1.
+            for k in range(n):
+                bleu *= (float(comps['correct'][k]) + tiny) \
+                        /(float(comps['guess'][k]) + small)
+                bleu_list[k].append(bleu ** (1./(k+1)))
+            ratio = (testlen + tiny) / (reflen + small) ## N.B.: avoid zero division
+            if ratio < 1:
+                for k in range(n):
+                    bleu_list[k][-1] *= math.exp(1 - 1/ratio)
+            if verbose > 1:
+                print(comps, reflen)
+        totalcomps['reflen'] = self._reflen
+        totalcomps['testlen'] = self._testlen
+        bleus = []
+        bleu = 1.
+        for k in range(n):
+            bleu *= float(totalcomps['correct'][k] + tiny) \
+                    / (totalcomps['guess'][k] + small)
+            bleus.append(bleu ** (1./(k+1)))
+        ratio = (self._testlen + tiny) / (self._reflen + small) ## N.B.: avoid zero division
+        if ratio < 1:
+            for k in range(n):
+                bleus[k] *= math.exp(1 - 1/ratio)
+        if verbose > 0:
+            print(totalcomps)
+            print("ratio:", ratio)
+        self._score = bleus
+        return self._score, bleu_list

evalcap/cider/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __author__ = 'tylin'

evalcap/cider/cider.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# Filename: cider.py
+#
+# Description: Describes the class to compute the CIDEr (Consensus-Based Image Description Evaluation) Metric
+#               by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726)
+#
+# Creation Date: Sun Feb  8 14:16:54 2015
+#
+# Authors: Ramakrishna Vedantam <[email protected]> and Tsung-Yi Lin <[email protected]>
+import os
+dir_path = os.path.dirname(os.path.abspath(__file__))
+import sys
+sys.path.append(dir_path)
+from cider_scorer import CiderScorer
+import pdb
+class Cider:
+    """
+    Main Class to compute the CIDEr metric
+    """
+    def __init__(self, test=None, refs=None, n=4, sigma=6.0):
+        # set cider to sum over 1 to 4-grams
+        self._n = n
+        # set the standard deviation parameter for gaussian penalty
+        self._sigma = sigma
+    def compute_score(self, gts, res):
+        """
+        Main function to compute CIDEr score
+        :param  hypo_for_image (dict) : dictionary with key <image> and value <tokenized hypothesis / candidate sentence>
+                ref_for_image (dict)  : dictionary with key <image> and value <tokenized reference sentence>
+        :return: cider (float) : computed CIDEr score for the corpus
+        """
+        assert(gts.keys() == res.keys())
+        imgIds = gts.keys()
+        cider_scorer = CiderScorer(n=self._n, sigma=self._sigma)
+        for id in imgIds:
+            hypo = res[id]
+            ref = gts[id]
+            # Sanity check.
+            assert(type(hypo) is list)
+            assert(len(hypo) == 1)
+            assert(type(ref) is list)
+            assert(len(ref) > 0)
+            cider_scorer += (hypo[0], ref)
+        (score, scores) = cider_scorer.compute_score()
+        return score, scores
+    def method(self):
+        return "CIDEr"

evalcap/cider/cider_scorer.py ADDED Viewed

	@@ -0,0 +1,193 @@

+#!/usr/bin/env python
+# Tsung-Yi Lin <[email protected]>
+# Ramakrishna Vedantam <[email protected]>
+import copy
+from collections import defaultdict
+import numpy as np
+import pdb
+import math
+def precook(s, n=4, out=False):
+    """
+    Takes a string as input and returns an object that can be given to
+    either cook_refs or cook_test. This is optional: cook_refs and cook_test
+    can take string arguments as well.
+    :param s: string : sentence to be converted into ngrams
+    :param n: int    : number of ngrams for which representation is calculated
+    :return: term frequency vector for occuring ngrams
+    """
+    words = s.split()
+    counts = defaultdict(int)
+    for k in range(1,n+1):
+        for i in range(len(words)-k+1):
+            ngram = tuple(words[i:i+k])
+            counts[ngram] += 1
+    return counts
+def cook_refs(refs, n=4): ## lhuang: oracle will call with "average"
+    '''Takes a list of reference sentences for a single segment
+    and returns an object that encapsulates everything that BLEU
+    needs to know about them.
+    :param refs: list of string : reference sentences for some image
+    :param n: int : number of ngrams for which (ngram) representation is calculated
+    :return: result (list of dict)
+    '''
+    return [precook(ref, n) for ref in refs]
+def cook_test(test, n=4):
+    '''Takes a test sentence and returns an object that
+    encapsulates everything that BLEU needs to know about it.
+    :param test: list of string : hypothesis sentence for some image
+    :param n: int : number of ngrams for which (ngram) representation is calculated
+    :return: result (dict)
+    '''
+    return precook(test, n, True)
+class CiderScorer(object):
+    """CIDEr scorer.
+    """
+    def copy(self):
+        ''' copy the refs.'''
+        new = CiderScorer(n=self.n)
+        new.ctest = copy.copy(self.ctest)
+        new.crefs = copy.copy(self.crefs)
+        return new
+    def __init__(self, test=None, refs=None, n=4, sigma=6.0):
+        ''' singular instance '''
+        self.n = n
+        self.sigma = sigma
+        self.crefs = []
+        self.ctest = []
+        self.document_frequency = defaultdict(float)
+        self.cook_append(test, refs)
+        self.ref_len = None
+    def cook_append(self, test, refs):
+        '''called by constructor and __iadd__ to avoid creating new instances.'''
+        if refs is not None:
+            self.crefs.append(cook_refs(refs))
+            if test is not None:
+                self.ctest.append(cook_test(test)) ## N.B.: -1
+            else:
+                self.ctest.append(None) # lens of crefs and ctest have to match
+    def size(self):
+        assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % (len(self.crefs), len(self.ctest))
+        return len(self.crefs)
+    def __iadd__(self, other):
+        '''add an instance (e.g., from another sentence).'''
+        if type(other) is tuple:
+            ## avoid creating new CiderScorer instances
+            self.cook_append(other[0], other[1])
+        else:
+            self.ctest.extend(other.ctest)
+            self.crefs.extend(other.crefs)
+        return self
+    def compute_doc_freq(self):
+        '''
+        Compute term frequency for reference data.
+        This will be used to compute idf (inverse document frequency later)
+        The term frequency is stored in the object
+        :return: None
+        '''
+        for refs in self.crefs:
+            # refs, k ref captions of one image
+            for ngram in set([ngram for ref in refs for (ngram,count) in ref.items()]):
+                self.document_frequency[ngram] += 1
+            # maxcounts[ngram] = max(maxcounts.get(ngram,0), count)
+    def compute_cider(self):
+        def counts2vec(cnts):
+            """
+            Function maps counts of ngram to vector of tfidf weights.
+            The function returns vec, an array of dictionary that store mapping of n-gram and tf-idf weights.
+            The n-th entry of array denotes length of n-grams.
+            :param cnts:
+            :return: vec (array of dict), norm (array of float), length (int)
+            """
+            vec = [defaultdict(float) for _ in range(self.n)]
+            length = 0
+            norm = [0.0 for _ in range(self.n)]
+            for (ngram, term_freq) in cnts.items():
+                # give word count 1 if it doesn't appear in reference corpus
+                df = np.log(max(1.0, self.document_frequency[ngram]))
+                # ngram index
+                n = len(ngram)-1
+                # tf (term_freq) * idf (precomputed idf) for n-grams
+                vec[n][ngram] = float(term_freq)*(self.ref_len - df)
+                # compute norm for the vector.  the norm will be used for computing similarity
+                norm[n] += pow(vec[n][ngram], 2)
+                if n == 1:
+                    length += term_freq
+            norm = [np.sqrt(n) for n in norm]
+            return vec, norm, length
+        def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref):
+            '''
+            Compute the cosine similarity of two vectors.
+            :param vec_hyp: array of dictionary for vector corresponding to hypothesis
+            :param vec_ref: array of dictionary for vector corresponding to reference
+            :param norm_hyp: array of float for vector corresponding to hypothesis
+            :param norm_ref: array of float for vector corresponding to reference
+            :param length_hyp: int containing length of hypothesis
+            :param length_ref: int containing length of reference
+            :return: array of score for each n-grams cosine similarity
+            '''
+            delta = float(length_hyp - length_ref)
+            # measure consine similarity
+            val = np.array([0.0 for _ in range(self.n)])
+            for n in range(self.n):
+                # ngram
+                for (ngram,count) in vec_hyp[n].items():
+                    # vrama91 : added clipping
+                    val[n] += min(vec_hyp[n][ngram], vec_ref[n][ngram]) * vec_ref[n][ngram]
+                if (norm_hyp[n] != 0) and (norm_ref[n] != 0):
+                    val[n] /= (norm_hyp[n]*norm_ref[n])
+                assert(not math.isnan(val[n]))
+                # vrama91: added a length based gaussian penalty
+                val[n] *= np.e**(-(delta**2)/(2*self.sigma**2))
+            return val
+        # compute log reference length
+        self.ref_len = np.log(float(len(self.crefs)))
+        if len(self.crefs) == 1:
+            self.ref_len = 1
+        scores = []
+        for test, refs in zip(self.ctest, self.crefs):
+            # compute vector for test captions
+            vec, norm, length = counts2vec(test)
+            # compute vector for ref captions
+            score = np.array([0.0 for _ in range(self.n)])
+            for ref in refs:
+                vec_ref, norm_ref, length_ref = counts2vec(ref)
+                score += sim(vec, vec_ref, norm, norm_ref, length, length_ref)
+            # change by vrama91 - mean of ngram scores, instead of sum
+            score_avg = np.mean(score)
+            # divide by number of references
+            score_avg /= len(refs)
+            # multiply score by 10
+            score_avg *= 10.0
+            # append score of an image to the score list
+            scores.append(score_avg)
+        return scores
+    def compute_score(self, option=None, verbose=0):
+        # compute idf
+        self.compute_doc_freq()
+        # assert to check document frequency
+        assert(len(self.ctest) >= max(self.document_frequency.values()))
+        # compute cider score
+        score = self.compute_cider()
+        # debug
+        # print score
+        return np.mean(np.array(score)), np.array(score)

evalcap/meteor/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __author__ = 'tylin'

evalcap/meteor/meteor-1.5.jar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e57b4c72c0830ebe68558f1c799a624e96cbc1b6045c9f6330e26dcff6eafc2
+size 6318693

evalcap/meteor/meteor.py ADDED Viewed

	@@ -0,0 +1,130 @@

+#!/usr/bin/env python
+# Python wrapper for METEOR implementation, by Xinlei Chen
+# Acknowledge Michael Denkowski for the generous discussion and help
+from __future__ import division
+import atexit
+import logging
+import os
+import re
+import subprocess
+import sys
+import threading
+import psutil
+# Assumes meteor-1.5.jar is in the same directory as meteor.py.  Change as needed.
+METEOR_JAR = 'meteor-1.5.jar'
+def enc(s):
+    return s.encode('utf-8')
+def dec(s):
+    return s.decode('utf-8')
+class Meteor:
+    def __init__(self):
+        # Used to guarantee thread safety
+        self.lock = threading.Lock()
+        mem = '1G'
+        mem_available_G = psutil.virtual_memory().available / 1E9
+        if mem_available_G < 2:
+            logging.warning("There is less than 2GB of available memory.\n"
+                            "Will try with limiting Meteor to 1GB of memory but this might cause issues.\n"
+                            "If you have problems using Meteor, "
+                            "then you can try to lower the `mem` variable in meteor.py")
+            mem = '1G'
+        meteor_cmd = ['java', '-jar', '-Xmx{}'.format(mem), METEOR_JAR,
+                      '-', '-', '-stdio', '-l', 'en', '-norm']
+        env = os.environ.copy()
+        env['LC_ALL'] = "C"
+        self.meteor_p = subprocess.Popen(meteor_cmd,
+                                         cwd=os.path.dirname(os.path.abspath(__file__)),
+                                         env=env,
+                                         stdin=subprocess.PIPE,
+                                         stdout=subprocess.PIPE,
+                                         stderr=subprocess.PIPE)
+        atexit.register(self.close)
+    def close(self):
+        with self.lock:
+            if self.meteor_p:
+                self.meteor_p.kill()
+                self.meteor_p.wait()
+                self.meteor_p = None
+        # if the user calls close() manually, remove the
+        # reference from atexit so the object can be garbage-collected.
+        if atexit is not None and atexit.unregister is not None:
+            atexit.unregister(self.close)
+    def compute_score(self, gts, res):
+        assert (gts.keys() == res.keys())
+        imgIds = gts.keys()
+        scores = []
+        eval_line = 'EVAL'
+        with self.lock:
+            for i in imgIds:
+                assert (len(res[i]) == 1)
+                stat = self._stat(res[i][0], gts[i])
+                eval_line += ' ||| {}'.format(stat)
+            self.meteor_p.stdin.write(enc('{}\n'.format(eval_line)))
+            self.meteor_p.stdin.flush()
+            for i in range(0, len(imgIds)):
+                v = self.meteor_p.stdout.readline()
+                try:
+                    scores.append(float(dec(v.strip())))
+                except:
+                    sys.stderr.write("Error handling value: {}\n".format(v))
+                    sys.stderr.write("Decoded value: {}\n".format(dec(v.strip())))
+                    sys.stderr.write("eval_line: {}\n".format(eval_line))
+                    # You can try uncommenting the next code line to show stderr from the Meteor JAR.
+                    # If the Meteor JAR is not writing to stderr, then the line will just hang.
+                    # sys.stderr.write("Error from Meteor:\n{}".format(self.meteor_p.stderr.read()))
+                    raise
+            score = float(dec(self.meteor_p.stdout.readline()).strip())
+        self.close()
+        return score, scores
+    def method(self):
+        return "METEOR"
+    def _stat(self, hypothesis_str, reference_list):
+        # SCORE ||| reference 1 words ||| reference n words ||| hypothesis words
+        hypothesis_str = hypothesis_str.replace('|||', '')
+        score_line = ' ||| '.join(('SCORE', ' ||| '.join(reference_list), hypothesis_str))
+        score_line = re.sub(r'\s+', ' ', score_line)
+        self.meteor_p.stdin.write(enc(score_line))
+        self.meteor_p.stdin.write(enc('\n'))
+        self.meteor_p.stdin.flush()
+        return dec(self.meteor_p.stdout.readline()).strip()
+    def _score(self, hypothesis_str, reference_list):
+        with self.lock:
+            # SCORE ||| reference 1 words ||| reference n words ||| hypothesis words
+            hypothesis_str = hypothesis_str.replace('|||', '').replace('  ', ' ')
+            score_line = ' ||| '.join(('SCORE', ' ||| '.join(reference_list), hypothesis_str))
+            self.meteor_p.stdin.write(enc('{}\n'.format(score_line)))
+            self.meteor_p.stdin.flush()
+            stats = dec(self.meteor_p.stdout.readline()).strip()
+            eval_line = 'EVAL ||| {}'.format(stats)
+            # EVAL ||| stats
+            self.meteor_p.stdin.write(enc('{}\n'.format(eval_line)))
+            self.meteor_p.stdin.flush()
+            score = float(dec(self.meteor_p.stdout.readline()).strip())
+            # bug fix: there are two values returned by the jar file, one average, and one all, so do it twice
+            # thanks for Andrej for pointing this out
+            score = float(dec(self.meteor_p.stdout.readline()).strip())
+        return score
+    def __del__(self):
+        self.close()

evalcap/meteor/test_meteor.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import meteor
+hypo = ['this is the model generated sentence1 which seems good enough']
+ref = ['this is one reference sentence for sentence1',
+        'this is a reference sentence for sentence2 which was generated by your model']
+m = meteor.Meteor()
+score = m._score(hypo[0], ref)
+print(score)

evalcap/rouge/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __author__ = 'vrama91'

evalcap/rouge/rouge.py ADDED Viewed

	@@ -0,0 +1,105 @@

+#!/usr/bin/env python
+#
+# File Name : rouge.py
+#
+# Description : Computes ROUGE-L metric as described by Lin and Hovey (2004)
+#
+# Creation Date : 2015-01-07 06:03
+# Author : Ramakrishna Vedantam <[email protected]>
+import numpy as np
+import pdb
+def my_lcs(string, sub):
+    """
+    Calculates longest common subsequence for a pair of tokenized strings
+    :param string : list of str : tokens from a string split using whitespace
+    :param sub : list of str : shorter string, also split using whitespace
+    :returns: length (list of int): length of the longest common subsequence between the two strings
+    Note: my_lcs only gives length of the longest common subsequence, not the actual LCS
+    """
+    if(len(string)< len(sub)):
+        sub, string = string, sub
+    lengths = [[0 for i in range(0,len(sub)+1)] for j in range(0,len(string)+1)]
+    for j in range(1,len(sub)+1):
+        for i in range(1,len(string)+1):
+            if(string[i-1] == sub[j-1]):
+                lengths[i][j] = lengths[i-1][j-1] + 1
+            else:
+                lengths[i][j] = max(lengths[i-1][j] , lengths[i][j-1])
+    return lengths[len(string)][len(sub)]
+class Rouge():
+    '''
+    Class for computing ROUGE-L score for a set of candidate sentences for the MS COCO test set
+    '''
+    def __init__(self):
+        # vrama91: updated the value below based on discussion with Hovey
+        self.beta = 1.2
+    def calc_score(self, candidate, refs):
+        """
+        Compute ROUGE-L score given one candidate and references for an image
+        :param candidate: str : candidate sentence to be evaluated
+        :param refs: list of str : COCO reference sentences for the particular image to be evaluated
+        :returns score: int (ROUGE-L score for the candidate evaluated against references)
+        """
+        # assert(len(candidate)==1)
+        # assert(len(refs)>0)
+        prec = []
+        rec = []
+        # split into tokens
+        token_c = candidate[0].split(" ")
+        for reference in refs:
+            # split into tokens
+            token_r = reference.split(" ")
+            # compute the longest common subsequence
+            lcs = my_lcs(token_r, token_c)
+            prec.append(lcs/float(len(token_c)))
+            rec.append(lcs/float(len(token_r)))
+        prec_max = max(prec)
+        rec_max = max(rec)
+        if(prec_max!=0 and rec_max !=0):
+            score = ((1 + self.beta**2)*prec_max*rec_max)/float(rec_max + self.beta**2*prec_max)
+        else:
+            score = 0.0
+        return score
+    def compute_score(self, gts, res):
+        """
+        Computes Rouge-L score given a set of reference and candidate sentences for the dataset
+        Invoked by evaluate_captions.py
+        :param hypo_for_image: dict : candidate / test sentences with "image name" key and "tokenized sentences" as values
+        :param ref_for_image: dict : reference MS-COCO sentences with "image name" key and "tokenized sentences" as values
+        :returns: average_score: float (mean ROUGE-L score computed by averaging scores for all the images)
+        """
+        assert(gts.keys() == res.keys())
+        imgIds = gts.keys()
+        score = []
+        for id in imgIds:
+            hypo = res[id]
+            ref  = gts[id]
+            score.append(self.calc_score(hypo, ref))
+            # Sanity check.
+            assert(type(hypo) is list)
+            assert(len(hypo) == 1)
+            assert(type(ref) is list)
+            assert(len(ref) > 0)
+        average_score = np.mean(np.array(score))
+        return average_score, np.array(score)
+    def method(self):
+        return "Rouge"

evalcap/tokenizer/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __author__ = 'hfang'

evalcap/tokenizer/ptbtokenizer.py ADDED Viewed

	@@ -0,0 +1,68 @@

+#!/usr/bin/env python
+#
+# File Name : ptbtokenizer.py
+#
+# Description : Do the PTB Tokenization and remove punctuations.
+#
+# Creation Date : 29-12-2014
+# Last Modified : Thu Mar 19 09:53:35 2015
+# Authors : Hao Fang <[email protected]> and Tsung-Yi Lin <[email protected]>
+import os
+import sys
+import subprocess
+import tempfile
+import itertools
+# path to the stanford corenlp jar
+STANFORD_CORENLP_3_4_1_JAR = 'stanford-corenlp-3.4.1.jar'
+# punctuations to be removed from the sentences
+PUNCTUATIONS = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \
+        ".", "?", "!", ",", ":", "-", "--", "...", ";"]
+class PTBTokenizer:
+    """Python wrapper of Stanford PTBTokenizer"""
+    def tokenize(self, captions_for_image):
+        cmd = ['java', '-cp', STANFORD_CORENLP_3_4_1_JAR, \
+                'edu.stanford.nlp.process.PTBTokenizer', \
+                '-preserveLines', '-lowerCase']
+        # ======================================================
+        # prepare data for PTB Tokenizer
+        # ======================================================
+        final_tokenized_captions_for_image = {}
+        image_id = [k for k, v in captions_for_image.items() for _ in range(len(v))]
+        sentences = '\n'.join([c['caption'].replace('\n', ' ') for k, v in captions_for_image.items() for c in v])
+        # ======================================================
+        # save sentences to temporary file
+        # ======================================================
+        path_to_jar_dirname=os.path.dirname(os.path.abspath(__file__))
+        tmp_file = tempfile.NamedTemporaryFile(delete=False, dir=path_to_jar_dirname, mode='w', encoding='utf-8')
+        tmp_file.write(sentences)
+        tmp_file.close()
+        # ======================================================
+        # tokenize sentence
+        # ======================================================
+        cmd.append(os.path.basename(tmp_file.name))
+        p_tokenizer = subprocess.Popen(cmd, cwd=path_to_jar_dirname, \
+                stdout=subprocess.PIPE)
+        token_lines = p_tokenizer.communicate(input=sentences.rstrip())[0]
+        lines = token_lines.decode().split('\n')
+        # remove temp file
+        os.remove(tmp_file.name)
+        # ======================================================
+        # create dictionary for tokenized captions
+        # ======================================================
+        for k, line in zip(image_id, lines):
+            if not k in final_tokenized_captions_for_image:
+                final_tokenized_captions_for_image[k] = []
+            tokenized_caption = ' '.join([w for w in line.rstrip().split(' ') \
+                    if w not in PUNCTUATIONS])
+            final_tokenized_captions_for_image[k].append(tokenized_caption)
+        return final_tokenized_captions_for_image

evalcap/tokenizer/stanford-corenlp-3.4.1.jar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2fcb91bb7a111f93d71e264f4ee0e3afd19ba0dde6d21b38605088df9e940399
+size 5921410

images/align.png ADDED Viewed

Git LFS Details

SHA256: 01abc0814362789759e8bdb363f06891343e0d98f2dd2d18b311ec2de5a51ba2
Pointer size: 132 Bytes
Size of remote file: 1.51 MB

lightning_tools/callbacks.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import os
+from lightning.pytorch.loggers import CSVLogger
+from lightning.pytorch import loggers as pl_loggers
+from lightning.pytorch.callbacks import LearningRateMonitor
+from lightning.pytorch.callbacks import ModelCheckpoint
+def add_callbacks(args):
+    log_dir = args.savedmodel_path
+    os.makedirs(log_dir, exist_ok=True)
+    # --------- Add Callbacks
+    checkpoint_callback = ModelCheckpoint(
+        dirpath=os.path.join(log_dir, "checkpoints"),
+        filename="{epoch}-{step}",
+        save_top_k=-1,
+        every_n_train_steps=args.every_n_train_steps,
+        save_last=False,
+        save_weights_only=False
+    )
+    lr_monitor_callback = LearningRateMonitor(logging_interval='step')
+    tb_logger = pl_loggers.TensorBoardLogger(save_dir=os.path.join(log_dir, "logs"), name="tensorboard")
+    csv_logger = CSVLogger(save_dir=os.path.join(log_dir, "logs"), name="csvlog")
+    to_returns = {
+        "callbacks": [checkpoint_callback, lr_monitor_callback],
+        "loggers": [csv_logger, tb_logger]
+    }
+    return to_returns

lightning_tools/optim.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from transformers import AdamW
+import functools
+from torch.optim.lr_scheduler import LambdaLR
+def lr_lambda(current_step, num_warmup_steps, num_training_steps):
+    if current_step < num_warmup_steps:
+        return float(current_step) / float(max(1, num_warmup_steps))
+    return max(
+        0.0, float(num_training_steps - current_step) / float(max(1, num_training_steps - num_warmup_steps))
+    )
+def get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps, last_epoch=-1):
+    """
+    Create a schedule with a learning rate that decreases linearly from the initial lr set in the optimizer to 0, after
+    a warmup period during which it increases linearly from 0 to the initial lr set in the optimizer.
+    Args:
+        optimizer (:class:`~torch.optim.Optimizer`):
+            The optimizer for which to schedule the learning rate.
+        num_warmup_steps (:obj:`int`):
+            The number of steps for the warmup phase.
+        num_training_steps (:obj:`int`):
+            The total number of training steps.
+        last_epoch (:obj:`int`, `optional`, defaults to -1):
+            The index of the last epoch when resuming training.
+    Return:
+        :obj:`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
+    """
+    return LambdaLR(optimizer, functools.partial(lr_lambda, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps), last_epoch)
+def config_optimizer(parameters, init_lr, warmup_steps, max_steps, name='lr'):
+    """
+    Original Bert Optimizer do not decay for bias and layer_normal
+    Args:
+        parameters:
+        init_lr:
+        warmup_steps:
+        max_steps:
+        name:
+        weight_decay:
+    Returns:
+    """
+    optimizer = AdamW(
+        parameters, lr=init_lr, eps=1e-8, correct_bias=False
+    )
+    scheduler = get_linear_schedule_with_warmup(
+        optimizer, num_warmup_steps=warmup_steps, num_training_steps=max_steps,
+    )
+    scheduler = {'scheduler': scheduler, 'name': name, 'interval': 'step', 'frequency': 1}
+    return optimizer, scheduler

models/R2GenGPT.py ADDED Viewed

	@@ -0,0 +1,379 @@

+import os
+import json
+import torch
+import torch.nn as nn
+import lightning.pytorch as pl
+from transformers import LlamaForCausalLM, LlamaTokenizer
+from evalcap.bleu.bleu import Bleu
+from evalcap.rouge.rouge import Rouge
+from evalcap.cider.cider import Cider
+from evalcap.meteor.meteor import Meteor
+from transformers import SwinModel
+from lightning_tools.optim import config_optimizer
+from peft import get_peft_model, LoraConfig, TaskType
+import pdb
+class R2GenGPT(pl.LightningModule):
+    """
+    R2GenGPT model.
+    """
+    def __init__(self, args):
+        super().__init__()
+        self.args = args
+        self.save_hyperparameters(args)
+        print(f'Loading vision encoder:{args.vision_model}')
+        self.visual_encoder = SwinModel.from_pretrained(args.vision_model)
+        if args.vis_use_lora:
+            peft_config_visual = LoraConfig(
+                                    r=args.vis_r,
+                                    lora_alpha=args.vis_alpha,
+                                    target_modules=["query", "value"],
+                                    lora_dropout=args.lora_dropout,
+                                    bias="none",
+                                    modules_to_save=["classifier"],
+                                )
+            self.visual_encoder = get_peft_model(self.visual_encoder, peft_config_visual)
+            self.visual_encoder.print_trainable_parameters()
+            print('Loading vision encoder with LoRA -- Done')
+        elif args.freeze_vm:
+            for name, param in self.visual_encoder.named_parameters():
+                param.requires_grad = False
+            print(f'Loading Frozen vision encoder:{args.vision_model} -- Done')
+        else:
+            print(f'Loading Trainable vision encoder:{args.vision_model} -- Done')
+        print('Loading LLAMA')
+        self.llama_tokenizer = LlamaTokenizer.from_pretrained(args.llama_model, use_fast=False)
+        self.llama_tokenizer.pad_token_id = 0
+        if args.low_resource:
+            self.llama_model = LlamaForCausalLM.from_pretrained(
+                args.llama_model,
+                torch_dtype=torch.float16,
+                load_in_8bit=True,
+                device_map="auto"
+            )
+        else:
+            self.llama_model = LlamaForCausalLM.from_pretrained(
+                args.llama_model,
+                torch_dtype=torch.float16,
+            )
+        if args.llm_use_lora:
+            self.embed_tokens = self.llama_model.get_input_embeddings()
+            peft_config = LoraConfig(
+                task_type=TaskType.CAUSAL_LM, inference_mode=False, r=args.llm_r, lora_alpha=args.llm_alpha, lora_dropout=args.lora_dropout
+            )
+            self.llama_model = get_peft_model(self.llama_model, peft_config)
+            self.llama_model.print_trainable_parameters()
+            print('Loading LLAMA LoRA Done')
+        else:
+            self.embed_tokens = self.llama_model.get_input_embeddings()
+            for name, param in self.llama_model.named_parameters():
+                param.requires_grad = False
+            print('Loading LLAMA Done')
+        self.llama_proj = nn.Linear(self.visual_encoder.num_features, self.llama_model.config.hidden_size)
+        self.layer_norm = nn.LayerNorm(self.llama_model.config.hidden_size)
+        self.end_sym = args.end_sym
+        self.prompt = 'Generate a comprehensive and detailed diagnosis report for this chest xray image.'
+        self.val_step_outputs = []
+        self.test_step_outputs = []
+        self.val_score = 0.0
+        if args.delta_file is not None:
+            state_dict = torch.load(args.delta_file, map_location=torch.device(f'cuda:{torch.cuda.current_device()}'))['model']
+            self.load_state_dict(state_dict=state_dict, strict=False)
+            print(f'Load checkpoint from {args.delta_file}')
+    def score(self, ref, hypo):
+        """
+        ref, dictionary of reference sentences (id, sentence)
+        hypo, dictionary of hypothesis sentences (id, sentence)
+        score, dictionary of scores
+        """
+        scorers = [
+            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
+            (Rouge(), "ROUGE_L"),
+            (Meteor(), "METEOR"),
+            (Cider(), "CIDEr")
+        ]
+        final_scores = {}
+        for scorer, method in scorers:
+            score, scores = scorer.compute_score(ref, hypo)
+            if type(score) == list:
+                for m, s in zip(method, score):
+                    final_scores[m] = s
+            else:
+                final_scores[method] = score
+        return final_scores
+    def encode_img(self, images):
+        image_embeds = []
+        for image in images:
+            device = image.device
+            if self.hparams.global_only:
+                image_embed = self.visual_encoder(image)['pooler_output'].unsqueeze(1).to(device)
+            else:
+                image_embed = self.visual_encoder(image)['last_hidden_state'].to(device)
+            image_embeds.append(image_embed)
+        image_embeds = torch.stack(image_embeds).mean(0)
+        inputs_llama = self.llama_proj(image_embeds)
+        atts_llama = torch.ones(inputs_llama.size()[:-1], dtype=torch.long).to(image.device)
+        return inputs_llama, atts_llama
+    def prompt_wrap(self, img_embeds, atts_img):
+        prompt=f'Human: <Img><ImageHere></Img> {self.prompt} \nAssistant:'
+        batch_size = img_embeds.shape[0]
+        p_before, p_after = prompt.split('<ImageHere>')
+        p_before_tokens = self.llama_tokenizer(
+            p_before, return_tensors="pt", add_special_tokens=False).to(img_embeds.device)
+        p_after_tokens = self.llama_tokenizer(
+            p_after, return_tensors="pt", add_special_tokens=False).to(img_embeds.device)
+        p_before_embeds = self.embed_tokens(p_before_tokens.input_ids).expand(batch_size, -1, -1)
+        p_after_embeds = self.embed_tokens(p_after_tokens.input_ids).expand(batch_size, -1, -1)
+        wrapped_img_embeds = torch.cat([p_before_embeds, img_embeds, p_after_embeds], dim=1)
+        wrapped_atts_img = atts_img[:, :1].expand(-1, wrapped_img_embeds.shape[1])
+        return wrapped_img_embeds, wrapped_atts_img
+    def forward(self, samples):
+        image = samples["image"]
+        img_embeds, atts_img = self.encode_img(image)
+        img_embeds = self.layer_norm(img_embeds)
+        img_embeds, atts_img = self.prompt_wrap(img_embeds, atts_img)
+        self.llama_tokenizer.padding_side = "right"
+        text = [t + self.end_sym for t in samples["input_text"]]
+        to_regress_tokens = self.llama_tokenizer(
+            text,
+            return_tensors="pt",
+            padding="max_length",
+            truncation=True,
+            max_length=self.hparams.max_length,
+            add_special_tokens=False
+        ).to(image[0].device)
+        targets = to_regress_tokens.input_ids.masked_fill(
+            to_regress_tokens.input_ids == 0, -100
+        )
+        empty_targets = (
+            torch.ones([atts_img.shape[0], atts_img.shape[1]+1],
+                       dtype=torch.long).to(image[0].device).fill_(-100)  # plus one for bos
+        )
+        targets = torch.cat([empty_targets, targets], dim=1)
+        batch_size = img_embeds.shape[0]
+        bos = torch.ones([batch_size, 1],
+                         dtype=to_regress_tokens.input_ids.dtype,
+                         device=to_regress_tokens.input_ids.device) * self.llama_tokenizer.bos_token_id
+        bos_embeds = self.embed_tokens(bos)
+        atts_bos = atts_img[:, :1]
+        to_regress_embeds = self.embed_tokens(to_regress_tokens.input_ids)
+        inputs_embeds = torch.cat([bos_embeds, img_embeds, to_regress_embeds], dim=1)
+        attention_mask = torch.cat([atts_bos, atts_img, to_regress_tokens.attention_mask], dim=1)
+        outputs = self.llama_model(
+            inputs_embeds=inputs_embeds,
+            attention_mask=attention_mask,
+            return_dict=True,
+            labels=targets,
+        )
+        loss = outputs.loss
+        return {"loss": loss}
+    def training_step(self, batch, batch_idx):
+        result = self(batch)
+        self.log_dict(result, prog_bar=True)
+        return result
+    def save_checkpoint(self, eval_res):
+        current_epoch, global_step = self.trainer.current_epoch, self.trainer.global_step
+        param_grad_dic = {
+            k: v.requires_grad for (k, v) in self.named_parameters() if v.requires_grad
+        }
+        state_dict = self.state_dict()
+        for k in list(state_dict.keys()):
+            if k not in param_grad_dic.keys():
+                del state_dict[k]
+        save_obj = {
+            "model": state_dict,
+            "config": self.hparams,
+            "epoch": current_epoch,
+            "step":global_step
+        }
+        os.makedirs(os.path.join(self.hparams.savedmodel_path, 'checkpoints'), exist_ok=True)
+        save_to = os.path.join(
+            self.hparams.savedmodel_path, 'checkpoints',
+            "checkpoint_epoch{}_step{}_bleu{:3f}_cider{:3f}.pth".format(current_epoch, global_step, eval_res['Bleu_4'], eval_res['CIDEr']),
+        )
+        self.print("Saving checkpoint at step {} to {}.".format(global_step, save_to))
+        torch.save(save_obj, save_to)
+    def validation_step(self, samples, batch_idx):
+        self.llama_tokenizer.padding_side = "right"
+        to_regress_tokens = self.llama_tokenizer(
+            samples['input_text'],
+            return_tensors="pt",
+            padding="max_length",
+            truncation=True,
+            max_length=self.hparams.max_length,
+            add_special_tokens=False
+        )
+        image = samples["image"]
+        img_embeds, atts_img = self.encode_img(image)
+        img_embeds = self.layer_norm(img_embeds)
+        img_embeds, atts_img = self.prompt_wrap(img_embeds, atts_img)
+        batch_size = img_embeds.shape[0]
+        bos = torch.ones([batch_size, 1],
+                         dtype=atts_img.dtype,
+                         device=atts_img.device) * self.llama_tokenizer.bos_token_id
+        bos_embeds = self.embed_tokens(bos)
+        atts_bos = atts_img[:, :1]
+        inputs_embeds = torch.cat([bos_embeds, img_embeds], dim=1)
+        attention_mask = torch.cat([atts_bos, atts_img], dim=1)
+        outputs = self.llama_model.generate(
+            inputs_embeds=inputs_embeds,
+            num_beams=self.hparams.beam_size,
+            do_sample=self.hparams.do_sample,
+            min_new_tokens=self.hparams.min_new_tokens,
+            max_new_tokens=self.hparams.max_new_tokens,
+            repetition_penalty=self.hparams.repetition_penalty,
+            length_penalty=self.hparams.length_penalty,
+            temperature=self.hparams.temperature,
+        )
+        hypo = [self.decode(i) for i in outputs]
+        ref = [self.decode(i) for i in to_regress_tokens['input_ids']]
+        self.val_step_outputs.append({"hypo": hypo, "ref": ref, "id": samples["id"]})
+        return hypo, ref
+    def decode(self, output_token):
+        if output_token[0] == 0:  # the model might output a unknow token <unk> at the beginning. remove it
+            output_token = output_token[1:]
+        if output_token[0] == 1:  # some users find that there is a start token <s> at the beginning. remove it
+            output_token = output_token[1:]
+        output_text = self.llama_tokenizer.decode(output_token, add_special_tokens=False)
+        output_text = output_text.split('</s>')[0].strip()
+        output_text = output_text.replace('<unk>', '')
+        return output_text
+    def on_validation_epoch_end(self):
+        ref, hypo, ids = [], [], []
+        for i in self.val_step_outputs:
+            ref.extend(i['ref'])
+            hypo.extend(i['hypo'])
+            ids.extend(i['id'])
+        ref = {k:[v] for k, v in zip(ids, ref)}
+        hypo = {k:[v] for k, v in zip(ids, hypo)}
+        eval_res = self.score(ref=ref,hypo=hypo)
+        self.log_dict(eval_res, sync_dist=True, logger=True)
+        result_folder = os.path.join(self.hparams.savedmodel_path, 'result')
+        os.makedirs(result_folder, exist_ok=True)
+        current_epoch, global_step = self.trainer.current_epoch, self.trainer.global_step
+        json.dump(hypo, open(os.path.join(result_folder, f"result_{current_epoch}_{global_step}" + '.json'), 'w'))
+        json.dump(ref, open(os.path.join(result_folder, 'refs.json'), 'w'))
+        self.print(eval_res)
+        val_score = 0
+        for score_type, weight in zip(self.hparams.scorer_types, self.hparams.weights):
+            val_score += eval_res[score_type] * weight
+        if self.trainer.local_rank == 0:
+            if val_score > self.val_score:
+                self.save_checkpoint(eval_res)
+                self.val_score = val_score
+        self.val_step_outputs.clear()
+    def test_step(self, samples, batch_idx):
+        self.llama_tokenizer.padding_side = "right"
+        to_regress_tokens = self.llama_tokenizer(
+            samples['input_text'],
+            return_tensors="pt",
+            padding="max_length",
+            truncation=True,
+            max_length=self.hparams.max_length,
+            add_special_tokens=False
+        )
+        image = samples["image"]
+        img_embeds, atts_img = self.encode_img(image)
+        img_embeds = self.layer_norm(img_embeds)
+        img_embeds, atts_img = self.prompt_wrap(img_embeds, atts_img)
+        batch_size = img_embeds.shape[0]
+        bos = torch.ones([batch_size, 1],
+                         dtype=atts_img.dtype,
+                         device=atts_img.device) * self.llama_tokenizer.bos_token_id
+        bos_embeds = self.embed_tokens(bos)
+        atts_bos = atts_img[:, :1]
+        inputs_embeds = torch.cat([bos_embeds, img_embeds], dim=1)
+        attention_mask = torch.cat([atts_bos, atts_img], dim=1)
+        outputs = self.llama_model.generate(
+            inputs_embeds=inputs_embeds,
+            num_beams=self.hparams.beam_size,
+            do_sample=self.hparams.do_sample,
+            min_new_tokens=self.hparams.min_new_tokens,
+            max_new_tokens=self.hparams.max_new_tokens,
+            repetition_penalty=self.hparams.repetition_penalty,
+            length_penalty=self.hparams.length_penalty,
+            temperature=self.hparams.temperature,
+        )
+        hypo = [self.decode(i) for i in outputs]
+        ref = [self.decode(i) for i in to_regress_tokens['input_ids']]
+        self.test_step_outputs.append({"hypo": hypo, "ref": ref, "id": samples["id"]})
+        return hypo, ref
+    def on_test_epoch_end(self):
+        """
+        This function is called at the end of the test epoch.
+        It is recommended to test on single device to ensure each sample/batch gets evaluated exactly once. This is helpful to make sure benchmarking for research papers is done the right way. Otherwise, in a multi-device setting, samples could occur duplicated when DistributedSampler is used, for eg. with strategy="ddp". It replicates some samples on some devices to make sure all devices have same batch size in case of uneven inputs.
+        """
+        ref, hypo, ids = [], [], []
+        for i in self.test_step_outputs:
+            ref.extend(i['ref'])
+            hypo.extend(i['hypo'])
+            ids.extend(i['id'])
+        ref = {k:[v] for k, v in zip(ids, ref)}
+        hypo = {k:[v] for k, v in zip(ids, hypo)}
+        eval_res = self.score(ref=ref,hypo=hypo)
+        result_folder = os.path.join(self.hparams.savedmodel_path, 'result')
+        os.makedirs(result_folder, exist_ok=True)
+        json.dump(hypo, open(os.path.join(result_folder, f"test_result.json"), 'w'))
+        json.dump(ref, open(os.path.join(result_folder, 'test_refs.json'), 'w'))
+        self.print(f"Test result of {self.hparams.delta_file}: {eval_res}")
+    def configure_optimizers(self):
+        optimizer = torch.optim.AdamW(self.parameters(), lr=self.hparams.learning_rate)
+        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=self.hparams.max_epochs, eta_min=1e-6)
+        return {"optimizer": optimizer, "lr_scheduler": scheduler}
+    def get_progress_bar_dict(self):
+        # don't show the version number
+        items = super().get_progress_bar_dict()
+        items.pop("v_num", None)
+        return items
+    def optimizer_zero_grad(self, epoch, batch_idx, optimizer):
+        optimizer.zero_grad()

requirements.txt CHANGED Viewed

@@ -1,5 +1,8 @@
-streamlit
 torch
-torchvision
-requests
-Pillow

 torch
+peft
+tensorboardX
+transformers==4.30.2
+lightning==2.0.5
+Pillow
+numpy
+gradio

scripts/1-1.shallow_run_iuxray.sh ADDED Viewed

	@@ -0,0 +1,30 @@

+#!/bin/bash
+dataset="iu_xray"
+annotation="data/iu_xray/annotation.json"
+base_dir="./data/iu_xray/images"
+version="v1_shallow"
+savepath="./save/$dataset/$version"
+python -u train.py \
+    --dataset ${dataset} \
+    --annotation ${annotation} \
+    --base_dir ${base_dir} \
+    --batch_size 8 \
+    --val_batch_size 12 \
+    --freeze_vm True \
+    --vis_use_lora False \
+    --savedmodel_path ${savepath} \
+    --max_length 60 \
+    --min_new_tokens 40 \
+    --max_new_tokens 100 \
+    --repetition_penalty 2.0 \
+    --length_penalty 2.0 \
+    --num_workers 8 \
+    --devices 2 \
+    --max_epochs 15 \
+    --limit_val_batches 1.0 \
+    --val_check_interval 1.0 \
+    --num_sanity_val_steps 0 \
+    2>&1 |tee -a ${savepath}/log.txt

scripts/1-2.shallow_test_iuxray.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+#!/bin/bash
+dataset="iu_xray"
+annotation="data/iu_xray/annotation.json"
+base_dir="./data/iu_xray/images"
+delta_file="/apdcephfs/share_733425/vinnylywang/zhanyuwang/Code/R2GenGPT/save/iu_xray/v1_shallow/checkpoints/checkpoint_epoch11_step1548_bleu0.155866_cider0.450477.pth"
+version="v1_shallow"
+savepath="./save/$dataset/$version"
+python -u train.py \
+    --test \
+    --dataset ${dataset} \
+    --annotation ${annotation} \
+    --base_dir ${base_dir} \
+    --delta_file ${delta_file} \
+    --test_batch_size 16 \
+    --freeze_vm True \
+    --vis_use_lora False \
+    --savedmodel_path ${savepath} \
+    --max_length 60 \
+    --min_new_tokens 40 \
+    --max_new_tokens 100 \
+    --repetition_penalty 2.0 \
+    --length_penalty 2.0 \
+    --num_workers 8 \
+    --devices 1 \
+    2>&1 |tee -a ${savepath}/log.txt

scripts/2-1.delta_run_iuxray.sh ADDED Viewed

	@@ -0,0 +1,30 @@

+#!/bin/bash
+dataset="iu_xray"
+annotation="data/iu_xray/annotation.json"
+base_dir="./data/iu_xray/images"
+version="v1_delta"
+savepath="./save/$dataset/$version"
+python -u train.py \
+    --dataset ${dataset} \
+    --annotation ${annotation} \
+    --base_dir ${base_dir} \
+    --batch_size 8 \
+    --val_batch_size 12 \
+    --freeze_vm True \
+    --vis_use_lora True \
+    --savedmodel_path ${savepath} \
+    --max_length 60 \
+    --min_new_tokens 40 \
+    --max_new_tokens 100 \
+    --repetition_penalty 2.0 \
+    --length_penalty 2.0 \
+    --num_workers 8 \
+    --devices 2 \
+    --max_epochs 15 \
+    --limit_val_batches 1.0 \
+    --val_check_interval 1.0 \
+    --num_sanity_val_steps 2 \
+    2>&1 |tee -a ${savepath}/log.txt

scripts/2-2.delta_test_iuxray.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+#!/bin/bash
+dataset="iu_xray"
+annotation="data/iu_xray/annotation.json"
+base_dir="./data/iu_xray/images"
+delta_file="/apdcephfs/share_733425/vinnylywang/zhanyuwang/Code/R2GenGPT/save/iu_xray/v1_delta/checkpoints/checkpoint_epoch13_step1806_bleu0.161532_cider0.530213.pth"
+version="v1_delta"
+savepath="./save/$dataset/$version"
+python -u train.py \
+    --test \
+    --dataset ${dataset} \
+    --annotation ${annotation} \
+    --base_dir ${base_dir} \
+    --delta_file ${delta_file} \
+    --test_batch_size 16 \
+    --freeze_vm True \
+    --vis_use_lora True \
+    --savedmodel_path ${savepath} \
+    --max_length 60 \
+    --min_new_tokens 40 \
+    --max_new_tokens 100 \
+    --repetition_penalty 2.0 \
+    --length_penalty 2.0 \
+    --num_workers 8 \
+    --devices 1 \
+    2>&1 |tee -a ${savepath}/log.txt

scripts/3-1.deep_run_iuxray.sh ADDED Viewed

	@@ -0,0 +1,30 @@

+#!/bin/bash
+dataset="iu_xray"
+annotation="data/iu_xray/annotation.json"
+base_dir="./data/iu_xray/images"
+version="v1_deep"
+savepath="./save/$dataset/$version"
+python -u train.py \
+    --dataset ${dataset} \
+    --annotation ${annotation} \
+    --base_dir ${base_dir} \
+    --batch_size 8 \
+    --val_batch_size 12 \
+    --freeze_vm False \
+    --vis_use_lora False \
+    --savedmodel_path ${savepath} \
+    --max_length 60 \
+    --min_new_tokens 40 \
+    --max_new_tokens 100 \
+    --repetition_penalty 2.0 \
+    --length_penalty 2.0 \
+    --num_workers 8 \
+    --devices 2 \
+    --max_epochs 15 \
+    --limit_val_batches 1.0 \
+    --val_check_interval 1.0 \
+    --num_sanity_val_steps 2 \
+    2>&1 |tee -a ${savepath}/log.txt

scripts/3-2.deep_test_iuxray.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+#!/bin/bash
+dataset="iu_xray"
+annotation="data/iu_xray/annotation.json"
+base_dir="./data/iu_xray/images"
+delta_file="/apdcephfs/share_733425/vinnylywang/zhanyuwang/Code/R2GenGPT/save/iu_xray/v1_deep/checkpoints/checkpoint_epoch12_step1677_bleu0.185560_cider0.678231.pth"
+version="v1_deep"
+savepath="./save/$dataset/$version"
+python -u train.py \
+    --test \
+    --dataset ${dataset} \
+    --annotation ${annotation} \
+    --base_dir ${base_dir} \
+    --delta_file ${delta_file} \
+    --test_batch_size 16 \
+    --freeze_vm False \
+    --vis_use_lora False \
+    --savedmodel_path ${savepath} \
+    --max_length 60 \
+    --min_new_tokens 40 \
+    --max_new_tokens 100 \
+    --repetition_penalty 2.0 \
+    --length_penalty 2.0 \
+    --num_workers 8 \
+    --devices 1 \
+    2>&1 |tee -a ${savepath}/log.txt

scripts/4-1.shallow_run.sh ADDED Viewed

	@@ -0,0 +1,39 @@

+#!/bin/bash
+dataset="mimic_cxr"
+annotation="./data/mimic_cxr/annnotation.json"
+base_dir="./data/mimic_cxr/images"
+version="v1_shallow"
+savepath="./save/$dataset/$version"
+if [ ! -d "$savepath" ]; then
+  mkdir -p "$savepath"
+  echo "Folder '$savepath' created."
+else
+  echo "Folder '$savepath' already exists."
+fi
+python -u train.py \
+    --dataset ${dataset} \
+    --annotation ${annotation} \
+    --base_dir ${base_dir} \
+    --batch_size 8 \
+    --val_batch_size 12 \
+    --freeze_vm True \
+    --vis_use_lora False \
+    --savedmodel_path ${savepath} \
+    --learning_rate 1e-4 \
+    --gradient_clip_val 1 \
+    --max_length 100 \
+    --min_new_tokens 80 \
+    --max_new_tokens 120 \
+    --repetition_penalty 2.0 \
+    --length_penalty 2.0 \
+    --num_workers 8 \
+    --devices 4 \
+    --max_epochs 5 \
+    --limit_val_batches 0.5 \
+    --val_check_interval 0.5 \
+    --num_sanity_val_steps 2 \
+    2>&1 |tee -a ${savepath}/log.txt

scripts/4-2.shallow_test.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+#!/bin/bash
+dataset="mimic_cxr"
+annotation="data/mimic_cxr/my_mimic_anno.json"
+base_dir="./data/mimic_cxr/images"
+delta_file="path/to/pretrained/delta_file"
+version="v1_shallow"
+savepath="./save/$dataset/$version"
+python -u train.py \
+    --test \
+    --dataset ${dataset} \
+    --annotation ${annotation} \
+    --base_dir ${base_dir} \
+    --delta_file ${delta_file} \
+    --test_batch_size 16 \
+    --freeze_vm True \
+    --vis_use_lora False \
+    --savedmodel_path ${savepath} \
+    --max_length 100 \
+    --min_new_tokens 80 \
+    --max_new_tokens 120 \
+    --repetition_penalty 2.0 \
+    --length_penalty 2.0 \
+    --num_workers 12 \
+    --devices 1 \
+    2>&1 |tee -a ${savepath}/log.txt

scripts/5-1.delta_run.sh ADDED Viewed

	@@ -0,0 +1,39 @@

+#!/bin/bash
+dataset="mimic_cxr"
+annotation="data/mimic_cxr/my_mimic_anno.json"
+base_dir="./data/mimic_cxr/images"
+version="v1_delta"
+savepath="./save/$dataset/$version"
+if [ ! -d "$savepath" ]; then
+  mkdir -p "$savepath"
+  echo "Folder '$savepath' created."
+else
+  echo "Folder '$savepath' already exists."
+fi
+python -u train.py \
+    --dataset ${dataset} \
+    --annotation ${annotation} \
+    --base_dir ${base_dir} \
+    --batch_size 8 \
+    --val_batch_size 16 \
+    --freeze_vm True \
+    --vis_use_lora True \
+    --vis_r 16 \
+    --vis_alpha 16 \
+    --savedmodel_path ${savepath} \
+    --max_length 100 \
+    --min_new_tokens 80 \
+    --max_new_tokens 120 \
+    --repetition_penalty 2.0 \
+    --length_penalty 2.0 \
+    --num_workers 16 \
+    --devices 4 \
+    --max_epochs 5 \
+    --limit_val_batches 0.5 \
+    --val_check_interval 0.5 \
+    --num_sanity_val_steps 2 \
+    2>&1 |tee -a ${savepath}/log.txt

scripts/5-2.delta_test.sh ADDED Viewed

	@@ -0,0 +1,30 @@

+#!/bin/bash
+dataset="mimic_cxr"
+annotation="data/mimic_cxr/my_mimic_anno.json"
+base_dir="./data/mimic_cxr/images"
+delta_file="path/to/pretrained/delta_file"
+version="v1_delta"
+savepath="./save/$dataset/$version"
+python -u train.py \
+    --test \
+    --dataset ${dataset} \
+    --annotation ${annotation} \
+    --base_dir ${base_dir} \
+    --delta_file ${delta_file} \
+    --max_length 100 \
+    --min_new_tokens 80 \
+    --max_new_tokens 120 \
+    --repetition_penalty 2.0 \
+    --length_penalty 2.0 \
+    --test_batch_size 16 \
+    --freeze_vm True \
+    --vis_use_lora True \
+    --vis_r 16 \
+    --vis_alpha 16 \
+    --savedmodel_path ${savepath} \
+    --num_workers 12 \
+    --devices 1 \
+    2>&1 |tee -a ${savepath}/log.txt

scripts/6-1.deep_run.sh ADDED Viewed

	@@ -0,0 +1,38 @@

+#!/bin/bash
+dataset="mimic_cxr"
+annotation="data/mimic_cxr/my_mimic_anno.json"
+base_dir="./data/mimic_cxr/images"
+version="v1_deep"
+savepath="./save/$dataset/$version"
+if [ ! -d "$savepath" ]; then
+  mkdir -p "$savepath"
+  echo "Folder '$savepath' created."
+else
+  echo "Folder '$savepath' already exists."
+fi
+python -u train.py \
+    --dataset ${dataset} \
+    --annotation ${annotation} \
+    --base_dir ${base_dir} \
+    --batch_size 6 \
+    --val_batch_size 12 \
+    --freeze_vm False \
+    --vis_use_lora False \
+    --llm_use_lora False \
+    --savedmodel_path ${savepath} \
+    --max_length 100 \
+    --min_new_tokens 80 \
+    --max_new_tokens 120 \
+    --repetition_penalty 2.0 \
+    --length_penalty 2.0 \
+    --num_workers 12 \
+    --devices 4 \
+    --max_epochs 5 \
+    --limit_val_batches 0.5 \
+    --val_check_interval 0.5 \
+    --num_sanity_val_steps 2 \
+    2>&1 |tee -a ${savepath}/log.txt

scripts/6-2.deep_test.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+#!/bin/bash
+dataset="mimic_cxr"
+annotation="data/mimic_cxr/my_mimic_anno.json"
+base_dir="./data/mimic_cxr/images"
+delta_file="path/to/pretrained/delta_file"
+version="v1_deep"
+savepath="./save/$dataset/$version"
+python -u train.py \
+    --test \
+    --dataset ${dataset} \
+    --annotation ${annotation} \
+    --base_dir ${base_dir} \
+    --delta_file ${delta_file} \
+    --test_batch_size 16 \
+    --max_length 100 \
+    --min_new_tokens 80 \
+    --max_new_tokens 120 \
+    --repetition_penalty 2.0 \
+    --length_penalty 2.0 \
+    --freeze_vm False \
+    --vis_use_lora False \
+    --savedmodel_path ${savepath} \
+    --num_workers 12 \
+    --devices 1 \
+    2>&1 |tee -a ${savepath}/log.txt

train.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import os
+from pprint import pprint
+from configs.config import parser
+from dataset.data_module import DataModule
+from lightning_tools.callbacks import add_callbacks
+from models.R2GenGPT import R2GenGPT
+from lightning.pytorch import seed_everything
+import lightning.pytorch as pl
+def train(args):
+    dm = DataModule(args)
+    callbacks = add_callbacks(args)
+    trainer = pl.Trainer(
+        devices=args.devices,
+        num_nodes=args.num_nodes,
+        strategy=args.strategy,
+        accelerator=args.accelerator,
+        precision=args.precision,
+        val_check_interval = args.val_check_interval,
+        limit_val_batches = args.limit_val_batches,
+        max_epochs = args.max_epochs,
+        num_sanity_val_steps = args.num_sanity_val_steps,
+        accumulate_grad_batches=args.accumulate_grad_batches,
+        callbacks=callbacks["callbacks"],
+        logger=callbacks["loggers"]
+    )
+    if args.ckpt_file is not None:
+        model = R2GenGPT.load_from_checkpoint(args.ckpt_file, strict=False)
+    else:
+        model = R2GenGPT(args)
+    if args.test:
+        trainer.test(model, datamodule=dm)
+    elif args.validate:
+        trainer.validate(model, datamodule=dm)
+    else:
+        trainer.fit(model, datamodule=dm)
+def main():
+    args = parser.parse_args()
+    os.makedirs(args.savedmodel_path, exist_ok=True)
+    pprint(vars(args))
+    seed_everything(42, workers=True)
+    train(args)
+if __name__ == '__main__':
+    main()