Spaces:

jdwh08s
/

Autodoc-Lifter

Paused

File size: 10,278 Bytes

89cbc4d

#####################################################
### DOCUMENT PROCESSOR [Summarizer]
#####################################################
### Jonathan Wang

# ABOUT: 
# This creates an app to chat with PDFs.

# This is the Summarizer
# Which creates summaries based on documents.
#####################################################
### TODO Board:
# Summary Index for document?

# https://docs.llamaindex.ai/en/stable/examples/response_synthesizers/tree_summarize/
# https://sourajit16-02-93.medium.com/text-summarization-unleashed-novice-to-maestro-with-llms-and-instant-code-solutions-8d26747689c4

#####################################################
### PROGRAM SETTINGS


#####################################################
### PROGRAM IMPORTS
import logging

from typing import Optional, Sequence, Any, Callable, cast
from llama_index.core.bridge.pydantic import Field, PrivateAttr

from llama_index.core.settings import Settings
from llama_index.core.base.llms.base import BaseLLM
from llama_index.core.multi_modal_llms import MultiModalLLM
from llama_index.core.schema import BaseNode, TextNode, ImageDocument
from llama_index.core.callbacks.base import CallbackManager

from llama_index.core.response_synthesizers import TreeSummarize

# Own Modules
from metadata_adder import ModelMetadataAdder

#####################################################
### CONSTANTS
logger = logging.getLogger(__name__)

DEFAULT_SUMMARY_TEMPLATE = """You are an expert summarizer of information. You are given some information from a document. Summarize the information, and then provide the key information that can be drawn from it. The information is below:
{context_str}
"""

DEFAULT_ONELINE_SUMMARY_TEMPLATE = """You are an expert summarizer of information. You are given a summary of a document. In no more than three sentences, describe the subject of the document, the main ideas of the document, and what types of questions can be answered from it."""

DEFAULT_TREE_SUMMARY_TEMPLATE = """You are an expert summarizer of information. You are given some text from a document.
Please provide a comprehensive summary of the text.
Include the main subject of the text, the key points or topics, and the most important conclusions if there are any.
The summary should be detailed yet concise."""

DEFAULT_TABLE_SUMMARY_TEMPLATE = """You are an expert summarizer of tables. You are given a table or part of a table in HTML format. The table is below:
{context_str}
----------------
Summarize the table, and then provide the key insights that can be drawn directly from the table. If this is not actually an HTML table or part of an HTML table, please do not respond.
"""

DEFAULT_IMAGE_SUMMARY_TEMPLATE = """You are an expert image summarizer. You are given an image. Summarize the image, and then provide the key insights that can be drawn directly from the image, if there are any.
"""

#####################################################
### SCRIPT

class TextSummaryMetadataAdder(ModelMetadataAdder):
    """Adds metadata to nodes based on a language model."""
    
    _llm: BaseLLM = PrivateAttr()
    
    def __init__(
        self, 
        metadata_name: str,
        llm: Optional[BaseLLM] = None,
        prompt_template: Optional[str] = DEFAULT_SUMMARY_TEMPLATE, 
        **kwargs: Any
    ) -> None:
        """Init params."""
        llm = llm or Settings.llm
        prompt_template = prompt_template if prompt_template is not None else DEFAULT_SUMMARY_TEMPLATE
        super().__init__(metadata_name=metadata_name, prompt_template=prompt_template, **kwargs)

    @classmethod
    def class_name(cls) -> str:
        return "TextSummaryMetadataAdder"

    def get_node_metadata(self, node: BaseNode) -> Optional[str]:
        if (getattr(node, 'text', None) is None):
            return None
        
        response = self._llm.complete(prompt=self.prompt_template.format(context_str=node.text))
        return response.text


class TableSummaryMetadataAdder(ModelMetadataAdder):
    """Adds table summary metadata to a document.

    Args:
        metadata_name: The name of the metadata to add to the document. Defaults to 'table_summary'.
        llm: The LLM to use to generate the table summary. Defaults to Settings llm.
        prompt_template: The prompt template to use to generate the table summary. Defaults to DEFAULT_TABLE_SUMMARY_TEMPLATE.
    """
    _llm: BaseLLM = PrivateAttr()
    
    def __init__(
        self,
        metadata_name: str = "table_summary",  ## TODO: This is a bad pattern, string should not be hardcoded like this
        llm: Optional[BaseLLM] = None,
        prompt_template: Optional[str] = DEFAULT_TABLE_SUMMARY_TEMPLATE,
        # num_workers: int = 1,
        **kwargs: Any,
    ) -> None:
        """Init params."""
        llm = llm or Settings.llm
        prompt_template = prompt_template or DEFAULT_TABLE_SUMMARY_TEMPLATE
        super().__init__(metadata_name=metadata_name, prompt_template=prompt_template, **kwargs)
        self._llm = llm
    
    @classmethod
    def class_name(cls) -> str:
        return "TableSummaryMetadataAdder"

    def get_node_metadata(self, node: BaseNode) -> Optional[str]:
        """Given a node, get the metadata for the node using the language model."""
        ## NOTE: Our PDF Reader parser distringuishes between TextNode and TableNode using the 'orignal_table_text' attribute. 
        ## BUG (future): `orignal_table_text` should not be hardcoded. 
        if (not isinstance(node, TextNode)):
            return None
        if (node.metadata.get('orignal_table_text') is None):
            return None
        if (getattr(node, 'text', None) is None):
            return None
        
        response = self._llm.complete(
            self.prompt_template.format(context_str=node.text)
        )
        return response.text


class ImageSummaryMetadataAdder(ModelMetadataAdder):
    """Adds image summary metadata to a document.

    Args:
        metadata_name: The name of the metadata to add to the document. Defaults to 'table_summary_metadata'.
    """
    _llm: MultiModalLLM = PrivateAttr()
    
    def __init__(
        self,
        llm: MultiModalLLM,
        prompt_template: str = DEFAULT_IMAGE_SUMMARY_TEMPLATE,
        metadata_name: str = 'image_summary',
        **kwargs: Any,
    ) -> None:
        """Init params."""
        super().__init__(metadata_name=metadata_name, prompt_template=prompt_template, **kwargs)
        self._llm = llm

    @classmethod
    def class_name(cls) -> str:
        return "ImageSummaryMetadataAdder"
    
    def _get_image_node_metadata(self, node: BaseNode) -> Optional[str]:
        """Handles getting images from image nodes.

        Args:
            node (BaseNode): The image node to get the image summary for. NOTE: This can technically be any type of node so long as it has an image stored.

        Returns:
            Optional[str]: The image summary if it exists. If not, return None.
        """
        if (
            ((getattr(node, 'image', None) is None) and (getattr(node, 'image_path', None) is None))
            or (not callable(getattr(node, "resolve_image", None)))  # method used to convert node to PILImage for model.
        ):
            # Not a valid image node with image attributes and image conversion.
            return None

        # Check whethr the image is of text or not
        ### TODO: Replace this with a text-overlap thing.
        image = node.resolve_image() # type: ignore | we check for this above.
        im_width, im_height = image.size
        if (im_width < 70):  # TODO: this really should be based on the average text width / whether this is overlapping text.
            return None

        ## NOTE: We're assuming that the llm complete function has a parameter `images` to send image node(s) as input.
        ## This is NOT necessarily true if the end user decides to create their own implementation of a MultiModalLLM.
        response = self._llm.complete(
            prompt=self.prompt_template,
            image_documents=[
                cast(ImageDocument, node)  # NOTE: This is a hack. Technically, node should be an ImageNode, a parent of ImageDocument; but I don't think we'll be using the Document features so this should be okay.
            ],
        )
        return response.text
    
    def _get_composite_node_metadata(self, node: BaseNode) -> Optional[str]:
        """Handles getting images from composite nodes (i.e., where an image is stored as a original node inside a composite node).

        Args:
            node (TextNode): The composite node to get the image summary for.

        Returns:
            Optional[str]: The image summary if it exists. If not, return None.
        """
        if ('orig_nodes' not in node.metadata):
            return None  # no image nodes in the composite node.
        
        output = ""
        for orig_node in node.metadata['orig_nodes']:
            output += str(self._get_image_node_metadata(orig_node) or "")
        
        if (output == ""):
            return None
        return output
    
    def get_node_metadata(self, node: BaseNode) -> Optional[str]:
        """Get the image summary for a node (or subnodes)."""
        
        if (node.metadata['type'].startswith('Composite')):
            return self._get_composite_node_metadata(node)
        else:
            return self._get_image_node_metadata(node)


def get_tree_summarizer(
    llm: Optional[BaseLLM] = None,
    callback_manager: Optional[CallbackManager] = None,
):
    llm = llm or Settings.llm
    tree_summarizer = TreeSummarize(llm=llm, callback_manager=callback_manager)
    return (tree_summarizer)


def get_tree_summary(tree_summarizer: TreeSummarize, text_chunks: Sequence[BaseNode]) -> str:
    """Summarize the text nodes using a tree summarizer.

    Args:
        tree_summarizer (TreeSummarize): The tree summarizer to use.
        text_chunks (Sequence[BaseNode]): The text nodes to summarize.
    
    Returns:
        str: The summarized text.
    """
    response = tree_summarizer.aget_response(query_str=DEFAULT_TREE_SUMMARY_TEMPLATE, text_chunks=[getattr(text_chunks, 'text') for text_chunks in text_chunks if hasattr(text_chunks, 'text')])
    return response.response