Usage

Installation

git clone https://github.com/panuthept/OKEAN.git
cd OKEAN

conda create -n okean python==3.11.4
conda activate okean

# Select the appropriate PyTorch version based on your CUDA version
# CUDA 11.8
conda install pytorch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 pytorch-cuda=11.8 -c pytorch -c nvidia
# CUDA 12.1
conda install pytorch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 pytorch-cuda=12.1 -c pytorch -c nvidia
# CPU Only
conda install pytorch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 cpuonly -c pytorch

pip install -e .

Entity Linking

from okean.modules.entity_linking.elq import ELQ

model = ELQ.from_pretrained(
  model_name_or_path="panuthept/okean-elq-wikipedia",
)

texts = [
  "Barack Obama is the former president of the United States.",
  "The Eiffel Tower is located in Paris.",
]
response = model(texts=texts, return_metadata=["id"])
print(response.passages)
>> [
  Passage(
    text='Barack Obama is the former president of the United States.',
    spans=[
      Span(start=0, end=12, surface_form='Barack Obama', confident=0.7972, entity=Entity(identifier=213062, confident=1.0, metadata={'id': {'wikipedia': '534366', 'wikidata': 'Q76'}})),
      Span(start=27, end=57, surface_form='president of the United States', confident=0.5499, entity=Entity(identifier=11887, confident=0.9999, metadata={'id': {'wikipedia': '24113', 'wikidata': 'Q11696'}}))
    ]),
  Passage(
    text='The Eiffel Tower is located in Paris.',
    spans=[
      Span(start=4, end=16, surface_form='Eiffel Tower', confident=0.5214, entity=Entity(identifier=4276, confident=0.9999, metadata={'id': {'wikipedia': '9232', 'wikidata': 'Q243'}})),
      Span(start=31, end=36, surface_form='Paris', confident=0.6658, entity=Entity(identifier=11245, confident=0.9999, metadata={'id': {'wikipedia': '22989', 'wikidata': 'Q90'}}))
    ]
  )
]

Entity Disambiguation

from okean.modules.entity_linking.elq import ELQ
from okean.data_types.basic_types import Passage, Span

model = ELQ.from_pretrained(
  model_name_or_path="panuthept/okean-elq-wikipedia",
)

passages = [
  Passage(
    text="Barack Obama is the former president of the United States.", 
    spans=[
      Span(start=0, end=12, surface_form="Barack Obama"),
      Span(start=27, end=57, surface_form="president of the United States"),
    ]
  ),
  Passage(
    text="The Eiffel Tower is located in Paris.",
    spans=[
      Span(start=4, end=16, surface_form="Eiffel Tower"),
      Span(start=31, end=36, surface_form="Paris"),
    ]
  ),
]
response = model(passages=passages, return_metadata=["id"])
print(response.passages)
>> [
  Passage(
    text='Barack Obama is the former president of the United States.',
    spans=[
      Span(start=0, end=12, surface_form='Barack Obama', confident=1.0, entity=Entity(identifier=213062, confident=1.0, metadata={'id': {'wikipedia': '534366', 'wikidata': 'Q76'}})),
      Span(start=27, end=57, surface_form='president of the United States', confident=1.0, entity=Entity(identifier=11887, confident=0.9999, metadata={'id': {'wikipedia': '24113', 'wikidata': 'Q11696'}}))
    ]),
  Passage(
    text='The Eiffel Tower is located in Paris.',
    spans=[
      Span(start=4, end=16, surface_form='Eiffel Tower', confident=1.0, entity=Entity(identifier=4276, confident=0.9999, metadata={'id': {'wikipedia': '9232', 'wikidata': 'Q243'}})),
      Span(start=31, end=36, surface_form='Paris', confident=1.0, entity=Entity(identifier=11245, confident=0.9999, metadata={'id': {'wikipedia': '22989', 'wikidata': 'Q90'}}))
    ]
  )
]
Downloads last month
12
Inference API
Unable to determine this model’s pipeline type. Check the docs .