Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Minseok Bae
commited on
Commit
·
b46b972
1
Parent(s):
dbcffd4
Added citations
Browse files- main_backend.py +5 -5
- src/backend/model_operations.py +3 -0
- src/display/about.py +30 -0
main_backend.py
CHANGED
@@ -75,12 +75,12 @@ def run_auto_eval(args):
|
|
75 |
else:
|
76 |
eval_request = manage_requests.EvalRequest(
|
77 |
model=args.model,
|
78 |
-
status=PENDING_STATUS,
|
79 |
precision=args.precision
|
80 |
)
|
81 |
pp.pprint(eval_request)
|
82 |
logging.info("Running reproducibility eval")
|
83 |
-
|
84 |
run_eval_suite.run_evaluation(
|
85 |
eval_request=eval_request,
|
86 |
local_dir=envs.EVAL_RESULTS_PATH_BACKEND,
|
@@ -93,14 +93,14 @@ def run_auto_eval(args):
|
|
93 |
|
94 |
def main():
|
95 |
parser = argparse.ArgumentParser(description="Run auto evaluation with optional reproducibility feature")
|
96 |
-
|
97 |
# Optional arguments
|
98 |
parser.add_argument("--reproduce", type=bool, default=False, help="Reproduce the evaluation results")
|
99 |
parser.add_argument("--model", type=str, default=None, help="Your Model ID")
|
100 |
parser.add_argument("--precision", type=str, default="float16", help="Precision of your model")
|
101 |
-
|
102 |
args = parser.parse_args()
|
103 |
-
|
104 |
run_auto_eval(args)
|
105 |
|
106 |
|
|
|
75 |
else:
|
76 |
eval_request = manage_requests.EvalRequest(
|
77 |
model=args.model,
|
78 |
+
status=PENDING_STATUS,
|
79 |
precision=args.precision
|
80 |
)
|
81 |
pp.pprint(eval_request)
|
82 |
logging.info("Running reproducibility eval")
|
83 |
+
|
84 |
run_eval_suite.run_evaluation(
|
85 |
eval_request=eval_request,
|
86 |
local_dir=envs.EVAL_RESULTS_PATH_BACKEND,
|
|
|
93 |
|
94 |
def main():
|
95 |
parser = argparse.ArgumentParser(description="Run auto evaluation with optional reproducibility feature")
|
96 |
+
|
97 |
# Optional arguments
|
98 |
parser.add_argument("--reproduce", type=bool, default=False, help="Reproduce the evaluation results")
|
99 |
parser.add_argument("--model", type=str, default=None, help="Your Model ID")
|
100 |
parser.add_argument("--precision", type=str, default="float16", help="Precision of your model")
|
101 |
+
|
102 |
args = parser.parse_args()
|
103 |
+
|
104 |
run_auto_eval(args)
|
105 |
|
106 |
|
src/backend/model_operations.py
CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
|
|
8 |
import spacy
|
9 |
# from transformers import AutoModelForCausalLM, AutoTokenizer
|
10 |
from sentence_transformers import CrossEncoder
|
|
|
11 |
from litellm import completion
|
12 |
|
13 |
import src.backend.util as util
|
@@ -22,6 +23,8 @@ nlp = spacy.load("en_core_web_sm")
|
|
22 |
|
23 |
os.environ["HUGGINGFACE_API_KEY"] = envs.TOKEN
|
24 |
|
|
|
|
|
25 |
|
26 |
def load_evaluation_model(model_path):
|
27 |
"""Load the evaluation model from the given path
|
|
|
8 |
import spacy
|
9 |
# from transformers import AutoModelForCausalLM, AutoTokenizer
|
10 |
from sentence_transformers import CrossEncoder
|
11 |
+
import litellm
|
12 |
from litellm import completion
|
13 |
|
14 |
import src.backend.util as util
|
|
|
23 |
|
24 |
os.environ["HUGGINGFACE_API_KEY"] = envs.TOKEN
|
25 |
|
26 |
+
litellm.set_verbose=True
|
27 |
+
|
28 |
|
29 |
def load_evaluation_model(model_path):
|
30 |
"""Load the evaluation model from the given path
|
src/display/about.py
CHANGED
@@ -147,4 +147,34 @@ Make sure you have followed the above steps first.
|
|
147 |
|
148 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
149 |
CITATION_BUTTON_TEXT = r"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
"""
|
|
|
147 |
|
148 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
149 |
CITATION_BUTTON_TEXT = r"""
|
150 |
+
# This CITATION.cff file was generated with cffinit.
|
151 |
+
# Visit https://bit.ly/cffinit to generate yours today!
|
152 |
+
|
153 |
+
cff-version: 1.2.0
|
154 |
+
title: Vectara Hallucination Leaderboard
|
155 |
+
message: >-
|
156 |
+
If you use this dataset, please cite it using the metadata
|
157 |
+
from this file.
|
158 |
+
type: dataset
|
159 |
+
authors:
|
160 |
+
- email: [email protected]
|
161 |
+
given-names: Simon
|
162 |
+
family-names: Hughes
|
163 |
+
- given-names: Minseok
|
164 |
+
family-names: Bae
|
165 |
+
email: [email protected]
|
166 |
+
repository-code: 'https://github.com/vectara/hallucination-leaderboard'
|
167 |
+
url: >-
|
168 |
+
https://github.com/vectara/hallucination-leaderboard/blob/main/README.md
|
169 |
+
abstract: >-
|
170 |
+
A leaderboard comparing LLM performance at maintaining
|
171 |
+
factual consistency when summarizing a set of facts.
|
172 |
+
keywords:
|
173 |
+
- nlp
|
174 |
+
- llm
|
175 |
+
- hallucination
|
176 |
+
- nli
|
177 |
+
- machine learning
|
178 |
+
license: Apache-2.0
|
179 |
+
date-released: '2023-11-01'
|
180 |
"""
|