Spaces:
Running
Running
Update
Browse files- metrics.py +17 -8
- results.duckdb +2 -2
metrics.py
CHANGED
@@ -1,21 +1,28 @@
|
|
1 |
import re
|
2 |
import duckdb
|
3 |
import textwrap
|
|
|
4 |
|
5 |
-
def _parse_answer(text: str) -> str:
|
6 |
"""
|
7 |
-
Converts text to lowercase.
|
8 |
-
|
|
|
9 |
and returns that set.
|
10 |
|
|
|
11 |
Another way to describe this is that we interpret adjacent words as
|
12 |
phrases that must be present literally. However, comma and arrow separate
|
13 |
distinct phrases that may be present in any order. All other characters
|
14 |
are dropped.
|
15 |
"""
|
16 |
text = text.lower()
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
19 |
|
20 |
def _answer_without_thoughts(completion: str) -> str:
|
21 |
if "<think>" not in completion[:200]:
|
@@ -33,9 +40,11 @@ def _check_answer(completion: str, answer: str) -> bool:
|
|
33 |
completion. We ignore "thoughts", capitalization, and punctuation.
|
34 |
"""
|
35 |
completion = _answer_without_thoughts(completion).lower()
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
39 |
|
40 |
|
41 |
def _clip_text(text: str, width: int) -> str:
|
|
|
1 |
import re
|
2 |
import duckdb
|
3 |
import textwrap
|
4 |
+
from typing import List, Tuple
|
5 |
|
6 |
+
def _parse_answer(text: str) -> List[List[str]]:
|
7 |
"""
|
8 |
+
Converts text to lowercase. Then interprets ";" as a separator between
|
9 |
+
alternatives. Within each alternative, interprets "," and "-->" as separators
|
10 |
+
for elements of a set. Within each set, drops all non-alphanumeric characters
|
11 |
and returns that set.
|
12 |
|
13 |
+
|
14 |
Another way to describe this is that we interpret adjacent words as
|
15 |
phrases that must be present literally. However, comma and arrow separate
|
16 |
distinct phrases that may be present in any order. All other characters
|
17 |
are dropped.
|
18 |
"""
|
19 |
text = text.lower()
|
20 |
+
alternatives = re.split(r';', text)
|
21 |
+
result = [ ]
|
22 |
+
for alternative in alternatives:
|
23 |
+
groups = re.split(r'-->|,', alternative)
|
24 |
+
result.append([" ".join(re.findall(r'\b\w+\b', group)) for group in groups])
|
25 |
+
return result
|
26 |
|
27 |
def _answer_without_thoughts(completion: str) -> str:
|
28 |
if "<think>" not in completion[:200]:
|
|
|
40 |
completion. We ignore "thoughts", capitalization, and punctuation.
|
41 |
"""
|
42 |
completion = _answer_without_thoughts(completion).lower()
|
43 |
+
alternative_answers = _parse_answer(answer)
|
44 |
+
for answer_phrases in alternative_answers:
|
45 |
+
if all(phrase in completion for phrase in answer_phrases):
|
46 |
+
return True
|
47 |
+
return False
|
48 |
|
49 |
|
50 |
def _clip_text(text: str, width: int) -> str:
|
results.duckdb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7d3a9c4a997e0b6741249ba973be3c145b6f660381633d35e5eaa94353ea30f
|
3 |
+
size 39333888
|