arjunguha commited on
Commit
8fcea9a
·
unverified ·
1 Parent(s): 0f62e55
Files changed (2) hide show
  1. metrics.py +17 -8
  2. results.duckdb +2 -2
metrics.py CHANGED
@@ -1,21 +1,28 @@
1
  import re
2
  import duckdb
3
  import textwrap
 
4
 
5
- def _parse_answer(text: str) -> str:
6
  """
7
- Converts text to lowercase. Interprets "," and "-->" as separators for
8
- elements of a set. Within each set, drops all non-alphanumeric characters
 
9
  and returns that set.
10
 
 
11
  Another way to describe this is that we interpret adjacent words as
12
  phrases that must be present literally. However, comma and arrow separate
13
  distinct phrases that may be present in any order. All other characters
14
  are dropped.
15
  """
16
  text = text.lower()
17
- groups = re.split(r'-->|,', text)
18
- return [" ".join(re.findall(r'\b\w+\b', group)) for group in groups]
 
 
 
 
19
 
20
  def _answer_without_thoughts(completion: str) -> str:
21
  if "<think>" not in completion[:200]:
@@ -33,9 +40,11 @@ def _check_answer(completion: str, answer: str) -> bool:
33
  completion. We ignore "thoughts", capitalization, and punctuation.
34
  """
35
  completion = _answer_without_thoughts(completion).lower()
36
- answer_phrases = _parse_answer(answer)
37
- r = all(phrase in completion for phrase in answer_phrases)
38
- return r
 
 
39
 
40
 
41
  def _clip_text(text: str, width: int) -> str:
 
1
  import re
2
  import duckdb
3
  import textwrap
4
+ from typing import List, Tuple
5
 
6
+ def _parse_answer(text: str) -> List[List[str]]:
7
  """
8
+ Converts text to lowercase. Then interprets ";" as a separator between
9
+ alternatives. Within each alternative, interprets "," and "-->" as separators
10
+ for elements of a set. Within each set, drops all non-alphanumeric characters
11
  and returns that set.
12
 
13
+
14
  Another way to describe this is that we interpret adjacent words as
15
  phrases that must be present literally. However, comma and arrow separate
16
  distinct phrases that may be present in any order. All other characters
17
  are dropped.
18
  """
19
  text = text.lower()
20
+ alternatives = re.split(r';', text)
21
+ result = [ ]
22
+ for alternative in alternatives:
23
+ groups = re.split(r'-->|,', alternative)
24
+ result.append([" ".join(re.findall(r'\b\w+\b', group)) for group in groups])
25
+ return result
26
 
27
  def _answer_without_thoughts(completion: str) -> str:
28
  if "<think>" not in completion[:200]:
 
40
  completion. We ignore "thoughts", capitalization, and punctuation.
41
  """
42
  completion = _answer_without_thoughts(completion).lower()
43
+ alternative_answers = _parse_answer(answer)
44
+ for answer_phrases in alternative_answers:
45
+ if all(phrase in completion for phrase in answer_phrases):
46
+ return True
47
+ return False
48
 
49
 
50
  def _clip_text(text: str, width: int) -> str:
results.duckdb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa7c7911a1ecf7fe4223995e3d393dd78cf8d4023409197854bf471fd8ab7c48
3
- size 32518144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d3a9c4a997e0b6741249ba973be3c145b6f660381633d35e5eaa94353ea30f
3
+ size 39333888