alozowski HF staff commited on
Commit
d1ea1bf
1 Parent(s): 37b898a

check_chat_template (#984)

Browse files

- Add a chat_template check (6705d9a783fe38e9a78e222e1d14487e01c7fc5d)

src/submission/check_validity.py CHANGED
@@ -6,7 +6,7 @@ from collections import defaultdict
6
  from datetime import datetime, timedelta, timezone
7
 
8
  import huggingface_hub
9
- from huggingface_hub import ModelCard
10
  from huggingface_hub.hf_api import ModelInfo, get_safetensors_metadata, parse_safetensors_file_metadata
11
  from transformers import AutoConfig, AutoTokenizer
12
 
@@ -179,7 +179,28 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
179
 
180
  return set(file_names), users_to_submission_dates
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
 
 
 
 
 
 
 
 
183
  def get_model_tags(model_card, model: str):
184
  is_merge_from_metadata = False
185
  is_moe_from_metadata = False
 
6
  from datetime import datetime, timedelta, timezone
7
 
8
  import huggingface_hub
9
+ from huggingface_hub import ModelCard, hf_hub_download
10
  from huggingface_hub.hf_api import ModelInfo, get_safetensors_metadata, parse_safetensors_file_metadata
11
  from transformers import AutoConfig, AutoTokenizer
12
 
 
179
 
180
  return set(file_names), users_to_submission_dates
181
 
182
+ def check_chat_template(model: str, revision: str) -> tuple[bool, str]:
183
+ try:
184
+ # Attempt to download only the tokenizer_config.json file
185
+ config_file = hf_hub_download(
186
+ repo_id=model,
187
+ filename="tokenizer_config.json",
188
+ revision=revision,
189
+ repo_type="model"
190
+ )
191
+
192
+ # Read and parse the tokenizer_config.json file
193
+ with open(config_file, 'r') as f:
194
+ tokenizer_config = json.load(f)
195
 
196
+ # Check if chat_template exists in the tokenizer configuration
197
+ if 'chat_template' not in tokenizer_config:
198
+ return False, f"The model {model} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it."
199
+
200
+ return True, ""
201
+ except Exception as e:
202
+ return False, f"Error checking chat_template for model {model}: {str(e)}"
203
+
204
  def get_model_tags(model_card, model: str):
205
  is_merge_from_metadata = False
206
  is_moe_from_metadata = False
src/submission/submit.py CHANGED
@@ -24,6 +24,7 @@ from src.submission.check_validity import (
24
  get_model_size,
25
  is_model_on_hub,
26
  user_submission_permission,
 
27
  )
28
 
29
  from src.voting.vote_system import VoteManager
@@ -114,6 +115,7 @@ def add_new_eval(
114
  except Exception as e:
115
  return styled_error("Could not get your model information. Please fill it up properly.")
116
 
 
117
  model_key = f"{model}_{model_info.sha}_{precision}"
118
  if model_key in requested_models:
119
  return styled_error(f"The model '{model}' with revision '{model_info.sha}' and precision '{precision}' has already been submitted.")
@@ -123,12 +125,12 @@ def add_new_eval(
123
  if model_size is None:
124
  return styled_error(error_text)
125
 
126
- # First check: Absolute size limit for float16 and bfloat16
127
  if precision in ["float16", "bfloat16"] and model_size > 100:
128
  return styled_error(f"Sadly, models larger than 100B parameters cannot be submitted in {precision} precision at this time. "
129
  f"Your model size: {model_size:.2f}B parameters.")
130
 
131
- # Second check: Precision-adjusted size limit for 8bit, 4bit, and GPTQ
132
  if precision in ["8bit", "4bit", "GPTQ"]:
133
  size_checker = ModelSizeChecker(model=model, precision=precision, model_size_in_b=model_size)
134
 
@@ -163,6 +165,12 @@ def add_new_eval(
163
  modelcard_OK, error_msg, model_card = check_model_card(model)
164
  if not modelcard_OK:
165
  return styled_error(error_msg)
 
 
 
 
 
 
166
 
167
  # Seems good, creating the eval
168
  print("Adding new eval")
 
24
  get_model_size,
25
  is_model_on_hub,
26
  user_submission_permission,
27
+ check_chat_template,
28
  )
29
 
30
  from src.voting.vote_system import VoteManager
 
115
  except Exception as e:
116
  return styled_error("Could not get your model information. Please fill it up properly.")
117
 
118
+ # Has it been submitted already?
119
  model_key = f"{model}_{model_info.sha}_{precision}"
120
  if model_key in requested_models:
121
  return styled_error(f"The model '{model}' with revision '{model_info.sha}' and precision '{precision}' has already been submitted.")
 
125
  if model_size is None:
126
  return styled_error(error_text)
127
 
128
+ # Absolute size limit for float16 and bfloat16
129
  if precision in ["float16", "bfloat16"] and model_size > 100:
130
  return styled_error(f"Sadly, models larger than 100B parameters cannot be submitted in {precision} precision at this time. "
131
  f"Your model size: {model_size:.2f}B parameters.")
132
 
133
+ # Precision-adjusted size limit for 8bit, 4bit, and GPTQ
134
  if precision in ["8bit", "4bit", "GPTQ"]:
135
  size_checker = ModelSizeChecker(model=model, precision=precision, model_size_in_b=model_size)
136
 
 
165
  modelcard_OK, error_msg, model_card = check_model_card(model)
166
  if not modelcard_OK:
167
  return styled_error(error_msg)
168
+
169
+ # Check the chat template submission
170
+ if use_chat_template:
171
+ chat_template_valid, chat_template_error = check_chat_template(model, revision)
172
+ if not chat_template_valid:
173
+ return styled_error(chat_template_error)
174
 
175
  # Seems good, creating the eval
176
  print("Adding new eval")