Husnain commited on
Commit
abbb658
1 Parent(s): 0bf2285

💥 [Fix] slow tokenizer issue, and finish by stop_sequences

Browse files
Files changed (1) hide show
  1. messagers/message_composer.py +6 -3
messagers/message_composer.py CHANGED
@@ -150,9 +150,12 @@ class MessageComposer:
150
  self.merged_str = "<bos>" + "\n".join(self.merged_str_list)
151
  # https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO#prompt-format
152
  # https://huggingface.co/openchat/openchat-3.5-0106
153
- # elif self.model in ["openchat-3.5", "nous-mixtral-8x7b"]:
154
- elif self.model in ["openchat-3.5", "command-r-plus", "gemma-7b"]:
155
- tokenizer = AutoTokenizer.from_pretrained(self.model_fullname)
 
 
 
156
  self.merged_str = tokenizer.apply_chat_template(
157
  messages, tokenize=False, add_generation_prompt=True
158
  )
 
150
  self.merged_str = "<bos>" + "\n".join(self.merged_str_list)
151
  # https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO#prompt-format
152
  # https://huggingface.co/openchat/openchat-3.5-0106
153
+ # https://huggingface.co/01-ai/Yi-1.5-34B-Chat
154
+ elif self.model in ["openchat-3.5", "command-r-plus", "gemma-7b", "yi-1.5-34b"]:
155
+ # https://discuss.huggingface.co/t/error-with-new-tokenizers-urgent/2847/5
156
+ tokenizer = AutoTokenizer.from_pretrained(
157
+ self.model_fullname, use_fast=False
158
+ )
159
  self.merged_str = tokenizer.apply_chat_template(
160
  messages, tokenize=False, add_generation_prompt=True
161
  )