Text Generation
Transformers
Safetensors
llama
text-generation-inference
Inference Endpoints
mfromm commited on
Commit
85ef05e
·
verified ·
1 Parent(s): 074b5e7

Update gptx_tokenizer.py

Browse files
Files changed (1) hide show
  1. gptx_tokenizer.py +5 -1
gptx_tokenizer.py CHANGED
@@ -245,7 +245,10 @@ class HFGPTXTokenizer(PreTrainedTokenizer):
245
  """
246
  output = self.tok.decode(input=token_ids, num_threads=num_threads)
247
  if skip_special_tokens:
248
- output = [token for token in output if token not in self.additional_special_tokens]
 
 
 
249
  if clean_up_tokenization_spaces:
250
  warnings.warn(
251
  "when cleaning up tokenization spaces, this will not behave "
@@ -253,6 +256,7 @@ class HFGPTXTokenizer(PreTrainedTokenizer):
253
  "`clean_up_tokenization_spaces=False` for decoding."
254
  )
255
  output = self.clean_up_tokenization(output)
 
256
  return output
257
 
258
  def _convert_id_to_token(self, index: int) -> str:
 
245
  """
246
  output = self.tok.decode(input=token_ids, num_threads=num_threads)
247
  if skip_special_tokens:
248
+ warnings.warn(
249
+ "skip_special_tokens currently not implemented"
250
+ )
251
+
252
  if clean_up_tokenization_spaces:
253
  warnings.warn(
254
  "when cleaning up tokenization spaces, this will not behave "
 
256
  "`clean_up_tokenization_spaces=False` for decoding."
257
  )
258
  output = self.clean_up_tokenization(output)
259
+
260
  return output
261
 
262
  def _convert_id_to_token(self, index: int) -> str: