Update gptx_tokenizer.py
Browse files- gptx_tokenizer.py +5 -1
gptx_tokenizer.py
CHANGED
@@ -245,7 +245,10 @@ class HFGPTXTokenizer(PreTrainedTokenizer):
|
|
245 |
"""
|
246 |
output = self.tok.decode(input=token_ids, num_threads=num_threads)
|
247 |
if skip_special_tokens:
|
248 |
-
|
|
|
|
|
|
|
249 |
if clean_up_tokenization_spaces:
|
250 |
warnings.warn(
|
251 |
"when cleaning up tokenization spaces, this will not behave "
|
@@ -253,6 +256,7 @@ class HFGPTXTokenizer(PreTrainedTokenizer):
|
|
253 |
"`clean_up_tokenization_spaces=False` for decoding."
|
254 |
)
|
255 |
output = self.clean_up_tokenization(output)
|
|
|
256 |
return output
|
257 |
|
258 |
def _convert_id_to_token(self, index: int) -> str:
|
|
|
245 |
"""
|
246 |
output = self.tok.decode(input=token_ids, num_threads=num_threads)
|
247 |
if skip_special_tokens:
|
248 |
+
warnings.warn(
|
249 |
+
"skip_special_tokens currently not implemented"
|
250 |
+
)
|
251 |
+
|
252 |
if clean_up_tokenization_spaces:
|
253 |
warnings.warn(
|
254 |
"when cleaning up tokenization spaces, this will not behave "
|
|
|
256 |
"`clean_up_tokenization_spaces=False` for decoding."
|
257 |
)
|
258 |
output = self.clean_up_tokenization(output)
|
259 |
+
|
260 |
return output
|
261 |
|
262 |
def _convert_id_to_token(self, index: int) -> str:
|