lkhl commited on
Commit
fd2ad62
·
verified ·
1 Parent(s): edde2a5

Update processing_videollama3.py

Browse files
Files changed (1) hide show
  1. processing_videollama3.py +9 -8
processing_videollama3.py CHANGED
@@ -681,14 +681,15 @@ class Videollama3Qwen2Processor(ProcessorMixin):
681
  kwargs.pop("padding")
682
  kwargs.pop("padding_side")
683
 
684
- image_idx = 0
685
- while DEFAULT_IMAGE_TOKEN in text:
686
- num_tokens = self._get_visual_seq_len(grid_sizes[image_idx])
687
- text = text.replace(DEFAULT_IMAGE_TOKEN, "<placeholder>" * num_tokens, 1)
688
- image_idx += 1
689
- text = text.replace("<placeholder>", DEFAULT_IMAGE_TOKEN)
690
-
691
- assert len(grid_sizes) == image_idx, "Number of images does not match the number of image tokens in the text."
 
692
 
693
  text_inputs = self.tokenizer(text, **kwargs)
694
  return text_inputs
 
681
  kwargs.pop("padding")
682
  kwargs.pop("padding_side")
683
 
684
+ if len(grid_sizes) > 0:
685
+ image_idx = 0
686
+ while DEFAULT_IMAGE_TOKEN in text:
687
+ num_tokens = self._get_visual_seq_len(grid_sizes[image_idx])
688
+ text = text.replace(DEFAULT_IMAGE_TOKEN, "<placeholder>" * num_tokens, 1)
689
+ image_idx += 1
690
+ text = text.replace("<placeholder>", DEFAULT_IMAGE_TOKEN)
691
+
692
+ assert len(grid_sizes) == image_idx, "Number of images does not match the number of image tokens in the text."
693
 
694
  text_inputs = self.tokenizer(text, **kwargs)
695
  return text_inputs