fffiloni commited on
Commit
678631b
·
verified ·
1 Parent(s): 5672cc2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -3
app.py CHANGED
@@ -31,12 +31,24 @@ tokenizer = AutoTokenizer.from_pretrained(
31
  from third_parts import VideoReader
32
  def read_video(video_path, video_interval):
33
  vid_frames = VideoReader(video_path)[::video_interval]
 
 
 
 
 
34
  for frame_idx in range(len(vid_frames)):
35
  frame_image = vid_frames[frame_idx]
36
  frame_image = frame_image[..., ::-1] # BGR (opencv system) to RGB (numpy system)
37
  frame_image = Image.fromarray(frame_image)
38
  vid_frames[frame_idx] = frame_image
39
- return vid_frames
 
 
 
 
 
 
 
40
 
41
  def visualize(pred_mask, image_path, work_dir):
42
  visualizer = Visualizer()
@@ -77,7 +89,7 @@ def image_vision(image_input_path, prompt):
77
  return answer, None
78
 
79
  def video_vision(video_input_path, prompt):
80
- vid_frames = read_video(video_input_path, video_interval=6)
81
  # create a question (<image> is a placeholder for the video frames)
82
  question = f"<image>{prompt}"
83
  result = model.predict_forward(
@@ -88,7 +100,21 @@ def video_vision(video_input_path, prompt):
88
  prediction = result['prediction']
89
  print(prediction)
90
 
91
- return result['prediction'], None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
 
94
 
 
31
  from third_parts import VideoReader
32
  def read_video(video_path, video_interval):
33
  vid_frames = VideoReader(video_path)[::video_interval]
34
+
35
+ temp_dir = tempfile.mkdtemp()
36
+ os.makedirs(temp_dir, exist_ok=True)
37
+ image_paths = [] # List to store paths of saved images
38
+
39
  for frame_idx in range(len(vid_frames)):
40
  frame_image = vid_frames[frame_idx]
41
  frame_image = frame_image[..., ::-1] # BGR (opencv system) to RGB (numpy system)
42
  frame_image = Image.fromarray(frame_image)
43
  vid_frames[frame_idx] = frame_image
44
+
45
+ # Save the frame as a .jpg file in the temporary folder
46
+ image_path = os.path.join(temp_dir.name, f"frame_{frame_idx:04d}.jpg")
47
+ frame_image.save(image_path, format="JPEG")
48
+
49
+ # Append the image path to the list
50
+ image_paths.append(image_path)
51
+ return vid_frames, image_paths
52
 
53
  def visualize(pred_mask, image_path, work_dir):
54
  visualizer = Visualizer()
 
89
  return answer, None
90
 
91
  def video_vision(video_input_path, prompt):
92
+ vid_frames, image_paths = read_video(video_input_path, video_interval=6)
93
  # create a question (<image> is a placeholder for the video frames)
94
  question = f"<image>{prompt}"
95
  result = model.predict_forward(
 
100
  prediction = result['prediction']
101
  print(prediction)
102
 
103
+ if '[SEG]' in prediction and Visualizer is not None:
104
+ _seg_idx = 0
105
+ pred_masks = result['prediction_masks'][_seg_idx]
106
+ seg_frames = []
107
+ for frame_idx in range(len(vid_frames)):
108
+ pred_mask = pred_masks[frame_idx]
109
+ temp_dir = tempfile.mkdtemp()
110
+ os.makedirs(temp_dir, exist_ok=True)
111
+ seg_frame = visualize(pred_mask, image_paths[frame_idx], temp_dir)
112
+ seg_frames.append(seg_frame)
113
+
114
+ return result['prediction'], seg_frames
115
+
116
+ else:
117
+ return result['prediction'], None
118
 
119
 
120