Spaces:

fffiloni
/

Sa2VA-simple-demo

Running on Zero

App Files Files Community

fffiloni commited on 1 day ago

Commit

678631b

verified ·

1 Parent(s): 5672cc2

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -3

app.py CHANGED Viewed

@@ -31,12 +31,24 @@ tokenizer = AutoTokenizer.from_pretrained(
 from third_parts import VideoReader
 def read_video(video_path, video_interval):
     vid_frames = VideoReader(video_path)[::video_interval]
     for frame_idx in range(len(vid_frames)):
         frame_image = vid_frames[frame_idx]
         frame_image = frame_image[..., ::-1]  # BGR (opencv system) to RGB (numpy system)
         frame_image = Image.fromarray(frame_image)
         vid_frames[frame_idx] = frame_image
-    return vid_frames
 def visualize(pred_mask, image_path, work_dir):
     visualizer = Visualizer()
@@ -77,7 +89,7 @@ def image_vision(image_input_path, prompt):
         return answer, None
 def video_vision(video_input_path, prompt):
-    vid_frames = read_video(video_input_path, video_interval=6)
     # create a question (<image> is a placeholder for the video frames)
     question = f"<image>{prompt}"
     result = model.predict_forward(
@@ -88,7 +100,21 @@ def video_vision(video_input_path, prompt):
     prediction = result['prediction']
     print(prediction)
-    return result['prediction'], None

 from third_parts import VideoReader
 def read_video(video_path, video_interval):
     vid_frames = VideoReader(video_path)[::video_interval]
+    temp_dir = tempfile.mkdtemp()
+    os.makedirs(temp_dir, exist_ok=True)
+    image_paths = []  # List to store paths of saved images
     for frame_idx in range(len(vid_frames)):
         frame_image = vid_frames[frame_idx]
         frame_image = frame_image[..., ::-1]  # BGR (opencv system) to RGB (numpy system)
         frame_image = Image.fromarray(frame_image)
         vid_frames[frame_idx] = frame_image
+        # Save the frame as a .jpg file in the temporary folder
+        image_path = os.path.join(temp_dir.name, f"frame_{frame_idx:04d}.jpg")
+        frame_image.save(image_path, format="JPEG")
+        # Append the image path to the list
+        image_paths.append(image_path)
+    return vid_frames, image_paths
 def visualize(pred_mask, image_path, work_dir):
     visualizer = Visualizer()
         return answer, None
 def video_vision(video_input_path, prompt):
+    vid_frames, image_paths = read_video(video_input_path, video_interval=6)
     # create a question (<image> is a placeholder for the video frames)
     question = f"<image>{prompt}"
     result = model.predict_forward(
     prediction = result['prediction']
     print(prediction)
+    if '[SEG]' in prediction and Visualizer is not None:
+        _seg_idx = 0
+        pred_masks = result['prediction_masks'][_seg_idx]
+        seg_frames = []
+        for frame_idx in range(len(vid_frames)):
+            pred_mask = pred_masks[frame_idx]
+            temp_dir = tempfile.mkdtemp()
+            os.makedirs(temp_dir, exist_ok=True)
+            seg_frame = visualize(pred_mask, image_paths[frame_idx], temp_dir)
+            seg_frames.append(seg_frame)
+        return result['prediction'], seg_frames
+    else:
+        return result['prediction'], None