import gradio as gr import os from torch import is_inference from pq3d.inference import inference MESH_DIR = 'assets/mesh' MESH_NAMES = sorted([os.path.splitext(fname)[0] for fname in os.listdir(MESH_DIR)]) def change_scene(dropdown_scene: str): # reset 3D scene and chatbot history return os.path.join(MESH_DIR, f'{dropdown_scene}.glb') with gr.Blocks(title='PQ3D Demo') as demo: gr.HTML(value="

Unifying 3D Vision Language Understanding vis Promptable Queries

") #gr.HTML(value="
") # gr.HTML(value="Teaser") #gr.HTML(value="

arXiv | Project Page | Code

") #gr.HTML(value="

LEO: an embodied generalist agent capable of perceiving, grounding, reasoning, planning, and acting in 3D world.

") with gr.Row(): with gr.Column(scale=5): dropdown_scene = gr.Dropdown( choices=MESH_NAMES, value='scene0050_00', interactive=True, label='Select a 3D scene', ) model_3d = gr.Model3D( value=os.path.join(MESH_DIR, f'scene0050_00.glb'), clear_color=[0.0, 0.0, 0.0, 0.0], label='3D Scene', camera_position=(80, 100, 6), height=659, ) gr.HTML( """
👆 SCROLL and DRAG on the 3D Scene to zoom in/out and rotate. Press CTRL and DRAG to pan.
""" ) dropdown_scene.change( fn=change_scene, inputs=[dropdown_scene], outputs=[model_3d], queue=False ) def inference_wrapper(text): scan_id = model_3d.value['orig_name'].split('.')[0] inst_id = inference(scan_id, text) return f"assets/mask/{scan_id}/{scan_id}_obj_{inst_id}.glb" gr.Interface( fn=inference_wrapper, inputs=["text"], outputs=gr.Model3D( clear_color=[0.0, 0.0, 0.0, 0.0], camera_position=(80, 100, 6), label="3D Model"), examples=[ ["armchair"], ["Sofa"], ["left computer on the desk"] ], title="Input text, Output 3D Mask, Red denotes predicted object" ) demo.queue().launch(share=True, allowed_paths=['assets'])