Spaces:

alexnasa
/

PartCrafter

Running on Zero

App Files Files Community

alex commited on 15 days ago

Commit

166aa76

1 Parent(s): c0bc561

split gif preview added

Browse files

Files changed (3) hide show

app.py +61 -99
packages.txt +12 -0
src/utils/render_utils.py +65 -5

app.py CHANGED Viewed

@@ -76,7 +76,7 @@ import importlib, site; site.addsitedir(site.getsitepackages()[0]); importlib.in
 from src.utils.data_utils import get_colored_mesh_composition, scene_to_parts, load_surfaces
-from src.utils.render_utils import render_views_around_mesh, render_normal_views_around_mesh, make_grid_for_images_or_videos, export_renderings
 from src.pipelines.pipeline_partcrafter import PartCrafterPipeline
 from src.utils.image_utils import prepare_image
 from src.models.briarmbg import BriaRMBG
@@ -101,82 +101,6 @@ def first_file_from_dir(directory, ext):
     return sorted(files)[0] if files else None
-def explode_mesh(mesh, explosion_scale=0.4):
-    if isinstance(mesh, trimesh.Scene):
-        scene = mesh
-    elif isinstance(mesh, trimesh.Trimesh):
-        print("Warning: Single mesh provided, can't create exploded view")
-        scene = trimesh.Scene(mesh)
-        return scene
-    else:
-        print(f"Warning: Unexpected mesh type: {type(mesh)}")
-        scene = mesh
-    if len(scene.geometry) <= 1:
-        print("Only one geometry found - nothing to explode")
-        return scene
-    print(f"[EXPLODE_MESH] Starting mesh explosion with scale {explosion_scale}")
-    print(f"[EXPLODE_MESH] Processing {len(scene.geometry)} parts")
-    exploded_scene = trimesh.Scene()
-    part_centers = []
-    geometry_names = []
-    for geometry_name, geometry in scene.geometry.items():
-        if hasattr(geometry, 'vertices'):
-            transform = scene.graph[geometry_name][0]
-            vertices_global = trimesh.transformations.transform_points(
-                geometry.vertices, transform)
-            center = np.mean(vertices_global, axis=0)
-            part_centers.append(center)
-            geometry_names.append(geometry_name)
-            print(f"[EXPLODE_MESH] Part {geometry_name}: center = {center}")
-    if not part_centers:
-        print("No valid geometries with vertices found")
-        return scene
-    part_centers = np.array(part_centers)
-    global_center = np.mean(part_centers, axis=0)
-    print(f"[EXPLODE_MESH] Global center: {global_center}")
-    for i, (geometry_name, geometry) in enumerate(scene.geometry.items()):
-        if hasattr(geometry, 'vertices'):
-            if i < len(part_centers):
-                part_center = part_centers[i]
-                direction = part_center - global_center
-                direction_norm = np.linalg.norm(direction)
-                if direction_norm > 1e-6:
-                    direction = direction / direction_norm
-                else:
-                    direction = np.random.randn(3)
-                    direction = direction / np.linalg.norm(direction)
-                offset = direction * explosion_scale
-            else:
-                offset = np.zeros(3)
-            original_transform = scene.graph[geometry_name][0].copy()
-            new_transform = original_transform.copy()
-            new_transform[:3, 3] = new_transform[:3, 3] + offset
-            exploded_scene.add_geometry(
-                geometry,
-                transform=new_transform,
-                geom_name=geometry_name
-            )
-            print(f"[EXPLODE_MESH] Part {geometry_name}: moved by {np.linalg.norm(offset):.4f}")
-    print("[EXPLODE_MESH] Mesh explosion complete")
-    return exploded_scene
 def get_duration(
     image_path,
@@ -200,10 +124,50 @@ def get_duration(
     return int(duration_seconds)
 @spaces.GPU(duration=get_duration)
 @torch.no_grad()
-def run_triposg(image_path: str,
                 num_parts: int = 1,
                 seed: int = 0,
                 num_tokens: int = 1024,
@@ -220,7 +184,7 @@ def run_triposg(image_path: str,
     This function takes a single 2D image as input and produces a set of part-based 3D meshes,
     using compositional latent diffusion with attention to structure and part separation.
     Optionally removes the background using a pretrained background removal model (RMBG),
-    and outputs a merged object mesh, a split preview (exploded view).
     Args:
         image_path (str): Path to the input image file on disk.
@@ -237,13 +201,10 @@ def run_triposg(image_path: str,
     Returns:
         Tuple[str, str, str, str]:
             - `merged_path` (str): File path to the merged full object mesh (`object.glb`).
-            - `split_preview_path` (str): File path to the exploded-view mesh (`split.glb`).
-            - `export_dir` (str): Directory where all generated meshes were saved.
     Notes:
         - This function utilizes HuggingFace pretrained weights for both part generation and background removal.
-        - The final output includes exploded and merged views to visualize object structure.
-        - Parts are exported in `.glb` format, and zipped for bulk download.
         - Generation time depends on the number of parts and inference parameters.
     """
@@ -299,11 +260,8 @@ def run_triposg(image_path: str,
     merged_path = os.path.join(export_dir, "object.glb")
     merged.export(merged_path)
-    split_preview_path = os.path.join(export_dir, "split.glb")
-    split_mesh.export(split_preview_path)
-    return merged_path, split_preview_path, export_dir
 def cleanup(request: gr.Request):
@@ -350,7 +308,8 @@ def build_demo():
                     )
                     input_image = gr.Image(type="filepath", label="Input Image", height=256)
                     num_parts = gr.Slider(1, MAX_NUM_PARTS, value=4, step=1, label="Number of Parts")
-                    run_button = gr.Button("🧩 Generate 3D Parts", variant="primary")
                     with gr.Accordion("Advanced Settings", open=False):
                         seed = gr.Number(value=0, label="Random Seed", precision=0)
@@ -369,9 +328,8 @@ def build_demo():
                         """
                     )
                     with gr.Row():
-                        output_model = gr.Model3D(label="Merged 3D Object", height=512)
-                        split_model = gr.Model3D(label="Split Preview", height=512)
-                        output_dir = gr.Textbox(label="Export Directory", visible=False)
             with gr.Row():
                 with gr.Column():
                     examples = gr.Examples(
@@ -396,19 +354,23 @@ def build_demo():
                         ],
                         inputs=[input_image, num_parts],
-                        outputs=[output_model, split_model, output_dir],
-                        fn=run_triposg,
-                        cache_examples=True,
                     )
-            run_button.click(fn=run_triposg,
                              inputs=[input_image, num_parts, seed, num_tokens, num_steps,
                                      guidance, flash_decoder, remove_bg, session_state],
-                             outputs=[output_model, split_model, output_dir])
         return demo
 if __name__ == "__main__":
     demo = build_demo()
     demo.unload(cleanup)
     demo.queue()
-    demo.launch()

 from src.utils.data_utils import get_colored_mesh_composition, scene_to_parts, load_surfaces
+from src.utils.render_utils import render_views_around_mesh, render_normal_views_around_mesh, make_grid_for_images_or_videos, export_renderings, explode_mesh
 from src.pipelines.pipeline_partcrafter import PartCrafterPipeline
 from src.utils.image_utils import prepare_image
 from src.models.briarmbg import BriaRMBG
     return sorted(files)[0] if files else None
 def get_duration(
     image_path,
     return int(duration_seconds)
+@spaces.GPU(duration=135)
+def gen_model_n_video(image_path: str,
+                      num_parts: int,
+                      progress=gr.Progress(track_tqdm=True),):
+    model_path = run_partcrafter(image_path, num_parts=num_parts, progress=progress)
+    video_path = gen_video(model_path)
+    return model_path, video_path
+@spaces.GPU()
+def gen_video(model_path):
+    if model_path is None:
+        gr.Info("You must craft the 3d parts first")
+        return None
+    export_dir = os.path.dirname(model_path)
+    merged = trimesh.load(model_path)
+    preview_path = os.path.join(export_dir, "rendering.gif")
+    num_views = 36
+    radius = 4
+    fps = 7
+    rendered_images = render_views_around_mesh(
+        merged,
+        num_views=num_views,
+        radius=radius,
+    )
+    export_renderings(
+        rendered_images,
+        preview_path,
+        fps=fps,
+    )
+    return preview_path
 @spaces.GPU(duration=get_duration)
 @torch.no_grad()
+def run_partcrafter(image_path: str,
                 num_parts: int = 1,
                 seed: int = 0,
                 num_tokens: int = 1024,
     This function takes a single 2D image as input and produces a set of part-based 3D meshes,
     using compositional latent diffusion with attention to structure and part separation.
     Optionally removes the background using a pretrained background removal model (RMBG),
+    and outputs a merged object mesh.
     Args:
         image_path (str): Path to the input image file on disk.
     Returns:
         Tuple[str, str, str, str]:
             - `merged_path` (str): File path to the merged full object mesh (`object.glb`).
     Notes:
         - This function utilizes HuggingFace pretrained weights for both part generation and background removal.
+        - The final output includes merged model parts to visualize object structure.
         - Generation time depends on the number of parts and inference parameters.
     """
     merged_path = os.path.join(export_dir, "object.glb")
     merged.export(merged_path)
+    return merged_path
 def cleanup(request: gr.Request):
                     )
                     input_image = gr.Image(type="filepath", label="Input Image", height=256)
                     num_parts = gr.Slider(1, MAX_NUM_PARTS, value=4, step=1, label="Number of Parts")
+                    run_button = gr.Button("🧩 Craft 3D Parts", variant="primary")
+                    video_button = gr.Button("🎥 Generate Gif")
                     with gr.Accordion("Advanced Settings", open=False):
                         seed = gr.Number(value=0, label="Random Seed", precision=0)
                         """
                     )
                     with gr.Row():
+                        output_model = gr.Model3D(label="Merged 3D Object", height=512, interactive=False)
+                        video_output = gr.Image(label="Split Preview", height=512)
             with gr.Row():
                 with gr.Column():
                     examples = gr.Examples(
                         ],
                         inputs=[input_image, num_parts],
+                        outputs=[output_model, video_output],
+                        fn=gen_model_n_video,
+                        cache_examples=True
                     )
+            run_button.click(fn=run_partcrafter,
                              inputs=[input_image, num_parts, seed, num_tokens, num_steps,
                                      guidance, flash_decoder, remove_bg, session_state],
+                             outputs=[output_model])
+            video_button.click(fn=gen_video,
+                             inputs=[output_model],
+                             outputs=[video_output])
         return demo
 if __name__ == "__main__":
     demo = build_demo()
     demo.unload(cleanup)
     demo.queue()
+    demo.launch(mcp_server=True)

packages.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+libglfw3-dev
+libgles2-mesa-dev
+libgl1
+freeglut3-dev
+unzip
+ffmpeg
+libsm6
+libxext6
+libgl1-mesa-dri
+libegl1-mesa
+libgbm1
+build-essential

src/utils/render_utils.py CHANGED Viewed

@@ -10,9 +10,60 @@ from diffusers.utils import export_to_video
 from diffusers.utils.loading_utils import load_video
 import torch
 from torchvision.utils import make_grid
 os.environ['PYOPENGL_PLATFORM'] = 'egl'
 def render(
     scene: pyrender.Scene,
     renderer: pyrender.Renderer,
@@ -123,13 +174,22 @@ def render_views_around_mesh(
         Tuple[List[Image.Image], List[Image.Image]],
         Tuple[List[np.ndarray], List[np.ndarray]]
     ]:
     if not isinstance(mesh, (trimesh.Trimesh, trimesh.Scene)):
         raise ValueError("mesh must be a trimesh.Trimesh or trimesh.Scene object")
     if isinstance(mesh, trimesh.Trimesh):
-        mesh = trimesh.Scene(mesh)
-    scene = pyrender.Scene.from_trimesh_scene(mesh)
     light = pyrender.DirectionalLight(
         color=np.ones(3),
         intensity=light_intensity
@@ -149,9 +209,9 @@ def render_views_around_mesh(
     )
     images, depths = [], []
-    for pose in camera_poses:
         image, depth = render(
-            scene, renderer, camera, pose, light,
             normalize_depth=normalize_depth,
             flags=flags,
             return_type=return_type

 from diffusers.utils.loading_utils import load_video
 import torch
 from torchvision.utils import make_grid
+import math
 os.environ['PYOPENGL_PLATFORM'] = 'egl'
+def explode_mesh(mesh, explosion_scale=0.4):
+    # ensure we have a Scene
+    if isinstance(mesh, trimesh.Trimesh):
+        scene = trimesh.Scene(mesh)
+    elif isinstance(mesh, trimesh.Scene):
+        scene = mesh
+    else:
+        raise ValueError(f"Expected Trimesh or Scene, got {type(mesh)}")
+    if len(scene.geometry) <= 1:
+        print("Nothing to explode")
+        return scene
+    # 1) collect (name, geom, world_center)
+    parts = []
+    for name, geom in scene.geometry.items():
+        # ← get(name) returns (4×4 world‐space matrix, parent_frame)
+        world_tf, _ = scene.graph.get(name)
+        pts = trimesh.transformations.transform_points(geom.vertices, world_tf)
+        center = pts.mean(axis=0)
+        parts.append((name, geom, center))
+    # compute global center
+    all_centers = np.stack([c for _,_,c in parts], axis=0)
+    global_center = all_centers.mean(axis=0)
+    exploded = trimesh.Scene()
+    for name, geom, center in parts:
+        dir_vec = center - global_center
+        norm = np.linalg.norm(dir_vec)
+        if norm < 1e-6:
+            dir_vec = np.random.randn(3)
+            dir_vec /= np.linalg.norm(dir_vec)
+        else:
+            dir_vec /= norm
+        offset = dir_vec * explosion_scale
+        # fetch the same 4×4, then bump just the translation
+        world_tf, _ = scene.graph.get(name)
+        world_tf = world_tf.copy()
+        world_tf[:3, 3] += offset
+        exploded.add_geometry(geom, transform=world_tf, geom_name=name)
+        print(f"[explode] {name} moved by {np.linalg.norm(offset):.4f}")
+    return exploded
 def render(
     scene: pyrender.Scene,
     renderer: pyrender.Renderer,
         Tuple[List[Image.Image], List[Image.Image]],
         Tuple[List[np.ndarray], List[np.ndarray]]
     ]:
+    meshes = []
+    scenes = []
     if not isinstance(mesh, (trimesh.Trimesh, trimesh.Scene)):
         raise ValueError("mesh must be a trimesh.Trimesh or trimesh.Scene object")
     if isinstance(mesh, trimesh.Trimesh):
+        for i in range(num_views):
+            scenes.append(pyrender.Scene.from_trimesh_scene(trimesh.Scene(mesh)))
+    else:
+        for i in range(num_views):
+            value = math.sin(math.pi * (i - 1) / num_views)
+            scenes.append(pyrender.Scene.from_trimesh_scene(explode_mesh(mesh, 0.2 * value),
+                                                            ambient_light=[0.02, 0.02, 0.02],
+                                                            bg_color=[0.0, 0.0, 0.0, 1.0]))
     light = pyrender.DirectionalLight(
         color=np.ones(3),
         intensity=light_intensity
     )
     images, depths = [], []
+    for i, pose in enumerate(camera_poses):
         image, depth = render(
+            scenes[i], renderer, camera, pose, light,
             normalize_depth=normalize_depth,
             flags=flags,
             return_type=return_type