Spaces:

amoghrrao
/

Vision_Transformer

Sleeping

App Files Files Community

amoghrrao commited on Mar 29

Commit

15ad333

verified ·

1 Parent(s): 3f5877b

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -47

app.py CHANGED Viewed

@@ -7,64 +7,42 @@ from torchvision import transforms
 from transformers import AutoProcessor, AutoModelForImageSegmentation, AutoModelForDepthEstimation
 def load_segmentation_model():
-    try:
-        print("Loading segmentation model...")
-        model_name = "ZhengPeng7/BiRefNet"
-        model = AutoModelForImageSegmentation.from_pretrained(model_name, trust_remote_code=True)
-        model.to(device)
-        print("Segmentation model loaded successfully.")
-        return model
-    except Exception as e:
-        print(f"Error loading segmentation model: {e}")
-        return None
 def load_depth_model():
-    try:
-        print("Loading depth estimation model...")
-        model_name = "depth-anything/Depth-Anything-V2-Metric-Indoor-Base-hf"
-        processor = AutoProcessor.from_pretrained(model_name)
-        model = AutoModelForDepthEstimation.from_pretrained(model_name)
-        model.to(device)
-        print("Depth estimation model loaded successfully.")
-        return processor, model
-    except Exception as e:
-        print(f"Error loading depth estimation model: {e}")
-        return None, None
 def process_segmentation_image(image):
     transform = transforms.Compose([
         transforms.Resize((512, 512)),
         transforms.ToTensor(),
     ])
-    input_tensor = transform(image).unsqueeze(0).to(device)
     return image, input_tensor
 def process_depth_image(image, processor):
     image = image.resize((512, 512))
-    inputs = processor(images=image, return_tensors="pt").to(device)
     return image, inputs
 def segment_image(image, input_tensor, model):
-    try:
-        with torch.no_grad():
-            outputs = model(input_tensor)
-            output_tensor = outputs[0] if isinstance(outputs, list) else outputs.logits
-            mask = torch.sigmoid(output_tensor).squeeze().cpu().numpy()
-            mask = (mask > 0.5).astype(np.uint8) * 255
-        return mask
-    except Exception as e:
-        print(f"Error during segmentation: {e}")
-        return np.zeros((512, 512), dtype=np.uint8)
 def estimate_depth(inputs, model):
-    try:
-        with torch.no_grad():
-            outputs = model(**inputs)
-        depth_map = outputs.predicted_depth.squeeze().cpu().numpy()
-        return depth_map
-    except Exception as e:
-        print(f"Error during depth estimation: {e}")
-        return np.zeros((512, 512), dtype=np.float32)
 def normalize_depth_map(depth_map):
     min_val = np.min(depth_map)
@@ -95,9 +73,6 @@ def process_image_pipeline(image):
     segmentation_model = load_segmentation_model()
     depth_processor, depth_model = load_depth_model()
-    if segmentation_model is None or depth_model is None:
-        return Image.fromarray(np.zeros((512, 512), dtype=np.uint8)), image, image
     _, input_tensor = process_segmentation_image(image)
     _, inputs = process_depth_image(image, depth_processor)
@@ -108,9 +83,6 @@ def process_image_pipeline(image):
     return Image.fromarray(segmentation_mask), blurred_image, gaussian_blur_image
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print(f"Using device: {device}")
 iface = gr.Interface(
     fn=process_image_pipeline,
     inputs=gr.Image(type="pil"),

 from transformers import AutoProcessor, AutoModelForImageSegmentation, AutoModelForDepthEstimation
 def load_segmentation_model():
+    model_name = "ZhengPeng7/BiRefNet"
+    model = AutoModelForImageSegmentation.from_pretrained(model_name, trust_remote_code=True)
+    return model
 def load_depth_model():
+    model_name = "depth-anything/Depth-Anything-V2-Metric-Indoor-Base-hf"
+    processor = AutoProcessor.from_pretrained(model_name)
+    model = AutoModelForDepthEstimation.from_pretrained(model_name)
+    return processor, model
 def process_segmentation_image(image):
     transform = transforms.Compose([
         transforms.Resize((512, 512)),
         transforms.ToTensor(),
     ])
+    input_tensor = transform(image).unsqueeze(0)
     return image, input_tensor
 def process_depth_image(image, processor):
     image = image.resize((512, 512))
+    inputs = processor(images=image, return_tensors="pt")
     return image, inputs
 def segment_image(image, input_tensor, model):
+    with torch.no_grad():
+        outputs = model(input_tensor)
+        output_tensor = outputs[0] if isinstance(outputs, list) else outputs.logits
+        mask = torch.sigmoid(output_tensor).squeeze().cpu().numpy()
+        mask = (mask > 0.5).astype(np.uint8) * 255
+    return mask
 def estimate_depth(inputs, model):
+    with torch.no_grad():
+        outputs = model(**inputs)
+    depth_map = outputs.predicted_depth.squeeze().cpu().numpy()
+    return depth_map
 def normalize_depth_map(depth_map):
     min_val = np.min(depth_map)
     segmentation_model = load_segmentation_model()
     depth_processor, depth_model = load_depth_model()
     _, input_tensor = process_segmentation_image(image)
     _, inputs = process_depth_image(image, depth_processor)
     return Image.fromarray(segmentation_mask), blurred_image, gaussian_blur_image
 iface = gr.Interface(
     fn=process_image_pipeline,
     inputs=gr.Image(type="pil"),