Spaces:

huntrezz
/

RealtimeMonocularDepth

Runtime error

File size: 2,140 Bytes

f8b3886
 
 
0143794
21d17cc
f8b3886
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21d17cc
 
 
f8b3886
21d17cc
 
f8b3886
21d17cc
 
 
f8b3886
21d17cc
f8b3886
21d17cc
 
f8b3886
21d17cc
 
 
 
 
f8b3886
21d17cc
f8b3886
21d17cc
 
f8b3886
21d17cc
 
f8b3886
21d17cc
 
 
 
 
 
f8b3886
21d17cc

import cv2
import torch
import numpy as np
from transformers import DPTForDepthEstimation, DPTImageProcessor
import gradio as gr

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256", torch_dtype=torch.float16).to(device)
processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")

def resize_image(image, target_size=(256, 256)):
    return cv2.resize(image, target_size)

def manual_normalize(depth_map):
    min_val = np.min(depth_map)
    max_val = np.max(depth_map)
    if min_val != max_val:
        normalized = (depth_map - min_val) / (max_val - min_val)
        return (normalized * 255).astype(np.uint8)
    else:
        return np.zeros_like(depth_map, dtype=np.uint8)

color_map = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_INFERNO)

def process_frame(image):
    rgb_frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    resized_frame = resize_image(rgb_frame)

    inputs = processor(images=resized_frame, return_tensors="pt").to(device)
    inputs = {k: v.to(torch.float16) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth

    depth_map = predicted_depth.squeeze().cpu().numpy()

    depth_map = np.nan_to_num(depth_map, nan=0.0, posinf=0.0, neginf=0.0)
    depth_map = depth_map.astype(np.float32)

    if depth_map.size == 0:
        depth_map = np.zeros((256, 256), dtype=np.uint8)
    else:
        if np.any(depth_map) and np.min(depth_map) != np.max(depth_map):
            depth_map = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
        else:
            depth_map = np.zeros_like(depth_map, dtype=np.uint8)

    if np.all(depth_map == 0):
        depth_map = manual_normalize(depth_map)

    depth_map_colored = cv2.applyColorMap(depth_map, color_map)
    return cv2.cvtColor(depth_map_colored, cv2.COLOR_BGR2RGB)

interface = gr.Interface(
    fn=process_frame,
    inputs=gr.Image(source="webcam", streaming=True),
    outputs="image",
    live=True
)

interface.launch()