Spaces:

huntrezz
/

RealtimeMonocularDepth

Runtime error

File size: 2,129 Bytes

f8b3886
 
 
0143794
e8486cb
726a72f
893be2d
f8b3886
 
893be2d
726a72f
 
893be2d
 
 
 
 
 
 
 
fd26002
893be2d
 
fd26002
 
 
893be2d
726a72f
 
 
893be2d
f8b3886
 
cafea28
 
a67ce24
a42d79c
 
cafea28
a42d79c
79684c1
e8486cb
1f906f0
 
cafea28
 
1f906f0
a42d79c
44656db
 
001bc7d
 
 
44656db
cafea28
d4f8b39
44656db
f8b3886
e8486cb
 
248871f
e8486cb
5e123c4
e8486cb
f8b3886
e8486cb

import cv2
import torch
import numpy as np
from transformers import DPTForDepthEstimation, DPTImageProcessor
import gradio as gr
import torch.quantization
import torch.nn.utils.prune as prune

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256", torch_dtype=torch.float32)
model.eval()

# Apply global unstructured pruning
parameters_to_prune = [
    (module, "weight") for module in filter(lambda m: isinstance(m, (torch.nn.Conv2d, torch.nn.Linear)), model.modules())
]
prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.3,  # Prune 30% of weights
)

for module, _ in parameters_to_prune:
    prune.remove(module, "weight")

# Apply quantization after pruning
model = torch.quantization.quantize_dynamic(
    model, {torch.nn.Linear, torch.nn.Conv2d}, dtype=torch.qint8
).to(device)

processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")

color_map = torch.from_numpy(cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_INFERNO)).to(device)

input_tensor = torch.zeros((1, 3, 72, 128), dtype=torch.float32, device=device)

def preprocess_image(image):
    return cv2.resize(image, (128, 72), interpolation=cv2.INTER_AREA).transpose(2, 0, 1).astype(np.float32) / 255.0

@torch.inference_mode()
def process_frame(image):
    if image is None:
        return None
    preprocessed = preprocess_image(image)
    input_tensor = torch.from_numpy(preprocessed).unsqueeze(0).to(device)
    
    predicted_depth = model(input_tensor).predicted_depth
    depth_map = predicted_depth.squeeze()
    
    # Normalize depth map to [0, 255] range
    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
    depth_map = (depth_map * 255).byte()
    
    depth_map_colored = color_map[depth_map]
    
    return cv2.cvtColor(depth_map_colored.cpu().numpy(), cv2.COLOR_BGR2RGB)

interface = gr.Interface(
    fn=process_frame,
    inputs=gr.Image(sources="webcam", streaming=True),
    outputs="image",
    live=True
)

interface.launch()