Spaces:
Runtime error
Runtime error
File size: 2,129 Bytes
f8b3886 0143794 e8486cb 726a72f 893be2d f8b3886 893be2d 726a72f 893be2d fd26002 893be2d fd26002 893be2d 726a72f 893be2d f8b3886 cafea28 a67ce24 a42d79c cafea28 a42d79c 79684c1 e8486cb 1f906f0 cafea28 1f906f0 a42d79c 44656db 001bc7d 44656db cafea28 d4f8b39 44656db f8b3886 e8486cb 248871f e8486cb 5e123c4 e8486cb f8b3886 e8486cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import cv2
import torch
import numpy as np
from transformers import DPTForDepthEstimation, DPTImageProcessor
import gradio as gr
import torch.quantization
import torch.nn.utils.prune as prune
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256", torch_dtype=torch.float32)
model.eval()
# Apply global unstructured pruning
parameters_to_prune = [
(module, "weight") for module in filter(lambda m: isinstance(m, (torch.nn.Conv2d, torch.nn.Linear)), model.modules())
]
prune.global_unstructured(
parameters_to_prune,
pruning_method=prune.L1Unstructured,
amount=0.3, # Prune 30% of weights
)
for module, _ in parameters_to_prune:
prune.remove(module, "weight")
# Apply quantization after pruning
model = torch.quantization.quantize_dynamic(
model, {torch.nn.Linear, torch.nn.Conv2d}, dtype=torch.qint8
).to(device)
processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")
color_map = torch.from_numpy(cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_INFERNO)).to(device)
input_tensor = torch.zeros((1, 3, 72, 128), dtype=torch.float32, device=device)
def preprocess_image(image):
return cv2.resize(image, (128, 72), interpolation=cv2.INTER_AREA).transpose(2, 0, 1).astype(np.float32) / 255.0
@torch.inference_mode()
def process_frame(image):
if image is None:
return None
preprocessed = preprocess_image(image)
input_tensor = torch.from_numpy(preprocessed).unsqueeze(0).to(device)
predicted_depth = model(input_tensor).predicted_depth
depth_map = predicted_depth.squeeze()
# Normalize depth map to [0, 255] range
depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
depth_map = (depth_map * 255).byte()
depth_map_colored = color_map[depth_map]
return cv2.cvtColor(depth_map_colored.cpu().numpy(), cv2.COLOR_BGR2RGB)
interface = gr.Interface(
fn=process_frame,
inputs=gr.Image(sources="webcam", streaming=True),
outputs="image",
live=True
)
interface.launch() |