|
from doctest import Example |
|
import gradio as gr |
|
from transformers import DPTFeatureExtractor, DPTForDepthEstimation |
|
import torch |
|
import numpy as np |
|
from PIL import Image, ImageOps |
|
from pathlib import Path |
|
import os |
|
import glob |
|
from autostereogram.sirds_converter import SirdsConverter |
|
from skimage import color |
|
|
|
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large") |
|
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") |
|
|
|
stereo_converter = SirdsConverter() |
|
|
|
|
|
def process_image(image_path): |
|
image_raw = Image.open(Path(image_path)) |
|
|
|
image = image_raw.resize( |
|
(1280, int(1280 * image_raw.size[1] / image_raw.size[0])), |
|
Image.Resampling.LANCZOS) |
|
|
|
|
|
encoding = feature_extractor(image, return_tensors="pt") |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model(**encoding) |
|
predicted_depth = outputs.predicted_depth |
|
|
|
|
|
prediction = torch.nn.functional.interpolate( |
|
predicted_depth.unsqueeze(1), |
|
size=image.size[::-1], |
|
mode="bicubic", |
|
align_corners=False, |
|
).squeeze() |
|
output = prediction.cpu().numpy() |
|
depth_image = (output * 255 / np.max(output)).astype('uint8') |
|
depth_image_padded = np.array(ImageOps.pad( |
|
Image.fromarray(depth_image), (1280, 720))) |
|
|
|
stereo_image = stereo_converter.convert_depth_to_stereogram_with_sird( |
|
depth_image_padded, False, 0.5).astype(np.uint8) |
|
|
|
return [depth_image_padded, stereo_image] |
|
|
|
|
|
title = "Demo: zero-shot depth estimation with DPT + 3D Voxels reconstruction" |
|
description = "This demo is a variation from the original <a href='https://huggingface.co/spaces/nielsr/dpt-depth-estimation' target='_blank'>DPT Demo</a>. It uses the DPT model to predict the depth of an image and then reconstruct the 3D model as voxels." |
|
|
|
examples = sorted(glob.glob('examples/*.jpg')) |
|
|
|
iface = gr.Interface(fn=process_image, |
|
inputs=[ |
|
gr.inputs.Image( |
|
type="filepath", label="Input Image") |
|
], |
|
outputs=[ |
|
gr.outputs.Image(label="Predicted Depth", type="pil"), |
|
gr.outputs.Image(label="Stereogram", type="pil") |
|
], |
|
description=description, |
|
examples=examples, |
|
allow_flagging="never", |
|
|
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch(debug=True, enable_queue=False) |
|
|