JichenHu's picture
Upload app.py
394e15a verified
raw
history blame
9.38 kB
# Copyright 2024 Anton Obukhov, ETH Zurich. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# --------------------------------------------------------------------------
# If you find this code useful, we kindly ask you to cite our paper in your work.
# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
# More information about the method can be found at https://marigoldmonodepth.github.io
# --------------------------------------------------------------------------
from __future__ import annotations
import functools
import os
import tempfile
import gradio as gr
import numpy as np
import spaces
from PIL import Image
from gradio_imageslider import ImageSlider
from tqdm import tqdm
from pathlib import Path
import gradio
from gradio.utils import get_cache_folder
from DAI.pipeline_all import DAIPipeline
from diffusers import (
AutoencoderKL,
UNet2DConditionModel,
)
from transformers import CLIPTextModel, AutoTokenizer
from DAI.controlnetvae import ControlNetVAEModel
from DAI.decoder import CustomAutoencoderKL
class Examples(gradio.helpers.Examples):
def __init__(self, *args, directory_name=None, **kwargs):
super().__init__(*args, **kwargs, _initiated_directly=False)
if directory_name is not None:
self.cached_folder = get_cache_folder() / directory_name
self.cached_file = Path(self.cached_folder) / "log.csv"
self.create()
default_seed = 2024
default_batch_size = 1
default_image_processing_resolution = 2048
def process_image_check(path_input):
if path_input is None:
raise gr.Error(
"Missing image in the first pane: upload a file or use one from the gallery below."
)
def resize_image(input_image, resolution):
# Ensure input_image is a PIL Image object
if not isinstance(input_image, Image.Image):
raise ValueError("input_image should be a PIL Image object")
# Convert image to numpy array
input_image_np = np.asarray(input_image)
# Get image dimensions
H, W, C = input_image_np.shape
H = float(H)
W = float(W)
# Calculate the scaling factor
k = float(resolution) / min(H, W)
# Determine new dimensions
H *= k
W *= k
H = int(np.round(H / 64.0)) * 64
W = int(np.round(W / 64.0)) * 64
# Resize the image using PIL's resize method
img = input_image.resize((W, H), Image.Resampling.LANCZOS)
return img
def process_image(
pipe,
vae_2,
path_input,
):
name_base, name_ext = os.path.splitext(os.path.basename(path_input))
print(f"Processing image {name_base}{name_ext}")
path_output_dir = tempfile.mkdtemp()
path_out_png = os.path.join(path_output_dir, f"{name_base}_delight.png")
input_image = Image.open(path_input)
# resolution = 0
# if max(input_image.size) < 768:
# resolution = None
resolution = None
pipe_out = pipe(
image=input_image,
prompt="remove glass reflection",
vae_2=vae_2,
processing_resolution=resolution,
)
processed_frame = (pipe_out.prediction.clip(-1, 1) + 1) / 2
processed_frame = (processed_frame[0] * 255).astype(np.uint8)
processed_frame = Image.fromarray(processed_frame)
processed_frame.save(path_out_png)
yield [input_image, path_out_png]
def run_demo_server(pipe, vae_2):
process_pipe_image = spaces.GPU(functools.partial(process_image, pipe, vae_2))
gradio_theme = gr.themes.Default()
with gr.Blocks(
theme=gradio_theme,
title="Dereflection Any Image",
css="""
#download {
height: 118px;
}
.slider .inner {
width: 5px;
background: #FFF;
}
.viewport {
aspect-ratio: 4/3;
}
.tabs button.selected {
font-size: 20px !important;
color: crimson !important;
}
h1 {
text-align: center;
display: block;
}
h2 {
text-align: center;
display: block;
}
h3 {
text-align: center;
display: block;
}
.md_feedback li {
margin-bottom: 0px !important;
}
""",
head="""
<script async src="https://www.googletagmanager.com/gtag/js?id=G-1FWSVCGZTG"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag() {dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-1FWSVCGZTG');
</script>
""",
) as demo:
gr.Markdown(
"""
# Dereflection Any Image
<p align="center">
"""
)
with gr.Tabs(elem_classes=["tabs"]):
with gr.Tab("Image"):
with gr.Row():
with gr.Column():
image_input = gr.Image(
label="Input Image",
type="filepath",
)
with gr.Row():
image_submit_btn = gr.Button(
value="remove reflection", variant="primary"
)
image_reset_btn = gr.Button(value="Reset")
with gr.Column():
image_output_slider = ImageSlider(
label="outputs",
type="filepath",
show_download_button=True,
show_share_button=True,
interactive=False,
elem_classes="slider",
# position=0.25,
)
Examples(
fn=process_pipe_image,
examples=sorted([
os.path.join("files", "image", name)
for name in os.listdir(os.path.join("files", "image"))
]),
inputs=[image_input],
outputs=[image_output_slider],
cache_examples=False,
directory_name="examples_image",
)
### Image tab
image_submit_btn.click(
fn=process_image_check,
inputs=image_input,
outputs=None,
preprocess=False,
queue=False,
).success(
fn=process_pipe_image,
inputs=[
image_input,
],
outputs=[image_output_slider],
concurrency_limit=1,
)
image_reset_btn.click(
fn=lambda: (
None,
None,
None,
),
inputs=[],
outputs=[
image_input,
image_output_slider,
],
queue=False,
)
### Server launch
demo.queue(
api_open=False,
).launch(
server_name="0.0.0.0",
server_port=7860,
)
def main():
os.system("pip freeze")
pretrained_model_name_or_path = "JichenHu/dereflection-any-image-v0"
revision = None
variant = None
# Load the model
# normal
controlnet = ControlNetVAEModel.from_pretrained(pretrained_model_name_or_path + "/controlnet")
unet = UNet2DConditionModel.from_pretrained(pretrained_model_name_or_path + "/unet")
vae_2 = CustomAutoencoderKL.from_pretrained(pretrained_model_name_or_path + "/vae_2")
# Load other components of the pipeline
vae = AutoencoderKL.from_pretrained(
pretrained_model_name_or_path, subfolder="vae", revision=revision, variant=variant
)
text_encoder = CLIPTextModel.from_pretrained(
pretrained_model_name_or_path, subfolder="text_encoder", revision=revision, variant=variant
)
tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name_or_path,
subfolder="tokenizer",
revision=revision,
use_fast=False,
)
pipe = DAIPipeline(
vae=vae,
text_encoder=text_encoder,
tokenizer=tokenizer,
unet=unet,
controlnet=controlnet,
safety_checker=None,
scheduler=None,
feature_extractor=None,
t_start=0,
)
try:
import xformers
pipe.enable_xformers_memory_efficient_attention()
except:
pass # run without xformers
run_demo_server(pipe, vae_2)
if __name__ == "__main__":
main()