Spaces:

JichenHu
/

Dereflection-Any-Image

Runtime error

File size: 9,378 Bytes

# Copyright 2024 Anton Obukhov, ETH Zurich. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# --------------------------------------------------------------------------
# If you find this code useful, we kindly ask you to cite our paper in your work.
# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
# More information about the method can be found at https://marigoldmonodepth.github.io
# --------------------------------------------------------------------------
from __future__ import annotations

import functools
import os
import tempfile

import gradio as gr
import numpy as np
import spaces

from PIL import Image
from gradio_imageslider import ImageSlider
from tqdm import tqdm

from pathlib import Path
import gradio
from gradio.utils import get_cache_folder
from DAI.pipeline_all import DAIPipeline

from diffusers import (
    AutoencoderKL,
    UNet2DConditionModel,
)

from transformers import CLIPTextModel, AutoTokenizer

from DAI.controlnetvae import ControlNetVAEModel

from DAI.decoder import CustomAutoencoderKL


class Examples(gradio.helpers.Examples):
    def __init__(self, *args, directory_name=None, **kwargs):
        super().__init__(*args, **kwargs, _initiated_directly=False)
        if directory_name is not None:
            self.cached_folder = get_cache_folder() / directory_name
            self.cached_file = Path(self.cached_folder) / "log.csv"
        self.create()


default_seed = 2024
default_batch_size = 1

default_image_processing_resolution = 2048

def process_image_check(path_input):
    if path_input is None:
        raise gr.Error(
            "Missing image in the first pane: upload a file or use one from the gallery below."
        )

def resize_image(input_image, resolution):
    # Ensure input_image is a PIL Image object
    if not isinstance(input_image, Image.Image):
        raise ValueError("input_image should be a PIL Image object")

    # Convert image to numpy array
    input_image_np = np.asarray(input_image)

    # Get image dimensions
    H, W, C = input_image_np.shape
    H = float(H)
    W = float(W)
    
    # Calculate the scaling factor
    k = float(resolution) / min(H, W)
    
    # Determine new dimensions
    H *= k
    W *= k
    H = int(np.round(H / 64.0)) * 64
    W = int(np.round(W / 64.0)) * 64
    
    # Resize the image using PIL's resize method
    img = input_image.resize((W, H), Image.Resampling.LANCZOS)
    
    return img

def process_image(
    pipe,
    vae_2,
    path_input,
):
    name_base, name_ext = os.path.splitext(os.path.basename(path_input))
    print(f"Processing image {name_base}{name_ext}")

    path_output_dir = tempfile.mkdtemp()
    path_out_png = os.path.join(path_output_dir, f"{name_base}_delight.png")
    input_image = Image.open(path_input)
    # resolution = 0
    # if max(input_image.size) < 768:
    #     resolution = None
    resolution = None

    pipe_out = pipe(
        image=input_image,
        prompt="remove glass reflection",
        vae_2=vae_2,
        processing_resolution=resolution,
    )

    processed_frame = (pipe_out.prediction.clip(-1, 1) + 1) / 2
    processed_frame = (processed_frame[0] * 255).astype(np.uint8)
    processed_frame = Image.fromarray(processed_frame)
    processed_frame.save(path_out_png)
    yield [input_image, path_out_png]


def run_demo_server(pipe, vae_2):
    process_pipe_image = spaces.GPU(functools.partial(process_image, pipe, vae_2))

    gradio_theme = gr.themes.Default()

    with gr.Blocks(
        theme=gradio_theme,
        title="Dereflection Any Image",
        css="""
            #download {
                height: 118px;
            }
            .slider .inner {
                width: 5px;
                background: #FFF;
            }
            .viewport {
                aspect-ratio: 4/3;
            }
            .tabs button.selected {
                font-size: 20px !important;
                color: crimson !important;
            }
            h1 {
                text-align: center;
                display: block;
            }
            h2 {
                text-align: center;
                display: block;
            }
            h3 {
                text-align: center;
                display: block;
            }
            .md_feedback li {
                margin-bottom: 0px !important;
            }
        """,
        head="""
            <script async src="https://www.googletagmanager.com/gtag/js?id=G-1FWSVCGZTG"></script>
            <script>
                window.dataLayer = window.dataLayer || [];
                function gtag() {dataLayer.push(arguments);}
                gtag('js', new Date());
                gtag('config', 'G-1FWSVCGZTG');
            </script>
        """,
    ) as demo:
        gr.Markdown(
            """
            # Dereflection Any Image
            <p align="center">
        """
        )

        with gr.Tabs(elem_classes=["tabs"]):
            with gr.Tab("Image"):
                with gr.Row():
                    with gr.Column():
                        image_input = gr.Image(
                            label="Input Image",
                            type="filepath",
                        )
                        with gr.Row():
                            image_submit_btn = gr.Button(
                                value="remove reflection", variant="primary"
                            )
                            image_reset_btn = gr.Button(value="Reset")
                    with gr.Column():
                        image_output_slider = ImageSlider(
                            label="outputs",
                            type="filepath",
                            show_download_button=True,
                            show_share_button=True,
                            interactive=False,
                            elem_classes="slider",
                            # position=0.25,
                        )

                Examples(
                    fn=process_pipe_image,
                    examples=sorted([
                        os.path.join("files", "image", name)
                        for name in os.listdir(os.path.join("files", "image"))
                    ]),
                    inputs=[image_input],
                    outputs=[image_output_slider],
                    cache_examples=False,
                    directory_name="examples_image",
                )

        ### Image tab
        image_submit_btn.click(
            fn=process_image_check,
            inputs=image_input,
            outputs=None,
            preprocess=False,
            queue=False,
        ).success(
            fn=process_pipe_image,
            inputs=[
                image_input,
            ],
            outputs=[image_output_slider],
            concurrency_limit=1,
        )

        image_reset_btn.click(
            fn=lambda: (
                None,
                None,
                None,
            ),
            inputs=[],
            outputs=[
                image_input,
                image_output_slider,
            ],
            queue=False,
        )

        ### Server launch

        demo.queue(
            api_open=False,
        ).launch(
            server_name="0.0.0.0",
            server_port=7860,
        )


def main():
    os.system("pip freeze")

    pretrained_model_name_or_path = "JichenHu/dereflection-any-image-v0"
    revision = None
    variant = None
    # Load the model
    # normal
    controlnet = ControlNetVAEModel.from_pretrained(pretrained_model_name_or_path + "/controlnet")
    unet = UNet2DConditionModel.from_pretrained(pretrained_model_name_or_path + "/unet")
    vae_2 = CustomAutoencoderKL.from_pretrained(pretrained_model_name_or_path + "/vae_2")

    # Load other components of the pipeline
    vae = AutoencoderKL.from_pretrained(
            pretrained_model_name_or_path, subfolder="vae", revision=revision, variant=variant
        )

    text_encoder = CLIPTextModel.from_pretrained(
            pretrained_model_name_or_path, subfolder="text_encoder", revision=revision, variant=variant
        )
    tokenizer = AutoTokenizer.from_pretrained(
                pretrained_model_name_or_path,
                subfolder="tokenizer",
                revision=revision,
                use_fast=False,
            )
    pipe = DAIPipeline(
            vae=vae,
            text_encoder=text_encoder,
            tokenizer=tokenizer,
            unet=unet,
            controlnet=controlnet,
            safety_checker=None,
            scheduler=None,
            feature_extractor=None,
            t_start=0,
        )

    try:
        import xformers
        pipe.enable_xformers_memory_efficient_attention()
    except:
        pass  # run without xformers

    run_demo_server(pipe, vae_2)


if __name__ == "__main__":
    main()