File size: 3,086 Bytes
dd87550
52822fe
 
 
 
 
 
 
dcad222
52822fe
 
 
 
 
6710d2a
52822fe
452e1d0
52822fe
6710d2a
52822fe
 
 
 
 
 
6710d2a
52822fe
78b16f5
52822fe
78b16f5
 
 
52822fe
78b16f5
79f0f31
52822fe
eb11375
120f77a
79f0f31
52822fe
 
 
79f0f31
 
 
 
 
 
6710d2a
 
79f0f31
6710d2a
79f0f31
6710d2a
 
766934f
 
 
 
 
 
6710d2a
 
79f0f31
6710d2a
 
 
79f0f31
 
 
6710d2a
 
 
 
79f0f31
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import transformers
import torch
import gradio as gr
import requests

from transformers import BlipForImageTextRetrieval
from transformers import AutoProcessor
from transformers.utils import logging
from PIL import Image
logging.set_verbosity_error()

model = BlipForImageTextRetrieval.from_pretrained("Salesforce/blip-itm-base-coco")
processor = AutoProcessor.from_pretrained("Salesforce/blip-itm-base-coco")

def process_image(input_type, image_url, image_upload, text):
    if input_type == "URL":
        raw_image =  Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
    else:
        raw_image = image_upload

    inputs = processor(images=raw_image, text=text, return_tensors="pt")
    itm_scores = model(**inputs)[0]
    itm_score = torch.nn.functional.softmax(itm_scores,dim=1)
    itm_score = itm_score[0][1]
    print(itm_score)

    if itm_score <=.35:
        cmnt = "which is not that great. Try again."
    elif itm_score <= .75:
        cmnt = "which is good. But you can improve it. Try again."
    elif itm_score == 1.0:
        cmnt = "and that is an unbelievable perfect score. You have achieved the near impossible. Congratulations"
    else:
        cmnt = "which is excellent. Can you improve on it?"

    formatted_text = (
        f"""<div><h1 style='text-align: center; font-size: 80px; color: blue;'>
        Your decription score is <span style='font-size: 100px; color: orange;'>{itm_score*100:.2f}/100</span> {cmnt}
        </h1></div>"""
    )
    return formatted_text

def display_image_from_url(image_url):
    if image_url:
        image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
        return image
    return None

def toggle_inputs(input_type):
    if input_type == "URL":
        return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
    else:
        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)

with gr.Blocks() as demo:
    gr.Markdown(
    """
    # Challenge yourself by describing the image - test & demo app by Srinivas.V..
    Paste either URL of an image or upload the image, describe the image best and submit to know your score.
    """)
    
    input_type = gr.Radio(choices=["URL", "Upload"], label="Input Type")
    image_url = gr.Textbox(label="Image URL", visible=False)
    url_image = gr.Image(type="pil", label="URL Image", visible=False)
    image_upload = gr.Image(type="pil", label="Upload Image", visible=False)
    description = gr.Textbox(label="Describe the image", visible=False, lines=3)
    
    input_type.change(fn=toggle_inputs, inputs=input_type, outputs=[image_url, url_image, image_upload, description])
    image_url.change(fn=display_image_from_url, inputs=image_url, outputs=url_image)

    submit_btn = gr.Button("Submit")
    processed_image = gr.HTML(label="Your challenge result")
    submit_btn.click(fn=process_image, inputs=[input_type, image_url, image_upload, description], outputs=processed_image)

demo.launch(debug=True, share=True)