File size: 3,086 Bytes
dd87550 52822fe dcad222 52822fe 6710d2a 52822fe 452e1d0 52822fe 6710d2a 52822fe 6710d2a 52822fe 78b16f5 52822fe 78b16f5 52822fe 78b16f5 79f0f31 52822fe eb11375 120f77a 79f0f31 52822fe 79f0f31 6710d2a 79f0f31 6710d2a 79f0f31 6710d2a 766934f 6710d2a 79f0f31 6710d2a 79f0f31 6710d2a 79f0f31 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import transformers
import torch
import gradio as gr
import requests
from transformers import BlipForImageTextRetrieval
from transformers import AutoProcessor
from transformers.utils import logging
from PIL import Image
logging.set_verbosity_error()
model = BlipForImageTextRetrieval.from_pretrained("Salesforce/blip-itm-base-coco")
processor = AutoProcessor.from_pretrained("Salesforce/blip-itm-base-coco")
def process_image(input_type, image_url, image_upload, text):
if input_type == "URL":
raw_image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
else:
raw_image = image_upload
inputs = processor(images=raw_image, text=text, return_tensors="pt")
itm_scores = model(**inputs)[0]
itm_score = torch.nn.functional.softmax(itm_scores,dim=1)
itm_score = itm_score[0][1]
print(itm_score)
if itm_score <=.35:
cmnt = "which is not that great. Try again."
elif itm_score <= .75:
cmnt = "which is good. But you can improve it. Try again."
elif itm_score == 1.0:
cmnt = "and that is an unbelievable perfect score. You have achieved the near impossible. Congratulations"
else:
cmnt = "which is excellent. Can you improve on it?"
formatted_text = (
f"""<div><h1 style='text-align: center; font-size: 80px; color: blue;'>
Your decription score is <span style='font-size: 100px; color: orange;'>{itm_score*100:.2f}/100</span> {cmnt}
</h1></div>"""
)
return formatted_text
def display_image_from_url(image_url):
if image_url:
image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
return image
return None
def toggle_inputs(input_type):
if input_type == "URL":
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
else:
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
with gr.Blocks() as demo:
gr.Markdown(
"""
# Challenge yourself by describing the image - test & demo app by Srinivas.V..
Paste either URL of an image or upload the image, describe the image best and submit to know your score.
""")
input_type = gr.Radio(choices=["URL", "Upload"], label="Input Type")
image_url = gr.Textbox(label="Image URL", visible=False)
url_image = gr.Image(type="pil", label="URL Image", visible=False)
image_upload = gr.Image(type="pil", label="Upload Image", visible=False)
description = gr.Textbox(label="Describe the image", visible=False, lines=3)
input_type.change(fn=toggle_inputs, inputs=input_type, outputs=[image_url, url_image, image_upload, description])
image_url.change(fn=display_image_from_url, inputs=image_url, outputs=url_image)
submit_btn = gr.Button("Submit")
processed_image = gr.HTML(label="Your challenge result")
submit_btn.click(fn=process_image, inputs=[input_type, image_url, image_upload, description], outputs=processed_image)
demo.launch(debug=True, share=True) |