Spaces:

gizemsarsinlar
/

Phi-3.5-Artwork-Analysis

Runtime error

App Files Files Community

Phi-3.5-Artwork-Analysis / app.py

gizemsarsinlar

Update app.py

3d9d048 verified about 17 hours ago

raw

history blame contribute delete

2.79 kB

	import gradio as gr
	import spaces
	from transformers import AutoModelForCausalLM, AutoProcessor
	import torch
	from PIL import Image
	import subprocess
	subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

	user_prompt = '<\|user\|>\n'
	assistant_prompt = '<\|assistant\|>\n'
	prompt_suffix = "<\|end\|>\n"

	model_name = "microsoft/Phi-3.5-vision-instruct"

	# Lazy-load the model and processor at runtime
	def get_model_and_processor(model_id):
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	trust_remote_code=True,
	torch_dtype=torch.bfloat16 # safer than 'auto'
	).cuda().eval()
	processor = AutoProcessor.from_pretrained(
	model_id,
	trust_remote_code=True
	)
	return model, processor

	@spaces.GPU
	def run_example(image, text_input=None, model_id=model_name):
	model, processor = get_model_and_processor(model_id)
	prompt = f"{user_prompt}<\|image_1\|>\n{text_input}{prompt_suffix}{assistant_prompt}"
	image = Image.fromarray(image).convert("RGB")

	inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
	generate_ids = model.generate(
	**inputs,
	max_new_tokens=1000,
	eos_token_id=processor.tokenizer.eos_token_id
	)
	generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
	response = processor.batch_decode(
	generate_ids,
	skip_special_tokens=True,
	clean_up_tokenization_spaces=False
	)[0]
	return response

	css = """
	#output {
	height: 500px;
	overflow: auto;
	border: 1px solid #ccc;
	}
	"""

	with gr.Blocks(css=css) as demo:
	gr.Markdown("## Phi-3.5 Vision Instruct Demo with Example Inputs")

	with gr.Tab(label="Phi-3.5 Input"):
	with gr.Row():
	with gr.Column():
	input_img = gr.Image(label="Input Picture")
	model_selector = gr.Dropdown(
	choices=[model_name],
	label="Model",
	value=model_name
	)
	text_input = gr.Textbox(label="Question")
	submit_btn = gr.Button(value="Submit")
	with gr.Column():
	output_text = gr.Textbox(label="Output Text")

	examples = [
	["image1.jpeg", "What does this painting tell us explain in detail?"],
	["image2.jpg", "What does this painting tell us explain in detail?"],
	["image3.jpg", "Describe the scene in this picture."]
	]

	gr.Examples(
	examples=examples,
	inputs=[input_img, text_input],
	examples_per_page=3
	)

	submit_btn.click(run_example, [input_img, text_input, model_selector], [output_text])

	demo.queue()
	demo.launch()