Image-To-Flux-Prompt

Running

App Files Files

Image-To-Flux-Prompt / app.py

aifeifei798

Update app.py

6b323fa verified 8 months ago

raw

history blame

2.3 kB

	from PIL import Image
	import spaces
	import gradio as gr
	from transformers import (
	AutoProcessor,
	AutoModelForCausalLM,
	)
	import torch
	import subprocess

	subprocess.run(
	"pip install flash-attn --no-build-isolation",
	env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
	shell=True,
	)

	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	Florence_models = AutoModelForCausalLM.from_pretrained(
	"microsoft/Florence-2-large",
	torch_dtype=torch_dtype,
	trust_remote_code=True).to(device)

	Florence_processors = AutoProcessor.from_pretrained(
	"microsoft/Florence-2-large", trust_remote_code=True)



	@spaces.GPU
	def feifeiflorence(
	image,
	progress=gr.Progress(track_tqdm=True),
	):
	image = Image.fromarray(image)
	task_prompt = "<MORE_DETAILED_CAPTION>"

	if image.mode != "RGB":
	image = image.convert("RGB")

	inputs = Florence_processors(text=task_prompt,
	images=image,
	return_tensors="pt").to(device, torch_dtype)

	generated_ids = Florence_models.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_new_tokens=1024,
	num_beams=3,
	do_sample=False,
	)
	generated_text = Florence_processors.batch_decode(
	generated_ids, skip_special_tokens=False)[0]
	parsed_answer = Florence_processors.post_process_generation(
	generated_text,
	task=task_prompt,
	image_size=(image.width, image.height))
	out_text=parsed_answer["<MORE_DETAILED_CAPTION>"]
	width, height = image.size
	return out_text,f"width={width} height={height}"



	with gr.Blocks() as demo:
	gr.Markdown(DESCRIPTION)
	with gr.Tab(label="Florence-2 Image Flux Prompt"):
	with gr.Row():
	with gr.Column():
	input_img = gr.Image(label="Input Picture")
	submit_btn = gr.Button(value="Submit")
	with gr.Column():
	output_text = gr.Textbox(label="Output Text")
	output_img_text = gr.Textbox(label="Output width and height")


	submit_btn.click(process_image, [input_img], [output_text, output_img_text])

	demo.launch()