Spaces:

jadechoghari
/

flux-kiwi

Runtime error

App Files Files Community

flux-kiwi / app.py

jadechoghari

Update app.py

8ae18c8 verified 9 months ago

raw

history blame

6.13 kB

	# import gradio as gr
	# import torch
	# import spaces
	# from diffusers import FluxPipeline, DiffusionPipeline
	# from torchao.quantization import autoquant



	# # # # normal FluxPipeline
	# pipeline_normal = FluxPipeline.from_pretrained(
	# "sayakpaul/FLUX.1-merged",
	# torch_dtype=torch.bfloat16
	# ).to("cuda")
	# pipeline_normal.transformer.to(memory_format=torch.channels_last)
	# pipeline_normal.transformer = torch.compile(pipeline_normal.transformer, mode="max-autotune", fullgraph=True)


	# # # optimized FluxPipeline
	# # pipeline_optimized = FluxPipeline.from_pretrained(
	# # "camenduru/FLUX.1-dev-diffusers",
	# # torch_dtype=torch.bfloat16
	# # ).to("cuda")
	# # pipeline_optimized.transformer.to(memory_format=torch.channels_last)
	# # pipeline_optimized.transformer = torch.compile(
	# # pipeline_optimized.transformer,
	# # mode="max-autotune",
	# # fullgraph=True
	# # )
	# # # wrap the autoquant call in a try-except block to handle unsupported layers
	# # for name, layer in pipeline_optimized.transformer.named_children():
	# # try:
	# # # apply autoquant to each layer
	# # pipeline_optimized.transformer._modules[name] = autoquant(layer, error_on_unseen=False)
	# # print(f"Successfully quantized {name}")
	# # except AttributeError as e:
	# # print(f"Skipping layer {name} due to error: {e}")
	# # except Exception as e:
	# # print(f"Unexpected error while quantizing {name}: {e}")

	# # pipeline_optimized.transformer = autoquant(
	# # pipeline_optimized.transformer,
	# # error_on_unseen=False
	# # )
	# pipeline_optimized = pipeline_normal

	# @spaces.GPU(duration=120)
	# def generate_images(prompt, guidance_scale, num_inference_steps):
	# # # generate image with normal pipeline
	# # image_normal = pipeline_normal(
	# # prompt=prompt,
	# # guidance_scale=guidance_scale,
	# # num_inference_steps=int(num_inference_steps)
	# # ).images[0]

	# # generate image with optimized pipeline
	# image_optimized = pipeline_optimized(
	# prompt=prompt,
	# guidance_scale=guidance_scale,
	# num_inference_steps=int(num_inference_steps)
	# ).images[0]

	# return image_optimized

	# # set up Gradio interface
	# demo = gr.Interface(
	# fn=generate_images,
	# inputs=[
	# gr.Textbox(lines=2, placeholder="Enter your prompt here...", label="Prompt"),
	# gr.Slider(1.0, 10.0, step=0.5, value=3.5, label="Guidance Scale"),
	# gr.Slider(10, 100, step=1, value=50, label="Number of Inference Steps")
	# ],
	# outputs=[
	# gr.Image(type="pil", label="Optimized FluxPipeline")
	# ],
	# title="FluxPipeline Comparison",
	# description="Compare images generated by the normal FluxPipeline and the optimized one using torchao and torch.compile()."
	# )

	# demo.launch()
	import gradio as gr
	import torch
	from optimum.quanto import quantize
	from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL
	from transformers import CLIPTextModel, CLIPTokenizer, T5TokenizerFast
	from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
	import subprocess
	import spaces
	import os

	# Set the data type for inference
	dtype = torch.bfloat16

	# Hugging Face repository and revision settings
	repo_name = "FLUX.1-schnell-4bit"
	bfl_repo = "black-forest-labs/FLUX.1-schnell"
	revision = "refs/pr/1"

	# Ensure local directory exists and download model files
	subprocess.run(["mkdir", "-p", repo_name])
	subprocess.run([
	"huggingface-cli", "download", "PrunaAI/" + repo_name,
	"--local-dir", repo_name,
	"--local-dir-use-symlinks", "False"
	])

	# Load scheduler, tokenizer, and VAE from the pre-trained repo
	scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(bfl_repo, subfolder="scheduler", revision=revision)
	text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype)
	tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype)
	vae = AutoencoderKL.from_pretrained(bfl_repo, subfolder="vae", torch_dtype=dtype, revision=revision)

	# Load text_encoder_2 and tokenizer_2 locally
	text_encoder_2 = torch.load(repo_name + '/text_encoder_2.pt')
	tokenizer_2 = T5TokenizerFast.from_pretrained(bfl_repo, subfolder="tokenizer_2", torch_dtype=dtype, revision=revision)

	# Load transformer locally (quantized model)
	transformer = torch.load(repo_name + '/transformer.pt')

	# Create the pipeline using the pre-trained models
	pipe = FluxPipeline(
	scheduler=scheduler,
	text_encoder=text_encoder,
	tokenizer=tokenizer,
	text_encoder_2=text_encoder_2,
	tokenizer_2=tokenizer_2,
	vae=vae,
	transformer=transformer,
	)

	# Enable model CPU offload to save memory
	pipe.enable_model_cpu_offload()

	# Define the image generation function
	@spaces.GPU(duration=120)
	def generate_image(prompt, guidance_scale, num_inference_steps):
	generator = torch.Generator().manual_seed(12345)
	image = pipe(
	prompt,
	guidance_scale=guidance_scale,
	num_inference_steps=int(num_inference_steps),
	max_sequence_length=256,
	generator=generator
	).images[0]
	return image

	# Set up Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# FLUX.1-schnell 4-bit Quantized Model")

	# Input for text prompt
	prompt_input = gr.Textbox(lines=2, label="Prompt", placeholder="Enter your prompt here...")

	# Slider for guidance scale
	guidance_scale_input = gr.Slider(0.0, 10.0, step=0.1, value=7.5, label="Guidance Scale")

	# Slider for number of inference steps
	inference_steps_input = gr.Slider(4, 50, step=1, value=25, label="Number of Inference Steps")

	# Button to trigger generation
	generate_button = gr.Button("Generate Image")

	# Output image
	output_image = gr.Image(label="Generated Image", type="pil")

	# Connect button to the image generation function
	generate_button.click(fn=generate_image,
	inputs=[prompt_input, guidance_scale_input, inference_steps_input],
	outputs=[output_image])

	# Launch the Gradio app
	demo.launch()