Spaces:

jadechoghari
/

flux-kiwi

Runtime error

App Files Files Community

jadechoghari commited on Sep 14, 2024

Commit

0c1fd6d

verified ·

1 Parent(s): faf4bac

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -54

app.py CHANGED Viewed

@@ -1,65 +1,18 @@
 import gradio as gr
 import torch
 import spaces
-from diffusers import FluxPipeline, DiffusionPipeline
-# # # normal FluxPipeline
-# pipeline_normal = FluxPipeline.from_pretrained(
-#     "sayakpaul/FLUX.1-merged",
-#     torch_dtype=torch.bfloat16
-# ).to("cuda")
-# pipeline_normal.transformer.to(memory_format=torch.channels_last)
-# pipeline_normal.transformer = torch.compile(pipeline_normal.transformer, mode="max-autotune", fullgraph=True)
-torch.backends.cuda.matmul.allow_tf32 = True  # Enable TensorFloat32 for faster matrix operations
-torch.backends.cudnn.benchmark = True         # Optimizes for GPU by enabling auto-tuning
-# Compile the model with maximum optimizations
-torch.compile(backend="inductor", mode="max-autotune")
 pipe = FluxPipeline.from_pretrained(
     "sayakpaul/FLUX.1-merged",
     torch_dtype=torch.bfloat16
 )
-# Offload to CPU if necessary
-pipe.enable_model_cpu_offload()
-# Use xformers for memory-efficient attention
-pipe.enable_xformers_memory_efficient_attention()
-# Apply dynamic quantization for even faster inference
-pipe = torch.quantization.quantize_dynamic(pipe, {torch.nn.Linear}, dtype=torch.qint8)
-# # optimized FluxPipeline
-# pipeline_optimized = FluxPipeline.from_pretrained(
-#     "camenduru/FLUX.1-dev-diffusers",
-#     torch_dtype=torch.bfloat16
-# ).to("cuda")
-# pipeline_optimized.transformer.to(memory_format=torch.channels_last)
-# pipeline_optimized.transformer = torch.compile(
-#     pipeline_optimized.transformer,
-#     mode="max-autotune",
-#     fullgraph=True
-# )
-# # wrap the autoquant call in a try-except block to handle unsupported layers
-# for name, layer in pipeline_optimized.transformer.named_children():
-#     try:
-#         # apply autoquant to each layer
-#         pipeline_optimized.transformer._modules[name] = autoquant(layer, error_on_unseen=False)
-#         print(f"Successfully quantized {name}")
-#     except AttributeError as e:
-#         print(f"Skipping layer {name} due to error: {e}")
-#     except Exception as e:
-#         print(f"Unexpected error while quantizing {name}: {e}")
-# pipeline_optimized.transformer = autoquant(
-#     pipeline_optimized.transformer,
-#     error_on_unseen=False
-# )
-pipeline_optimized = pipe
 @spaces.GPU(duration=120)
 def generate_images(prompt, guidance_scale, num_inference_steps):

 import gradio as gr
 import torch
 import spaces
+from diffusers import FluxPipeline
 pipe = FluxPipeline.from_pretrained(
     "sayakpaul/FLUX.1-merged",
     torch_dtype=torch.bfloat16
 )
+pipe.transformer.to(memory_format=torch.channels_last)
+pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
+pipe.transformer = autoquant(
+    pipe.transformer,
+    error_on_unseen=False
+)
 @spaces.GPU(duration=120)
 def generate_images(prompt, guidance_scale, num_inference_steps):