Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,65 +1,18 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
import spaces
|
4 |
-
from diffusers import FluxPipeline
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
# # # normal FluxPipeline
|
10 |
-
# pipeline_normal = FluxPipeline.from_pretrained(
|
11 |
-
# "sayakpaul/FLUX.1-merged",
|
12 |
-
# torch_dtype=torch.bfloat16
|
13 |
-
# ).to("cuda")
|
14 |
-
# pipeline_normal.transformer.to(memory_format=torch.channels_last)
|
15 |
-
# pipeline_normal.transformer = torch.compile(pipeline_normal.transformer, mode="max-autotune", fullgraph=True)
|
16 |
-
|
17 |
-
torch.backends.cuda.matmul.allow_tf32 = True # Enable TensorFloat32 for faster matrix operations
|
18 |
-
torch.backends.cudnn.benchmark = True # Optimizes for GPU by enabling auto-tuning
|
19 |
-
|
20 |
-
# Compile the model with maximum optimizations
|
21 |
-
torch.compile(backend="inductor", mode="max-autotune")
|
22 |
|
23 |
pipe = FluxPipeline.from_pretrained(
|
24 |
"sayakpaul/FLUX.1-merged",
|
25 |
torch_dtype=torch.bfloat16
|
26 |
)
|
27 |
-
|
28 |
-
|
29 |
-
pipe.
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
# Apply dynamic quantization for even faster inference
|
35 |
-
pipe = torch.quantization.quantize_dynamic(pipe, {torch.nn.Linear}, dtype=torch.qint8)
|
36 |
-
# # optimized FluxPipeline
|
37 |
-
# pipeline_optimized = FluxPipeline.from_pretrained(
|
38 |
-
# "camenduru/FLUX.1-dev-diffusers",
|
39 |
-
# torch_dtype=torch.bfloat16
|
40 |
-
# ).to("cuda")
|
41 |
-
# pipeline_optimized.transformer.to(memory_format=torch.channels_last)
|
42 |
-
# pipeline_optimized.transformer = torch.compile(
|
43 |
-
# pipeline_optimized.transformer,
|
44 |
-
# mode="max-autotune",
|
45 |
-
# fullgraph=True
|
46 |
-
# )
|
47 |
-
# # wrap the autoquant call in a try-except block to handle unsupported layers
|
48 |
-
# for name, layer in pipeline_optimized.transformer.named_children():
|
49 |
-
# try:
|
50 |
-
# # apply autoquant to each layer
|
51 |
-
# pipeline_optimized.transformer._modules[name] = autoquant(layer, error_on_unseen=False)
|
52 |
-
# print(f"Successfully quantized {name}")
|
53 |
-
# except AttributeError as e:
|
54 |
-
# print(f"Skipping layer {name} due to error: {e}")
|
55 |
-
# except Exception as e:
|
56 |
-
# print(f"Unexpected error while quantizing {name}: {e}")
|
57 |
-
|
58 |
-
# pipeline_optimized.transformer = autoquant(
|
59 |
-
# pipeline_optimized.transformer,
|
60 |
-
# error_on_unseen=False
|
61 |
-
# )
|
62 |
-
pipeline_optimized = pipe
|
63 |
|
64 |
@spaces.GPU(duration=120)
|
65 |
def generate_images(prompt, guidance_scale, num_inference_steps):
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
import spaces
|
4 |
+
from diffusers import FluxPipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
pipe = FluxPipeline.from_pretrained(
|
7 |
"sayakpaul/FLUX.1-merged",
|
8 |
torch_dtype=torch.bfloat16
|
9 |
)
|
10 |
+
pipe.transformer.to(memory_format=torch.channels_last)
|
11 |
+
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
|
12 |
+
pipe.transformer = autoquant(
|
13 |
+
pipe.transformer,
|
14 |
+
error_on_unseen=False
|
15 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
@spaces.GPU(duration=120)
|
18 |
def generate_images(prompt, guidance_scale, num_inference_steps):
|