jadechoghari commited on
Commit
dd7bda3
Β·
verified Β·
1 Parent(s): 43d08aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -164
app.py CHANGED
@@ -1,12 +1,12 @@
1
- # import gradio as gr
2
- # import torch
3
- # import spaces
4
- # from diffusers import FluxPipeline, DiffusionPipeline
5
- # from torchao.quantization import autoquant
6
 
7
 
8
 
9
- # # # # normal FluxPipeline
10
  # pipeline_normal = FluxPipeline.from_pretrained(
11
  # "sayakpaul/FLUX.1-merged",
12
  # torch_dtype=torch.bfloat16
@@ -14,168 +14,68 @@
14
  # pipeline_normal.transformer.to(memory_format=torch.channels_last)
15
  # pipeline_normal.transformer = torch.compile(pipeline_normal.transformer, mode="max-autotune", fullgraph=True)
16
 
17
-
18
- # # # optimized FluxPipeline
19
- # # pipeline_optimized = FluxPipeline.from_pretrained(
20
- # # "camenduru/FLUX.1-dev-diffusers",
21
- # # torch_dtype=torch.bfloat16
22
- # # ).to("cuda")
23
- # # pipeline_optimized.transformer.to(memory_format=torch.channels_last)
24
- # # pipeline_optimized.transformer = torch.compile(
25
- # # pipeline_optimized.transformer,
26
- # # mode="max-autotune",
27
- # # fullgraph=True
28
- # # )
29
- # # # wrap the autoquant call in a try-except block to handle unsupported layers
30
- # # for name, layer in pipeline_optimized.transformer.named_children():
31
- # # try:
32
- # # # apply autoquant to each layer
33
- # # pipeline_optimized.transformer._modules[name] = autoquant(layer, error_on_unseen=False)
34
- # # print(f"Successfully quantized {name}")
35
- # # except AttributeError as e:
36
- # # print(f"Skipping layer {name} due to error: {e}")
37
- # # except Exception as e:
38
- # # print(f"Unexpected error while quantizing {name}: {e}")
39
-
40
- # # pipeline_optimized.transformer = autoquant(
41
- # # pipeline_optimized.transformer,
42
- # # error_on_unseen=False
43
- # # )
44
- # pipeline_optimized = pipeline_normal
45
-
46
- # @spaces.GPU(duration=120)
47
- # def generate_images(prompt, guidance_scale, num_inference_steps):
48
- # # # generate image with normal pipeline
49
- # # image_normal = pipeline_normal(
50
- # # prompt=prompt,
51
- # # guidance_scale=guidance_scale,
52
- # # num_inference_steps=int(num_inference_steps)
53
- # # ).images[0]
54
-
55
- # # generate image with optimized pipeline
56
- # image_optimized = pipeline_optimized(
57
- # prompt=prompt,
58
- # guidance_scale=guidance_scale,
59
- # num_inference_steps=int(num_inference_steps)
60
- # ).images[0]
61
-
62
- # return image_optimized
63
-
64
- # # set up Gradio interface
65
- # demo = gr.Interface(
66
- # fn=generate_images,
67
- # inputs=[
68
- # gr.Textbox(lines=2, placeholder="Enter your prompt here...", label="Prompt"),
69
- # gr.Slider(1.0, 10.0, step=0.5, value=3.5, label="Guidance Scale"),
70
- # gr.Slider(10, 100, step=1, value=50, label="Number of Inference Steps")
71
- # ],
72
- # outputs=[
73
- # gr.Image(type="pil", label="Optimized FluxPipeline")
74
- # ],
75
- # title="FluxPipeline Comparison",
76
- # description="Compare images generated by the normal FluxPipeline and the optimized one using torchao and torch.compile()."
77
  # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- # demo.launch()
80
- import gradio as gr
81
- import torch
82
- import spaces
83
- from optimum.quanto import quantize
84
- from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL
85
- from transformers import CLIPTextModel, CLIPTokenizer, T5TokenizerFast
86
- from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
87
- import subprocess
88
-
89
- import os
90
- import torch.multiprocessing as mp
91
-
92
- # Ensure the correct start method for multiprocessing with CUDA
93
- mp.set_start_method('spawn', force=True)
94
- # Set the data type for inference
95
- dtype = torch.bfloat16
96
-
97
- # Hugging Face repository and revision settings
98
- repo_name = "FLUX.1-schnell-4bit"
99
- bfl_repo = "black-forest-labs/FLUX.1-schnell"
100
- revision = "refs/pr/1"
101
-
102
- # Ensure local directory exists and download model files
103
- subprocess.run(["mkdir", "-p", repo_name])
104
- subprocess.run([
105
- "huggingface-cli", "download", "PrunaAI/" + repo_name,
106
- "--local-dir", repo_name,
107
- "--local-dir-use-symlinks", "False"
108
- ])
109
-
110
- # Load scheduler, tokenizer, and VAE from the pre-trained repo
111
- scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(bfl_repo, subfolder="scheduler", revision=revision)
112
- text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype)
113
- tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype)
114
- vae = AutoencoderKL.from_pretrained(bfl_repo, subfolder="vae", torch_dtype=dtype, revision=revision)
115
-
116
- # Load text_encoder_2 and tokenizer_2 locally
117
- text_encoder_2 = torch.load(repo_name + '/text_encoder_2.pt')
118
- tokenizer_2 = T5TokenizerFast.from_pretrained(bfl_repo, subfolder="tokenizer_2", torch_dtype=dtype, revision=revision)
119
-
120
- # Load transformer locally (quantized model)
121
- transformer = torch.load(repo_name + '/transformer.pt')
122
-
123
- # Create the pipeline using the pre-trained models
124
- pipe = FluxPipeline(
125
- scheduler=scheduler,
126
- text_encoder=text_encoder,
127
- tokenizer=tokenizer,
128
- text_encoder_2=text_encoder_2,
129
- tokenizer_2=tokenizer_2,
130
- vae=vae,
131
- transformer=transformer,
132
- )
133
-
134
- # Enable model CPU offload to save memory
135
- pipe.enable_model_cpu_offload()
136
-
137
- # Define the image generation function
138
  @spaces.GPU(duration=120)
139
- def generate_image(prompt, guidance_scale, num_inference_steps):
140
- generator = torch.Generator().manual_seed(12345)
141
- image = pipe(
142
- prompt,
 
 
 
 
 
 
 
143
  guidance_scale=guidance_scale,
144
- num_inference_steps=int(num_inference_steps),
145
- max_sequence_length=256,
146
- generator=generator
147
  ).images[0]
148
- return image
149
-
150
- # Set up Gradio interface
151
- with gr.Blocks() as demo:
152
- gr.Markdown("# FLUX.1-schnell 4-bit Quantized Model")
153
-
154
- # Input for text prompt
155
- prompt_input = gr.Textbox(lines=2, label="Prompt", placeholder="Enter your prompt here...")
156
-
157
- # Slider for guidance scale
158
- guidance_scale_input = gr.Slider(0.0, 10.0, step=0.1, value=7.5, label="Guidance Scale")
159
-
160
- # Slider for number of inference steps
161
- inference_steps_input = gr.Slider(4, 50, step=1, value=25, label="Number of Inference Steps")
162
-
163
- # Button to trigger generation
164
- generate_button = gr.Button("Generate Image")
165
-
166
- # Output image
167
- output_image = gr.Image(label="Generated Image", type="pil")
168
-
169
- # Connect button to the image generation function
170
- generate_button.click(fn=generate_image,
171
- inputs=[prompt_input, guidance_scale_input, inference_steps_input],
172
- outputs=[output_image])
173
-
174
- # Launch the Gradio app
175
- if __name__ == '__main__':
176
- demo.launch()
177
-
178
-
179
-
180
 
 
181
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import spaces
4
+ from diffusers import FluxPipeline, DiffusionPipeline
5
+ from torchao.quantization import autoquant
6
 
7
 
8
 
9
+ # # # normal FluxPipeline
10
  # pipeline_normal = FluxPipeline.from_pretrained(
11
  # "sayakpaul/FLUX.1-merged",
12
  # torch_dtype=torch.bfloat16
 
14
  # pipeline_normal.transformer.to(memory_format=torch.channels_last)
15
  # pipeline_normal.transformer = torch.compile(pipeline_normal.transformer, mode="max-autotune", fullgraph=True)
16
 
17
+ pipeline_normal = DiffusionPipeline.from_pretrained("sayakpaul/FLUX.1-merged")
18
+ pipeline_normal.load_lora_weights("DarkMoonDragon/TurboRender-flux-dev")
19
+ # # optimized FluxPipeline
20
+ # pipeline_optimized = FluxPipeline.from_pretrained(
21
+ # "camenduru/FLUX.1-dev-diffusers",
22
+ # torch_dtype=torch.bfloat16
23
+ # ).to("cuda")
24
+ # pipeline_optimized.transformer.to(memory_format=torch.channels_last)
25
+ # pipeline_optimized.transformer = torch.compile(
26
+ # pipeline_optimized.transformer,
27
+ # mode="max-autotune",
28
+ # fullgraph=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  # )
30
+ # # wrap the autoquant call in a try-except block to handle unsupported layers
31
+ # for name, layer in pipeline_optimized.transformer.named_children():
32
+ # try:
33
+ # # apply autoquant to each layer
34
+ # pipeline_optimized.transformer._modules[name] = autoquant(layer, error_on_unseen=False)
35
+ # print(f"Successfully quantized {name}")
36
+ # except AttributeError as e:
37
+ # print(f"Skipping layer {name} due to error: {e}")
38
+ # except Exception as e:
39
+ # print(f"Unexpected error while quantizing {name}: {e}")
40
+
41
+ # pipeline_optimized.transformer = autoquant(
42
+ # pipeline_optimized.transformer,
43
+ # error_on_unseen=False
44
+ # )
45
+ pipeline_optimized = pipeline_normal
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  @spaces.GPU(duration=120)
48
+ def generate_images(prompt, guidance_scale, num_inference_steps):
49
+ # # generate image with normal pipeline
50
+ # image_normal = pipeline_normal(
51
+ # prompt=prompt,
52
+ # guidance_scale=guidance_scale,
53
+ # num_inference_steps=int(num_inference_steps)
54
+ # ).images[0]
55
+
56
+ # generate image with optimized pipeline
57
+ image_optimized = pipeline_optimized(
58
+ prompt=prompt,
59
  guidance_scale=guidance_scale,
60
+ num_inference_steps=int(num_inference_steps)
 
 
61
  ).images[0]
62
+
63
+ return image_optimized
64
+
65
+ # set up Gradio interface
66
+ demo = gr.Interface(
67
+ fn=generate_images,
68
+ inputs=[
69
+ gr.Textbox(lines=2, placeholder="Enter your prompt here...", label="Prompt"),
70
+ gr.Slider(1.0, 10.0, step=0.5, value=3.5, label="Guidance Scale"),
71
+ gr.Slider(10, 100, step=1, value=50, label="Number of Inference Steps")
72
+ ],
73
+ outputs=[
74
+ gr.Image(type="pil", label="Optimized FluxPipeline")
75
+ ],
76
+ title="FluxPipeline Comparison",
77
+ description="Compare images generated by the normal FluxPipeline and the optimized one using torchao and torch.compile()."
78
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ demo.launch()
81