LPX55 commited on
Commit
2b517e0
·
verified ·
1 Parent(s): 337713d

Update app_v3.py

Browse files
Files changed (1) hide show
  1. app_v3.py +16 -1
app_v3.py CHANGED
@@ -47,6 +47,21 @@ pipe = FluxControlNetPipeline.from_pretrained(
47
  )
48
  pipe.to("cuda")
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  @spaces.GPU(duration=10)
51
  @torch.no_grad()
52
  def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale, seed, guidance_end):
@@ -228,4 +243,4 @@ with gr.Blocks(title="FLUX Turbo Upscaler", fill_height=True) as demo:
228
  outputs=[prompt]
229
  )
230
 
231
- demo.launch(mcp_server=True, show_error=True)
 
47
  )
48
  pipe.to("cuda")
49
 
50
+ pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
51
+ # For FLUX models, compiling VAE decode can also be beneficial if needed, though UNet is primary.
52
+ # pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=True) # Uncomment if VAE compile helps
53
+
54
+ # 2. Memory Efficient Attention (xFormers): Reduces memory usage and improves speed
55
+ # Requires xformers library installation. Beneficial even with high VRAM.
56
+ try:
57
+ pipe.enable_xformers_memory_efficient_attention()
58
+ except Exception as e:
59
+ print(f"XFormers not available, skipping memory efficient attention: {e}")
60
+
61
+ # 3. Attention Slicing: Recommended for FLUX models and high-resolution images,
62
+ # even with ample VRAM, as it can sometimes help with very large tensors.
63
+ pipe.enable_attention_slicing()
64
+
65
  @spaces.GPU(duration=10)
66
  @torch.no_grad()
67
  def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale, seed, guidance_end):
 
243
  outputs=[prompt]
244
  )
245
 
246
+ demo.launch(show_error=True)