FLUX.1-Kontext-Dev

Running on Zero

App Files Files Community

cbensimon HF Staff commited on 27 days ago

Commit

288103a

1 Parent(s): 3df4fd5

Unclutter a bit

Browse files

Files changed (1) hide show

optimization.py +28 -30

optimization.py CHANGED Viewed

@@ -10,43 +10,41 @@ from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
 from zerogpu import aoti_compile
 def optimize_pipeline_(pipeline: FluxPipeline):
     @spaces.GPU(duration=1500)
     def compile_transformer():
         pipeline.transformer.fuse_qkv_projections()
         quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
-        def _example_tensor(*shape):
-            return torch.randn(*shape, device='cuda', dtype=torch.bfloat16)
-        is_timestep_distilled = not pipeline.transformer.config.guidance_embeds
-        seq_length = 256 if is_timestep_distilled else 512
-        transformer_kwargs = {
-            'hidden_states': _example_tensor(1, 4096, 64),
-            'timestep': torch.tensor([1.], device='cuda', dtype=torch.bfloat16),
-            'guidance': None if is_timestep_distilled else torch.tensor([1.], device='cuda', dtype=torch.bfloat16),
-            'pooled_projections': _example_tensor(1, 768),
-            'encoder_hidden_states': _example_tensor(1, seq_length, 4096),
-            'txt_ids': _example_tensor(seq_length, 3),
-            'img_ids': _example_tensor(4096, 3),
-            'joint_attention_kwargs': {},
-            'return_dict': False,
-        }
-        inductor_configs = {
-            'conv_1x1_as_mm': True,
-            'epilogue_fusion': False,
-            'coordinate_descent_tuning': True,
-            'coordinate_descent_check_all_directions': True,
-            'max_autotune': True,
-            'triton.cudagraphs': True,
-        }
         exported = torch.export.export(pipeline.transformer, args=(), kwargs=transformer_kwargs)
         return aoti_compile(exported, inductor_configs)
     transformer_config = pipeline.transformer.config

 from zerogpu import aoti_compile
+def _example_tensor(*shape):
+    return torch.randn(*shape, device='cuda', dtype=torch.bfloat16)
 def optimize_pipeline_(pipeline: FluxPipeline):
+    is_timestep_distilled = not pipeline.transformer.config.guidance_embeds
+    seq_length = 256 if is_timestep_distilled else 512
+    transformer_kwargs = {
+        'hidden_states': _example_tensor(1, 4096, 64),
+        'timestep': torch.tensor([1.], device='cuda', dtype=torch.bfloat16),
+        'guidance': None if is_timestep_distilled else torch.tensor([1.], device='cuda', dtype=torch.bfloat16),
+        'pooled_projections': _example_tensor(1, 768),
+        'encoder_hidden_states': _example_tensor(1, seq_length, 4096),
+        'txt_ids': _example_tensor(seq_length, 3),
+        'img_ids': _example_tensor(4096, 3),
+        'joint_attention_kwargs': {},
+        'return_dict': False,
+    }
+    inductor_configs = {
+        'conv_1x1_as_mm': True,
+        'epilogue_fusion': False,
+        'coordinate_descent_tuning': True,
+        'coordinate_descent_check_all_directions': True,
+        'max_autotune': True,
+        'triton.cudagraphs': True,
+    }
     @spaces.GPU(duration=1500)
     def compile_transformer():
         pipeline.transformer.fuse_qkv_projections()
         quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
         exported = torch.export.export(pipeline.transformer, args=(), kwargs=transformer_kwargs)
         return aoti_compile(exported, inductor_configs)
     transformer_config = pipeline.transformer.config