lint

jfacevedo-google · jfacevedo-google · commit 50a029d1bc64 · 2025-06-16T19:46:12.000Z
diff --git a/src/maxdiffusion/generate_wan.py b/src/maxdiffusion/generate_wan.py
@@ -40,7 +40,9 @@ def run(config):
   prompt = [config.prompt] * batch_multiplier
   negative_prompt = [config.negative_prompt] * batch_multiplier
 
-  max_logging.log(f"Num steps: {config.num_inference_steps}, height: {config.height}, width: {config.width}, frames: {config.num_frames}")
+  max_logging.log(
+      f"Num steps: {config.num_inference_steps}, height: {config.height}, width: {config.width}, frames: {config.num_frames}"
+  )
 
   videos = pipeline(
       prompt=prompt,
@@ -91,6 +93,7 @@ def run(config):
     )
   print("generation time: ", (time.perf_counter() - s0))
 
+
 def main(argv: Sequence[str]) -> None:
   pyconfig.initialize(argv)
   run(pyconfig.config)
diff --git a/src/maxdiffusion/models/wan/transformers/transformer_wan.py b/src/maxdiffusion/models/wan/transformers/transformer_wan.py
@@ -315,9 +315,7 @@ def __call__(self, hidden_states: jax.Array, encoder_hidden_states: jax.Array, t
     )
 
     # 1. Self-attention
-    norm_hidden_states = (self.norm1(hidden_states) * (1 + scale_msa) + shift_msa).astype(
-        hidden_states.dtype
-    )
+    norm_hidden_states = (self.norm1(hidden_states) * (1 + scale_msa) + shift_msa).astype(hidden_states.dtype)
     attn_output = self.attn1(
         hidden_states=norm_hidden_states, encoder_hidden_states=norm_hidden_states, rotary_emb=rotary_emb
     )
@@ -329,13 +327,9 @@ def __call__(self, hidden_states: jax.Array, encoder_hidden_states: jax.Array, t
     hidden_states = hidden_states + attn_output
 
     # 3. Feed-forward
-    norm_hidden_states = (self.norm3(hidden_states) * (1 + c_scale_msa) + c_shift_msa).astype(
-        hidden_states.dtype
-    )
+    norm_hidden_states = (self.norm3(hidden_states) * (1 + c_scale_msa) + c_shift_msa).astype(hidden_states.dtype)
     ff_output = self.ffn(norm_hidden_states)
-    hidden_states = (hidden_states + ff_output * c_gate_msa).astype(
-        hidden_states.dtype
-    )
+    hidden_states = (hidden_states + ff_output * c_gate_msa).astype(hidden_states.dtype)
     return hidden_states
 
 
diff --git a/src/maxdiffusion/tests/attention_test.py b/src/maxdiffusion/tests/attention_test.py
@@ -37,10 +37,16 @@ def setUp(self):
   def test_splash_attention(self):
     """Test numerics of splash attention are equivalent to dot_product"""
 
-    pyconfig.initialize([None, os.path.join(THIS_DIR, "..", "configs", "base21.yml"),
-      'flash_block_sizes={"block_q" : 512, "block_kv_compute": 512, "block_kv": 512,'
-      '"block_q_dkv": 512, "block_kv_dkv": 512, "block_kv_dkv_compute": 512,'
-      '"block_q_dq": 512, "block_kv_dq": 512}',], unittest=True)
+    pyconfig.initialize(
+        [
+            None,
+            os.path.join(THIS_DIR, "..", "configs", "base21.yml"),
+            'flash_block_sizes={"block_q" : 512, "block_kv_compute": 512, "block_kv": 512,'
+            '"block_q_dkv": 512, "block_kv_dkv": 512, "block_kv_dkv_compute": 512,'
+            '"block_q_dq": 512, "block_kv_dq": 512}',
+        ],
+        unittest=True,
+    )
     config = pyconfig.config
 
     batch = 8
@@ -57,7 +63,7 @@ def test_splash_attention(self):
         split_head_dim=True,
         attention_kernel="dot_product",
         mesh=None,
-        dtype=jnp.bfloat16
+        dtype=jnp.bfloat16,
     )
 
     params = dot_product_attention.init(key2, x)["params"]
@@ -75,7 +81,7 @@ def test_splash_attention(self):
         attention_kernel="flash",
         mesh=mesh,
         dtype=jnp.bfloat16,
-        flash_block_sizes=flash_block_sizes
+        flash_block_sizes=flash_block_sizes,
     )
 
     params = splash_attention.init(key2, x)["params"]
diff --git a/src/maxdiffusion/tests/wan_transformer_test.py b/src/maxdiffusion/tests/wan_transformer_test.py
@@ -272,7 +272,7 @@ def test_wan_model(self):
           timestep=dummy_timestep,
           encoder_hidden_states=dummy_encoder_hidden_states,
           is_uncond=jnp.array(True, dtype=jnp.bool_),
-          slg_mask=jnp.zeros(40, dtype=jnp.bool_)
+          slg_mask=jnp.zeros(40, dtype=jnp.bool_),
       )
     assert dummy_output.shape == hidden_states_shape
 
diff --git a/src/maxdiffusion/trainers/wan_trainer.py b/src/maxdiffusion/trainers/wan_trainer.py
@@ -116,7 +116,9 @@ def training_loop(self, pipeline, optimizer, learning_rate_scheduler, data):
     for step in np.arange(start_step, self.config.max_train_steps):
       if self.config.enable_profiler and step == first_profiling_step:
         max_utils.activate_profiler(self.config)
-      with jax.profiler.StepTraceAnnotation("train", step_num=step), pipeline.mesh, nn_partitioning.axis_rules(self.config.logical_axis_rules):
+      with jax.profiler.StepTraceAnnotation("train", step_num=step), pipeline.mesh, nn_partitioning.axis_rules(
+          self.config.logical_axis_rules
+      ):
         state, train_metric, rng = p_train_step(state, graphdef, data, rng)
 
       new_time = datetime.datetime.now()

Original file line number	Diff line number	Diff line change
`@@ -272,7 +272,7 @@ def test_wan_model(self):`
`272`	`272`	`timestep=dummy_timestep,`
`273`	`273`	`encoder_hidden_states=dummy_encoder_hidden_states,`
`274`	`274`	`is_uncond=jnp.array(True, dtype=jnp.bool_),`
`275`		`- slg_mask=jnp.zeros(40, dtype=jnp.bool_)`
	`275`	`+ slg_mask=jnp.zeros(40, dtype=jnp.bool_),`
`276`	`276`	`)`
`277`	`277`	`assert dummy_output.shape == hidden_states_shape`
`278`	`278`