implement sencache

James Huang · James Huang · commit 2bae7416b6ee · 2026-03-13T08:01:35.000Z
Signed-off-by: James Huang &lt;shyhuang@google.com&gt;
diff --git a/src/maxdiffusion/configs/base_wan_27b.yml b/src/maxdiffusion/configs/base_wan_27b.yml
@@ -302,8 +302,10 @@ guidance_scale_high: 4.0
 # timestep to switch between low noise and high noise transformer
 boundary_ratio: 0.875
 
-# Diffusion CFG cache (FasterCache-style, WAN 2.1 T2V only)
+# Diffusion CFG cache (FasterCache-style)
 use_cfg_cache: False
+# SenCache: sensitivity-aware adaptive caching (Haghighi & Alahi, 2026)
+use_sen_cache: False
 
 # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
 guidance_rescale: 0.0
diff --git a/src/maxdiffusion/pipelines/wan/wan_pipeline_2_2.py b/src/maxdiffusion/pipelines/wan/wan_pipeline_2_2.py
@@ -111,14 +111,25 @@ def __call__(
       negative_prompt_embeds: jax.Array = None,
       vae_only: bool = False,
       use_cfg_cache: bool = False,
+      use_sen_cache: bool = False,
   ):
+    if use_cfg_cache and use_sen_cache:
+      raise ValueError("use_cfg_cache and use_sen_cache are mutually exclusive. Enable only one.")
+
     if use_cfg_cache and (guidance_scale_low <= 1.0 or guidance_scale_high <= 1.0):
       raise ValueError(
           f"use_cfg_cache=True requires both guidance_scale_low > 1.0 and guidance_scale_high > 1.0 "
           f"(got {guidance_scale_low}, {guidance_scale_high}). "
           "CFG cache accelerates classifier-free guidance, which must be enabled for both transformer phases."
       )
 
+    if use_sen_cache and (guidance_scale_low <= 1.0 or guidance_scale_high <= 1.0):
+      raise ValueError(
+          f"use_sen_cache=True requires both guidance_scale_low > 1.0 and guidance_scale_high > 1.0 "
+          f"(got {guidance_scale_low}, {guidance_scale_high}). "
+          "SenCache requires classifier-free guidance to be enabled for both transformer phases."
+      )
+
     latents, prompt_embeds, negative_prompt_embeds, scheduler_state, num_frames = self._prepare_model_inputs(
         prompt,
         negative_prompt,
@@ -148,6 +159,7 @@ def __call__(
         scheduler=self.scheduler,
         scheduler_state=scheduler_state,
         use_cfg_cache=use_cfg_cache,
+        use_sen_cache=use_sen_cache,
         height=height,
     )
 
@@ -184,22 +196,104 @@ def run_inference_2_2(
     scheduler: FlaxUniPCMultistepScheduler,
     scheduler_state,
     use_cfg_cache: bool = False,
+    use_sen_cache: bool = False,
     height: int = 480,
 ):
-  """Denoising loop for WAN 2.2 T2V with optional FasterCache CFG-Cache.
-
-  Dual-transformer CFG-Cache strategy (enabled via use_cfg_cache=True):
-  - High-noise phase (t >= boundary): always full CFG — short phase, critical
-    for establishing video structure.
-  - Low-noise phase (t < boundary): FasterCache alternation — full CFG every N
-    steps, FFT frequency-domain compensation on cache steps (batch×1).
-  - Boundary transition: mandatory full CFG step to populate cache for the
-    low-noise transformer.
-  - FFT compensation identical to WAN 2.1 (Lv et al., ICLR 2025).
+  """Denoising loop for WAN 2.2 T2V with optional caching acceleration.
+
+  Supports two caching strategies:
+
+  1. CFG-Cache (use_cfg_cache=True) — FasterCache-style:
+     Caches the unconditional branch and uses FFT frequency-domain compensation.
+
+  2. SenCache (use_sen_cache=True) — Sensitivity-aware caching:
+     Measures output sensitivity after each full forward pass. When sensitivity
+     is low (model output is stable), skips the entire transformer and reuses
+     the cached noise prediction. Naturally handles MoE expert boundaries by
+     detecting high sensitivity at transition points.
   """
   do_classifier_free_guidance = guidance_scale_low > 1.0 or guidance_scale_high > 1.0
   bsz = latents.shape[0]
 
+  # ── SenCache path ──
+  if use_sen_cache and do_classifier_free_guidance:
+    timesteps_np = np.array(scheduler_state.timesteps, dtype=np.int32)
+    step_uses_high = [bool(timesteps_np[s] >= boundary) for s in range(num_inference_steps)]
+
+    # Resolution-dependent SenCache config
+    if height >= 720:
+      sen_threshold = 0.06    # tighter for higher resolution
+      warmup_ratio = 0.10
+      max_consecutive_cache = 2
+    else:
+      sen_threshold = 0.08
+      warmup_ratio = 0.08
+      max_consecutive_cache = 3
+
+    warmup_steps = max(2, int(num_inference_steps * warmup_ratio))
+
+    prompt_embeds_combined = jnp.concatenate([prompt_embeds, negative_prompt_embeds], axis=0)
+
+    # SenCache state
+    prev_noise_pred = None          # last full-computation noise prediction
+    sensitivity = float('inf')      # measured relative output change
+    consecutive_cached = 0          # consecutive steps using cache
+    cache_count = 0
+
+    for step in range(num_inference_steps):
+      t = jnp.array(scheduler_state.timesteps, dtype=jnp.int32)[step]
+
+      # Select transformer and guidance scale
+      if step_uses_high[step]:
+        graphdef, state, rest = high_noise_graphdef, high_noise_state, high_noise_rest
+        guidance_scale = guidance_scale_high
+      else:
+        graphdef, state, rest = low_noise_graphdef, low_noise_state, low_noise_rest
+        guidance_scale = guidance_scale_low
+
+      # Caching decision
+      is_warmup = step < warmup_steps
+      is_boundary = step > 0 and step_uses_high[step] != step_uses_high[step - 1]
+      should_cache = (
+          not is_warmup
+          and not is_boundary
+          and prev_noise_pred is not None
+          and sensitivity < sen_threshold
+          and consecutive_cached < max_consecutive_cache
+      )
+
+      if should_cache:
+        # ── Cache step: reuse previous noise prediction ──
+        noise_pred = prev_noise_pred
+        consecutive_cached += 1
+        cache_count += 1
+      else:
+        # ── Full CFG step ──
+        latents_doubled = jnp.concatenate([latents] * 2)
+        timestep = jnp.broadcast_to(t, bsz * 2)
+        noise_pred, _, _ = transformer_forward_pass_full_cfg(
+            graphdef, state, rest,
+            latents_doubled, timestep, prompt_embeds_combined,
+            guidance_scale=guidance_scale,
+        )
+
+        # Measure sensitivity: relative output change since last full step
+        if prev_noise_pred is not None:
+          output_diff = jnp.mean(jnp.abs(noise_pred - prev_noise_pred))
+          output_magnitude = jnp.mean(jnp.abs(noise_pred)) + 1e-8
+          sensitivity = float(output_diff / output_magnitude)
+        else:
+          sensitivity = float('inf')
+
+        prev_noise_pred = noise_pred
+        consecutive_cached = 0
+
+      latents, scheduler_state = scheduler.step(scheduler_state, noise_pred, t, latents).to_tuple()
+
+    print(f"[SenCache] Cached {cache_count}/{num_inference_steps} steps "
+          f"({100*cache_count/num_inference_steps:.1f}% cache ratio)")
+    return latents
+
   # ── CFG cache path ──
   if use_cfg_cache and do_classifier_free_guidance:
     # Get timesteps as numpy for Python-level scheduling decisions