Fix

Perseus14 · Perseus14 · commit d52e25de1465 · 2026-01-13T18:54:01.000Z
diff --git a/src/maxdiffusion/models/lora_nnx.py b/src/maxdiffusion/models/lora_nnx.py
@@ -206,66 +206,87 @@ def merge_lora(model: nnx.Module, state_dict: dict, scale: float):
     into the kernel of nnx.Linear and nnx.Conv layers.
     """
     lora_params = {}
+    # Parse weights and alphas
     for k, v in state_dict.items():
-      # Try matching diffusers rename format: "some.thing_lora.down.weight"
-      m = re.match(r"^(.*?)_lora\.(down|up)\.weight$", k)
-      if m:
-          module_path_str, weight_type = m.group(1), m.group(2)
-      else:
-          # Try matching diffusers format: "some.thing.lora.down.weight"
-          m = re.match(r"^(.*?)\.lora\.(down|up)\.weight$", k)
-          if m:
-              module_path_str, weight_type = m.group(1), m.group(2)
-          else:
-              # Try matching kohya/lightning format: "some.thing.lora_down.weight"
-              m = re.match(r"^(.*?)\.(lora_down|lora_up)\.weight$", k)
-              if m:
-                  module_path_str, weight_type = m.group(1), m.group(2).replace("lora_", "")
-              else:
-                  max_logging.log(f"Could not parse LoRA key: {k}")
-                  continue
-
-      if module_path_str not in lora_params:
-          lora_params[module_path_str] = {}
-      lora_params[module_path_str][weight_type] = jnp.array(v)
+        if k.endswith(".alpha"):
+            module_path_str = k[: -len(".alpha")]
+            if module_path_str not in lora_params:
+                lora_params[module_path_str] = {}
+            lora_params[module_path_str]["alpha"] = jnp.array(v)
+            continue
+
+        # Try matching diffusers rename format: "some.thing_lora.down.weight"
+        m = re.match(r"^(.*?)_lora\.(down|up)\.weight$", k)
+        if m:
+            module_path_str, weight_type = m.group(1), m.group(2)
+        else:
+            # Try matching diffusers format: "some.thing.lora.down.weight"
+            m = re.match(r"^(.*?)\.lora\.(down|up)\.weight$", k)
+            if m:
+                module_path_str, weight_type = m.group(1), m.group(2)
+            else:
+                # Try matching kohya/lightning format: "some.thing.lora_down.weight"
+                m = re.match(r"^(.*?)\.(lora_down|lora_up)\.weight$", k)
+                if m:
+                    module_path_str, weight_type = m.group(1), m.group(2).replace("lora_", "")
+                else:
+                    max_logging.log(f"Could not parse LoRA key: {k}")
+                    continue
+        if module_path_str not in lora_params:
+            lora_params[module_path_str] = {}
+        lora_params[module_path_str][weight_type] = jnp.array(v)
+    max_logging.log(f"Parsed {len(lora_params)} unique LoRA module keys: {list(lora_params.keys())}")
 
     assigned_count = 0
     for path, module in nnx.iter_graph(model):
+        if not isinstance(module, (nnx.Linear, nnx.Conv)):
+            max_logging.log(f"Skipping non-Linear/Conv layer: {module}")
+            continue
+
         nnx_path_str = ".".join(map(str, path))
+        max_logging.log(f"Checking NNX layer: {nnx_path_str}")
 
         matched_key = None
         if nnx_path_str in lora_params:
           matched_key = nnx_path_str
         else:
-          # Fallback: check if any param key matches end of nnx path
+          # Fallback: check if any param key is a suffix of nnx path
           for k in lora_params:
             if nnx_path_str.endswith(k):
                 matched_key = k
+                max_logging.log(f"NNX path '{nnx_path_str}' matched LoRA key '{k}' via suffix.")
                 break
+        max_logging.log(f"Layer: {nnx_path_str}, Matched LoRA key: {matched_key}")
 
         if matched_key and matched_key in lora_params:
-          weights = lora_params[matched_key]
-          if "down" in weights and "up" in weights:
-              if isinstance(module, nnx.Linear):
-                  down_w = weights["down"] # (rank, in_features)
-                  up_w = weights["up"]     # (out_features_flat, rank)
-                  # delta = A@B = down.T @ up.T
-                  delta = (down_w.T @ up_w.T).reshape(module.kernel.shape)
-                  module.kernel.value += delta * scale
-                  assigned_count +=1
-              elif isinstance(module, nnx.Conv):
-                  if module.kernel_size == (1, 1):
-                      down_w = weights["down"] # (1,1,in_c,rank)
-                      up_w = weights["up"]     # (1,1,rank,out_c)
-                      # delta = down @ up for channel dimension
-                      delta = (jnp.squeeze(down_w) @ jnp.squeeze(up_w)).reshape(module.kernel.shape)
-                      module.kernel.value += delta * scale
-                      assigned_count += 1
-                  else:
-                      raise NotImplementedError(
-                          f"Merging LoRA weights for Conv layer {matched_key} "
-                          f"with kernel_size {module.kernel_size} > 1 is not supported."
-                      )
-          else:
-              max_logging.log(f"LoRA weights for {matched_key} incomplete.")
+            weights = lora_params[matched_key]
+            if "down" in weights and "up" in weights:
+                if isinstance(module, nnx.Linear):
+                    down_w = weights["down"] # (rank, in_features)
+                    up_w = weights["up"]     # (out_features_flat, rank)
+                    rank = down_w.shape[0]
+                    alpha = weights.get("alpha", rank)
+                    current_scale = scale * alpha / rank
+                    # delta = A@B = down.T @ up.T
+                    delta = (down_w.T @ up_w.T).reshape(module.kernel.shape)
+                    module.kernel.value += delta * current_scale
+                    assigned_count +=1
+                elif isinstance(module, nnx.Conv):
+                    if module.kernel_size == (1, 1):
+                        down_w = weights["down"] # (1,1,in_c,rank)
+                        up_w = weights["up"]     # (1,1,rank,out_c)
+                        rank = down_w.shape[-1]
+                        alpha = weights.get("alpha", rank)
+                        current_scale = scale * alpha / rank
+                        # delta = down @ up for channel dimension
+                        delta = (jnp.squeeze(down_w) @ jnp.squeeze(up_w)).reshape(module.kernel.shape)
+                        module.kernel.value += delta * current_scale
+                        assigned_count += 1
+                    else:
+                        raise NotImplementedError(
+                            f"Merging LoRA weights for Conv layer {matched_key} "
+                            f"with kernel_size {module.kernel_size} > 1 is not supported."
+                        )
+            else:
+                max_logging.log(f"LoRA weights for {matched_key} incomplete.")
     max_logging.log(f"Merged weights into {assigned_count} layers in {type(model).__name__}.")