Refactored based on review

Perseus14 · Perseus14 · commit 7f018e49c1e5 · 2026-01-27T13:06:09.000Z
diff --git a/src/maxdiffusion/configs/base_wan_i2v_14b.yml b/src/maxdiffusion/configs/base_wan_i2v_14b.yml
@@ -276,8 +276,8 @@ profiler_steps: 10
 enable_jax_named_scopes: False
 
 # Generation parameters
-prompt: "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."
-prompt_2: "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."
+prompt: "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. Appearing behind him is a giant, translucent, pink spiritual manifestation (faxiang) that is synchronized with the man's action and pose." #"An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."
+prompt_2: "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. Appearing behind him is a giant, translucent, pink spiritual manifestation (faxiang) that is synchronized with the man's action and pose." #"An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."
 negative_prompt: "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
 do_classifier_free_guidance: True
 height: 480
@@ -303,11 +303,11 @@ lightning_ckpt: ""
 enable_lora: False
 # Values are lists to support multiple LoRA loading during inference in the future.
 lora_config: {
-  rank: [64],
-  lora_model_name_or_path: ["lightx2v/Wan2.1-Distill-Loras"],
-  weight_name: ["wan2.1_i2v_lora_rank64_lightx2v_4step.safetensors"],
-  adapter_name: ["wan21-distill-lora-i2v"],
-  scale: [1.0],
+  rank: [64, 32],
+  lora_model_name_or_path: ["lightx2v/Wan2.1-Distill-Loras", "starsfriday/Wan2.1-Divine-Power-LoRA"],
+  weight_name: ["wan2.1_i2v_lora_rank64_lightx2v_4step.safetensors", "divine-power.safetensors"],
+  adapter_name: ["wan21-distill-lora-i2v", "divine-power-lora"],
+  scale: [1.0, 1.0],
   from_pt: []
 }
 # Ex with values:
diff --git a/src/maxdiffusion/generate_wan.py b/src/maxdiffusion/generate_wan.py
@@ -28,7 +28,7 @@
 from google.cloud import storage
 import flax
 from maxdiffusion.common_types import WAN2_1, WAN2_2
-from maxdiffusion.loaders.wan_lora_nnx_loader import Wan2_1NnxLoraLoader, Wan2_2NnxLoraLoader
+from maxdiffusion.loaders.wan_lora_nnx_loader import Wan2_1NNXLoraLoader, Wan2_2NNXLoraLoader
 
 
 def upload_video_to_gcs(output_dir: str, video_path: str):
@@ -191,39 +191,40 @@ def run(config, pipeline=None, filename_prefix="", commit_hash=None):
     pipeline, _, _ = checkpoint_loader.load_checkpoint()
 
   # If LoRA is specified, inject layers and load weights.
-  if config.enable_lora and hasattr(config, "lora_config") and config.lora_config and config.lora_config["lora_model_name_or_path"]:
+  if (
+      config.enable_lora
+      and hasattr(config, "lora_config")
+      and config.lora_config
+      and config.lora_config["lora_model_name_or_path"]
+  ):
     if model_key == WAN2_1:
-      lora_loader = Wan2_1NnxLoraLoader()
+      lora_loader = Wan2_1NNXLoraLoader()
       lora_config = config.lora_config
-
-      if len(lora_config["lora_model_name_or_path"]) > 1:
-          max_logging.log("Found multiple LoRAs in config, but only loading the first one.")
-
-      pipeline = lora_loader.load_lora_weights(
-          pipeline,
-          lora_config["lora_model_name_or_path"][0],
-          transformer_weight_name=lora_config["weight_name"][0],
-          rank=lora_config["rank"][0],
-          scale=lora_config["scale"][0],
-          scan_layers=config.scan_layers,
-      )
+      for i in range(len(lora_config["lora_model_name_or_path"])):
+        pipeline = lora_loader.load_lora_weights(
+            pipeline,
+            lora_config["lora_model_name_or_path"][i],
+            transformer_weight_name=lora_config["weight_name"][i],
+            rank=lora_config["rank"][i],
+            scale=lora_config["scale"][i],
+            scan_layers=config.scan_layers,
+            dtype=config.weights_dtype,
+        )
 
     if model_key == WAN2_2:
-      lora_loader = Wan2_2NnxLoraLoader()
+      lora_loader = Wan2_2NNXLoraLoader()
       lora_config = config.lora_config
-
-      if len(lora_config["lora_model_name_or_path"]) > 1:
-          max_logging.log("Found multiple LoRAs in config, but only loading the first one.")
-
-      pipeline = lora_loader.load_lora_weights(
-          pipeline,
-          lora_config["lora_model_name_or_path"][0],
-          high_noise_weight_name=lora_config["high_noise_weight_name"][0],
-          low_noise_weight_name=lora_config["low_noise_weight_name"][0],
-          rank=lora_config["rank"][0],
-          scale=lora_config["scale"][0],
-          scan_layers=config.scan_layers,
-      )
+      for i in range(len(lora_config["lora_model_name_or_path"])):
+        pipeline = lora_loader.load_lora_weights(
+            pipeline,
+            lora_config["lora_model_name_or_path"][i],
+            high_noise_weight_name=lora_config["high_noise_weight_name"][i],
+            low_noise_weight_name=lora_config["low_noise_weight_name"][i],
+            rank=lora_config["rank"][i],
+            scale=lora_config["scale"][i],
+            scan_layers=config.scan_layers,
+            dtype=config.weights_dtype,
+        )
 
   s0 = time.perf_counter()
 
diff --git a/src/maxdiffusion/loaders/__init__.py b/src/maxdiffusion/loaders/__init__.py
@@ -14,4 +14,4 @@
 
 from .lora_pipeline import StableDiffusionLoraLoaderMixin
 from .flux_lora_pipeline import FluxLoraLoaderMixin
-from .wan_lora_nnx_loader import Wan2_1NnxLoraLoader, Wan2_2NnxLoraLoader
+from .wan_lora_nnx_loader import Wan2_1NNXLoraLoader, Wan2_2NNXLoraLoader
diff --git a/src/maxdiffusion/loaders/wan_lora_nnx_loader.py b/src/maxdiffusion/loaders/wan_lora_nnx_loader.py
@@ -22,7 +22,7 @@
 from . import lora_conversion_utils
 
 
-class Wan2_1NnxLoraLoader(LoRABaseMixin):
+class Wan2_1NNXLoraLoader(LoRABaseMixin):
   """
   Handles loading LoRA weights into NNX-based WAN 2.1 model.
   Assumes WAN pipeline contains 'transformer'
@@ -37,6 +37,7 @@ def load_lora_weights(
       rank: int,
       scale: float = 1.0,
       scan_layers: bool = False,
+      dtype: str = "float32",
       **kwargs,
   ):
     """
@@ -53,14 +54,14 @@ def translate_fn(nnx_path_str):
     if hasattr(pipeline, "transformer") and transformer_weight_name:
       max_logging.log(f"Merging LoRA into transformer with rank={rank}")
       h_state_dict, _ = lora_loader.lora_state_dict(lora_model_path, weight_name=transformer_weight_name, **kwargs)
-      merge_fn(pipeline.transformer, h_state_dict, rank, scale, translate_fn)
+      merge_fn(pipeline.transformer, h_state_dict, rank, scale, translate_fn, dtype=dtype)
     else:
       max_logging.log("transformer not found or no weight name provided for LoRA.")
 
     return pipeline
 
 
-class Wan2_2NnxLoraLoader(LoRABaseMixin):
+class Wan2_2NNXLoraLoader(LoRABaseMixin):
   """
   Handles loading LoRA weights into NNX-based WAN 2.2 model.
   Assumes WAN pipeline contains 'high_noise_transformer' and 'low_noise_transformer'
@@ -76,6 +77,7 @@ def load_lora_weights(
       rank: int,
       scale: float = 1.0,
       scan_layers: bool = False,
+      dtype: str = "float32",
       **kwargs,
   ):
     """
@@ -92,15 +94,15 @@ def translate_fn(nnx_path_str: str):
     if hasattr(pipeline, "high_noise_transformer") and high_noise_weight_name:
       max_logging.log(f"Merging LoRA into high_noise_transformer with rank={rank}")
       h_state_dict, _ = lora_loader.lora_state_dict(lora_model_path, weight_name=high_noise_weight_name, **kwargs)
-      merge_fn(pipeline.high_noise_transformer, h_state_dict, rank, scale, translate_fn)
+      merge_fn(pipeline.high_noise_transformer, h_state_dict, rank, scale, translate_fn, dtype=dtype)
     else:
       max_logging.log("high_noise_transformer not found or no weight name provided for LoRA.")
 
     # Handle low noise model
     if hasattr(pipeline, "low_noise_transformer") and low_noise_weight_name:
       max_logging.log(f"Merging LoRA into low_noise_transformer with rank={rank}")
       l_state_dict, _ = lora_loader.lora_state_dict(lora_model_path, weight_name=low_noise_weight_name, **kwargs)
-      merge_fn(pipeline.low_noise_transformer, l_state_dict, rank, scale, translate_fn)
+      merge_fn(pipeline.low_noise_transformer, l_state_dict, rank, scale, translate_fn, dtype=dtype)
     else:
       max_logging.log("low_noise_transformer not found or no weight name provided for LoRA.")
 
diff --git a/src/maxdiffusion/models/lora_nnx.py b/src/maxdiffusion/models/lora_nnx.py