@@ -716,41 +716,34 @@ def translate_ltx2_nnx_path_to_diffusers_lora(nnx_path_str, scan_layers=False):
716716 "attn1.to_k" : "attn1.to_k" ,
717717 "attn1.to_v" : "attn1.to_v" ,
718718 "attn1.to_out" : "attn1.to_out.0" ,
719-
720719 # Audio Self Attention (audio_attn1)
721720 "audio_attn1.to_q" : "audio_attn1.to_q" ,
722721 "audio_attn1.to_k" : "audio_attn1.to_k" ,
723722 "audio_attn1.to_v" : "audio_attn1.to_v" ,
724723 "audio_attn1.to_out" : "audio_attn1.to_out.0" ,
725-
726724 # Audio Cross Attention (audio_attn2)
727725 "audio_attn2.to_q" : "audio_attn2.to_q" ,
728726 "audio_attn2.to_k" : "audio_attn2.to_k" ,
729727 "audio_attn2.to_v" : "audio_attn2.to_v" ,
730728 "audio_attn2.to_out" : "audio_attn2.to_out.0" ,
731-
732729 # Cross Attention (attn2)
733730 "attn2.to_q" : "attn2.to_q" ,
734731 "attn2.to_k" : "attn2.to_k" ,
735732 "attn2.to_v" : "attn2.to_v" ,
736733 "attn2.to_out" : "attn2.to_out.0" ,
737-
738734 # Audio to Video Cross Attention
739735 "audio_to_video_attn.to_q" : "audio_to_video_attn.to_q" ,
740736 "audio_to_video_attn.to_k" : "audio_to_video_attn.to_k" ,
741737 "audio_to_video_attn.to_v" : "audio_to_video_attn.to_v" ,
742738 "audio_to_video_attn.to_out" : "audio_to_video_attn.to_out.0" ,
743-
744739 # Video to Audio Cross Attention
745740 "video_to_audio_attn.to_q" : "video_to_audio_attn.to_q" ,
746741 "video_to_audio_attn.to_k" : "video_to_audio_attn.to_k" ,
747742 "video_to_audio_attn.to_v" : "video_to_audio_attn.to_v" ,
748743 "video_to_audio_attn.to_out" : "video_to_audio_attn.to_out.0" ,
749-
750744 # Feed Forward
751745 "ff.net_0" : "ff.net.0.proj" ,
752746 "ff.net_2" : "ff.net.2" ,
753-
754747 # Audio Feed Forward
755748 "audio_ff.net_0" : "audio_ff.net.0.proj" ,
756749 "audio_ff.net_2" : "audio_ff.net.2" ,
@@ -768,7 +761,6 @@ def translate_ltx2_nnx_path_to_diffusers_lora(nnx_path_str, scan_layers=False):
768761 "av_cross_attn_audio_v2a_gate.linear" : "diffusion_model.av_ca_v2a_gate_adaln_single.linear" ,
769762 "av_cross_attn_audio_scale_shift.linear" : "diffusion_model.av_ca_audio_scale_shift_adaln_single.linear" ,
770763 "av_cross_attn_video_scale_shift.linear" : "diffusion_model.av_ca_video_scale_shift_adaln_single.linear" ,
771-
772764 # Nested conditioning layers
773765 "time_embed.emb.timestep_embedder.linear_1" : "diffusion_model.adaln_single.emb.timestep_embedder.linear_1" ,
774766 "time_embed.emb.timestep_embedder.linear_2" : "diffusion_model.adaln_single.emb.timestep_embedder.linear_2" ,
@@ -786,11 +778,10 @@ def translate_ltx2_nnx_path_to_diffusers_lora(nnx_path_str, scan_layers=False):
786778 "caption_projection.linear_2" : "diffusion_model.caption_projection.linear_2" ,
787779 "audio_caption_projection.linear_1" : "diffusion_model.audio_caption_projection.linear_1" ,
788780 "audio_caption_projection.linear_2" : "diffusion_model.audio_caption_projection.linear_2" ,
789-
790781 # Connectors
791782 "feature_extractor.linear" : "text_embedding_projection.aggregate_embed" ,
792783 }
793-
784+
794785 if nnx_path_str in global_map :
795786 return global_map [nnx_path_str ]
796787
@@ -807,5 +798,3 @@ def translate_ltx2_nnx_path_to_diffusers_lora(nnx_path_str, scan_layers=False):
807798 return f"diffusion_model.transformer_blocks.{ idx } .{ suffix_map [inner_suffix ]} "
808799
809800 return None
810-
811-
0 commit comments