conversion done

Serenagu525 · Serenagu525 · commit 8cd7dbe07bf1 · 2025-06-13T19:47:55.000Z
diff --git a/src/maxdiffusion/generate_ltx_video.py b/src/maxdiffusion/generate_ltx_video.py
@@ -28,50 +28,6 @@ def validate_transformer_inputs(prompt_embeds, fractional_coords, latents, noise
   print("segment_ids.shape: ", segment_ids.shape, segment_ids.dtype)
   print("encoder_attention_segment_ids.shape: ", encoder_attention_segment_ids.shape, encoder_attention_segment_ids.dtype)
 
-
-def loop_body(
-    step,
-    args,
-    transformer,
-    fractional_cords,
-    prompt_embeds,
-    segment_ids,
-    encoder_attention_segment_ids
-):
-  latents, state, noise_cond = args
-  noise_pred = transformer.apply(
-      {"params": state.params},
-      hidden_states=latents,
-      indices_grid=fractional_cords,
-      encoder_hidden_states=prompt_embeds,
-      timestep=noise_cond,
-      segment_ids=segment_ids,
-      encoder_attention_segment_ids=encoder_attention_segment_ids
-  )
-  import pdb; pdb.set_trace()
-  return noise_pred, state, noise_cond   #need to make changes here? latents need to be changed based on noise_pred, but needs scheduler, return noise_pred for now
-
-
-
-def run_inference(
-    states, transformer, config, mesh, latents, fractional_cords, prompt_embeds, timestep, segment_ids, encoder_attention_segment_ids
-):
-  transformer_state = states["transformer"]
-  loop_body_p = functools.partial(
-      loop_body,
-      transformer=transformer,
-      fractional_cords=fractional_cords,
-      prompt_embeds=prompt_embeds,
-      segment_ids=segment_ids,
-      encoder_attention_segment_ids=encoder_attention_segment_ids
-  )
-  ## TODO: add vae decode step
-  ## TODO: add loop
-  with mesh, nn_partitioning.axis_rules(config.logical_axis_rules):
-    latents, transformer_state, _ = jax.lax.fori_loop(0, 1, loop_body_p, (latents, transformer_state, timestep))   
-  return latents
-  
-
 def run(config):
   key = jax.random.PRNGKey(0)
 
@@ -119,92 +75,6 @@ def run(config):
   )
 
   
-  
-  
-  transformer_state = jax.device_put(transformer_state, transformer_state_shardings)
-  get_memory_allocations()
-
-  states = {}
-  state_shardings = {}
-
-  state_shardings["transformer"] = transformer_state_shardings
-  states["transformer"] = transformer_state
-
-  #create dummy inputs:
-  example_inputs = {}
-  batch_size, num_tokens = 4, 256
-  input_shapes = {
-    "latents": (batch_size, num_tokens, in_channels),
-    "fractional_coords": (batch_size, 3, num_tokens),
-    "prompt_embeds": (batch_size, 128, model_config["caption_channels"]),
-    "timestep": (batch_size, 256),   #TODO: add in the segment id stuff
-    "segment_ids": (batch_size, 256),
-    "encoder_attention_segment_ids": (batch_size, 128),
-  }
-  for name, shape in input_shapes.items():
-    example_inputs[name] = jnp.ones(
-      shape, dtype=jnp.float32 if name not in ["attention_mask", "encoder_attention_mask"] else jnp.bool
-    )
-
-  data_sharding = jax.sharding.NamedSharding(mesh, P(*config.data_sharding))
-  latents = jax.device_put(example_inputs["latents"], data_sharding)
-  prompt_embeds = jax.device_put(example_inputs["prompt_embeds"], data_sharding)
-  fractional_coords = jax.device_put(example_inputs["fractional_coords"], data_sharding)
-  noise_cond = jax.device_put(example_inputs["timestep"], data_sharding)
-  segment_ids = jax.device_put(example_inputs["segment_ids"], data_sharding)
-  encoder_attention_segment_ids = jax.device_put(example_inputs["encoder_attention_segment_ids"], data_sharding)
-
-  validate_transformer_inputs(prompt_embeds, fractional_coords, latents, noise_cond, segment_ids, encoder_attention_segment_ids)
-  p_run_inference = jax.jit(
-      functools.partial(
-          run_inference,
-          transformer=transformer,
-          config=config,
-          mesh=mesh,
-          latents=latents,
-          fractional_cords=fractional_coords,
-          prompt_embeds=prompt_embeds,
-          timestep = noise_cond,
-          segment_ids=segment_ids,
-          encoder_attention_segment_ids=encoder_attention_segment_ids
-      ),
-      in_shardings=(state_shardings,),
-      out_shardings=None,
-  )
-  noise_pred = p_run_inference(states).block_until_ready()
-  print(noise_pred)  #(4, 256, 128)
-
-
-
-
-
-
-  
-
-
-
-
-
-
-
-
-  
-
-
-
-
-  
-
-  
-
-  
-
-
-
-
-
-
-
 
 
 
@@ -219,12 +89,4 @@ def main(argv: Sequence[str]) -> None:
 
 
 
-###setup_initial_state, can optionally load from checkpoint
-
-
-
-
-
-
 
-#end to end steps from ltx repo: pipeline_ltx_video.py
diff --git a/src/maxdiffusion/models/attention_flax.py b/src/maxdiffusion/models/attention_flax.py
@@ -1188,4 +1188,4 @@ def setup(self):
   def __call__(self, hidden_states, deterministic=True):
     hidden_states = self.proj(hidden_states)
     hidden_linear, hidden_gelu = jnp.split(hidden_states, 2, axis=2)
-    return self.dropout_layer(hidden_linear * nn.gelu(hidden_gelu), deterministic=deterministic)
+    return self.dropout_layer(hidden_linear * nn.gelu(hidden_gelu), deterministic=deterministic)
diff --git a/src/maxdiffusion/tests/ltx_video_transformer_test.py b/src/maxdiffusion/tests/ltx_video_transformer_test.py