Move dpo_utils.py, load_and_quantize_checkpoint.py, vllm_decode.py and scratch_code/ to src/maxtext

SurbhiJainUSC · Google-ML-Automation · commit 3bc185fc337f · 2026-01-30T12:51:33.000-08:00
PiperOrigin-RevId: 863355252
diff --git a/codecov.yml b/codecov.yml
@@ -36,9 +36,8 @@ ignore:
   - "src/MaxText/configs"
   - "src/MaxText/examples"
   - "src/MaxText/experimental"
-  - "src/MaxText/inference"
-  - "src/MaxText/inference_mlperf"
-  - "src/MaxText/scratch_code"
+  - "src/maxtext/inference"
+  - "src/maxtext/scratch_code"
   - "src/MaxText/distillation" # code moved to src/maxtext/trainers/post_train/distillation
   - "src/MaxText/sft" # code moved to src/maxtext/trainers/post_train/sft
 
diff --git a/docs/conf.py b/docs/conf.py
@@ -175,8 +175,8 @@ def run_apidoc(_):
       # Paths to exclude
       os.path.join(MAXTEXT_REPO_ROOT, "tests"),
       os.path.join(MAXTEXT_REPO_ROOT, "src", "MaxText", "experimental"),
-      os.path.join(MAXTEXT_REPO_ROOT, "src", "MaxText", "inference_mlperf"),
-      os.path.join(MAXTEXT_REPO_ROOT, "src", "MaxText", "scratch_code"),
+      os.path.join(MAXTEXT_REPO_ROOT, "src", "maxtext", "inference"),
+      os.path.join(MAXTEXT_REPO_ROOT, "src", "maxtext", "scratch_code"),
       os.path.join(MAXTEXT_REPO_ROOT, "src", "MaxText", "utils", "ckpt_conversion"),
       os.path.join(MAXTEXT_REPO_ROOT, "src", "MaxText", "rl"),
       os.path.join(MAXTEXT_REPO_ROOT, "src", "MaxText", "multimodal_utils.py"),
diff --git a/docs/guides/optimization/pallas_kernels_performance.md b/docs/guides/optimization/pallas_kernels_performance.md
@@ -62,8 +62,8 @@ To maximize performance, MaxText uses custom Pallas kernels for memory-bandwidth
 
 - **Serving Attention (Paged & Ragged):** For high-throughput inference, this kernel efficiently fetches non-contiguous "pages" of the KV cache from memory. It is a key optimization for our serving stack and is used for models running on MaxText's inference engine.
 
-  - [`src/MaxText/inference/paged_attention.py`](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/MaxText/inference/paged_attention.py)
-  - [`src/MaxText/inference/paged_attention_kernel_v2.py`](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/MaxText/inference/paged_attention_kernel_v2.py)
+  - [`src/maxtext/inference/paged_attention.py`](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/maxtext/inference/paged_attention.py)
+  - [`src/maxtext/inference/paged_attention_kernel_v2.py`](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/maxtext/inference/paged_attention_kernel_v2.py)
 
 - **MoE Grouped Matmul (Megablox GMM):** Sparse/irregular grouped GEMMs driven by host-built metadata.
 
diff --git a/src/MaxText/__init__.py b/src/MaxText/__init__.py
@@ -31,7 +31,7 @@
 
 from MaxText import pyconfig
 from MaxText.layers import models
-from MaxText import dpo_utils
+from maxtext.trainers.post_train.dpo import dpo_utils
 from maxtext.utils import maxtext_utils
 from maxtext.utils import model_creation_utils
 from maxtext.utils.model_creation_utils import from_config
diff --git a/src/MaxText/experimental/agent/integrative_rag_agent/config.py b/src/MaxText/experimental/agent/integrative_rag_agent/config.py
@@ -103,7 +103,7 @@
 # for converting PyTorch code to JAX
 block_for_rag = [
     "src/MaxText/layers",  # Neural network layers and building blocks
-    "src/MaxText/inference",  # Inference and prediction code
+    "src/maxtext/inference",  # Inference and prediction code
     "src/MaxText/common_types.py",  # Common data types and structures
-    "src/MaxText/maxtext_utils.py",  # Utility functions and helpers
+    "src/maxtext/utils/maxtext_utils.py",  # Utility functions and helpers
 ]
diff --git a/src/MaxText/experimental/agent/orchestration_agent/split_python_file.py b/src/MaxText/experimental/agent/orchestration_agent/split_python_file.py
@@ -93,7 +93,7 @@ def visit_Attribute(self, node):
       if base_name in self.git_aliases:
         # It's an external dependency. We need to format it with the attribute path.
         # Example: base_name='page_manager', attr_chain='PageState'
-        # self.git_dependencies['page_manager'] might be 'src/MaxText/inference/page_manager.py#page_manager'
+        # self.git_dependencies['page_manager'] might be 'src/maxtext/inference/page_manager.py#page_manager'
         path, obj = self.git_dependencies[base_name].split("#", 1)
 
         # As per the user request, we append the attribute access to the object name.
@@ -198,8 +198,8 @@ def convert_package_to_path(self, path):
 
     Example:
         "from maxtext.inference import page_manager, utils" ->
-        {"page_manager": "src/MaxText/inference.py#page_manager",
-         "utils": "src/MaxText/inference.py#utils"}
+        {"page_manager": "src/maxtext/inference.py#page_manager",
+         "utils": "src/maxtext/inference.py#utils"}
 
     Args:
         path (str): A normalized absolute import string.
@@ -216,7 +216,7 @@ def convert_package_to_path(self, path):
       # The logic in get_absolute_imports should ideally resolve this ambiguity.
       # A heuristic could be used here (e.g., checking casing) but we stick to the current logic.
       # The user's example `from maxtext.inference import page_manager` creates a path
-      # `src/MaxText/inference.py#page_manager`, which is what the new visitor expects to correct.
+      # `src/maxtext/inference.py#page_manager`, which is what the new visitor expects to correct.
       import_dict[pkg.strip()] = path_form + ".py#" + pkg.strip()
     return import_dict
 
diff --git a/src/MaxText/train.py b/src/MaxText/train.py
@@ -51,7 +51,6 @@
 
 from MaxText.gradient_accumulation import gradient_accumulation_loss_and_grad
 from MaxText.vocabulary_tiling import vocab_tiling_linen_loss
-from MaxText.dpo_utils import _merge_dpo_state, _split_dpo_state, dpo_loss_fn
 # pylint: disable=too-many-positional-arguments
 
 from maxtext.common import checkpointing, profiler
@@ -63,6 +62,7 @@
 )
 from maxtext.common.metric_logger import MetricLogger, record_activation_metrics
 from maxtext.common.vertex_tensorboard import VertexTensorboardManager
+from maxtext.trainers.post_train.dpo.dpo_utils import _merge_dpo_state, _split_dpo_state, dpo_loss_fn
 from maxtext.utils import exceptions
 from maxtext.utils import gcs_utils
 from maxtext.utils import max_logging
diff --git a/src/maxtext/checkpoint_conversion/load_and_quantize_checkpoint.py b/src/maxtext/checkpoint_conversion/load_and_quantize_checkpoint.py
diff --git a/src/maxtext/inference/mlperf/README.md b/src/maxtext/inference/mlperf/README.md
@@ -64,7 +64,7 @@ cd ~
 git clone https://github.com/AI-Hypercomputer/maxtext.git
 cd maxtext
 bash setup.sh
-python3 -m pip install -r src/MaxText/inference_mlperf/requirements.txt
+python3 -m pip install -r src/maxtext/inference/mlperf/requirements.txt
 ```
 
 ### Generate quantized checkpoint
@@ -125,7 +125,7 @@ export MODEL_SIZE=llama3.1-405b
 export QUANTIZE_TYPE=int8
 
 cd maxtext && \
-python3 -m MaxText.load_and_quantize_checkpoint src/MaxText/configs/base.yml tokenizer_path=${TOKENIZER} load_parameters_path=${LOAD_PARAMS_PATH} max_prefill_predict_length=1024 max_target_length=2048 model_name=${MODEL_SIZE} ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=-1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=1 attention=dot_product quantization=${QUANTIZE_TYPE} save_quantized_params_path=${SAVE_QUANT_PARAMS_PATH} async_checkpointing=false
+python3 -m maxtext.checkpoint_conversion.load_and_quantize_checkpoint src/MaxText/configs/base.yml tokenizer_path=${TOKENIZER} load_parameters_path=${LOAD_PARAMS_PATH} max_prefill_predict_length=1024 max_target_length=2048 model_name=${MODEL_SIZE} ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=-1 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=1 attention=dot_product quantization=${QUANTIZE_TYPE} save_quantized_params_path=${SAVE_QUANT_PARAMS_PATH} async_checkpointing=false
 ```
 
 The quantized checkpoint is saved at `${SAVE_QUANT_PARAMS_PATH}`
@@ -141,7 +141,7 @@ huggingface-cli login --token $HUGGING_FACE_TOKEN
 #### For trillium
 #### LLama2-70b:
 ```
-cd ~/maxtext/src/MaxText/inference_mlperf/trillium
+cd ~/maxtext/src/maxtext/inference/mlperf/trillium
 ```
 
 ##### Test Run
diff --git a/src/maxtext/inference/mlperf/llama_offline_run.sh b/src/maxtext/inference/mlperf/llama_offline_run.sh
@@ -117,7 +117,7 @@ else
   export DATASET_TYPE=full
   export DATASET_PATH=${DATA_DISK_DIR}/processed-data.pkl
   export TOTAL_SAMPLE_COUNT=24576
-  export USER_CONFIG=user.conf # NOTE: you may need to change this path(e.g. `src/MaxText/inference_mlperf/user.conf`)
+  export USER_CONFIG=user.conf # NOTE: you may need to change this path(e.g. `src/maxtext/inference/mlperf/user.conf`)
 fi
 
 # LIBTPU_INIT_ARGS="--xla_tpu_enable_data_parallel_all_reduce_opt=true --xla_tpu_data_parallel_opt_different_sized_ops=true --xla_tpu_enable_async_collective_fusion=true --xla_tpu_enable_async_collective_fusion_fuse_all_gather=true --xla_tpu_enable_async_collective_fusion_multiple_steps=true --xla_tpu_overlap_compute_collective_tc=true --xla_enable_async_all_gather=true"
diff --git a/src/maxtext/inference/mlperf/trillium/benchmarks_llama2-70b-trillium_2x4.sh b/src/maxtext/inference/mlperf/trillium/benchmarks_llama2-70b-trillium_2x4.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-# NOTE: please check the README located at src/MaxText/inference_mlperf/README.md for instructions on how
+# NOTE: please check the README located at src/maxtext/inference/mlperf/README.md for instructions on how
 # to set up the environment before running this script.
 # Run command:
 # bash benchmarks_llama2-70b-trillium_2x4.sh [-b benchmark_type]
diff --git a/src/maxtext/scratch_code/__init__.py b/src/maxtext/scratch_code/__init__.py
diff --git a/src/maxtext/scratch_code/analyze_sharegpt.py b/src/maxtext/scratch_code/analyze_sharegpt.py
diff --git a/src/maxtext/scratch_code/demo_from_config.ipynb b/src/maxtext/scratch_code/demo_from_config.ipynb
diff --git a/src/maxtext/scratch_code/gemma_7b.sh b/src/maxtext/scratch_code/gemma_7b.sh
diff --git a/src/maxtext/scratch_code/mixtral-numerical-verification.ipynb b/src/maxtext/scratch_code/mixtral-numerical-verification.ipynb
diff --git a/src/maxtext/scratch_code/run_inference_microbenchmark.sh b/src/maxtext/scratch_code/run_inference_microbenchmark.sh
@@ -1,5 +1,5 @@
 # llama2-7b
-python3 -m maxtext.inference_microbenchmark \
+python3 -m maxtext.inference.inference_microbenchmark \
 "${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/MaxText}"/configs/base.yml \
 async_checkpointing=false \
 attention=autoselected \
diff --git a/src/maxtext/scratch_code/setup_transformer.sh b/src/maxtext/scratch_code/setup_transformer.sh
diff --git a/src/maxtext/trainers/post_train/dpo/dpo_utils.py b/src/maxtext/trainers/post_train/dpo/dpo_utils.py
diff --git a/src/maxtext/utils/train_utils.py b/src/maxtext/utils/train_utils.py
@@ -19,11 +19,11 @@
 import jax
 from MaxText import sharding
 from MaxText import optimizers
-from MaxText.dpo_utils import _merge_dpo_state
 from MaxText.rampup_batch import create_rampup_manager
 from maxtext.common import checkpointing
 from maxtext.common.data_loader import create_dataloader
 from maxtext.common.goodput import GoodputEvent, maybe_record_goodput
+from maxtext.trainers.post_train.dpo.dpo_utils import _merge_dpo_state
 from maxtext.utils import max_logging
 from maxtext.utils import max_utils
 from maxtext.utils import maxtext_utils
diff --git a/src/maxtext/vllm_decode.py b/src/maxtext/vllm_decode.py
@@ -15,7 +15,7 @@
 An example script to perform decoding using vLLM via Tunix or via MaxText on vLLM.
 
 Example usage with Tunix:
-  python3 -m MaxText.vllm_decode MaxText/configs/base.yml \
+  python3 -m maxtext.vllm_decode MaxText/configs/base.yml \
     model_name=llama3.1-8b tokenizer_path=meta-llama/Llama-3.1-8B-Instruct \
     tokenizer_type=huggingface hf_access_token=<your_hf_token> \
     load_parameters_path=<your_checkpoint_path> \
@@ -25,7 +25,7 @@
     --use_tunix \
   
 Or without Tunix using the MaxText vLLM integration:
-  python3 -m MaxText.vllm_decode \
+  python3 -m maxtext.vllm_decode \
     --model_name qwen3-30b-a3b \
     --hf_model_name Qwen/Qwen3-30B-A3B \
     --hf_config_path src/MaxText/integration/vllm/maxtext_vllm_adapter \
diff --git a/tests/end_to_end/tpu/deepseek/Run_DeepSeek.md b/tests/end_to_end/tpu/deepseek/Run_DeepSeek.md
@@ -171,7 +171,7 @@ To verify the correctness of the model implementation, we perform two primary ch
 One example command to generate golden logits from HuggingFace for V2-Lite.
 
 ```sh
-python3 -m MaxText.scratch_code.generate_hf_golden_logits \
+python3 -m tests.assets.logits_generation.generate_hf_golden_logits \
     --model-id=deepseek-ai/DeepSeek-V2-Lite \
     --output-path=golden_DeepSeek-V2-Lite.jsonl \
     --prompts='I love to;Today is a;What is the'
diff --git a/tests/end_to_end/tpu/gpt_oss/run_gpt_oss.md b/tests/end_to_end/tpu/gpt_oss/run_gpt_oss.md
@@ -165,7 +165,7 @@ To verify the correctness of the model implementation, we perform Logit Comparis
 One example command to generate golden logits from HuggingFace for gpt-oss-20b:
 
 ```sh
-python3 -m MaxText.scratch_code.generate_hf_golden_logits \
+python3 -m tests.assets.logits_generation.generate_hf_golden_logits \
     --model-id=openai/gpt-oss-20b \
     --output-path=golden_data_gpt-oss-20b.jsonl \
     --prompts='I love to;Today is a;What is the' \
diff --git a/tests/end_to_end/tpu/llama3.1/405b/3_test_llama3.1_405b.sh b/tests/end_to_end/tpu/llama3.1/405b/3_test_llama3.1_405b.sh
@@ -16,7 +16,7 @@ export SAVE_QUANT_PARAMS_PATH=gs://maxtext-llama/llama3.1_405b_int8
 
 export QUANTIZE_TYPE="int8"
 
-JAX_PLATFORMS=cpu python3 -m MaxText.load_and_quantize_checkpoint \
+JAX_PLATFORMS=cpu python3 -m maxtext.checkpoint_conversion.load_and_quantize_checkpoint \
     "${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/MaxText}/"configs/base.yml \
     tokenizer_path="${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/maxtext/assets/tokenizers}}"/tokenizer_llama3.tiktoken \
     tokenizer_type=tiktoken \
diff --git a/tests/integration/grpo_trainer_correctness_test.py b/tests/integration/grpo_trainer_correctness_test.py
@@ -56,7 +56,7 @@
 
 
 def get_golden_data(config):
-  """Get the golden data for GrpoTrainer from maxtext/MaxText/scratch_code/generate_grpo_golden_logits.py."""
+  """Get the golden data for GrpoTrainer from tests/assets/logits_generation/generate_grpo_golden_logits.py."""
   input_golden_data_path = os.path.join(
       MAXTEXT_TEST_ASSETS_ROOT,
       "golden_logits",