Skip to content

Commit 5d9e57f

Browse files
Merge pull request #3448 from AI-Hypercomputer:docker_build
PiperOrigin-RevId: 885861574
2 parents 28d8fce + a70db31 commit 5d9e57f

12 files changed

+58
-105
lines changed

.github/workflows/build_and_push_docker_image.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ jobs:
122122
DEVICE=${{ inputs.device }}
123123
MODE=${{ inputs.build_mode }}
124124
WORKFLOW=${{ inputs.workflow }}
125+
PACKAGE_DIR=./src
125126
JAX_VERSION=NONE
126127
LIBTPU_VERSION=NONE
127128
INCLUDE_TEST_ASSETS=true

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,5 @@ packages = ["src/MaxText", "src/maxtext", "src/install_maxtext_extra_deps", "src
5050
install_maxtext_tpu_github_deps = "install_maxtext_extra_deps.install_github_deps:main"
5151
install_maxtext_cuda12_github_deps = "install_maxtext_extra_deps.install_github_deps:main"
5252
install_maxtext_tpu_post_train_extra_deps = "install_maxtext_extra_deps.install_post_train_extra_deps:main"
53-
docker_build_dependency_image = "dependencies.scripts.docker_build_dependency_image:main"
54-
docker_upload_runner = "dependencies.scripts.docker_upload_runner:main"
53+
build_maxtext_docker_image = "dependencies.scripts.build_maxtext_docker_image:main"
54+
upload_maxtext_docker_image = "dependencies.scripts.upload_maxtext_docker_image:main"

src/MaxText/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
"""
2020

2121
__author__ = "Google LLC"
22-
__version__ = "0.2.0"
22+
__version__ = "0.2.1"
2323
__description__ = (
2424
"MaxText is a high performance, highly scalable, open-source LLM written in pure Python/Jax and "
2525
"targeting Google Cloud TPUs and GPUs for training and **inference."

src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ ENV ENV_JAX_VERSION=$JAX_VERSION
3838
ARG DEVICE
3939
ENV ENV_DEVICE=$DEVICE
4040

41+
ARG PACKAGE_DIR
42+
ENV PACKAGE_DIR=$PACKAGE_DIR
43+
4144
ENV MAXTEXT_ASSETS_ROOT=/deps/src/maxtext/assets
4245
ENV MAXTEXT_TEST_ASSETS_ROOT=/deps/tests/assets
4346
ENV MAXTEXT_PKG_DIR=/deps/src/MaxText
@@ -47,16 +50,19 @@ ENV MAXTEXT_REPO_ROOT=/deps
4750
WORKDIR /deps
4851

4952
# Copy setup files and dependency files separately for better caching
50-
COPY tools/setup tools/setup/
51-
COPY src/dependencies/requirements/ src/dependencies/requirements/
52-
COPY src/install_maxtext_extra_deps/extra_deps_from_github.txt src/install_maxtext_extra_deps/
53+
COPY ${PACKAGE_DIR}/dependencies/requirements/ src/dependencies/requirements/
54+
COPY ${PACKAGE_DIR}/dependencies/scripts/ src/dependencies/scripts/
55+
COPY ${PACKAGE_DIR}/install_maxtext_extra_deps/ src/install_maxtext_extra_deps/
56+
COPY ${PACKAGE_DIR}/maxtext/integration/vllm/ src/MaxText/integration/vllm/
5357

5458
# Install dependencies - these steps are cached unless the copied files change
5559
RUN echo "Running command: bash setup.sh MODE=$ENV_MODE JAX_VERSION=$ENV_JAX_VERSION DEVICE=${ENV_DEVICE}"
56-
RUN --mount=type=cache,target=/root/.cache/pip --mount=type=cache,target=/root/.cache/uv bash /deps/tools/setup/setup.sh MODE=${ENV_MODE} JAX_VERSION=${ENV_JAX_VERSION} DEVICE=${ENV_DEVICE}
60+
RUN --mount=type=cache,target=/root/.cache/uv \
61+
export UV_LINK_MODE=copy && \
62+
bash /deps/src/dependencies/scripts/setup.sh MODE=${ENV_MODE} JAX_VERSION=${ENV_JAX_VERSION} DEVICE=${ENV_DEVICE}
5763

5864
# Now copy the remaining code (source files that may change frequently)
59-
COPY . .
65+
COPY ${PACKAGE_DIR}/maxtext/ src/MaxText/
6066

6167
# Download test assets from GCS if building image with test assets
6268
ARG INCLUDE_TEST_ASSETS=false
@@ -67,5 +73,4 @@ RUN if [ "$INCLUDE_TEST_ASSETS" = "true" ]; then \
6773
fi; \
6874
fi
6975

70-
# Install (editable) MaxText
71-
RUN --mount=type=cache,target=/root/.cache/pip --mount=type=cache,target=/root/.cache/uv test -f '/tmp/venv_created' && "$(tail -n1 /tmp/venv_created)"/bin/activate ; pip install --no-dependencies -e .
76+
ENV PYTHONPATH="/deps/src:${PYTHONPATH}"

src/dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile

Lines changed: 0 additions & 50 deletions
This file was deleted.

src/dependencies/dockerfiles/maxtext_runner.Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
ARG BASEIMAGE=maxtext_base_image
44
FROM $BASEIMAGE
55

6-
#FROM maxtext_base_image
6+
ARG PACKAGE_DIR
7+
ENV PACKAGE_DIR=$PACKAGE_DIR
78

89
ENV MAXTEXT_ASSETS_ROOT=/deps/src/maxtext/assets
910
ENV MAXTEXT_TEST_ASSETS_ROOT=/deps/tests/assets
@@ -14,8 +15,7 @@ ENV MAXTEXT_REPO_ROOT=/deps
1415
WORKDIR /deps
1516

1617
# Copy assets separately
17-
COPY src/maxtext/assets/ "${MAXTEXT_ASSETS_ROOT}"
18-
COPY tests/assets/ "${MAXTEXT_TEST_ASSETS_ROOT}"
18+
COPY ${PACKAGE_DIR}/maxtext/assets/ "${MAXTEXT_ASSETS_ROOT}"
1919

2020
# Copy all files except assets from local workspace into docker container
21-
COPY --exclude="${MAXTEXT_ASSETS_ROOT}" --exclude="${MAXTEXT_TEST_ASSETS_ROOT}" . .
21+
COPY --exclude=${PACKAGE_DIR}/maxtext/assets/ ${PACKAGE_DIR}/maxtext/ src/MaxText/

src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ ENV ENV_LIBTPU_VERSION=$LIBTPU_VERSION
3535
ARG DEVICE
3636
ENV ENV_DEVICE=$DEVICE
3737

38+
ARG PACKAGE_DIR
39+
ENV PACKAGE_DIR=$PACKAGE_DIR
40+
3841
ENV MAXTEXT_ASSETS_ROOT=/deps/src/maxtext/assets
3942
ENV MAXTEXT_TEST_ASSETS_ROOT=/deps/tests/assets
4043
ENV MAXTEXT_PKG_DIR=/deps/src/maxtext
@@ -44,20 +47,22 @@ ENV MAXTEXT_REPO_ROOT=/deps
4447
WORKDIR /deps
4548

4649
# Copy setup files and dependency files separately for better caching
47-
COPY tools/setup tools/setup/
48-
COPY src/dependencies/requirements/ src/dependencies/requirements/
49-
COPY src/install_maxtext_extra_deps/ src/install_maxtext_extra_deps/
50-
COPY src/maxtext/integration/vllm/ src/maxtext/integration/vllm/
50+
COPY ${PACKAGE_DIR}/dependencies/requirements/ src/dependencies/requirements/
51+
COPY ${PACKAGE_DIR}/dependencies/scripts/ src/dependencies/scripts/
52+
COPY ${PACKAGE_DIR}/install_maxtext_extra_deps/ src/install_maxtext_extra_deps/
53+
COPY ${PACKAGE_DIR}/maxtext/integration/vllm/ src/maxtext/integration/vllm/
5154

52-
# Copy the custom libtpu.so file if it exists inside maxtext repository
55+
# Copy the custom libtpu.so file if it exists
5356
COPY libtpu.so* /root/custom_libtpu/
5457

5558
# Install dependencies - these steps are cached unless the copied files change
5659
RUN echo "Running command: bash setup.sh MODE=$ENV_MODE WORKFLOW=$ENV_WORKFLOW JAX_VERSION=$ENV_JAX_VERSION LIBTPU_VERSION=$ENV_LIBTPU_VERSION DEVICE=${ENV_DEVICE}"
57-
RUN --mount=type=cache,target=/root/.cache/pip --mount=type=cache,target=/root/.cache/uv bash /deps/tools/setup/setup.sh MODE=${ENV_MODE} WORKFLOW=${ENV_WORKFLOW} JAX_VERSION=${ENV_JAX_VERSION} LIBTPU_VERSION=${ENV_LIBTPU_VERSION} DEVICE=${ENV_DEVICE}
60+
RUN --mount=type=cache,target=/root/.cache/uv \
61+
export UV_LINK_MODE=copy && \
62+
bash /deps/src/dependencies/scripts/setup.sh MODE=${ENV_MODE} WORKFLOW=${ENV_WORKFLOW} JAX_VERSION=${ENV_JAX_VERSION} LIBTPU_VERSION=${ENV_LIBTPU_VERSION} DEVICE=${ENV_DEVICE}
5863

5964
# Now copy the remaining code (source files that may change frequently)
60-
COPY . .
65+
COPY ${PACKAGE_DIR}/maxtext/ src/maxtext/
6166

6267
# Download test assets from GCS if building image with test assets
6368
ARG INCLUDE_TEST_ASSETS=false
@@ -68,5 +73,4 @@ RUN if [ "$INCLUDE_TEST_ASSETS" = "true" ]; then \
6873
fi; \
6974
fi
7075

71-
# Install (editable) MaxText
72-
RUN --mount=type=cache,target=/root/.cache/pip --mount=type=cache,target=/root/.cache/uv test -f '/tmp/venv_created' && "$(tail -n1 /tmp/venv_created)"/bin/activate ; pip install --no-dependencies -e .
76+
ENV PYTHONPATH="/deps/src:${PYTHONPATH}"

src/dependencies/scripts/docker_build_dependency_image.py renamed to src/dependencies/scripts/build_maxtext_docker_image.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@
1919

2020

2121
def main():
22-
script_path = os.path.join(os.path.dirname(__file__), "docker_build_dependency_image.sh")
22+
current_dir = os.path.dirname(os.path.abspath(__file__))
23+
repo_root = os.path.abspath(os.path.join(current_dir, "..", ".."))
24+
# Use relative path for Docker
25+
os.environ["PACKAGE_DIR"] = os.path.relpath(repo_root, os.getcwd())
26+
27+
script_path = os.path.join(current_dir, "docker_build_dependency_image.sh")
2328
if not os.path.exists(script_path):
2429
raise FileNotFoundError(f"Script not found at {script_path}")
2530

src/dependencies/scripts/docker_build_dependency_image.sh

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,16 +49,8 @@
4949
# Build docker image with post-training dependencies
5050
## bash src/dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training
5151

52-
if [ "${BASH_SOURCE-}" ]; then
53-
this_file="${BASH_SOURCE[0]}"
54-
elif [ "${ZSH_VERSION-}" ]; then
55-
# shellcheck disable=SC2296
56-
this_file="${(%):-%x}"
57-
else
58-
this_file="${0}"
59-
fi
60-
61-
MAXTEXT_REPO_ROOT="${MAXTEXT_REPO_ROOT:-$(CDPATH='' cd -- "$(dirname -- "${this_file}")"'/../../..' && pwd)}"
52+
PACKAGE_DIR="${PACKAGE_DIR:-src}"
53+
echo "PACKAGE_DIR: $PACKAGE_DIR"
6254

6355
# Enable "exit immediately if any command fails" option
6456
set -e
@@ -107,6 +99,7 @@ docker_build_args=(
10799
"WORKFLOW=${WORKFLOW}"
108100
"MODE=${MODE}"
109101
"JAX_VERSION=${JAX_VERSION}"
102+
"PACKAGE_DIR=${PACKAGE_DIR}"
110103
)
111104

112105
run_docker_build() {
@@ -123,7 +116,7 @@ build_gpu_image() {
123116
fi
124117

125118
echo "Building docker image with arguments: ${docker_build_args[*]}"
126-
run_docker_build "$MAXTEXT_REPO_ROOT/src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile" "${docker_build_args[@]}"
119+
run_docker_build "$PACKAGE_DIR/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile" "${docker_build_args[@]}"
127120
}
128121

129122
# Function to build image for TPUs
@@ -140,7 +133,7 @@ build_tpu_image() {
140133
fi
141134

142135
echo "Building docker image with arguments: ${docker_build_args[*]}"
143-
run_docker_build "$MAXTEXT_REPO_ROOT/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile" "${docker_build_args[@]}"
136+
run_docker_build "$PACKAGE_DIR/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile" "${docker_build_args[@]}"
144137
}
145138

146139
if [[ ${DEVICE} == "gpu" ]]; then

src/dependencies/scripts/docker_upload_runner.sh

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,10 @@
2121
# (minutes). However, if you are simply changing local code and not updating dependencies, uploading just takes a few seconds.
2222

2323
# Example command:
24-
# bash docker_upload_runner.sh CLOUD_IMAGE_NAME=${USER}_runner
25-
26-
if [ "${BASH_SOURCE-}" ]; then
27-
this_file="${BASH_SOURCE[0]}"
28-
elif [ "${ZSH_VERSION-}" ]; then
29-
# shellcheck disable=SC2296
30-
this_file="${(%):-%x}"
31-
else
32-
this_file="${0}"
33-
fi
24+
# bash src/dependencies/scripts/docker_upload_runner.sh CLOUD_IMAGE_NAME=${USER}_runner
3425

35-
MAXTEXT_REPO_ROOT="${MAXTEXT_REPO_ROOT:-$(CDPATH='' cd -- "$(dirname -- "${this_file}")"'/../../..' && pwd)}"
26+
PACKAGE_DIR="${PACKAGE_DIR:-src}"
27+
echo "PACKAGE_DIR: $PACKAGE_DIR"
3628

3729
set -e
3830

@@ -85,6 +77,7 @@ if [ -n "$DANGLING_LINKS" ]; then
8577
echo "$DANGLING_LINKS"
8678
echo "These can cause 'failed to compute cache key' errors during 'docker build'."
8779
echo "Please remove or fix them before building the Docker image."
80+
echo "Alternatively, run the command again from a clean, empty directory to bypass your local file state entirely."
8881
exit 1
8982
fi
9083

@@ -95,23 +88,20 @@ if [ -n "$ABSOLUTE_LINKS" ]; then
9588
echo "$ABSOLUTE_LINKS"
9689
echo "Docker cannot follow absolute paths outside of the build context, which can cause 'failed to compute cache key' errors."
9790
echo "Please remove these links or convert them to relative paths before building the Docker image."
91+
echo "Alternatively, run the command again from a clean, empty directory to bypass your local file state entirely."
9892
exit 1
9993
fi
10094

101-
# Download other test assets from GCS into ${MAXTEXT_TEST_ASSETS_ROOT:-${MAXTEXT_REPO_ROOT:-$PWD}}/tests/assets/golden_logits
102-
# if ! gcloud storage cp gs://maxtext-test-assets/* "${MAXTEXT_TEST_ASSETS_ROOT:-${MAXTEXT_REPO_ROOT:-$PWD}/tests/assets/golden_logits}"; then
103-
# echo "WARNING: Failed to download test assets from GCS. These files are only used for end-to-end tests; you may not have access to the bucket."
104-
# fi
105-
10695
# Check if the base image exists locally
10796
if ! docker image inspect "${LOCAL_IMAGE_NAME}" &> /dev/null; then
10897
echo "ERROR: Base image '${LOCAL_IMAGE_NAME}' not found locally."
109-
echo "Please build it first by running 'bash docker_build_dependency_image.sh'."
98+
echo "Please build it first by running 'build_maxtext_docker_image'."
11099
exit 1
111100
fi
112101

113102
docker build --no-cache --build-arg BASEIMAGE=${LOCAL_IMAGE_NAME} \
114-
-f "$MAXTEXT_REPO_ROOT"'/src/dependencies/dockerfiles/maxtext_runner.Dockerfile' \
103+
--build-arg PACKAGE_DIR=${PACKAGE_DIR} \
104+
-f "$PACKAGE_DIR"'/dependencies/dockerfiles/maxtext_runner.Dockerfile' \
115105
-t ${LOCAL_IMAGE_NAME_RUNNER} .
116106

117107
docker tag ${LOCAL_IMAGE_NAME_RUNNER} gcr.io/$PROJECT/${CLOUD_IMAGE_NAME}:latest

0 commit comments

Comments
 (0)