AI-Hypercomputer
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions b/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/AddPullReady.yml‎
Lines changed: 116 additions & 0 deletions b/‎.github/workflows/AddPullReady.yml‎
Lines changed: 116 additions & 0 deletions
diff --git a/‎.github/workflows/CPUTests.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/CPUTests.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 0 deletions b/‎README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎code_style.sh‎
100644100755 b/‎code_style.sh‎
100644100755
diff --git a/‎end_to_end/tpu/eval_assert.py‎
Lines changed: 11 additions & 11 deletions b/‎end_to_end/tpu/eval_assert.py‎
Lines changed: 11 additions & 11 deletions
@@ -0,0 +1 @@
+*   @entrpn
@@ -0,0 +1,116 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Add Pull Ready Label
+
+on:
+  workflow_run:
+    workflows: [Unit Test, Linter]
+    types:
+      - completed
+  pull_request_review:
+  pull_request_review_comment:
+  workflow_dispatch:
+
+jobs:
+  AddPullReady:
+    permissions:
+      checks: read
+      pull-requests: write
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/github-script@v7
+        with:
+          script: |
+            const owner = context.repo.owner
+            const repo = context.repo.repo
+            let pull_number = -1
+            if (context.payload.pull_request !== undefined) {
+              pull_number = context.payload.pull_request.number
+            } else if (context.payload.workflow_run !== undefined) {
+              if (context.payload.workflow_run.pull_requests.length === 0) {
+                console.log("This workflow is NOT running within a PR's context")
+                process.exit()
+              }
+              console.log(context.payload.workflow_run.pull_requests)
+              pull_number = context.payload.workflow_run.pull_requests[0].number
+            } else {
+              console.log("This workflow is running within an invalid context")
+              process.exit(1)
+            }
+            const reviews = await github.rest.pulls.listReviews({
+              owner,
+              repo,
+              pull_number,
+            })
+            const decision_query = `
+              query($owner: String!, $repo: String!, $pull_number: Int!) {
+                repository(owner: $owner, name: $repo) {
+                  pullRequest(number: $pull_number) {
+                    reviewDecision # Fetches the overall review status
+                  }
+                }
+              }
+            `;
+            const decision_result = await github.graphql(decision_query, { owner, repo, pull_number });
+
+            if (reviews.data.length === 0) {
+              console.log("Not adding pull ready because the PR is not approved yet.")
+              process.exit()
+            }
+            let is_approved = false
+            if (decision_result.repository.pullRequest.reviewDecision === "APPROVED") {
+              is_approved = true
+            }
+            if (!is_approved) {
+              console.log("Not adding pull ready because the PR is not approved yet by sufficient code owners.")
+              process.exit()
+            }
+
+            const commits = await github.rest.pulls.listCommits({
+              owner,
+              repo,
+              pull_number,
+              per_page: 100,
+            })
+            // Check that the number of commits in the PR is 1.
+            if (commits.data.length !== 1) {
+              console.log("Not adding pull ready because the PR has more than one commit. Please squash your commits.")
+              process.exit(1)
+            }
+            const ref = commits.data.slice(-1)[0].sha
+            const checkRuns = await github.rest.checks.listForRef({
+              owner,
+              repo,
+              ref,
+            })
+            if (checkRuns.data.check_runs.length === 0) {
+              console.log("Not adding pull ready because no check runs are associated with the last commit: " + ref)
+              process.exit()
+            }
+            for (const checkRun of checkRuns.data.check_runs) {
+              if (checkRun.name.endsWith(context.job)) continue
+              if (checkRun.conclusion !== "success") {
+                console.log("Not adding pull ready because " + checkRun.name + " has not passed yet: " + checkRun.html_url)
+                process.exit()
+              }
+            }
+            console.log("Adding pull ready label because the PR is approved AND all the check runs have passed")
+            await github.rest.issues.addLabels({
+              issue_number: pull_number,
+              labels: ["pull ready"],
+              owner,
+              repo,
+            })
@@ -11,8 +11,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-20.04]
-        python-version: ['3.10']
+        os: [ubuntu-latest]
+        python-version: ['3.12']
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}
@@ -22,7 +22,7 @@ jobs:
     - name: Install Dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install pylint pyink pytype==2024.2.27
+        pip install pylint pyink==23.10.0 pytype==2024.2.27
     # - name: Typecheck the code with pytype
     #   run: |
     #     pytype --jobs auto --disable import-error src/maxdiffusion/
 
@@ -255,6 +255,9 @@ After installation completes, run the training script.
   - In Wan2.1, the ici_fsdp_parallelism axis is used for sequence parallelism, the ici_tensor_parallelism axis is used for head parallelism. 
     - You can enable both, keeping in mind that Wan2.1 has 40 heads and 40 must be evenly divisible by ici_tensor_parallelism.
     - For Sequence parallelism, the code pads the sequence length to evenly divide the sequence. Try out different ici_fsdp_parallelism numbers, but we find 2 and 4 to be the best right now.
+  - For use on GPU it is recommended to enable the cudnn_te_flash attention kernel for optimal performance.
+    - Best performance is achieved with the use of batch parallelism, which can be enabled by using the ici_fsdp_batch_parallelism axis. Note that this parallelism strategy does not support fractional batch sizes.
+    - ici_fsdp_batch_parallelism and ici_fsdp_parallelism can be combined to allow for fractional batch sizes. However, padding is not currently supported for the cudnn_te_flash attention kernel and it is therefore required that the sequence length is divisible by the number of devices in the ici_fsdp_parallelism axis.
 
   You should eventually see a training run as:
 
 
@@ -1,18 +1,18 @@
 """
- Copyright 2024 Google LLC
+Copyright 2024 Google LLC
 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
-      https://www.apache.org/licenses/LICENSE-2.0
+     https://www.apache.org/licenses/LICENSE-2.0
 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- """
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
 
 """
 Example to run