diff --git a/.github/workflows/peerpods-chart_image.yaml b/.github/workflows/peerpods-chart_image.yaml index 449104e325..b71ccb2a72 100644 --- a/.github/workflows/peerpods-chart_image.yaml +++ b/.github/workflows/peerpods-chart_image.yaml @@ -60,10 +60,15 @@ jobs: fetch-depth: 0 - name: Install yq + # Keep in sync with the pin in publish-cohere-release.yaml. + env: + YQ_VERSION: v4.44.3 + YQ_SHA256: a2c097180dd884a8d50c956ee16a9cec070f30a7947cf4ebf87d5f36213e9ed7 run: | - echo "Installing yq..." - sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 - sudo chmod +x /usr/local/bin/yq + curl -fsSLo /tmp/yq "https://github.com/mikefarah/yq/releases/download/${YQ_VERSION}/yq_linux_amd64" + echo "${YQ_SHA256} /tmp/yq" | sha256sum --check --strict + sudo install -m 0755 /tmp/yq /usr/local/bin/yq + rm /tmp/yq yq --version - name: Read versions from Chart.yaml and versions.yaml @@ -155,25 +160,6 @@ jobs: --password-stdin echo "Helm authenticated with ghcr.io" - - name: Authenticate to GCP - if: ${{ contains(steps.registry.outputs.registry, 'docker.pkg.dev') }} - uses: google-github-actions/auth@c200f3691d83b41bf9bbd8638997a462592937ed # v2.1.13 - with: - workload_identity_provider: ${{ vars.GCP_WORKLOAD_IDENTITY_PROVIDER }} - service_account: ${{ vars.GCP_SERVICE_ACCOUNT }} - - - name: Authenticate Helm with Artifact Registry - if: ${{ contains(steps.registry.outputs.registry, 'docker.pkg.dev') }} - env: - REGISTRY: ${{ steps.registry.outputs.registry }} - run: | - AR_HOST=$(echo "${REGISTRY}" | cut -d'/' -f1) - echo "Authenticating Helm with ${AR_HOST}..." - gcloud auth print-access-token | helm registry login "${AR_HOST}" \ - --username oauth2accesstoken \ - --password-stdin - echo "Helm authenticated with ${AR_HOST}" - - name: Update Helm dependencies run: | echo "Updating Helm dependencies..." diff --git a/.github/workflows/publish-cohere-release.yaml b/.github/workflows/publish-cohere-release.yaml new file mode 100644 index 0000000000..c1db609c24 --- /dev/null +++ b/.github/workflows/publish-cohere-release.yaml @@ -0,0 +1,283 @@ +--- +# Publish semver-tagged Cohere-fork release artifacts to GHCR. +# +# Triggered by GitHub Releases targeting the `cohere` branch. The release tag +# becomes the image/chart tag verbatim, with one normalisation: a leading "v" +# is stripped for the chart (Helm/OCI requires SemVer with no prefix). +# +# Release process: +# 1. Bump src/cloud-api-adaptor/install/charts/peerpods/Chart.yaml `version` +# to the new SemVer (e.g. 0.1.4-cohere.2). Merge to cohere. +# 2. Create a GitHub Release on the cohere branch with tag `v0.1.4-cohere.2` +# (or `0.1.4-cohere.2` — both work). Publishing the release fires this. +# +# Tags produced (release `v0.1.4-cohere.2`): +# ghcr.io/cohere-ai/cloud-api-adaptor/cloud-api-adaptor:v0.1.4-cohere.2 +# ghcr.io/cohere-ai/cloud-api-adaptor/peerpod-ctrl:v0.1.4-cohere.2 +# ghcr.io/cohere-ai/cloud-api-adaptor/charts/peerpods:0.1.4-cohere.2 +# +# The chart is self-contained: values.yaml is patched at package time so +# image.tag and resourceCtrl.image.tag default to the release tag. A bare +# `helm install` without overrides gets matching images. +# +# `latest-cohere` is NOT touched — that floats with the cohere branch tip via +# publish-cohere.yaml. Consumers pin to the semver tag for stable releases. +name: Publish (cohere release) + +on: + release: + types: [published] + workflow_dispatch: + inputs: + tag: + description: 'Release tag to (re)publish (e.g. v0.1.4-cohere.2). Must already exist as a git tag on cohere.' + required: true + type: string + +concurrency: + group: publish-cohere-release-${{ github.event.release.tag_name || inputs.tag }} + cancel-in-progress: false + +permissions: {} + +env: + REGISTRY: ghcr.io/cohere-ai/cloud-api-adaptor + +jobs: + tags: + name: Compute tags + runs-on: ubuntu-24.04 + # Only fire for releases cut from the cohere branch. Manual dispatch always runs. + if: >- + github.event_name == 'workflow_dispatch' || + github.event.release.target_commitish == 'cohere' + outputs: + git_ref: ${{ steps.t.outputs.git_ref }} + image_tag: ${{ steps.t.outputs.image_tag }} + chart_version: ${{ steps.t.outputs.chart_version }} + steps: + - name: Derive tags from release + id: t + env: + RAW_TAG: ${{ github.event.release.tag_name || inputs.tag }} + run: | + # Image tags keep the v prefix verbatim; chart strips it (OCI SemVer). + chart_version="${RAW_TAG#v}" + { + echo "git_ref=${RAW_TAG}" + echo "image_tag=${RAW_TAG}" + echo "chart_version=${chart_version}" + } >> "$GITHUB_OUTPUT" + + caa: + name: Build CAA image (release, amd64) + needs: tags + runs-on: ubuntu-24.04 + permissions: + contents: read # checkout the release tag + packages: write # push image manifests to GHCR + defaults: + run: + working-directory: src/cloud-api-adaptor + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + ref: ${{ needs.tags.outputs.git_ref }} + + - name: Read Go version from versions.yaml + run: | + command -v yq || sudo snap install yq + go_version="$(yq '.tools.golang' versions.yaml)" + [ -n "$go_version" ] + echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV" + + - name: Setup Go ${{ env.GO_VERSION }} + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 + with: + go-version: ${{ env.GO_VERSION }} + cache-dependency-path: "**/go.sum" + cache: false + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0 + + - name: Login to GHCR + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push release image + uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0 + env: + REGISTRY: ${{ env.REGISTRY }} + RELEASE_TAGS: ${{ needs.tags.outputs.image_tag }} + with: + timeout_minutes: 60 + retry_wait_seconds: 120 + max_attempts: 3 + command: | + cd src/cloud-api-adaptor && \ + ARCHES=linux/amd64 \ + RELEASE_BUILD=true \ + RELEASE_TAGS="${RELEASE_TAGS}" \ + make image registry="${REGISTRY}" + + peerpod-ctrl: + name: Build peerpod-ctrl image (amd64) + needs: tags + runs-on: ubuntu-24.04 + permissions: + contents: read # checkout the release tag + packages: write # push image manifests to GHCR + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + ref: ${{ needs.tags.outputs.git_ref }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0 + + - name: Login to GHCR + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0 + with: + tags: ${{ env.REGISTRY }}/peerpod-ctrl:${{ needs.tags.outputs.image_tag }} + push: true + context: src + file: src/peerpod-ctrl/Dockerfile + platforms: linux/amd64 + build-args: | + GOFLAGS=-tags=gcp + + chart: + name: Publish peerpods Helm chart + needs: tags + runs-on: ubuntu-24.04 + permissions: + contents: read # checkout the release tag + packages: write # push chart artifact to GHCR + id-token: write # OIDC token for actions/attest sigstore signing + attestations: write # write build provenance attestations + artifact-metadata: write # actions/attest writes attestation metadata + defaults: + run: + working-directory: src/cloud-api-adaptor/install/charts/peerpods + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: ${{ needs.tags.outputs.git_ref }} + persist-credentials: false + fetch-depth: 0 + + - name: Install yq + # Keep in sync with the pin in peerpods-chart_image.yaml. + env: + YQ_VERSION: v4.44.3 + YQ_SHA256: a2c097180dd884a8d50c956ee16a9cec070f30a7947cf4ebf87d5f36213e9ed7 + run: | + curl -fsSLo /tmp/yq "https://github.com/mikefarah/yq/releases/download/${YQ_VERSION}/yq_linux_amd64" + echo "${YQ_SHA256} /tmp/yq" | sha256sum --check --strict + sudo install -m 0755 /tmp/yq /usr/local/bin/yq + rm /tmp/yq + yq --version + + - name: Patch values.yaml with release image tags + env: + IMAGE_TAG: ${{ needs.tags.outputs.image_tag }} + run: | + yq -i ".image.tag = \"${IMAGE_TAG}\"" values.yaml + yq -i ".resourceCtrl.image.tag = \"${IMAGE_TAG}\"" values.yaml + echo "Patched values.yaml default image tags to ${IMAGE_TAG}" + echo " image.tag: $(yq '.image.tag' values.yaml)" + echo " resourceCtrl.image.tag: $(yq '.resourceCtrl.image.tag' values.yaml)" + + - name: Read versions + id: read_version + working-directory: . + env: + CHART_VERSION: ${{ needs.tags.outputs.chart_version }} + run: | + HELM_VERSION="$(yq -e '.tools.helm.version' src/cloud-api-adaptor/versions.yaml)" + HELM_CHECKSUM="$(yq -e '.tools.helm.sha256' src/cloud-api-adaptor/versions.yaml)" + { + echo "helm_version=${HELM_VERSION}" + echo "helm_checksum=${HELM_CHECKSUM}" + } >> "$GITHUB_OUTPUT" + + - name: Install Helm + env: + HELM_VERSION: ${{ steps.read_version.outputs.helm_version }} + HELM_CHECKSUM: ${{ steps.read_version.outputs.helm_checksum }} + run: | + curl -fsSL -o helm.tar.gz "https://get.helm.sh/helm-${HELM_VERSION}-linux-amd64.tar.gz" + echo "${HELM_CHECKSUM} helm.tar.gz" | sha256sum --check --strict + tar -xzf helm.tar.gz + sudo mv linux-amd64/helm /usr/local/bin/helm + rm -rf helm.tar.gz linux-amd64 + + - name: Login to GHCR + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Authenticate Helm with GHCR + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_ACTOR: ${{ github.actor }} + run: | + echo "${GITHUB_TOKEN}" | helm registry login ghcr.io \ + --username "${GITHUB_ACTOR}" \ + --password-stdin + + - name: Update Helm dependencies + run: helm dependency update + + - name: Package Helm chart + env: + VERSION: ${{ needs.tags.outputs.chart_version }} + run: | + mkdir -p .cr-release-packages + helm package . --version "${VERSION}" --destination .cr-release-packages + ls -lh .cr-release-packages/ + + - name: Push Helm chart to OCI registry + id: push_chart + env: + VERSION: ${{ needs.tags.outputs.chart_version }} + REGISTRY: ${{ env.REGISTRY }}/charts + run: | + CHART_PACKAGE=".cr-release-packages/peerpods-${VERSION}.tgz" + helm push "${CHART_PACKAGE}" "oci://${REGISTRY}" + DIGEST=$(helm show chart "oci://${REGISTRY}/peerpods" --version "${VERSION}" 2>&1 | awk '/Digest:/ {print $2}') + + if [ -z "${DIGEST}" ]; then + echo "ERROR: Failed to extract digest" + exit 1 + fi + + echo "digest=${DIGEST}" >> "$GITHUB_OUTPUT" + echo "Pushed: oci://${REGISTRY}/peerpods:${VERSION} (digest: ${DIGEST})" + + - name: Generate attestation + uses: actions/attest@59d89421af93a897026c735860bf21b6eb4f7b26 # v4.1.0 + with: + subject-name: ${{ env.REGISTRY }}/charts/peerpods + subject-digest: ${{ steps.push_chart.outputs.digest }} + push-to-registry: true diff --git a/.github/workflows/publish-cohere.yaml b/.github/workflows/publish-cohere.yaml new file mode 100644 index 0000000000..7795fa4c41 --- /dev/null +++ b/.github/workflows/publish-cohere.yaml @@ -0,0 +1,174 @@ +--- +# Publish Cohere-fork artifacts to GHCR on every push to `cohere`. +# +# Builds the *release* CAA image only (no libvirt); upstream's reusable +# caa_build_and_push.yaml has a hardcoded matrix that builds dev+release in +# parallel, so we inline the release path here to avoid wasting an hour of CI +# on a libvirt build we don't ship. peerpod-ctrl is also inlined (amd64-only, +# GCP build tags) because upstream's reusable hardcodes all four arches. +# +# Tags produced (push to cohere): +# ghcr.io/cohere-ai/cloud-api-adaptor/cloud-api-adaptor:latest-cohere +# ghcr.io/cohere-ai/cloud-api-adaptor/cloud-api-adaptor:<12-char-sha> +# ghcr.io/cohere-ai/cloud-api-adaptor/peerpod-ctrl:latest-cohere +# ghcr.io/cohere-ai/cloud-api-adaptor/peerpod-ctrl:<12-char-sha> +# ghcr.io/cohere-ai/cloud-api-adaptor/charts/peerpods:0.0.0-dev.cohere +# +# The chart always publishes to the floating `0.0.0-dev.cohere` tag here — +# mirroring upstream's `0.0.0-dev` convention for main-branch pushes — so we +# never silently overwrite a real Chart.yaml version. Real SemVer chart tags +# (e.g. 0.1.4-cohere.2) are produced by publish-cohere-release.yaml on release +# events and match the GH Release tag, not Chart.yaml on disk at push time. +name: Publish (cohere) + +on: + push: + branches: [cohere] + workflow_dispatch: + +concurrency: + group: publish-cohere-${{ github.ref }} + cancel-in-progress: false + +permissions: {} + +env: + REGISTRY: ghcr.io/cohere-ai/cloud-api-adaptor + +jobs: + tags: + name: Compute tags + runs-on: ubuntu-24.04 + outputs: + release_tags: ${{ steps.t.outputs.release_tags }} + image_tags: ${{ steps.t.outputs.image_tags }} + steps: + - name: Derive tag list from commit SHA + id: t + env: + SHA: ${{ github.sha }} + run: | + short="${SHA:0:12}" + echo "release_tags=latest-cohere,${short}" >> "$GITHUB_OUTPUT" + echo "image_tags=latest-cohere,${short}" >> "$GITHUB_OUTPUT" + + caa: + name: Build CAA image (release, amd64) + needs: tags + runs-on: ubuntu-24.04 + permissions: + contents: read # checkout the cohere ref + packages: write # push image manifests to GHCR + defaults: + run: + working-directory: src/cloud-api-adaptor + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + ref: ${{ github.sha }} + + - name: Read Go version from versions.yaml + run: | + command -v yq || sudo snap install yq + go_version="$(yq '.tools.golang' versions.yaml)" + [ -n "$go_version" ] + echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV" + + - name: Setup Go ${{ env.GO_VERSION }} + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 + with: + go-version: ${{ env.GO_VERSION }} + cache-dependency-path: "**/go.sum" + cache: false + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0 + + - name: Login to GHCR + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push release image + uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0 + env: + REGISTRY: ${{ env.REGISTRY }} + RELEASE_TAGS: ${{ needs.tags.outputs.release_tags }} + with: + timeout_minutes: 60 + retry_wait_seconds: 120 + max_attempts: 3 + command: | + cd src/cloud-api-adaptor && \ + ARCHES=linux/amd64 \ + RELEASE_BUILD=true \ + RELEASE_TAGS="${RELEASE_TAGS}" \ + make image registry="${REGISTRY}" + + peerpod-ctrl: + name: Build peerpod-ctrl image (amd64) + needs: tags + runs-on: ubuntu-24.04 + permissions: + contents: read # checkout the cohere ref + packages: write # push image manifests to GHCR + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + ref: ${{ github.sha }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0 + + - name: Login to GHCR + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Determine image tags + id: tags + env: + IMAGE_TAGS: ${{ needs.tags.outputs.image_tags }} + REGISTRY: ${{ env.REGISTRY }} + run: | + tags_csv="" + for t in ${IMAGE_TAGS//,/ }; do + tags_csv+="${REGISTRY}/peerpod-ctrl:${t}," + done + echo "tags=${tags_csv%,}" >> "$GITHUB_OUTPUT" + + - name: Build and push + uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0 + with: + tags: ${{ steps.tags.outputs.tags }} + push: true + context: src + file: src/peerpod-ctrl/Dockerfile + platforms: linux/amd64 + build-args: | + GOFLAGS=-tags=gcp + + chart: + name: Publish peerpods Helm chart + uses: ./.github/workflows/peerpods-chart_image.yaml + with: + git_ref: ${{ github.sha }} + # Floating dev tag — mirrors upstream's `0.0.0-dev` convention for + # main-branch pushes. Real SemVer is reserved for release events. + chart_version: "0.0.0-dev.cohere" + permissions: + contents: read # checkout the cohere ref + packages: write # push chart artifact to GHCR + id-token: write # OIDC token for actions/attest sigstore signing + attestations: write # write build provenance attestations + artifact-metadata: write # actions/attest writes attestation metadata diff --git a/src/cloud-api-adaptor/install/charts/peerpods/templates/daemonset.yaml b/src/cloud-api-adaptor/install/charts/peerpods/templates/daemonset.yaml index 81d5ad2d13..08f374214b 100644 --- a/src/cloud-api-adaptor/install/charts/peerpods/templates/daemonset.yaml +++ b/src/cloud-api-adaptor/install/charts/peerpods/templates/daemonset.yaml @@ -105,6 +105,10 @@ spec: dnsPolicy: ClusterFirstWithHostNet nodeSelector: {{- toYaml .Values.nodeSelector | nindent 8 }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} volumes: {{- if and (or (eq .Values.provider "libvirt") (eq .Values.provider "byom")) (include "peerpods.sshKeySecretName" .) }} - name: ssh diff --git a/src/cloud-api-adaptor/install/charts/peerpods/templates/fix-gke-node-config.yaml b/src/cloud-api-adaptor/install/charts/peerpods/templates/fix-gke-node-config.yaml index 0dd5269a3a..20ee72147c 100644 --- a/src/cloud-api-adaptor/install/charts/peerpods/templates/fix-gke-node-config.yaml +++ b/src/cloud-api-adaptor/install/charts/peerpods/templates/fix-gke-node-config.yaml @@ -15,11 +15,13 @@ spec: labels: app: fix-gke-node-config spec: - tolerations: - - operator: Exists hostPID: true nodeSelector: {{- toYaml .Values.nodeSelector | nindent 8 }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} initContainers: - name: fix-containerd image: alpine:3.21 diff --git a/src/cloud-api-adaptor/install/charts/peerpods/values.yaml b/src/cloud-api-adaptor/install/charts/peerpods/values.yaml index 6c6c46c6b3..2d60446728 100644 --- a/src/cloud-api-adaptor/install/charts/peerpods/values.yaml +++ b/src/cloud-api-adaptor/install/charts/peerpods/values.yaml @@ -79,8 +79,12 @@ secrets: # # Note: libvirt/docker provider files override this with the dev image. image: - name: us-central1-docker.pkg.dev/cohere-confidential-computing/cloud-api-adaptor/cloud-api-adaptor - tag: "44d97551e74d3d835e6ab17d37475f1033b2011a" + # Cohere fork default. Published by .github/workflows/publish-cohere.yaml on + # every push to the cohere branch. Override with a short SHA (e.g. "a4a1c915") + # for immutable pins in production — `latest-cohere` is a floating tag and + # follows the tip of the cohere branch. + name: ghcr.io/cohere-ai/cloud-api-adaptor/cloud-api-adaptor + tag: latest-cohere # Cloud provider: libvirt, aws, azure, gcp, ibmcloud, vsphere provider: gcp @@ -89,9 +93,11 @@ provider: gcp # using Workload Identity (no GCP_CREDENTIALS file needed). command: ["/bin/sh", "-c", "exec cloud-api-adaptor gcp"] -# DaemonSet nodeSelector. GKE nodes are labeled by the node pool config. -nodeSelector: - fortress.cohere.com/caa-worker: "true" +# nodeSelector / tolerations applied to the cloud-api-adaptor and +# fix-gke-node-config DaemonSets. Empty by default; consumers set these +# to fence the DaemonSets onto their CAA worker pool. +nodeSelector: {} +tolerations: [] # ServiceAccount annotations (e.g. Workload Identity binding) serviceAccount: @@ -110,16 +116,12 @@ limit: "10" # DaemonSet configuration daemonset: - # Update strategy controls how the CAA DaemonSet performs rolling updates + # Update strategy controls how the CAA DaemonSet performs rolling updates. + # CAA uses hostNetwork, so two pods on the same node cause a port conflict. + # maxSurge must be 0; roll one node at a time by killing old before starting new. updateStrategy: - # maxUnavailable: Maximum number of pods that can be unavailable during update - # maxSurge: Maximum number of extra pods that can be created during update - # - # Default (maxUnavailable: 0, maxSurge: 1) provides zero downtime updates - # and works on both small (1 node) and large clusters. - # Override with custom values for specific deployment needs. - maxUnavailable: 0 - maxSurge: 1 + maxUnavailable: 1 + maxSurge: 0 # A free-form map which can be used to inject provider-specific or platform-specific labels. # # Example (Azure Workload Identity): @@ -136,8 +138,11 @@ daemonset: resourceCtrl: enabled: true image: - repository: us-central1-docker.pkg.dev/cohere-confidential-computing/cloud-api-adaptor/peerpod-ctrl - tag: "bfd5b9847b2fb44c843f6a3b6209092d0a217d83" + # Cohere fork default. Published by .github/workflows/publish-cohere.yaml on + # every push to the cohere branch. Override with a short SHA for immutable + # pins in production. + repository: ghcr.io/cohere-ai/cloud-api-adaptor/peerpod-ctrl + tag: latest-cohere # peerpods-webhook subchart configuration # Mutating webhook that modifies pod specs to use peer pods runtime and resources @@ -152,8 +157,15 @@ kata-deploy: # Set to false if kata is already installed (e.g., via another chart or operator). # If disabled, ensure kata-remote shim is enabled otherwise CAA will not work. enabled: true + # Fence the kata-deploy DaemonSet to the CAA worker pool. Empty by default; + # consumers must supply matching nodeSelector / tolerations via --set. + nodeSelector: {} + tolerations: [] + # No external snapshotter needed. The kata-remote shim uses + # experimental_force_guest_pull (set in configuration-remote.toml) + # to pull images inside the guest VM without nydus-snapshotter. snapshotter: - setup: ["nydus"] + setup: [] defaultShim: amd64: remote arm64: remote @@ -194,3 +206,18 @@ kata-deploy: enabled: false remote: enabled: true + containerd: + # Override kata-deploy's default of "nydus". nydus is not installed + # (snapshotter.setup: [] above), so routing kata-remote to it would + # break pod sandbox creation with + # "dial unix:///run/nydus-snapshotter/...: connection refused". + # Empty string causes kata-deploy to omit the snapshotter line from + # the containerd drop-in, letting containerd fall back to overlayfs. + snapshotter: "" + # Required for peer pods: sets runtime.experimental_force_guest_pull + # = true in configuration-remote.toml so the kata-agent pulls images + # inside the guest VM. Without this, the kata-agent tries to bind- + # mount /run/kata-containers/shared/containers//rootfs (populated + # by virtiofs for local kata) which doesn't exist on peer-pod VMs, + # failing CreateContainer with ENOENT. + forceGuestPull: true