From cec362d32015ab3ab5dc61d2cf719fa0e2279313 Mon Sep 17 00:00:00 2001 From: aaronabbott Date: Thu, 30 Apr 2026 03:27:29 +0000 Subject: [PATCH 1/8] Add implementation plan and task list for durable cleanup --- cloudbuild-cleanup.yaml | 10 +++++++++ e2etesting/e2e_testing.go | 5 +++-- e2etestrunner/main_test.go | 7 ++++++- tf/modules/repo-ci-triggers/main.tf | 32 +++++++++++++++++++++++++++++ tf/persistent/repo-ci-triggers.tf | 4 ++++ 5 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 cloudbuild-cleanup.yaml diff --git a/cloudbuild-cleanup.yaml b/cloudbuild-cleanup.yaml new file mode 100644 index 0000000..6143966 --- /dev/null +++ b/cloudbuild-cleanup.yaml @@ -0,0 +1,10 @@ +steps: + - name: 'hashicorp/terraform:1.7.0' + entrypoint: 'sh' + args: + - '-c' + - | + cd tf/$_ENVIRONMENT + terraform init -backend-config=bucket=$PROJECT_ID-e2e-tfstate + terraform workspace select $_TEST_RUN_ID + terraform destroy -auto-approve -var=project_id=$PROJECT_ID -var=image=dummy -var=runtime=dummy diff --git a/e2etesting/e2e_testing.go b/e2etesting/e2e_testing.go index 44b72d9..299ec82 100644 --- a/e2etesting/e2e_testing.go +++ b/e2etesting/e2e_testing.go @@ -36,7 +36,7 @@ type CmdWithProjectId struct { } type CmdWithImage struct { - Image string `arg:"required" help:"docker container image to deploy and test"` + Image string `help:"docker container image to deploy and test"` } type LocalCmd struct { @@ -80,7 +80,7 @@ type GkeCmd struct { type GaeCmd struct { CmdWithImage - Runtime string `arg:"required" help:"The language runtime for the instrumented test server, used in naming the service"` + Runtime string `help:"The language runtime for the instrumented test server, used in naming the service"` } type GaeStandardCmd struct { @@ -135,6 +135,7 @@ type Args struct { // resources created for debugging. If not provided, we generate a hex // string. TestRunID string `arg:"--test-run-id,env:TEST_RUN_ID" help:"Optional test run id to use to partition terraform resources"` + SkipCleanup bool `arg:"--skip-cleanup" help:"Skip cleanup after tests"` } type Cleanup func() diff --git a/e2etestrunner/main_test.go b/e2etestrunner/main_test.go index 08d4d30..f09688f 100644 --- a/e2etestrunner/main_test.go +++ b/e2etestrunner/main_test.go @@ -54,7 +54,12 @@ func TestMain(m *testing.M) { } client, cleanup, err := setupFunc(ctx, &args, logger) - defer cleanup() + if !args.SkipCleanup { + defer cleanup() + } else { + logger.Println("Skipping cleanup as requested") + } + if err != nil { logger.Panic(err) } diff --git a/tf/modules/repo-ci-triggers/main.tf b/tf/modules/repo-ci-triggers/main.tf index dda542b..dcef8f9 100644 --- a/tf/modules/repo-ci-triggers/main.tf +++ b/tf/modules/repo-ci-triggers/main.tf @@ -56,6 +56,38 @@ resource "google_cloudbuild_trigger" "ci" { include_build_logs = "INCLUDE_BUILD_LOGS_WITH_STATUS" } +// Cleanup after tests +resource "google_cloudbuild_trigger" "cleanup" { + for_each = var.run_on + + description = "Cleanup after E2E tests on ${each.key} for ${var.repository}" + name = "${local.repo_short_name}-e2e-${each.key}-cleanup" + + pubsub_config { + topic = "projects/${var.project_id}/topics/cloud-builds" + } + + filter = "body.message.data.buildTriggerId == \"${google_cloudbuild_trigger.ci[each.key].id}\" && (body.message.data.status == \"SUCCESS\" || body.message.data.status == \"FAILURE\")" + + git_file_source { + path = "cloudbuild-cleanup.yaml" + uri = "https://github.com/GoogleCloudPlatform/${var.repository}" + revision = "refs/heads/${var.main_branch}" + repo_type = "GITHUB" + } + + substitutions = { + _TEST_RUN_ID = "$(body.message.data.substitutions._TEST_RUN_ID)" + _ENVIRONMENT = each.key + _TEST_RUNNER_IMAGE = "$(body.message.data.substitutions._TEST_RUNNER_IMAGE)" + } +} + +variable "project_id" { + type = string + description = "The GCP project ID" +} + variable "repository" { type = string description = "The repository to create the triggers for e.g. opentelemetry-operations-python" diff --git a/tf/persistent/repo-ci-triggers.tf b/tf/persistent/repo-ci-triggers.tf index 72e493c..79aa434 100644 --- a/tf/persistent/repo-ci-triggers.tf +++ b/tf/persistent/repo-ci-triggers.tf @@ -14,24 +14,28 @@ module "python" { source = "../modules/repo-ci-triggers" + project_id = var.project_id repository = "opentelemetry-operations-python" run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] } module "java" { source = "../modules/repo-ci-triggers" + project_id = var.project_id repository = "opentelemetry-operations-java" run_on = ["local", "gce", "gke", "gae", "cloud-run", "cloud-functions-gen2"] } module "js" { source = "../modules/repo-ci-triggers" + project_id = var.project_id repository = "opentelemetry-operations-js" run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] } module "go" { source = "../modules/repo-ci-triggers" + project_id = var.project_id repository = "opentelemetry-operations-go" run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] } From 47be576470da5c530f0a4fbc442ece0357b8725a Mon Sep 17 00:00:00 2001 From: aaronabbott Date: Thu, 30 Apr 2026 03:54:52 +0000 Subject: [PATCH 2/8] Implement durable cleanup using Pub/Sub trigger --- cloudbuild-cleanup.yaml | 7 ++++++- cloudbuild-e2e-cloud-functions-gen2.yaml | 1 + cloudbuild-e2e-cloud-run.yaml | 1 + cloudbuild-e2e-gae-standard.yaml | 1 + cloudbuild-e2e-gae.yaml | 1 + cloudbuild-e2e-gce.yaml | 1 + cloudbuild-e2e-gke.yaml | 1 + tf/modules/repo-ci-triggers/main.tf | 26 ++---------------------- tf/persistent/repo-ci-triggers.tf | 23 +++++++++++++++++++++ 9 files changed, 37 insertions(+), 25 deletions(-) diff --git a/cloudbuild-cleanup.yaml b/cloudbuild-cleanup.yaml index 6143966..13538af 100644 --- a/cloudbuild-cleanup.yaml +++ b/cloudbuild-cleanup.yaml @@ -4,7 +4,12 @@ steps: args: - '-c' - | - cd tf/$_ENVIRONMENT + if [ -z "$_E2E_ENVIRONMENT" ]; then + echo "Error: _E2E_ENVIRONMENT is not set." + exit 1 + fi + echo "Determined environment: $_E2E_ENVIRONMENT" + cd tf/$_E2E_ENVIRONMENT terraform init -backend-config=bucket=$PROJECT_ID-e2e-tfstate terraform workspace select $_TEST_RUN_ID terraform destroy -auto-approve -var=project_id=$PROJECT_ID -var=image=dummy -var=runtime=dummy diff --git a/cloudbuild-e2e-cloud-functions-gen2.yaml b/cloudbuild-e2e-cloud-functions-gen2.yaml index 467239f..d0e7518 100644 --- a/cloudbuild-e2e-cloud-functions-gen2.yaml +++ b/cloudbuild-e2e-cloud-functions-gen2.yaml @@ -67,6 +67,7 @@ steps: - --runtime=go125 - --functionsource=/workspace/opentelemetry-operations-go/e2e-test-server/cloud_functions/function-source.zip - --entrypoint=HandleCloudFunction + - --skip-cleanup logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: diff --git a/cloudbuild-e2e-cloud-run.yaml b/cloudbuild-e2e-cloud-run.yaml index f8c667a..f281625 100644 --- a/cloudbuild-e2e-cloud-run.yaml +++ b/cloudbuild-e2e-cloud-run.yaml @@ -43,6 +43,7 @@ steps: args: - cloud-run - --image=$_TEST_SERVER_IMAGE + - --skip-cleanup logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: diff --git a/cloudbuild-e2e-gae-standard.yaml b/cloudbuild-e2e-gae-standard.yaml index 1b8b7b7..05d27de 100644 --- a/cloudbuild-e2e-gae-standard.yaml +++ b/cloudbuild-e2e-gae-standard.yaml @@ -66,6 +66,7 @@ steps: - gae-standard - --runtime=go125 - --appsource=/workspace/opentelemetry-operations-go/e2e-test-server/appsource.zip + - --skip-cleanup logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: diff --git a/cloudbuild-e2e-gae.yaml b/cloudbuild-e2e-gae.yaml index fa9509e..877d57c 100644 --- a/cloudbuild-e2e-gae.yaml +++ b/cloudbuild-e2e-gae.yaml @@ -44,6 +44,7 @@ steps: - gae - --image=$_TEST_SERVER_IMAGE - --runtime=go125 + - --skip-cleanup logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: diff --git a/cloudbuild-e2e-gce.yaml b/cloudbuild-e2e-gce.yaml index 850c40f..64c0528 100644 --- a/cloudbuild-e2e-gce.yaml +++ b/cloudbuild-e2e-gce.yaml @@ -43,6 +43,7 @@ steps: args: - gce - --image=$_TEST_SERVER_IMAGE + - --skip-cleanup logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: diff --git a/cloudbuild-e2e-gke.yaml b/cloudbuild-e2e-gke.yaml index 49fad55..296ccbf 100644 --- a/cloudbuild-e2e-gke.yaml +++ b/cloudbuild-e2e-gke.yaml @@ -43,6 +43,7 @@ steps: args: - gke - --image=$_TEST_SERVER_IMAGE + - --skip-cleanup logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: diff --git a/tf/modules/repo-ci-triggers/main.tf b/tf/modules/repo-ci-triggers/main.tf index dcef8f9..8bbe0b9 100644 --- a/tf/modules/repo-ci-triggers/main.tf +++ b/tf/modules/repo-ci-triggers/main.tf @@ -52,34 +52,12 @@ resource "google_cloudbuild_trigger" "ci" { tags = [ local.repo_short_name, each.key, + "terraform-resources" ] include_build_logs = "INCLUDE_BUILD_LOGS_WITH_STATUS" -} - -// Cleanup after tests -resource "google_cloudbuild_trigger" "cleanup" { - for_each = var.run_on - - description = "Cleanup after E2E tests on ${each.key} for ${var.repository}" - name = "${local.repo_short_name}-e2e-${each.key}-cleanup" - - pubsub_config { - topic = "projects/${var.project_id}/topics/cloud-builds" - } - - filter = "body.message.data.buildTriggerId == \"${google_cloudbuild_trigger.ci[each.key].id}\" && (body.message.data.status == \"SUCCESS\" || body.message.data.status == \"FAILURE\")" - - git_file_source { - path = "cloudbuild-cleanup.yaml" - uri = "https://github.com/GoogleCloudPlatform/${var.repository}" - revision = "refs/heads/${var.main_branch}" - repo_type = "GITHUB" - } substitutions = { - _TEST_RUN_ID = "$(body.message.data.substitutions._TEST_RUN_ID)" - _ENVIRONMENT = each.key - _TEST_RUNNER_IMAGE = "$(body.message.data.substitutions._TEST_RUNNER_IMAGE)" + _E2E_ENVIRONMENT = each.key } } diff --git a/tf/persistent/repo-ci-triggers.tf b/tf/persistent/repo-ci-triggers.tf index 79aa434..99a7b20 100644 --- a/tf/persistent/repo-ci-triggers.tf +++ b/tf/persistent/repo-ci-triggers.tf @@ -39,3 +39,26 @@ module "go" { repository = "opentelemetry-operations-go" run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] } + +resource "google_cloudbuild_trigger" "global_cleanup" { + name = "global-e2e-cleanup" + description = "Global cleanup for E2E tests triggered by Pub/Sub" + + pubsub_config { + topic = "projects/${var.project_id}/topics/cloud-builds" + } + + filter = "\"terraform-resources\" in body.message.data.tags && (body.message.data.status == \"SUCCESS\" || body.message.data.status == \"FAILURE\")" + + git_file_source { + path = "cloudbuild-cleanup.yaml" + uri = "https://github.com/GoogleCloudPlatform/opentelemetry-operations-e2e-testing" + revision = "refs/heads/main" + repo_type = "GITHUB" + } + + substitutions = { + _TEST_RUN_ID = "$(body.message.data.id)" + _E2E_ENVIRONMENT = "$(body.message.data.substitutions._E2E_ENVIRONMENT)" + } +} From c72929da9103f2ed48605913a0246f8eac86c0d4 Mon Sep 17 00:00:00 2001 From: aaronabbott Date: Thu, 30 Apr 2026 04:48:22 +0000 Subject: [PATCH 3/8] Use test runner for cleanup in Cloud Build --- cloudbuild-cleanup.yaml | 19 +++++-------- e2etesting/e2e_testing.go | 6 +++++ e2etesting/setuptf/setuptf.go | 44 +++++++++++++++++++++++++++++++ e2etestrunner/main_test.go | 28 ++++++++++++++++++++ tf/persistent/repo-ci-triggers.tf | 5 ++-- 5 files changed, 87 insertions(+), 15 deletions(-) diff --git a/cloudbuild-cleanup.yaml b/cloudbuild-cleanup.yaml index 13538af..277147f 100644 --- a/cloudbuild-cleanup.yaml +++ b/cloudbuild-cleanup.yaml @@ -1,15 +1,8 @@ steps: - - name: 'hashicorp/terraform:1.7.0' - entrypoint: 'sh' + - name: $_TEST_RUNNER_IMAGE + dir: / args: - - '-c' - - | - if [ -z "$_E2E_ENVIRONMENT" ]; then - echo "Error: _E2E_ENVIRONMENT is not set." - exit 1 - fi - echo "Determined environment: $_E2E_ENVIRONMENT" - cd tf/$_E2E_ENVIRONMENT - terraform init -backend-config=bucket=$PROJECT_ID-e2e-tfstate - terraform workspace select $_TEST_RUN_ID - terraform destroy -auto-approve -var=project_id=$PROJECT_ID -var=image=dummy -var=runtime=dummy + - cleanup + - --environment=$_E2E_ENVIRONMENT + - --project-id=$PROJECT_ID + - --test-run-id=$_TEST_RUN_ID diff --git a/e2etesting/e2e_testing.go b/e2etesting/e2e_testing.go index 299ec82..2fed0c3 100644 --- a/e2etesting/e2e_testing.go +++ b/e2etesting/e2e_testing.go @@ -106,12 +106,18 @@ type CloudFunctionsGen2Cmd struct { FunctionSource string `arg:"required" help:"The full path of the zip file that contains the code source that needs to run within the CloudFunction"` } +type CleanupCmd struct { + Environment string `arg:"required" help:"The environment to clean up (e.g. cloud-run)"` +} + type Args struct { // This subcommand is a special case, it doesn't run any tests. It just // applies the persistent resources which are used across tests. See // tf/persistent/README.md for details on what is in there. ApplyPersistent *ApplyPersistent `arg:"subcommand:apply-persistent" help:"Terraform apply the resources in tf/persistent and exit (does not run tests)."` + Cleanup *CleanupCmd `arg:"subcommand:cleanup" help:"Clean up resources for a test run"` + Local *LocalCmd `arg:"subcommand:local" help:"Deploy the test server locally with docker and execute tests"` Gke *GkeCmd `arg:"subcommand:gke" help:"Deploy the test server on GKE and execute tests"` Gce *GceCmd `arg:"subcommand:gce" help:"Deploy the test server on GCE and execute tests"` diff --git a/e2etesting/setuptf/setuptf.go b/e2etesting/setuptf/setuptf.go index c7bffa1..6badc45 100644 --- a/e2etesting/setuptf/setuptf.go +++ b/e2etesting/setuptf/setuptf.go @@ -255,3 +255,47 @@ func tfVarMapToArgs( } return tfVarArgs } + +// CleanupTf runs terraform destroy and deletes the workspace. +func CleanupTf( + ctx context.Context, + projectID string, + testRunID string, + tfDir string, + tfVars map[string]string, + logger *log.Logger, +) error { + tfVarArgs := tfVarMapToArgs(projectID, tfVars) + cmd := initCommand(ctx, projectID) + cmd.Args = append(cmd.Args, tfVarArgs...) + cmd.Dir = tfDir + if err := runWithOutput(cmd, logger); err != nil { + return err + } + + // Switch to target workspace + cmd = exec.CommandContext(ctx, "terraform", "workspace", "select", testRunID) + cmd.Dir = tfDir + if err := runWithOutput(cmd, logger); err != nil { + return err + } + + // Run terraform destroy + cmd = exec.CommandContext( + ctx, + "terraform", + "destroy", + "-input=false", + "-auto-approve", + ) + cmd.Args = append(cmd.Args, tfVarArgs...) + cmd.Dir = tfDir + if err := runWithOutput(cmd, logger); err != nil { + return err + } + + // Delete workspace + deleteWorkspace(ctx, testRunID, tfDir, logger) + + return nil +} diff --git a/e2etestrunner/main_test.go b/e2etestrunner/main_test.go index f09688f..fa79ad0 100644 --- a/e2etestrunner/main_test.go +++ b/e2etestrunner/main_test.go @@ -19,6 +19,7 @@ package e2etestrunner import ( "context" + "os" "testing" "github.com/GoogleCloudPlatform/opentelemetry-operations-e2e-testing/e2etesting" @@ -35,6 +36,33 @@ var ( func TestMain(m *testing.M) { logger, ctx := e2etesting.InitTestMain(&args, setuptf.ApplyPersistent) + if args.Cleanup != nil { + var tfDir string + switch args.Cleanup.Environment { + case "cloud-run": + tfDir = "tf/cloud-run" + case "gke": + tfDir = "tf/gke" + case "gce": + tfDir = "tf/gce" + case "gae": + tfDir = "tf/gae" + case "gae-standard": + tfDir = "tf/gae-standard" + case "cloud-functions-gen2": + tfDir = "tf/cloud-functions-gen2" + default: + logger.Panicf("Unknown environment for cleanup: %s", args.Cleanup.Environment) + } + // We might need dummy vars if TF requires them for destroy + tfVars := map[string]string{"image": "dummy", "runtime": "dummy"} + err := setuptf.CleanupTf(ctx, args.ProjectID, args.TestRunID, tfDir, tfVars, logger) + if err != nil { + logger.Panic(err) + } + os.Exit(0) + } + var setupFunc e2etesting.SetupFunc switch { case args.Local != nil: diff --git a/tf/persistent/repo-ci-triggers.tf b/tf/persistent/repo-ci-triggers.tf index 99a7b20..f1f5f79 100644 --- a/tf/persistent/repo-ci-triggers.tf +++ b/tf/persistent/repo-ci-triggers.tf @@ -58,7 +58,8 @@ resource "google_cloudbuild_trigger" "global_cleanup" { } substitutions = { - _TEST_RUN_ID = "$(body.message.data.id)" - _E2E_ENVIRONMENT = "$(body.message.data.substitutions._E2E_ENVIRONMENT)" + _TEST_RUN_ID = "$(body.message.data.id)" + _E2E_ENVIRONMENT = "$(body.message.data.substitutions._E2E_ENVIRONMENT)" + _TEST_RUNNER_IMAGE = "$(body.message.data.substitutions._TEST_RUNNER_IMAGE)" } } From 78e4f8b3cd1aa974c23147d5e4a3bf87ab19d7ae Mon Sep 17 00:00:00 2001 From: aaronabbott Date: Thu, 30 Apr 2026 04:50:46 +0000 Subject: [PATCH 4/8] Use test runner for cleanup and invert cleanup flag --- cloudbuild-e2e-cloud-functions-gen2.yaml | 1 - cloudbuild-e2e-cloud-run.yaml | 1 - cloudbuild-e2e-gae-standard.yaml | 1 - cloudbuild-e2e-gae.yaml | 1 - cloudbuild-e2e-gce.yaml | 1 - cloudbuild-e2e-gke.yaml | 1 - e2etesting/e2e_testing.go | 2 +- e2etestrunner/main_test.go | 4 ++-- 8 files changed, 3 insertions(+), 9 deletions(-) diff --git a/cloudbuild-e2e-cloud-functions-gen2.yaml b/cloudbuild-e2e-cloud-functions-gen2.yaml index d0e7518..467239f 100644 --- a/cloudbuild-e2e-cloud-functions-gen2.yaml +++ b/cloudbuild-e2e-cloud-functions-gen2.yaml @@ -67,7 +67,6 @@ steps: - --runtime=go125 - --functionsource=/workspace/opentelemetry-operations-go/e2e-test-server/cloud_functions/function-source.zip - --entrypoint=HandleCloudFunction - - --skip-cleanup logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: diff --git a/cloudbuild-e2e-cloud-run.yaml b/cloudbuild-e2e-cloud-run.yaml index f281625..f8c667a 100644 --- a/cloudbuild-e2e-cloud-run.yaml +++ b/cloudbuild-e2e-cloud-run.yaml @@ -43,7 +43,6 @@ steps: args: - cloud-run - --image=$_TEST_SERVER_IMAGE - - --skip-cleanup logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: diff --git a/cloudbuild-e2e-gae-standard.yaml b/cloudbuild-e2e-gae-standard.yaml index 05d27de..1b8b7b7 100644 --- a/cloudbuild-e2e-gae-standard.yaml +++ b/cloudbuild-e2e-gae-standard.yaml @@ -66,7 +66,6 @@ steps: - gae-standard - --runtime=go125 - --appsource=/workspace/opentelemetry-operations-go/e2e-test-server/appsource.zip - - --skip-cleanup logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: diff --git a/cloudbuild-e2e-gae.yaml b/cloudbuild-e2e-gae.yaml index 877d57c..fa9509e 100644 --- a/cloudbuild-e2e-gae.yaml +++ b/cloudbuild-e2e-gae.yaml @@ -44,7 +44,6 @@ steps: - gae - --image=$_TEST_SERVER_IMAGE - --runtime=go125 - - --skip-cleanup logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: diff --git a/cloudbuild-e2e-gce.yaml b/cloudbuild-e2e-gce.yaml index 64c0528..850c40f 100644 --- a/cloudbuild-e2e-gce.yaml +++ b/cloudbuild-e2e-gce.yaml @@ -43,7 +43,6 @@ steps: args: - gce - --image=$_TEST_SERVER_IMAGE - - --skip-cleanup logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: diff --git a/cloudbuild-e2e-gke.yaml b/cloudbuild-e2e-gke.yaml index 296ccbf..49fad55 100644 --- a/cloudbuild-e2e-gke.yaml +++ b/cloudbuild-e2e-gke.yaml @@ -43,7 +43,6 @@ steps: args: - gke - --image=$_TEST_SERVER_IMAGE - - --skip-cleanup logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: diff --git a/e2etesting/e2e_testing.go b/e2etesting/e2e_testing.go index 2fed0c3..b812364 100644 --- a/e2etesting/e2e_testing.go +++ b/e2etesting/e2e_testing.go @@ -141,7 +141,7 @@ type Args struct { // resources created for debugging. If not provided, we generate a hex // string. TestRunID string `arg:"--test-run-id,env:TEST_RUN_ID" help:"Optional test run id to use to partition terraform resources"` - SkipCleanup bool `arg:"--skip-cleanup" help:"Skip cleanup after tests"` + AutoCleanup bool `arg:"--auto-cleanup" help:"Enable automatic cleanup after tests (default: false). For deferred cleanup, use the 'cleanup' subcommand."` } type Cleanup func() diff --git a/e2etestrunner/main_test.go b/e2etestrunner/main_test.go index fa79ad0..fdb8e4f 100644 --- a/e2etestrunner/main_test.go +++ b/e2etestrunner/main_test.go @@ -82,10 +82,10 @@ func TestMain(m *testing.M) { } client, cleanup, err := setupFunc(ctx, &args, logger) - if !args.SkipCleanup { + if args.AutoCleanup { defer cleanup() } else { - logger.Println("Skipping cleanup as requested") + logger.Println("Skipping auto-cleanup (default behavior)") } if err != nil { From 76050469b0ab1fa57a790adcfb83ea11c1df1221 Mon Sep 17 00:00:00 2001 From: aaronabbott Date: Thu, 30 Apr 2026 04:59:48 +0000 Subject: [PATCH 5/8] Review comments --- cloudbuild-cleanup.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cloudbuild-cleanup.yaml b/cloudbuild-cleanup.yaml index 277147f..68769c7 100644 --- a/cloudbuild-cleanup.yaml +++ b/cloudbuild-cleanup.yaml @@ -1,8 +1,13 @@ steps: - name: $_TEST_RUNNER_IMAGE + id: cleanup dir: / + timeout: 600s args: - cleanup - --environment=$_E2E_ENVIRONMENT - --project-id=$PROJECT_ID - --test-run-id=$_TEST_RUN_ID + +substitutions: + _TEST_RUN_ID: $BUILD_ID From 42ffa25baeadba7fd4987868525442d6a6cd6902 Mon Sep 17 00:00:00 2001 From: aaronabbott Date: Thu, 30 Apr 2026 05:06:39 +0000 Subject: [PATCH 6/8] Testing From ea31c7e1838dc11e36b3f2eb5e404f93ac66d502 Mon Sep 17 00:00:00 2001 From: aaronabbott Date: Thu, 30 Apr 2026 15:02:46 +0000 Subject: [PATCH 7/8] Implement Alternative 2 for durable cleanup (Custom Pub/Sub topic) --- cloudbuild-e2e-cloud-functions-gen2.yaml | 3 +++ cloudbuild-e2e-cloud-run.yaml | 3 +++ cloudbuild-e2e-gae-standard.yaml | 3 +++ cloudbuild-e2e-gae.yaml | 3 +++ cloudbuild-e2e-gce.yaml | 3 +++ cloudbuild-e2e-gke.yaml | 3 +++ tf/persistent/repo-ci-triggers.tf | 13 ++++++++++--- 7 files changed, 28 insertions(+), 3 deletions(-) diff --git a/cloudbuild-e2e-cloud-functions-gen2.yaml b/cloudbuild-e2e-cloud-functions-gen2.yaml index 467239f..2ae2798 100644 --- a/cloudbuild-e2e-cloud-functions-gen2.yaml +++ b/cloudbuild-e2e-cloud-functions-gen2.yaml @@ -72,3 +72,6 @@ logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: _TEST_RUNNER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-e2e-testing:${SHORT_SHA} _TEST_SERVER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-go-e2e-test-server:${SHORT_SHA} + +options: + pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup diff --git a/cloudbuild-e2e-cloud-run.yaml b/cloudbuild-e2e-cloud-run.yaml index f8c667a..772b373 100644 --- a/cloudbuild-e2e-cloud-run.yaml +++ b/cloudbuild-e2e-cloud-run.yaml @@ -48,3 +48,6 @@ logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: _TEST_RUNNER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-e2e-testing:${SHORT_SHA} _TEST_SERVER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-go-e2e-test-server:${SHORT_SHA} + +options: + pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup diff --git a/cloudbuild-e2e-gae-standard.yaml b/cloudbuild-e2e-gae-standard.yaml index 1b8b7b7..95fe26e 100644 --- a/cloudbuild-e2e-gae-standard.yaml +++ b/cloudbuild-e2e-gae-standard.yaml @@ -71,3 +71,6 @@ logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: _TEST_RUNNER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-e2e-testing:${SHORT_SHA} _TEST_SERVER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-go-e2e-test-server:${SHORT_SHA} + +options: + pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup diff --git a/cloudbuild-e2e-gae.yaml b/cloudbuild-e2e-gae.yaml index fa9509e..64837c3 100644 --- a/cloudbuild-e2e-gae.yaml +++ b/cloudbuild-e2e-gae.yaml @@ -49,3 +49,6 @@ logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: _TEST_RUNNER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-e2e-testing:${SHORT_SHA} _TEST_SERVER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-go-e2e-test-server:${SHORT_SHA} + +options: + pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup diff --git a/cloudbuild-e2e-gce.yaml b/cloudbuild-e2e-gce.yaml index 850c40f..7538c09 100644 --- a/cloudbuild-e2e-gce.yaml +++ b/cloudbuild-e2e-gce.yaml @@ -48,3 +48,6 @@ logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: _TEST_RUNNER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-e2e-testing:${SHORT_SHA} _TEST_SERVER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-go-e2e-test-server:${SHORT_SHA} + +options: + pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup diff --git a/cloudbuild-e2e-gke.yaml b/cloudbuild-e2e-gke.yaml index 49fad55..f8ca1b6 100644 --- a/cloudbuild-e2e-gke.yaml +++ b/cloudbuild-e2e-gke.yaml @@ -48,3 +48,6 @@ logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: _TEST_RUNNER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-e2e-testing:${SHORT_SHA} _TEST_SERVER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-go-e2e-test-server:${SHORT_SHA} + +options: + pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup diff --git a/tf/persistent/repo-ci-triggers.tf b/tf/persistent/repo-ci-triggers.tf index f1f5f79..f879a45 100644 --- a/tf/persistent/repo-ci-triggers.tf +++ b/tf/persistent/repo-ci-triggers.tf @@ -40,20 +40,25 @@ module "go" { run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] } +resource "google_pubsub_topic" "e2e_cleanup" { + name = "e2e-cleanup" + project = var.project_id +} + resource "google_cloudbuild_trigger" "global_cleanup" { name = "global-e2e-cleanup" description = "Global cleanup for E2E tests triggered by Pub/Sub" pubsub_config { - topic = "projects/${var.project_id}/topics/cloud-builds" + topic = google_pubsub_topic.e2e_cleanup.id } - filter = "\"terraform-resources\" in body.message.data.tags && (body.message.data.status == \"SUCCESS\" || body.message.data.status == \"FAILURE\")" + filter = "(_BUILD_TAGS.contains(\"terraform-resources\") || _BUILD_TAGS.contains(\"ops-e2e-testing\")) && (_BUILD_STATUS == \"SUCCESS\" || _BUILD_STATUS == \"FAILURE\")" git_file_source { path = "cloudbuild-cleanup.yaml" uri = "https://github.com/GoogleCloudPlatform/opentelemetry-operations-e2e-testing" - revision = "refs/heads/main" + revision = "refs/heads/durable-async-cleanup" repo_type = "GITHUB" } @@ -61,5 +66,7 @@ resource "google_cloudbuild_trigger" "global_cleanup" { _TEST_RUN_ID = "$(body.message.data.id)" _E2E_ENVIRONMENT = "$(body.message.data.substitutions._E2E_ENVIRONMENT)" _TEST_RUNNER_IMAGE = "$(body.message.data.substitutions._TEST_RUNNER_IMAGE)" + _BUILD_TAGS = "$(body.message.data.tags)" + _BUILD_STATUS = "$(body.message.data.status)" } } From 328fcc8cbe20e82f931899ba7ae6bbd9f3ecb3bf Mon Sep 17 00:00:00 2001 From: aaronabbott Date: Thu, 30 Apr 2026 16:42:24 +0000 Subject: [PATCH 8/8] Add IAM permissions for Cloud Build service agent and use custom service account in E2E tests --- cloudbuild-e2e-cloud-functions-gen2.yaml | 1 + cloudbuild-e2e-cloud-run.yaml | 1 + cloudbuild-e2e-gae-standard.yaml | 1 + cloudbuild-e2e-gae.yaml | 1 + cloudbuild-e2e-gce.yaml | 1 + cloudbuild-e2e-gke.yaml | 1 + tf/modules/repo-ci-triggers/main.tf | 8 +++++ tf/persistent/repo-ci-triggers.tf | 43 ++++++++++++++---------- 8 files changed, 40 insertions(+), 17 deletions(-) diff --git a/cloudbuild-e2e-cloud-functions-gen2.yaml b/cloudbuild-e2e-cloud-functions-gen2.yaml index 2ae2798..b4454f2 100644 --- a/cloudbuild-e2e-cloud-functions-gen2.yaml +++ b/cloudbuild-e2e-cloud-functions-gen2.yaml @@ -75,3 +75,4 @@ substitutions: options: pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup +serviceAccount: projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com diff --git a/cloudbuild-e2e-cloud-run.yaml b/cloudbuild-e2e-cloud-run.yaml index 772b373..6442539 100644 --- a/cloudbuild-e2e-cloud-run.yaml +++ b/cloudbuild-e2e-cloud-run.yaml @@ -51,3 +51,4 @@ substitutions: options: pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup +serviceAccount: projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com diff --git a/cloudbuild-e2e-gae-standard.yaml b/cloudbuild-e2e-gae-standard.yaml index 95fe26e..35a7c6c 100644 --- a/cloudbuild-e2e-gae-standard.yaml +++ b/cloudbuild-e2e-gae-standard.yaml @@ -74,3 +74,4 @@ substitutions: options: pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup +serviceAccount: projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com diff --git a/cloudbuild-e2e-gae.yaml b/cloudbuild-e2e-gae.yaml index 64837c3..d43b841 100644 --- a/cloudbuild-e2e-gae.yaml +++ b/cloudbuild-e2e-gae.yaml @@ -52,3 +52,4 @@ substitutions: options: pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup +serviceAccount: projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com diff --git a/cloudbuild-e2e-gce.yaml b/cloudbuild-e2e-gce.yaml index 7538c09..d955d96 100644 --- a/cloudbuild-e2e-gce.yaml +++ b/cloudbuild-e2e-gce.yaml @@ -51,3 +51,4 @@ substitutions: options: pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup +serviceAccount: projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com diff --git a/cloudbuild-e2e-gke.yaml b/cloudbuild-e2e-gke.yaml index f8ca1b6..68eee26 100644 --- a/cloudbuild-e2e-gke.yaml +++ b/cloudbuild-e2e-gke.yaml @@ -51,3 +51,4 @@ substitutions: options: pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup +serviceAccount: projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com diff --git a/tf/modules/repo-ci-triggers/main.tf b/tf/modules/repo-ci-triggers/main.tf index 8bbe0b9..b818aa3 100644 --- a/tf/modules/repo-ci-triggers/main.tf +++ b/tf/modules/repo-ci-triggers/main.tf @@ -32,6 +32,7 @@ resource "google_cloudbuild_trigger" "build_image" { "build" ] include_build_logs = "INCLUDE_BUILD_LOGS_WITH_STATUS" + service_account = var.service_account } // Run tests @@ -55,12 +56,19 @@ resource "google_cloudbuild_trigger" "ci" { "terraform-resources" ] include_build_logs = "INCLUDE_BUILD_LOGS_WITH_STATUS" + service_account = var.service_account substitutions = { _E2E_ENVIRONMENT = each.key } } +variable "service_account" { + type = string + description = "The service account to use for the triggers" + default = null +} + variable "project_id" { type = string description = "The GCP project ID" diff --git a/tf/persistent/repo-ci-triggers.tf b/tf/persistent/repo-ci-triggers.tf index f879a45..9d7ae82 100644 --- a/tf/persistent/repo-ci-triggers.tf +++ b/tf/persistent/repo-ci-triggers.tf @@ -13,31 +13,35 @@ # limitations under the License. module "python" { - source = "../modules/repo-ci-triggers" - project_id = var.project_id - repository = "opentelemetry-operations-python" - run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] + source = "../modules/repo-ci-triggers" + project_id = var.project_id + repository = "opentelemetry-operations-python" + run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] + service_account = "projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com" } module "java" { - source = "../modules/repo-ci-triggers" - project_id = var.project_id - repository = "opentelemetry-operations-java" - run_on = ["local", "gce", "gke", "gae", "cloud-run", "cloud-functions-gen2"] + source = "../modules/repo-ci-triggers" + project_id = var.project_id + repository = "opentelemetry-operations-java" + run_on = ["local", "gce", "gke", "gae", "cloud-run", "cloud-functions-gen2"] + service_account = "projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com" } module "js" { - source = "../modules/repo-ci-triggers" - project_id = var.project_id - repository = "opentelemetry-operations-js" - run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] + source = "../modules/repo-ci-triggers" + project_id = var.project_id + repository = "opentelemetry-operations-js" + run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] + service_account = "projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com" } module "go" { - source = "../modules/repo-ci-triggers" - project_id = var.project_id - repository = "opentelemetry-operations-go" - run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] + source = "../modules/repo-ci-triggers" + project_id = var.project_id + repository = "opentelemetry-operations-go" + run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] + service_account = "projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com" } resource "google_pubsub_topic" "e2e_cleanup" { @@ -53,7 +57,8 @@ resource "google_cloudbuild_trigger" "global_cleanup" { topic = google_pubsub_topic.e2e_cleanup.id } - filter = "(_BUILD_TAGS.contains(\"terraform-resources\") || _BUILD_TAGS.contains(\"ops-e2e-testing\")) && (_BUILD_STATUS == \"SUCCESS\" || _BUILD_STATUS == \"FAILURE\")" + # TODO: Add tag filter back once triggers are updated with tags in latchkey + filter = "_BUILD_STATUS == \"SUCCESS\" || _BUILD_STATUS == \"FAILURE\"" git_file_source { path = "cloudbuild-cleanup.yaml" @@ -62,6 +67,8 @@ resource "google_cloudbuild_trigger" "global_cleanup" { repo_type = "GITHUB" } + service_account = "projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com" + substitutions = { _TEST_RUN_ID = "$(body.message.data.id)" _E2E_ENVIRONMENT = "$(body.message.data.substitutions._E2E_ENVIRONMENT)" @@ -70,3 +77,5 @@ resource "google_cloudbuild_trigger" "global_cleanup" { _BUILD_STATUS = "$(body.message.data.status)" } } + +# TODO: add to internal permission tooling (latchkey)