diff --git a/cloudbuild-cleanup.yaml b/cloudbuild-cleanup.yaml new file mode 100644 index 0000000..68769c7 --- /dev/null +++ b/cloudbuild-cleanup.yaml @@ -0,0 +1,13 @@ +steps: + - name: $_TEST_RUNNER_IMAGE + id: cleanup + dir: / + timeout: 600s + args: + - cleanup + - --environment=$_E2E_ENVIRONMENT + - --project-id=$PROJECT_ID + - --test-run-id=$_TEST_RUN_ID + +substitutions: + _TEST_RUN_ID: $BUILD_ID diff --git a/cloudbuild-e2e-cloud-functions-gen2.yaml b/cloudbuild-e2e-cloud-functions-gen2.yaml index 467239f..b4454f2 100644 --- a/cloudbuild-e2e-cloud-functions-gen2.yaml +++ b/cloudbuild-e2e-cloud-functions-gen2.yaml @@ -72,3 +72,7 @@ logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: _TEST_RUNNER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-e2e-testing:${SHORT_SHA} _TEST_SERVER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-go-e2e-test-server:${SHORT_SHA} + +options: + pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup +serviceAccount: projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com diff --git a/cloudbuild-e2e-cloud-run.yaml b/cloudbuild-e2e-cloud-run.yaml index f8c667a..6442539 100644 --- a/cloudbuild-e2e-cloud-run.yaml +++ b/cloudbuild-e2e-cloud-run.yaml @@ -48,3 +48,7 @@ logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: _TEST_RUNNER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-e2e-testing:${SHORT_SHA} _TEST_SERVER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-go-e2e-test-server:${SHORT_SHA} + +options: + pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup +serviceAccount: projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com diff --git a/cloudbuild-e2e-gae-standard.yaml b/cloudbuild-e2e-gae-standard.yaml index 1b8b7b7..35a7c6c 100644 --- a/cloudbuild-e2e-gae-standard.yaml +++ b/cloudbuild-e2e-gae-standard.yaml @@ -71,3 +71,7 @@ logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: _TEST_RUNNER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-e2e-testing:${SHORT_SHA} _TEST_SERVER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-go-e2e-test-server:${SHORT_SHA} + +options: + pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup +serviceAccount: projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com diff --git a/cloudbuild-e2e-gae.yaml b/cloudbuild-e2e-gae.yaml index fa9509e..d43b841 100644 --- a/cloudbuild-e2e-gae.yaml +++ b/cloudbuild-e2e-gae.yaml @@ -49,3 +49,7 @@ logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: _TEST_RUNNER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-e2e-testing:${SHORT_SHA} _TEST_SERVER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-go-e2e-test-server:${SHORT_SHA} + +options: + pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup +serviceAccount: projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com diff --git a/cloudbuild-e2e-gce.yaml b/cloudbuild-e2e-gce.yaml index 850c40f..d955d96 100644 --- a/cloudbuild-e2e-gce.yaml +++ b/cloudbuild-e2e-gce.yaml @@ -48,3 +48,7 @@ logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: _TEST_RUNNER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-e2e-testing:${SHORT_SHA} _TEST_SERVER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-go-e2e-test-server:${SHORT_SHA} + +options: + pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup +serviceAccount: projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com diff --git a/cloudbuild-e2e-gke.yaml b/cloudbuild-e2e-gke.yaml index 49fad55..68eee26 100644 --- a/cloudbuild-e2e-gke.yaml +++ b/cloudbuild-e2e-gke.yaml @@ -48,3 +48,7 @@ logsBucket: gs://opentelemetry-ops-e2e-cloud-build-logs substitutions: _TEST_RUNNER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-e2e-testing:${SHORT_SHA} _TEST_SERVER_IMAGE: us-central1-docker.pkg.dev/${PROJECT_ID}/e2e-testing/opentelemetry-operations-go-e2e-test-server:${SHORT_SHA} + +options: + pubsubTopic: projects/opentelemetry-ops-e2e/topics/e2e-cleanup +serviceAccount: projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com diff --git a/e2etesting/e2e_testing.go b/e2etesting/e2e_testing.go index 44b72d9..b812364 100644 --- a/e2etesting/e2e_testing.go +++ b/e2etesting/e2e_testing.go @@ -36,7 +36,7 @@ type CmdWithProjectId struct { } type CmdWithImage struct { - Image string `arg:"required" help:"docker container image to deploy and test"` + Image string `help:"docker container image to deploy and test"` } type LocalCmd struct { @@ -80,7 +80,7 @@ type GkeCmd struct { type GaeCmd struct { CmdWithImage - Runtime string `arg:"required" help:"The language runtime for the instrumented test server, used in naming the service"` + Runtime string `help:"The language runtime for the instrumented test server, used in naming the service"` } type GaeStandardCmd struct { @@ -106,12 +106,18 @@ type CloudFunctionsGen2Cmd struct { FunctionSource string `arg:"required" help:"The full path of the zip file that contains the code source that needs to run within the CloudFunction"` } +type CleanupCmd struct { + Environment string `arg:"required" help:"The environment to clean up (e.g. cloud-run)"` +} + type Args struct { // This subcommand is a special case, it doesn't run any tests. It just // applies the persistent resources which are used across tests. See // tf/persistent/README.md for details on what is in there. ApplyPersistent *ApplyPersistent `arg:"subcommand:apply-persistent" help:"Terraform apply the resources in tf/persistent and exit (does not run tests)."` + Cleanup *CleanupCmd `arg:"subcommand:cleanup" help:"Clean up resources for a test run"` + Local *LocalCmd `arg:"subcommand:local" help:"Deploy the test server locally with docker and execute tests"` Gke *GkeCmd `arg:"subcommand:gke" help:"Deploy the test server on GKE and execute tests"` Gce *GceCmd `arg:"subcommand:gce" help:"Deploy the test server on GCE and execute tests"` @@ -135,6 +141,7 @@ type Args struct { // resources created for debugging. If not provided, we generate a hex // string. TestRunID string `arg:"--test-run-id,env:TEST_RUN_ID" help:"Optional test run id to use to partition terraform resources"` + AutoCleanup bool `arg:"--auto-cleanup" help:"Enable automatic cleanup after tests (default: false). For deferred cleanup, use the 'cleanup' subcommand."` } type Cleanup func() diff --git a/e2etesting/setuptf/setuptf.go b/e2etesting/setuptf/setuptf.go index c7bffa1..6badc45 100644 --- a/e2etesting/setuptf/setuptf.go +++ b/e2etesting/setuptf/setuptf.go @@ -255,3 +255,47 @@ func tfVarMapToArgs( } return tfVarArgs } + +// CleanupTf runs terraform destroy and deletes the workspace. +func CleanupTf( + ctx context.Context, + projectID string, + testRunID string, + tfDir string, + tfVars map[string]string, + logger *log.Logger, +) error { + tfVarArgs := tfVarMapToArgs(projectID, tfVars) + cmd := initCommand(ctx, projectID) + cmd.Args = append(cmd.Args, tfVarArgs...) + cmd.Dir = tfDir + if err := runWithOutput(cmd, logger); err != nil { + return err + } + + // Switch to target workspace + cmd = exec.CommandContext(ctx, "terraform", "workspace", "select", testRunID) + cmd.Dir = tfDir + if err := runWithOutput(cmd, logger); err != nil { + return err + } + + // Run terraform destroy + cmd = exec.CommandContext( + ctx, + "terraform", + "destroy", + "-input=false", + "-auto-approve", + ) + cmd.Args = append(cmd.Args, tfVarArgs...) + cmd.Dir = tfDir + if err := runWithOutput(cmd, logger); err != nil { + return err + } + + // Delete workspace + deleteWorkspace(ctx, testRunID, tfDir, logger) + + return nil +} diff --git a/e2etestrunner/main_test.go b/e2etestrunner/main_test.go index 08d4d30..fdb8e4f 100644 --- a/e2etestrunner/main_test.go +++ b/e2etestrunner/main_test.go @@ -19,6 +19,7 @@ package e2etestrunner import ( "context" + "os" "testing" "github.com/GoogleCloudPlatform/opentelemetry-operations-e2e-testing/e2etesting" @@ -35,6 +36,33 @@ var ( func TestMain(m *testing.M) { logger, ctx := e2etesting.InitTestMain(&args, setuptf.ApplyPersistent) + if args.Cleanup != nil { + var tfDir string + switch args.Cleanup.Environment { + case "cloud-run": + tfDir = "tf/cloud-run" + case "gke": + tfDir = "tf/gke" + case "gce": + tfDir = "tf/gce" + case "gae": + tfDir = "tf/gae" + case "gae-standard": + tfDir = "tf/gae-standard" + case "cloud-functions-gen2": + tfDir = "tf/cloud-functions-gen2" + default: + logger.Panicf("Unknown environment for cleanup: %s", args.Cleanup.Environment) + } + // We might need dummy vars if TF requires them for destroy + tfVars := map[string]string{"image": "dummy", "runtime": "dummy"} + err := setuptf.CleanupTf(ctx, args.ProjectID, args.TestRunID, tfDir, tfVars, logger) + if err != nil { + logger.Panic(err) + } + os.Exit(0) + } + var setupFunc e2etesting.SetupFunc switch { case args.Local != nil: @@ -54,7 +82,12 @@ func TestMain(m *testing.M) { } client, cleanup, err := setupFunc(ctx, &args, logger) - defer cleanup() + if args.AutoCleanup { + defer cleanup() + } else { + logger.Println("Skipping auto-cleanup (default behavior)") + } + if err != nil { logger.Panic(err) } diff --git a/tf/modules/repo-ci-triggers/main.tf b/tf/modules/repo-ci-triggers/main.tf index dda542b..b818aa3 100644 --- a/tf/modules/repo-ci-triggers/main.tf +++ b/tf/modules/repo-ci-triggers/main.tf @@ -32,6 +32,7 @@ resource "google_cloudbuild_trigger" "build_image" { "build" ] include_build_logs = "INCLUDE_BUILD_LOGS_WITH_STATUS" + service_account = var.service_account } // Run tests @@ -52,8 +53,25 @@ resource "google_cloudbuild_trigger" "ci" { tags = [ local.repo_short_name, each.key, + "terraform-resources" ] include_build_logs = "INCLUDE_BUILD_LOGS_WITH_STATUS" + service_account = var.service_account + + substitutions = { + _E2E_ENVIRONMENT = each.key + } +} + +variable "service_account" { + type = string + description = "The service account to use for the triggers" + default = null +} + +variable "project_id" { + type = string + description = "The GCP project ID" } variable "repository" { diff --git a/tf/persistent/repo-ci-triggers.tf b/tf/persistent/repo-ci-triggers.tf index 72e493c..9d7ae82 100644 --- a/tf/persistent/repo-ci-triggers.tf +++ b/tf/persistent/repo-ci-triggers.tf @@ -13,25 +13,69 @@ # limitations under the License. module "python" { - source = "../modules/repo-ci-triggers" - repository = "opentelemetry-operations-python" - run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] + source = "../modules/repo-ci-triggers" + project_id = var.project_id + repository = "opentelemetry-operations-python" + run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] + service_account = "projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com" } module "java" { - source = "../modules/repo-ci-triggers" - repository = "opentelemetry-operations-java" - run_on = ["local", "gce", "gke", "gae", "cloud-run", "cloud-functions-gen2"] + source = "../modules/repo-ci-triggers" + project_id = var.project_id + repository = "opentelemetry-operations-java" + run_on = ["local", "gce", "gke", "gae", "cloud-run", "cloud-functions-gen2"] + service_account = "projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com" } module "js" { - source = "../modules/repo-ci-triggers" - repository = "opentelemetry-operations-js" - run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] + source = "../modules/repo-ci-triggers" + project_id = var.project_id + repository = "opentelemetry-operations-js" + run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] + service_account = "projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com" } module "go" { - source = "../modules/repo-ci-triggers" - repository = "opentelemetry-operations-go" - run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] + source = "../modules/repo-ci-triggers" + project_id = var.project_id + repository = "opentelemetry-operations-go" + run_on = ["local", "gce", "gke", "gae", "gae-standard", "cloud-run", "cloud-functions-gen2"] + service_account = "projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com" } + +resource "google_pubsub_topic" "e2e_cleanup" { + name = "e2e-cleanup" + project = var.project_id +} + +resource "google_cloudbuild_trigger" "global_cleanup" { + name = "global-e2e-cleanup" + description = "Global cleanup for E2E tests triggered by Pub/Sub" + + pubsub_config { + topic = google_pubsub_topic.e2e_cleanup.id + } + + # TODO: Add tag filter back once triggers are updated with tags in latchkey + filter = "_BUILD_STATUS == \"SUCCESS\" || _BUILD_STATUS == \"FAILURE\"" + + git_file_source { + path = "cloudbuild-cleanup.yaml" + uri = "https://github.com/GoogleCloudPlatform/opentelemetry-operations-e2e-testing" + revision = "refs/heads/durable-async-cleanup" + repo_type = "GITHUB" + } + + service_account = "projects/opentelemetry-ops-e2e/serviceAccounts/e2e-cloudbuild-runner@opentelemetry-ops-e2e.iam.gserviceaccount.com" + + substitutions = { + _TEST_RUN_ID = "$(body.message.data.id)" + _E2E_ENVIRONMENT = "$(body.message.data.substitutions._E2E_ENVIRONMENT)" + _TEST_RUNNER_IMAGE = "$(body.message.data.substitutions._TEST_RUNNER_IMAGE)" + _BUILD_TAGS = "$(body.message.data.tags)" + _BUILD_STATUS = "$(body.message.data.status)" + } +} + +# TODO: add to internal permission tooling (latchkey)