From a50c3309753e89608164354b93044c7ef4ab9e79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Le=C5=9Bniewski?= Date: Tue, 29 Apr 2025 17:35:32 +0200 Subject: [PATCH 1/5] add SensitiveColumns to tables schema --- internal/memdb/memdb.go | 1 + schema/arrow.go | 1 + schema/table.go | 8 ++++++++ schema/table_test.go | 1 + serve/package.go | 1 + serve/testdata/memdbtables.json | 9 ++++++--- 6 files changed, 18 insertions(+), 3 deletions(-) diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index f2323e0816..6ff389bf8d 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -59,6 +59,7 @@ func GetNewClient(options ...Option) plugin.NewClientFunc { }, }, PermissionsNeeded: []string{"permission1"}, + SensitiveColumns: []string{"col1"}, Relations: schema.Tables{ { Name: "table2", diff --git a/schema/arrow.go b/schema/arrow.go index 680e53087d..0560426695 100644 --- a/schema/arrow.go +++ b/schema/arrow.go @@ -27,6 +27,7 @@ const ( MetadataTableDependsOn = "cq:table_depends_on" MetadataTableIsPaid = "cq:table_paid" MetadataTablePermissionsNeeded = "cq:table_permissions_needed" + MetadataTableSensitiveColumns = "cq:table_sensitive_columns" ) type Schemas []*arrow.Schema diff --git a/schema/table.go b/schema/table.go index cb21e5b3e5..e4a2adde08 100644 --- a/schema/table.go +++ b/schema/table.go @@ -66,6 +66,8 @@ type Table struct { Description string `json:"description"` // List of permissions needed to access this table, if any. For example ["Microsoft.Network/dnsZones/read"] or ["storage.buckets.list"] PermissionsNeeded []string `json:"permissions_needed"` + // List of columns that may contain sensitive or secret data + SensitiveColumns []string `json:"sensitive_columns"` // Columns are the set of fields that are part of this table Columns ColumnList `json:"columns"` // Relations are a set of related tables defines @@ -188,6 +190,7 @@ func NewTableFromArrowSchema(sc *arrow.Schema) (*Table, error) { title, _ := tableMD.GetValue(MetadataTableTitle) dependsOn, _ := tableMD.GetValue(MetadataTableDependsOn) permissionsNeeded, _ := tableMD.GetValue(MetadataTablePermissionsNeeded) + sensitiveColumns, _ := tableMD.GetValue(MetadataTableSensitiveColumns) var parent *Table if dependsOn != "" { parent = &Table{Name: dependsOn} @@ -200,6 +203,8 @@ func NewTableFromArrowSchema(sc *arrow.Schema) (*Table, error) { var permissionsNeededArr []string _ = json.Unmarshal([]byte(permissionsNeeded), &permissionsNeededArr) + var sensitiveColumnsArr []string + _ = json.Unmarshal([]byte(sensitiveColumns), &sensitiveColumnsArr) table := &Table{ Name: name, Description: description, @@ -208,6 +213,7 @@ func NewTableFromArrowSchema(sc *arrow.Schema) (*Table, error) { Title: title, Parent: parent, PermissionsNeeded: permissionsNeededArr, + SensitiveColumns: sensitiveColumnsArr, } if isIncremental, found := tableMD.GetValue(MetadataIncremental); found { table.IsIncremental = isIncremental == MetadataTrue @@ -493,6 +499,8 @@ func (t *Table) ToArrowSchema() *arrow.Schema { } asJSON, _ := json.Marshal(t.PermissionsNeeded) md[MetadataTablePermissionsNeeded] = string(asJSON) + asJSON, _ = json.Marshal(t.SensitiveColumns) + md[MetadataTableSensitiveColumns] = string(asJSON) schemaMd := arrow.MetadataFrom(md) for i, c := range t.Columns { diff --git a/schema/table_test.go b/schema/table_test.go index f8f67b74fb..5dd523c0a4 100644 --- a/schema/table_test.go +++ b/schema/table_test.go @@ -731,6 +731,7 @@ func TestTablesToAndFromArrow(t *testing.T) { {Name: "multiple_attributes", Type: arrow.BinaryTypes.String, PrimaryKey: true, IncrementalKey: true, NotNull: true, Unique: true}, }, PermissionsNeeded: []string{"storage.buckets.list", "compute.acceleratorTypes.list", "test,test"}, + SensitiveColumns: []string{"string", "json"}, }, } diff --git a/serve/package.go b/serve/package.go index 581b09c407..cd2b809017 100644 --- a/serve/package.go +++ b/serve/package.go @@ -103,6 +103,7 @@ func (s *PluginServe) writeTablesJSON(ctx context.Context, dir string) error { Title: &table.Title, Columns: &columns, PermissionsNeeded: &table.PermissionsNeeded, + SensitiveColumns: &table.SensitiveColumns, }) } buffer := &bytes.Buffer{} diff --git a/serve/testdata/memdbtables.json b/serve/testdata/memdbtables.json index 9b1225d09d..ba3631ef36 100644 --- a/serve/testdata/memdbtables.json +++ b/serve/testdata/memdbtables.json @@ -17,7 +17,8 @@ "relations": ["table2"], "title": "", "is_paid": false, - "permissions_needed": ["permission1"] + "permissions_needed": ["permission1"], + "sensitive_columns": ["col1"] }, { "columns": [ @@ -38,7 +39,8 @@ "name": "table2", "title": "", "is_paid": false, - "permissions_needed": null + "permissions_needed": null, + "sensitive_columns": null }, { "columns": [ @@ -59,6 +61,7 @@ "name": "table3", "title": "", "is_paid": true, - "permissions_needed": null + "permissions_needed": null, + "sensitive_columns": null } ] From 127b6efd9d61fa4676e553c6784f2c5868eb8589 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Le=C5=9Bniewski?= Date: Tue, 6 May 2025 17:57:53 +0200 Subject: [PATCH 2/5] add sensitive column validation --- plugin/testing_validation.go | 7 +++++++ schema/validators.go | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/plugin/testing_validation.go b/plugin/testing_validation.go index 07c71518e2..6e7e16680e 100644 --- a/plugin/testing_validation.go +++ b/plugin/testing_validation.go @@ -14,5 +14,12 @@ func ValidateNoEmptyColumns(t *testing.T, tables schema.Tables, messages message if len(emptyColumns) > 0 { t.Fatalf("found empty column(s): %v in %s", emptyColumns, table.Name) } + nonMatchingColumns, nonMatchingJsonColumns := schema.FindNotMatchingSensitiveColumns(table, records) + if len(nonMatchingColumns) > 0 { + t.Fatalf("found non-matching sensitive column(s): %v in %s", nonMatchingColumns, table.Name) + } + if len(nonMatchingJsonColumns) > 0 { + t.Fatalf("found non-matching sensitive JSON column(s): %v in %s", nonMatchingJsonColumns, table.Name) + } } } diff --git a/schema/validators.go b/schema/validators.go index e42dabe8e3..217309a18b 100644 --- a/schema/validators.go +++ b/schema/validators.go @@ -2,6 +2,8 @@ package schema import ( "encoding/json" + "slices" + "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/cloudquery/plugin-sdk/v4/types" @@ -40,6 +42,36 @@ func FindEmptyColumns(table *Table, records []arrow.Record) []string { return emptyColumns } +func FindNotMatchingSensitiveColumns(table *Table, records []arrow.Record) ([]string, []string) { + if len(table.SensitiveColumns) == 0 { + return []string{}, []string{} + } + + nonMatchingColumns := make([]string, 0) + nonMatchingJsonColumns := make([]string, 0) + tableColumns := table.Columns.Names() + for _, c := range table.SensitiveColumns { + isJsonPath := false + if strings.Contains(c, ".") { + c = strings.Split(c, ".")[0] + isJsonPath = true + } + if !slices.Contains(tableColumns, c) { + nonMatchingColumns = append(nonMatchingColumns, c) + continue + } + if !isJsonPath { + continue + } + col := table.Columns.Get(c) + if !arrow.TypeEqual(col.Type, types.ExtensionTypes.JSON) { + nonMatchingJsonColumns = append(nonMatchingJsonColumns, c) + continue + } + } + return nonMatchingColumns, nonMatchingJsonColumns +} + func isEmptyJSON(msg json.RawMessage) bool { if len(msg) == 0 { return true From 65453c040eba0784d07ea48a191af7540c191ff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Le=C5=9Bniewski?= Date: Wed, 7 May 2025 17:34:49 +0200 Subject: [PATCH 3/5] update cloudquery-api-go to 1.13.9 --- examples/simple_plugin/go.mod | 2 +- examples/simple_plugin/go.sum | 4 ++-- go.mod | 2 +- go.sum | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/simple_plugin/go.mod b/examples/simple_plugin/go.mod index ac634b5cd4..d2dea78643 100644 --- a/examples/simple_plugin/go.mod +++ b/examples/simple_plugin/go.mod @@ -32,7 +32,7 @@ require ( github.com/bahlo/generic-list-go v0.2.0 // indirect github.com/buger/jsonparser v1.1.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect - github.com/cloudquery/cloudquery-api-go v1.13.8 // indirect + github.com/cloudquery/cloudquery-api-go v1.13.9 // indirect github.com/cloudquery/codegen v0.3.26 // indirect github.com/cloudquery/plugin-pb-go v1.26.10 // indirect github.com/cloudquery/plugin-sdk/v2 v2.7.0 // indirect diff --git a/examples/simple_plugin/go.sum b/examples/simple_plugin/go.sum index 8ec16f7f29..536d38f188 100644 --- a/examples/simple_plugin/go.sum +++ b/examples/simple_plugin/go.sum @@ -50,8 +50,8 @@ github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMU github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= -github.com/cloudquery/cloudquery-api-go v1.13.8 h1:8n5D0G2wynbUdexr1GS8ND8i0uOwm0gXKNipJsijPe0= -github.com/cloudquery/cloudquery-api-go v1.13.8/go.mod h1:ZhEjPkDGDL2KZKlQLUnsgQ0mPz3qC7qftr37q3q+IcA= +github.com/cloudquery/cloudquery-api-go v1.13.9 h1:XudJusEJ0+kPa2/GXoZPuphR8gTRvHZ49dq53sTrWME= +github.com/cloudquery/cloudquery-api-go v1.13.9/go.mod h1:ZhEjPkDGDL2KZKlQLUnsgQ0mPz3qC7qftr37q3q+IcA= github.com/cloudquery/codegen v0.3.26 h1:cWORVpObYW5/0LnjC0KO/Ocg1+vbZivJfFd+sMpb5ZY= github.com/cloudquery/codegen v0.3.26/go.mod h1:bg/M1JxFvNVABMLMFb/uAQmTGAyI2L/E4zL4kho9RFs= github.com/cloudquery/plugin-pb-go v1.26.10 h1:VNRk3JMLR7+pCXGCk4729I8r3vTrn64qonCs+4KY7+M= diff --git a/go.mod b/go.mod index 254993412e..b338097820 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/licensemanager v1.30.3 github.com/aws/aws-sdk-go-v2/service/marketplacemetering v1.29.0 github.com/bradleyjkemp/cupaloy/v2 v2.8.0 - github.com/cloudquery/cloudquery-api-go v1.13.8 + github.com/cloudquery/cloudquery-api-go v1.13.9 github.com/cloudquery/codegen v0.3.26 github.com/cloudquery/plugin-pb-go v1.26.10 github.com/cloudquery/plugin-sdk/v2 v2.7.0 diff --git a/go.sum b/go.sum index 265557a509..822428bd30 100644 --- a/go.sum +++ b/go.sum @@ -50,8 +50,8 @@ github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMU github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= -github.com/cloudquery/cloudquery-api-go v1.13.8 h1:8n5D0G2wynbUdexr1GS8ND8i0uOwm0gXKNipJsijPe0= -github.com/cloudquery/cloudquery-api-go v1.13.8/go.mod h1:ZhEjPkDGDL2KZKlQLUnsgQ0mPz3qC7qftr37q3q+IcA= +github.com/cloudquery/cloudquery-api-go v1.13.9 h1:XudJusEJ0+kPa2/GXoZPuphR8gTRvHZ49dq53sTrWME= +github.com/cloudquery/cloudquery-api-go v1.13.9/go.mod h1:ZhEjPkDGDL2KZKlQLUnsgQ0mPz3qC7qftr37q3q+IcA= github.com/cloudquery/codegen v0.3.26 h1:cWORVpObYW5/0LnjC0KO/Ocg1+vbZivJfFd+sMpb5ZY= github.com/cloudquery/codegen v0.3.26/go.mod h1:bg/M1JxFvNVABMLMFb/uAQmTGAyI2L/E4zL4kho9RFs= github.com/cloudquery/jsonschema v0.0.0-20240220124159-92878faa2a66 h1:OZLPSIBYEfvkAUeOeM8CwTgVQy5zhayI99ishCrsFV0= From 5a282db15254a64acc8a17a1dbea22c9f086f05e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Le=C5=9Bniewski?= Date: Wed, 7 May 2025 17:42:05 +0200 Subject: [PATCH 4/5] lint --- plugin/testing_validation.go | 6 +++--- schema/validators.go | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/plugin/testing_validation.go b/plugin/testing_validation.go index 6e7e16680e..ef7745d352 100644 --- a/plugin/testing_validation.go +++ b/plugin/testing_validation.go @@ -14,12 +14,12 @@ func ValidateNoEmptyColumns(t *testing.T, tables schema.Tables, messages message if len(emptyColumns) > 0 { t.Fatalf("found empty column(s): %v in %s", emptyColumns, table.Name) } - nonMatchingColumns, nonMatchingJsonColumns := schema.FindNotMatchingSensitiveColumns(table, records) + nonMatchingColumns, nonMatchingJSONColumns := schema.FindNotMatchingSensitiveColumns(table) if len(nonMatchingColumns) > 0 { t.Fatalf("found non-matching sensitive column(s): %v in %s", nonMatchingColumns, table.Name) } - if len(nonMatchingJsonColumns) > 0 { - t.Fatalf("found non-matching sensitive JSON column(s): %v in %s", nonMatchingJsonColumns, table.Name) + if len(nonMatchingJSONColumns) > 0 { + t.Fatalf("found non-matching sensitive JSON column(s): %v in %s", nonMatchingJSONColumns, table.Name) } } } diff --git a/schema/validators.go b/schema/validators.go index 217309a18b..604096fa25 100644 --- a/schema/validators.go +++ b/schema/validators.go @@ -42,34 +42,34 @@ func FindEmptyColumns(table *Table, records []arrow.Record) []string { return emptyColumns } -func FindNotMatchingSensitiveColumns(table *Table, records []arrow.Record) ([]string, []string) { +func FindNotMatchingSensitiveColumns(table *Table) (nonMatchingColumns []string, nonMatchingJSONColumns []string) { if len(table.SensitiveColumns) == 0 { return []string{}, []string{} } - nonMatchingColumns := make([]string, 0) - nonMatchingJsonColumns := make([]string, 0) + nonMatchingColumns = make([]string, 0) + nonMatchingJSONColumns = make([]string, 0) tableColumns := table.Columns.Names() for _, c := range table.SensitiveColumns { - isJsonPath := false + isJSONPath := false if strings.Contains(c, ".") { c = strings.Split(c, ".")[0] - isJsonPath = true + isJSONPath = true } if !slices.Contains(tableColumns, c) { nonMatchingColumns = append(nonMatchingColumns, c) continue } - if !isJsonPath { + if !isJSONPath { continue } col := table.Columns.Get(c) if !arrow.TypeEqual(col.Type, types.ExtensionTypes.JSON) { - nonMatchingJsonColumns = append(nonMatchingJsonColumns, c) + nonMatchingJSONColumns = append(nonMatchingJSONColumns, c) continue } } - return nonMatchingColumns, nonMatchingJsonColumns + return nonMatchingColumns, nonMatchingJSONColumns } func isEmptyJSON(msg json.RawMessage) bool { From 353473d958bf0dd30cb583a74c028595fa8d5e5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Le=C5=9Bniewski?= Date: Thu, 8 May 2025 12:28:54 +0200 Subject: [PATCH 5/5] separate sensitive column validation into own function --- plugin/testing_validation.go | 5 +++++ schema/validators.go | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/plugin/testing_validation.go b/plugin/testing_validation.go index ef7745d352..3a0d4c177e 100644 --- a/plugin/testing_validation.go +++ b/plugin/testing_validation.go @@ -14,6 +14,11 @@ func ValidateNoEmptyColumns(t *testing.T, tables schema.Tables, messages message if len(emptyColumns) > 0 { t.Fatalf("found empty column(s): %v in %s", emptyColumns, table.Name) } + } +} + +func ValidateSensitivColumns(t *testing.T, tables schema.Tables) { + for _, table := range tables.FlattenTables() { nonMatchingColumns, nonMatchingJSONColumns := schema.FindNotMatchingSensitiveColumns(table) if len(nonMatchingColumns) > 0 { t.Fatalf("found non-matching sensitive column(s): %v in %s", nonMatchingColumns, table.Name) diff --git a/schema/validators.go b/schema/validators.go index 604096fa25..bf02b79aec 100644 --- a/schema/validators.go +++ b/schema/validators.go @@ -46,7 +46,6 @@ func FindNotMatchingSensitiveColumns(table *Table) (nonMatchingColumns []string, if len(table.SensitiveColumns) == 0 { return []string{}, []string{} } - nonMatchingColumns = make([]string, 0) nonMatchingJSONColumns = make([]string, 0) tableColumns := table.Columns.Names()