diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index f2323e0816..6ff389bf8d 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -59,6 +59,7 @@ func GetNewClient(options ...Option) plugin.NewClientFunc { }, }, PermissionsNeeded: []string{"permission1"}, + SensitiveColumns: []string{"col1"}, Relations: schema.Tables{ { Name: "table2", diff --git a/plugin/testing_validation.go b/plugin/testing_validation.go index 07c71518e2..3a0d4c177e 100644 --- a/plugin/testing_validation.go +++ b/plugin/testing_validation.go @@ -16,3 +16,15 @@ func ValidateNoEmptyColumns(t *testing.T, tables schema.Tables, messages message } } } + +func ValidateSensitivColumns(t *testing.T, tables schema.Tables) { + for _, table := range tables.FlattenTables() { + nonMatchingColumns, nonMatchingJSONColumns := schema.FindNotMatchingSensitiveColumns(table) + if len(nonMatchingColumns) > 0 { + t.Fatalf("found non-matching sensitive column(s): %v in %s", nonMatchingColumns, table.Name) + } + if len(nonMatchingJSONColumns) > 0 { + t.Fatalf("found non-matching sensitive JSON column(s): %v in %s", nonMatchingJSONColumns, table.Name) + } + } +} diff --git a/schema/arrow.go b/schema/arrow.go index 680e53087d..0560426695 100644 --- a/schema/arrow.go +++ b/schema/arrow.go @@ -27,6 +27,7 @@ const ( MetadataTableDependsOn = "cq:table_depends_on" MetadataTableIsPaid = "cq:table_paid" MetadataTablePermissionsNeeded = "cq:table_permissions_needed" + MetadataTableSensitiveColumns = "cq:table_sensitive_columns" ) type Schemas []*arrow.Schema diff --git a/schema/table.go b/schema/table.go index cb21e5b3e5..e4a2adde08 100644 --- a/schema/table.go +++ b/schema/table.go @@ -66,6 +66,8 @@ type Table struct { Description string `json:"description"` // List of permissions needed to access this table, if any. For example ["Microsoft.Network/dnsZones/read"] or ["storage.buckets.list"] PermissionsNeeded []string `json:"permissions_needed"` + // List of columns that may contain sensitive or secret data + SensitiveColumns []string `json:"sensitive_columns"` // Columns are the set of fields that are part of this table Columns ColumnList `json:"columns"` // Relations are a set of related tables defines @@ -188,6 +190,7 @@ func NewTableFromArrowSchema(sc *arrow.Schema) (*Table, error) { title, _ := tableMD.GetValue(MetadataTableTitle) dependsOn, _ := tableMD.GetValue(MetadataTableDependsOn) permissionsNeeded, _ := tableMD.GetValue(MetadataTablePermissionsNeeded) + sensitiveColumns, _ := tableMD.GetValue(MetadataTableSensitiveColumns) var parent *Table if dependsOn != "" { parent = &Table{Name: dependsOn} @@ -200,6 +203,8 @@ func NewTableFromArrowSchema(sc *arrow.Schema) (*Table, error) { var permissionsNeededArr []string _ = json.Unmarshal([]byte(permissionsNeeded), &permissionsNeededArr) + var sensitiveColumnsArr []string + _ = json.Unmarshal([]byte(sensitiveColumns), &sensitiveColumnsArr) table := &Table{ Name: name, Description: description, @@ -208,6 +213,7 @@ func NewTableFromArrowSchema(sc *arrow.Schema) (*Table, error) { Title: title, Parent: parent, PermissionsNeeded: permissionsNeededArr, + SensitiveColumns: sensitiveColumnsArr, } if isIncremental, found := tableMD.GetValue(MetadataIncremental); found { table.IsIncremental = isIncremental == MetadataTrue @@ -493,6 +499,8 @@ func (t *Table) ToArrowSchema() *arrow.Schema { } asJSON, _ := json.Marshal(t.PermissionsNeeded) md[MetadataTablePermissionsNeeded] = string(asJSON) + asJSON, _ = json.Marshal(t.SensitiveColumns) + md[MetadataTableSensitiveColumns] = string(asJSON) schemaMd := arrow.MetadataFrom(md) for i, c := range t.Columns { diff --git a/schema/table_test.go b/schema/table_test.go index f8f67b74fb..5dd523c0a4 100644 --- a/schema/table_test.go +++ b/schema/table_test.go @@ -731,6 +731,7 @@ func TestTablesToAndFromArrow(t *testing.T) { {Name: "multiple_attributes", Type: arrow.BinaryTypes.String, PrimaryKey: true, IncrementalKey: true, NotNull: true, Unique: true}, }, PermissionsNeeded: []string{"storage.buckets.list", "compute.acceleratorTypes.list", "test,test"}, + SensitiveColumns: []string{"string", "json"}, }, } diff --git a/schema/validators.go b/schema/validators.go index e42dabe8e3..bf02b79aec 100644 --- a/schema/validators.go +++ b/schema/validators.go @@ -2,6 +2,8 @@ package schema import ( "encoding/json" + "slices" + "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/cloudquery/plugin-sdk/v4/types" @@ -40,6 +42,35 @@ func FindEmptyColumns(table *Table, records []arrow.Record) []string { return emptyColumns } +func FindNotMatchingSensitiveColumns(table *Table) (nonMatchingColumns []string, nonMatchingJSONColumns []string) { + if len(table.SensitiveColumns) == 0 { + return []string{}, []string{} + } + nonMatchingColumns = make([]string, 0) + nonMatchingJSONColumns = make([]string, 0) + tableColumns := table.Columns.Names() + for _, c := range table.SensitiveColumns { + isJSONPath := false + if strings.Contains(c, ".") { + c = strings.Split(c, ".")[0] + isJSONPath = true + } + if !slices.Contains(tableColumns, c) { + nonMatchingColumns = append(nonMatchingColumns, c) + continue + } + if !isJSONPath { + continue + } + col := table.Columns.Get(c) + if !arrow.TypeEqual(col.Type, types.ExtensionTypes.JSON) { + nonMatchingJSONColumns = append(nonMatchingJSONColumns, c) + continue + } + } + return nonMatchingColumns, nonMatchingJSONColumns +} + func isEmptyJSON(msg json.RawMessage) bool { if len(msg) == 0 { return true diff --git a/serve/package.go b/serve/package.go index 581b09c407..cd2b809017 100644 --- a/serve/package.go +++ b/serve/package.go @@ -103,6 +103,7 @@ func (s *PluginServe) writeTablesJSON(ctx context.Context, dir string) error { Title: &table.Title, Columns: &columns, PermissionsNeeded: &table.PermissionsNeeded, + SensitiveColumns: &table.SensitiveColumns, }) } buffer := &bytes.Buffer{} diff --git a/serve/testdata/memdbtables.json b/serve/testdata/memdbtables.json index 9b1225d09d..ba3631ef36 100644 --- a/serve/testdata/memdbtables.json +++ b/serve/testdata/memdbtables.json @@ -17,7 +17,8 @@ "relations": ["table2"], "title": "", "is_paid": false, - "permissions_needed": ["permission1"] + "permissions_needed": ["permission1"], + "sensitive_columns": ["col1"] }, { "columns": [ @@ -38,7 +39,8 @@ "name": "table2", "title": "", "is_paid": false, - "permissions_needed": null + "permissions_needed": null, + "sensitive_columns": null }, { "columns": [ @@ -59,6 +61,7 @@ "name": "table3", "title": "", "is_paid": true, - "permissions_needed": null + "permissions_needed": null, + "sensitive_columns": null } ]