Skip to content

Commit 39587c2

Browse files
feat(bigquery): Support collation feature (googleapis#30919)
--------- Co-authored-by: Yoshi Automation Bot <yoshi-automation@google.com>
1 parent 2e6e025 commit 39587c2

10 files changed

Lines changed: 201 additions & 9 deletions

File tree

google-cloud-bigquery/acceptance/bigquery/dataset_test.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,20 @@
173173
dataset.default_expiration = nil
174174
end
175175

176+
it "sets and gets the default_collation" do
177+
new_default_collation = "und:ci"
178+
179+
dataset.default_collation = new_default_collation
180+
181+
fresh = bigquery.dataset dataset.dataset_id
182+
_(fresh).wont_be :nil?
183+
_(fresh).must_be_kind_of Google::Cloud::Bigquery::Dataset
184+
_(fresh.dataset_id).must_equal dataset.dataset_id
185+
_(fresh.default_collation).must_equal new_default_collation
186+
ensure
187+
dataset.default_collation = nil
188+
end
189+
176190
it "should fail to set metadata with stale etag" do
177191
fresh = bigquery.dataset dataset.dataset_id
178192
_(fresh.etag).wont_be :nil?

google-cloud-bigquery/acceptance/bigquery/table_test.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,18 @@
190190
_(table.labels).must_equal new_labels
191191
end
192192

193+
it "sets and gets the default_collation" do
194+
new_default_collation = "und:ci"
195+
196+
table.default_collation = new_default_collation
197+
198+
fresh = dataset.table table.table_id
199+
_(fresh).wont_be :nil?
200+
_(fresh).must_be_kind_of Google::Cloud::Bigquery::Table
201+
_(fresh.table_id).must_equal table.table_id
202+
_(fresh.default_collation).must_equal new_default_collation
203+
end
204+
193205
it "loads and reloads table with partial projection of table metadata" do
194206
_(table.table_id).must_equal table_id # ensure table is created
195207
%w[unspecified basic storage full].each do |view|
@@ -1186,6 +1198,19 @@
11861198
_(table.schema.fields.map(&:default_value_expression)).must_be :==, schema_fields_default.map(&:default_value_expression)
11871199
end
11881200

1201+
it "creates a table with a field with collation" do
1202+
t = nil
1203+
begin
1204+
t = dataset.create_table "#{prefix}_table_collation_test" do |schema|
1205+
schema.string "name", mode: :required, collation: "und:ci"
1206+
end
1207+
t.reload!
1208+
_(t.schema.field("name").collation).must_equal "und:ci"
1209+
ensure
1210+
t.delete if t
1211+
end
1212+
end
1213+
11891214
it "restores snapshot into a table" do
11901215
begin
11911216
result = table.clone target_clone_table

google-cloud-bigquery/lib/google/cloud/bigquery/dataset.rb

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,37 @@ def description= new_description
197197
patch_gapi! :description
198198
end
199199

200+
##
201+
# The default collation of the dataset.
202+
#
203+
# @return [String, nil] The default collation, or `nil` if not present or the object is a
204+
# reference (see {#reference?}).
205+
#
206+
# @!group Attributes
207+
#
208+
def default_collation
209+
return nil if reference?
210+
ensure_full_data!
211+
@gapi.default_collation
212+
end
213+
214+
##
215+
# Updates the default collation of the dataset.
216+
#
217+
# If the dataset is not a full resource representation (see
218+
# {#resource_full?}), the full representation will be retrieved before
219+
# the update to comply with ETag-based optimistic concurrency control.
220+
#
221+
# @param [String] new_default_collation The new default collation for the dataset.
222+
#
223+
# @!group Attributes
224+
#
225+
def default_collation= new_default_collation
226+
reload! unless resource_full?
227+
@gapi.update! default_collation: new_default_collation
228+
patch_gapi! :default_collation
229+
end
230+
200231
##
201232
# The default lifetime of all tables in the dataset, in milliseconds.
202233
#

google-cloud-bigquery/lib/google/cloud/bigquery/schema.rb

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -318,13 +318,14 @@ def dump destination
318318
# "[CURRENT_DATE(), DATE '2020-01-01'"]
319319
#
320320
def string name, description: nil, mode: :nullable, policy_tags: nil,
321-
max_length: nil, default_value_expression: nil
321+
max_length: nil, default_value_expression: nil, collation: nil
322322
add_field name, :string,
323323
description: description,
324324
mode: mode,
325325
policy_tags: policy_tags,
326326
max_length: max_length,
327-
default_value_expression: default_value_expression
327+
default_value_expression: default_value_expression,
328+
collation: collation
328329
end
329330

330331
##
@@ -981,7 +982,8 @@ def add_field name,
981982
max_length: nil,
982983
precision: nil,
983984
scale: nil,
984-
default_value_expression: nil
985+
default_value_expression: nil,
986+
collation: nil
985987
frozen_check!
986988

987989
new_gapi = Google::Apis::BigqueryV2::TableFieldSchema.new(
@@ -999,6 +1001,7 @@ def add_field name,
9991001
new_gapi.precision = precision if precision
10001002
new_gapi.scale = scale if scale
10011003
new_gapi.default_value_expression = default_value_expression if default_value_expression
1004+
new_gapi.collation = collation if collation
10021005
# Remove any existing field of this name
10031006
@gapi.fields ||= []
10041007
@gapi.fields.reject! { |f| f.name == new_gapi.name }

google-cloud-bigquery/lib/google/cloud/bigquery/schema/field.rb

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,31 @@ def scale
351351
@gapi.scale
352352
end
353353

354+
##
355+
# The collation of the field.
356+
#
357+
# Collation can be set only when the type of field is `STRING`.
358+
# The following values are supported:
359+
#
360+
# * `und:ci`: undetermined locale, case insensitive.
361+
# * (empty string): Default to case-sensitive behavior.
362+
#
363+
# @return [String, nil] The collation for the field, or `nil`.
364+
#
365+
def collation
366+
@gapi.collation
367+
end
368+
369+
##
370+
# Updates the collation of the field.
371+
#
372+
# @param [String] new_collation The new collation. See {#collation}
373+
# for supported values.
374+
#
375+
def collation= new_collation
376+
@gapi.update! collation: new_collation
377+
end
378+
354379
##
355380
# Checks if the type of the field is `STRING`.
356381
#
@@ -568,15 +593,16 @@ def field name
568593
# @param [Integer] max_length The maximum UTF-8 length of strings
569594
# allowed in the field.
570595
#
571-
def string name, description: nil, mode: :nullable, policy_tags: nil, max_length: nil
596+
def string name, description: nil, mode: :nullable, policy_tags: nil, max_length: nil, collation: nil
572597
record_check!
573598

574599
add_field name,
575600
:string,
576601
description: description,
577602
mode: mode,
578603
policy_tags: policy_tags,
579-
max_length: max_length
604+
max_length: max_length,
605+
collation: collation
580606
end
581607

582608
##
@@ -1029,7 +1055,8 @@ def add_field name,
10291055
policy_tags: nil,
10301056
max_length: nil,
10311057
precision: nil,
1032-
scale: nil
1058+
scale: nil,
1059+
collation: nil
10331060
frozen_check!
10341061

10351062
new_gapi = Google::Apis::BigqueryV2::TableFieldSchema.new(
@@ -1046,6 +1073,7 @@ def add_field name,
10461073
new_gapi.max_length = max_length if max_length
10471074
new_gapi.precision = precision if precision
10481075
new_gapi.scale = scale if scale
1076+
new_gapi.collation = collation if collation
10491077
# Remove any existing field of this name
10501078
@gapi.fields ||= []
10511079
@gapi.fields.reject! { |f| f.name == new_gapi.name }

google-cloud-bigquery/lib/google/cloud/bigquery/table.rb

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,37 @@ def description= new_description
752752
patch_gapi! :description
753753
end
754754

755+
##
756+
# The default collation of the table.
757+
#
758+
# @return [String, nil] The default collation, or `nil` if not present or the object is a
759+
# reference (see {#reference?}).
760+
#
761+
# @!group Attributes
762+
#
763+
def default_collation
764+
return nil if reference?
765+
ensure_full_data!
766+
@gapi.default_collation
767+
end
768+
769+
##
770+
# Updates the default collation of the table.
771+
#
772+
# If the table is not a full resource representation (see
773+
# {#resource_full?}), the full representation will be retrieved before
774+
# the update to comply with ETag-based optimistic concurrency control.
775+
#
776+
# @param [String] new_default_collation The new default collation for the table.
777+
#
778+
# @!group Attributes
779+
#
780+
def default_collation= new_default_collation
781+
reload! unless resource_full?
782+
@gapi.update! default_collation: new_default_collation
783+
patch_gapi! :default_collation
784+
end
785+
755786
##
756787
# The number of bytes in the table.
757788
#
@@ -3706,7 +3737,7 @@ def schema replace: false
37063737
# At most 1 policy tag is currently allowed.
37073738
# @param [Integer] max_length The maximum UTF-8 length of strings
37083739
# allowed in the field.
3709-
# @param default_value_expression [String] The default value of a field
3740+
# @param [String] default_value_expression The default value of a field
37103741
# using a SQL expression. It can only be set for top level fields (columns).
37113742
# Use a struct or array expression to specify default value for the entire struct or
37123743
# array. The valid SQL expressions are:
@@ -3722,6 +3753,11 @@ def schema replace: false
37223753
# `ST_GEOPOINT`
37233754
# - Struct or array composed with the above allowed functions, for example:
37243755
# "[CURRENT_DATE(), DATE '2020-01-01'"]
3756+
# @param [String] collation The collation of the field.
3757+
# Collation can be set only when the type of field is `STRING`.
3758+
# The following values are supported:
3759+
# - `und:ci`: undetermined locale, case insensitive.
3760+
# - (empty string): Default to case-sensitive behavior.
37253761
#
37263762
# @example
37273763
# require "google/cloud/bigquery"
@@ -3743,9 +3779,9 @@ def schema replace: false
37433779
#
37443780
# @!group Schema
37453781
def string name, description: nil, mode: :nullable, policy_tags: nil, max_length: nil,
3746-
default_value_expression: nil
3782+
default_value_expression: nil, collation: nil
37473783
schema.string name, description: description, mode: mode, policy_tags: policy_tags, max_length: max_length,
3748-
default_value_expression: default_value_expression
3784+
default_value_expression: default_value_expression, collation: collation
37493785
end
37503786

37513787
##

google-cloud-bigquery/test/google/cloud/bigquery/dataset_attributes_test.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,4 +107,16 @@ def self.attr_test attr, val
107107
attr_test :default_expiration, 999
108108
attr_test :etag, "etag123456789"
109109
attr_test :api_url, "http://googleapi/bigquery/v2/projects/test-project/datasets/my_dataset"
110+
111+
it "gets full data for default_collation" do
112+
mock = Minitest::Mock.new
113+
bigquery.service.mocked_service = mock
114+
mock.expect :get_dataset, dataset_full_gapi, [project, dataset_id], access_policy_version: nil
115+
116+
_(dataset.default_collation).must_equal "und:ci"
117+
118+
# A second call to attribute does not make a second HTTP API call
119+
_(dataset.default_collation).must_equal "und:ci"
120+
mock.verify
121+
end
110122
end

google-cloud-bigquery/test/google/cloud/bigquery/table_attributes_test.rb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,4 +134,17 @@ def self.attr_test attr, val
134134
attr_test :buffer_bytes, 2000
135135
attr_test :buffer_rows, 200
136136

137+
it "gets full data for default_collation" do
138+
mock = Minitest::Mock.new
139+
mock.expect :get_table, table_full_gapi,
140+
[table.project_id, table.dataset_id, table.table_id], **patch_table_args
141+
table.service.mocked_service = mock
142+
143+
_(table.default_collation).must_equal "und:ci"
144+
145+
mock.verify
146+
147+
# A second call to attribute does not make a second HTTP API call
148+
table.default_collation
149+
end
137150
end

google-cloud-bigquery/test/google/cloud/bigquery/table_schema_test.rb

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,4 +421,32 @@
421421

422422
_(table.schema.fields.map(&:default_value_expression)).must_be :==, [field_string_required_gapi_default.default_value_expression]
423423
end
424+
425+
it "sets the collation on a string field" do
426+
mock = Minitest::Mock.new
427+
field_string_collation_gapi = Google::Apis::BigqueryV2::TableFieldSchema.new(
428+
name: "first_name",
429+
type: "STRING",
430+
mode: "REQUIRED",
431+
collation: "und:ci",
432+
description: nil,
433+
fields: []
434+
)
435+
new_schema_gapi = Google::Apis::BigqueryV2::TableSchema.new(
436+
fields: [field_string_collation_gapi])
437+
returned_table_gapi = table_gapi.dup
438+
returned_table_gapi.schema = new_schema_gapi
439+
patch_table_gapi = Google::Apis::BigqueryV2::Table.new schema: new_schema_gapi, etag: etag
440+
mock.expect :patch_table, returned_table_gapi,
441+
[table.project_id, table.dataset_id, table.table_id, patch_table_gapi], options: {header: {"If-Match" => etag}}
442+
table.service.mocked_service = mock
443+
444+
table.schema replace: true do |schema|
445+
schema.string "first_name", mode: :required, collation: "und:ci"
446+
end
447+
448+
mock.verify
449+
450+
_(table.schema.field("first_name").collation).must_equal "und:ci"
451+
end
424452
end

google-cloud-bigquery/test/helper.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ def random_dataset_hash id = nil, name = nil, description = nil, default_expirat
8484
"friendlyName" => name,
8585
"description" => description,
8686
"defaultTableExpirationMs" => default_expiration,
87+
"defaultCollation" => "und:ci",
8788
"access" => [],
8889
"creationTime" => time_millis,
8990
"lastModifiedTime" => time_millis,
@@ -275,6 +276,7 @@ def random_table_hash dataset, id = nil, name = nil, description = nil, project_
275276
"lastModifiedTime" => time_millis,
276277
"location" => "US",
277278
"labels" => { "foo" => "bar" },
279+
"defaultCollation" => "und:ci",
278280
"streamingBuffer" => {
279281
"estimatedBytes" => "2000", # String per google/google-api-ruby-client
280282
"estimatedRows" => "200", # String per google/google-api-ruby-client

0 commit comments

Comments
 (0)