Skip to content

Commit 524ddb7

Browse files
author
remi Taylor
committed
BigQuery sample: import data from Cloud Storage file
1 parent 48f6b27 commit 524ddb7

4 files changed

Lines changed: 94 additions & 4 deletions

File tree

bigquery_sample/Gemfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,5 @@ group :test do
2020
gem "rake"
2121
gem "rubocop"
2222
gem "rspec"
23+
gem "google-cloud-storage"
2324
end

bigquery_sample/Gemfile.lock

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ GEM
44
addressable (2.4.0)
55
ast (2.3.0)
66
diff-lcs (1.2.5)
7+
digest-crc (0.4.1)
78
faraday (0.9.2)
89
multipart-post (>= 1.2, < 3)
910
google-api-client (0.9.13)
@@ -19,6 +20,10 @@ GEM
1920
google-api-client (~> 0.9.11)
2021
google-cloud-core (~> 0.20.0)
2122
google-cloud-core (0.20.1)
23+
google-cloud-storage (0.20.0)
24+
digest-crc (~> 0.4)
25+
google-api-client (~> 0.9.11)
26+
google-cloud-core (~> 0.20.0)
2227
googleauth (0.5.1)
2328
faraday (~> 0.9)
2429
jwt (~> 1.4)
@@ -82,6 +87,7 @@ PLATFORMS
8287

8388
DEPENDENCIES
8489
google-cloud-bigquery
90+
google-cloud-storage
8591
rake
8692
rspec
8793
rubocop

bigquery_sample/bigquery_samples.rb

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,10 @@ def list_table_data project_id:, dataset_id:, table_id:
138138
def import_table_data_from_file project_id:, dataset_id:, table_id:,
139139
local_file_path:
140140
# [START import_table_data_from_file]
141-
# project_id = "Your Google Cloud project ID"
142-
# dataset_id = "ID of the dataset delete table from"
143-
# table_id = "ID of the table to import file data into"
141+
# project_id = "Your Google Cloud project ID"
142+
# dataset_id = "ID of the dataset delete table from"
143+
# table_id = "ID of the table to import file data into"
144+
# local_file_path = "Path to local file to import into BigQuery table"
144145

145146
require "google/cloud"
146147

@@ -159,6 +160,31 @@ def import_table_data_from_file project_id:, dataset_id:, table_id:,
159160
# [END import_table_data_from_file]
160161
end
161162

163+
def import_table_data_from_cloud_storage project_id:, dataset_id:, table_id:,
164+
storage_path:
165+
# [START import_table_data_from_file]
166+
# project_id = "Your Google Cloud project ID"
167+
# dataset_id = "ID of the dataset delete table from"
168+
# table_id = "ID of the table to import file data into"
169+
# storage_path = "Storage path to file to import, eg. gs://bucket/file.csv"
170+
171+
require "google/cloud"
172+
173+
gcloud = Google::Cloud.new project_id
174+
bigquery = gcloud.bigquery
175+
dataset = bigquery.dataset dataset_id
176+
table = dataset.table table_id
177+
178+
puts "Importing data from Cloud Storage file: #{storage_path}"
179+
load_job = table.load storage_path
180+
181+
puts "Waiting for load job to complete: #{load_job.job_id}"
182+
load_job.wait_until_done!
183+
184+
puts "Data imported"
185+
# [END import_table_data_from_file]
186+
end
187+
162188
# TODO: separate sample into separate executable files
163189
#
164190
if __FILE__ == $PROGRAM_NAME

bigquery_sample/spec/bigquery_sample_spec.rb

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,16 @@
1717
require "google/cloud"
1818
require "csv"
1919

20+
# TODO: refactor CSV creation to simplify & slightly DRY specs
21+
2022
RSpec.describe "Google Cloud BigQuery samples" do
2123

2224
before do
2325
@project_id = ENV["GOOGLE_PROJECT_ID"]
2426
@gcloud = Google::Cloud.new @project_id
2527
@bigquery = @gcloud.bigquery
28+
@storage = @gcloud.storage
29+
@bucket = @storage.bucket ENV["STORAGE_BUCKET"]
2630

2731
# Examples assume that test_dataset does not exist
2832
test_dataset = @bigquery.dataset "test_dataset"
@@ -214,11 +218,64 @@ def capture &block
214218
end
215219
end
216220

217-
example "import data from Cloud Storage"
221+
example "import data from Cloud Storage" do
222+
begin
223+
dataset = @bigquery.create_dataset "test_dataset"
224+
225+
table = dataset.create_table "test_table" do |schema|
226+
schema.string "name"
227+
schema.integer "value"
228+
end
229+
230+
csv_file = Tempfile.new %w[ bigquery-test csv ]
231+
232+
CSV.open csv_file.path, "w" do |csv|
233+
csv << [ "Alice", 5 ]
234+
csv << [ "Bob", 10 ]
235+
end
236+
237+
file = @bucket.create_file csv_file.path, "bigquery-test.csv"
238+
239+
expect(table.data).to be_empty
240+
241+
capture do
242+
import_table_data_from_cloud_storage(
243+
project_id: @project_id,
244+
dataset_id: "test_dataset",
245+
table_id: "test_table",
246+
storage_path: "gs://#{@bucket.name}/bigquery-test.csv"
247+
)
248+
end
249+
250+
expect(captured_output).to include(
251+
"Importing data from Cloud Storage file: " +
252+
"gs://#{@bucket.name}/bigquery-test.csv"
253+
)
254+
expect(captured_output).to match(
255+
/Waiting for load job to complete: job_\w+/
256+
)
257+
expect(captured_output).to include "Data imported"
258+
259+
loaded_data = table.data
260+
261+
expect(loaded_data).not_to be_empty
262+
expect(loaded_data.count).to eq 2
263+
expect(loaded_data.first["name"]).to eq "Alice"
264+
expect(loaded_data.first["value"]).to eq 5
265+
expect(loaded_data.last["name"]).to eq "Bob"
266+
expect(loaded_data.last["value"]).to eq 10
267+
ensure
268+
csv_file.flush
269+
csv_file.close
270+
end
271+
end
272+
218273
example "stream data import"
219274
end
220275

221276
describe "Exporting data" do
277+
# Needs a CSV file to import into the table before running
278+
# the export command, so refactor CSV code before writing this
222279
example "export data to Cloud Storage"
223280
end
224281

0 commit comments

Comments
 (0)