Skip to content

Commit 5b4b12c

Browse files
author
remi Taylor
committed
BigQuery samples: export data to GCS, run queries
1 parent 6e4a6ad commit 5b4b12c

2 files changed

Lines changed: 149 additions & 27 deletions

File tree

bigquery_sample/bigquery_samples.rb

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def delete_table project_id:, dataset_id:, table_id:
117117
def list_table_data project_id:, dataset_id:, table_id:
118118
# [START list_table_data]
119119
# project_id = "Your Google Cloud project ID"
120-
# dataset_id = "ID of the dataset delete table from"
120+
# dataset_id = "ID of the dataset containing table"
121121
# table_id = "ID of the table to display data for"
122122

123123
require "google/cloud"
@@ -139,7 +139,7 @@ def import_table_data_from_file project_id:, dataset_id:, table_id:,
139139
local_file_path:
140140
# [START import_table_data_from_file]
141141
# project_id = "Your Google Cloud project ID"
142-
# dataset_id = "ID of the dataset delete table from"
142+
# dataset_id = "ID of the dataset containing table"
143143
# table_id = "ID of the table to import file data into"
144144
# local_file_path = "Path to local file to import into BigQuery table"
145145

@@ -162,9 +162,9 @@ def import_table_data_from_file project_id:, dataset_id:, table_id:,
162162

163163
def import_table_data_from_cloud_storage project_id:, dataset_id:, table_id:,
164164
storage_path:
165-
# [START import_table_data_from_file]
165+
# [START import_table_data_from_cloud_storage]
166166
# project_id = "Your Google Cloud project ID"
167-
# dataset_id = "ID of the dataset delete table from"
167+
# dataset_id = "ID of the dataset containing table"
168168
# table_id = "ID of the table to import file data into"
169169
# storage_path = "Storage path to file to import, eg. gs://bucket/file.csv"
170170

@@ -182,7 +182,73 @@ def import_table_data_from_cloud_storage project_id:, dataset_id:, table_id:,
182182
load_job.wait_until_done!
183183

184184
puts "Data imported"
185-
# [END import_table_data_from_file]
185+
# [END import_table_data_from_cloud_storage]
186+
end
187+
188+
def export_table_data_to_cloud_storage project_id:, dataset_id:, table_id:,
189+
storage_path:
190+
# [START export_table_data_to_cloud_storage]
191+
# project_id = "Your Google Cloud project ID"
192+
# dataset_id = "ID of the dataset containing table"
193+
# table_id = "ID of the table to export file data from"
194+
# storage_path = "Storage path to export to, eg. gs://bucket/file.csv"
195+
196+
require "google/cloud"
197+
198+
gcloud = Google::Cloud.new project_id
199+
bigquery = gcloud.bigquery
200+
dataset = bigquery.dataset dataset_id
201+
table = dataset.table table_id
202+
203+
puts "Exporting data to Cloud Storage file: #{storage_path}"
204+
extract_job = table.extract storage_path
205+
206+
puts "Waiting for extract job to complete: #{extract_job.job_id}"
207+
extract_job.wait_until_done!
208+
209+
puts "Data exported"
210+
# [END export_table_data_to_cloud_storage]
211+
end
212+
213+
def run_query_sync project_id:, query_string:
214+
# [start run_query_sync]
215+
# project_id = "your google cloud project id"
216+
# query_string = "query string to execute (using bigquery query syntax)"
217+
218+
require "google/cloud"
219+
220+
gcloud = Google::Cloud.new project_id
221+
bigquery = gcloud.bigquery
222+
223+
data = bigquery.query query_string
224+
225+
data.each do |row|
226+
puts row.inspect
227+
end
228+
# [end run_query_sync]
229+
end
230+
231+
def run_query_async project_id:, query_string:
232+
# [start run_query_async]
233+
# project_id = "your google cloud project id"
234+
# query_string = "query string to execute (using bigquery query syntax)"
235+
236+
require "google/cloud"
237+
238+
gcloud = Google::Cloud.new project_id
239+
bigquery = gcloud.bigquery
240+
241+
puts "Running query"
242+
query_job = bigquery.query_job query_string
243+
244+
puts "Waiting for query to complete"
245+
query_job.wait_until_done!
246+
247+
puts "Query results:"
248+
query_job.query_results.each do |row|
249+
puts row.inspect
250+
end
251+
# [end run_query_async]
186252
end
187253

188254
# TODO: separate sample into separate executable files

bigquery_sample/spec/bigquery_sample_spec.rb

Lines changed: 78 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,6 @@
1717
require "google/cloud"
1818
require "csv"
1919

20-
# TODO: move some helpers to a shared directory and update other specs
21-
# require_relative "../../shared/spec_helpers"
22-
# require "spec_helper"
23-
#
24-
# ...
25-
#
26-
# require_relative "../../shared/spec_helpers"
27-
#
28-
# require "spec_helper/tempfile_helper"
29-
# require "spec_helper/csv_file_helper"
30-
# require "spec_helper/cloud_storage_helper"
31-
# require "spec_helper/capture_output"
32-
# require "spec_helper/bigquery_helper"
33-
3420
RSpec.describe "Google Cloud BigQuery samples" do
3521

3622
before do
@@ -49,6 +35,10 @@
4935
schema.string "name"
5036
schema.integer "value"
5137
end
38+
39+
if @bucket.file "bigquery-test.csv"
40+
@bucket.file("bigquery-test.csv").delete
41+
end
5242
end
5343

5444
after do
@@ -63,6 +53,13 @@ def delete_test_dataset!
6353
dataset.delete if dataset
6454
end
6555

56+
# Helper to create Tempfile that will be cleaned up after test run
57+
def create_tempfile extension = "txt"
58+
file = Tempfile.new [ "bigquery-test", ".#{extension}" ]
59+
@tempfiles << file
60+
file
61+
end
62+
6663
# Helper to create and return CSV file.
6764
# The block will be passed a CSV object.
6865
#
@@ -74,9 +71,8 @@ def delete_test_dataset!
7471
#
7572
# puts file.path
7673
def create_csv &block
77-
file = Tempfile.new %w[ bigquery-test csv ]
74+
file = create_tempfile "csv"
7875
CSV.open file.path, "w", &block
79-
@tempfiles << file
8076
file
8177
end
8278

@@ -176,9 +172,7 @@ def capture &block
176172
csv << [ "Bob", 10 ]
177173
end
178174

179-
load_job = @table.load csv_file.path
180-
181-
load_job.wait_until_done!
175+
@table.load(csv_file.path).wait_until_done!
182176

183177
expect {
184178
list_table_data project_id: @project_id,
@@ -269,11 +263,73 @@ def capture &block
269263
end
270264

271265
describe "Exporting data" do
272-
example "export data to Cloud Storage"
266+
example "export data to Cloud Storage" do
267+
csv_file = create_csv do |csv|
268+
csv << [ "Alice", 5 ]
269+
csv << [ "Bob", 10 ]
270+
end
271+
272+
@table.load(csv_file.path).wait_until_done!
273+
274+
expect(@bucket.file "bigquery-test.csv").to be nil
275+
276+
capture do
277+
export_table_data_to_cloud_storage(
278+
project_id: @project_id,
279+
dataset_id: @dataset.dataset_id,
280+
table_id: @table.table_id,
281+
storage_path: "gs://#{@bucket.name}/bigquery-test.csv"
282+
)
283+
end
284+
285+
expect(captured_output).to include(
286+
"Exporting data to Cloud Storage file: " +
287+
"gs://#{@bucket.name}/bigquery-test.csv"
288+
)
289+
expect(captured_output).to match(
290+
/Waiting for extract job to complete: job_\w+/
291+
)
292+
expect(captured_output).to include "Data exported"
293+
294+
expect(@bucket.file "bigquery-test.csv").not_to be nil
295+
296+
local_file = create_tempfile "csv"
297+
@bucket.file("bigquery-test.csv").download local_file.path
298+
299+
csv = CSV.read local_file.path
300+
301+
expect(csv[0]).to eq %w[ name value ]
302+
expect(csv[1]).to eq %w[ Alice 5 ]
303+
expect(csv[2]).to eq %w[ Bob 10 ]
304+
end
273305
end
274306

275307
describe "Querying" do
276-
example "run query"
277-
example "run query as job"
308+
example "run query" do
309+
capture do
310+
run_query_sync(
311+
project_id: @project_id,
312+
query_string: "SELECT TOP(word, 50) as word, COUNT(*) as count " +
313+
"FROM publicdata:samples.shakespeare"
314+
)
315+
end
316+
317+
expect(captured_output).to include '{"word"=>"you", "count"=>42}'
318+
end
319+
320+
example "run query as job" do
321+
capture do
322+
run_query_async(
323+
project_id: @project_id,
324+
query_string: "SELECT TOP(word, 50) as word, COUNT(*) as count " +
325+
"FROM publicdata:samples.shakespeare"
326+
)
327+
end
328+
329+
expect(captured_output).to include "Running query"
330+
expect(captured_output).to include "Waiting for query to complete"
331+
expect(captured_output).to include "Query results:"
332+
expect(captured_output).to include '{"word"=>"you", "count"=>42}'
333+
end
278334
end
279335
end

0 commit comments

Comments
 (0)