BigQuery samples: export data to GCS, run queries

remi Taylor · remi Taylor · commit 5b4b12c779c9 · 2016-09-19T13:34:10.000-07:00
diff --git a/bigquery_sample/bigquery_samples.rb b/bigquery_sample/bigquery_samples.rb
@@ -117,7 +117,7 @@ def delete_table project_id:, dataset_id:, table_id:
 def list_table_data project_id:, dataset_id:, table_id:
   # [START list_table_data]
   # project_id = "Your Google Cloud project ID"
-  # dataset_id = "ID of the dataset delete table from"
+  # dataset_id = "ID of the dataset containing table"
   # table_id   = "ID of the table to display data for"
 
   require "google/cloud"
@@ -139,7 +139,7 @@ def import_table_data_from_file project_id:, dataset_id:, table_id:,
                                 local_file_path:
   # [START import_table_data_from_file]
   # project_id      = "Your Google Cloud project ID"
-  # dataset_id      = "ID of the dataset delete table from"
+  # dataset_id      = "ID of the dataset containing table"
   # table_id        = "ID of the table to import file data into"
   # local_file_path = "Path to local file to import into BigQuery table"
 
@@ -162,9 +162,9 @@ def import_table_data_from_file project_id:, dataset_id:, table_id:,
 
 def import_table_data_from_cloud_storage project_id:, dataset_id:, table_id:,
                                          storage_path:
-  # [START import_table_data_from_file]
+  # [START import_table_data_from_cloud_storage]
   # project_id   = "Your Google Cloud project ID"
-  # dataset_id   = "ID of the dataset delete table from"
+  # dataset_id   = "ID of the dataset containing table"
   # table_id     = "ID of the table to import file data into"
   # storage_path = "Storage path to file to import, eg. gs://bucket/file.csv"
 
@@ -182,7 +182,73 @@ def import_table_data_from_cloud_storage project_id:, dataset_id:, table_id:,
   load_job.wait_until_done!
 
   puts "Data imported"
-  # [END import_table_data_from_file]
+  # [END import_table_data_from_cloud_storage]
+end
+
+def export_table_data_to_cloud_storage project_id:, dataset_id:, table_id:,
+                                       storage_path:
+  # [START export_table_data_to_cloud_storage]
+  # project_id   = "Your Google Cloud project ID"
+  # dataset_id   = "ID of the dataset containing table"
+  # table_id     = "ID of the table to export file data from"
+  # storage_path = "Storage path to export to, eg. gs://bucket/file.csv"
+
+  require "google/cloud"
+
+  gcloud   = Google::Cloud.new project_id
+  bigquery = gcloud.bigquery
+  dataset  = bigquery.dataset dataset_id
+  table    = dataset.table table_id
+
+  puts "Exporting data to Cloud Storage file: #{storage_path}"
+  extract_job = table.extract storage_path
+
+  puts "Waiting for extract job to complete: #{extract_job.job_id}"
+  extract_job.wait_until_done!
+
+  puts "Data exported"
+  # [END export_table_data_to_cloud_storage]
+end
+
+def run_query_sync project_id:, query_string:
+  # [start run_query_sync]
+  # project_id   = "your google cloud project id"
+  # query_string = "query string to execute (using bigquery query syntax)"
+
+  require "google/cloud"
+
+  gcloud   = Google::Cloud.new project_id
+  bigquery = gcloud.bigquery
+
+  data = bigquery.query query_string
+
+  data.each do |row|
+    puts row.inspect
+  end
+  # [end run_query_sync]
+end
+
+def run_query_async project_id:, query_string:
+  # [start run_query_async]
+  # project_id   = "your google cloud project id"
+  # query_string = "query string to execute (using bigquery query syntax)"
+
+  require "google/cloud"
+
+  gcloud   = Google::Cloud.new project_id
+  bigquery = gcloud.bigquery
+
+  puts "Running query"
+  query_job = bigquery.query_job query_string
+
+  puts "Waiting for query to complete"
+  query_job.wait_until_done!
+
+  puts "Query results:"
+  query_job.query_results.each do |row|
+    puts row.inspect
+  end
+  # [end run_query_async]
 end
 
 # TODO: separate sample into separate executable files
diff --git a/bigquery_sample/spec/bigquery_sample_spec.rb b/bigquery_sample/spec/bigquery_sample_spec.rb
@@ -17,20 +17,6 @@
 require "google/cloud"
 require "csv"
 
-# TODO: move some helpers to a shared directory and update other specs
-# require_relative "../../shared/spec_helpers"
-# require "spec_helper"
-#
-# ...
-#
-# require_relative "../../shared/spec_helpers"
-# 
-# require "spec_helper/tempfile_helper"
-# require "spec_helper/csv_file_helper"
-# require "spec_helper/cloud_storage_helper"
-# require "spec_helper/capture_output"
-# require "spec_helper/bigquery_helper"
-
 RSpec.describe "Google Cloud BigQuery samples" do
 
   before do
@@ -49,6 +35,10 @@
       schema.string  "name"
       schema.integer "value"
     end
+
+    if @bucket.file "bigquery-test.csv"
+      @bucket.file("bigquery-test.csv").delete
+    end
   end
 
   after do
@@ -63,6 +53,13 @@ def delete_test_dataset!
     dataset.delete               if dataset
   end
 
+  # Helper to create Tempfile that will be cleaned up after test run
+  def create_tempfile extension = "txt"
+    file = Tempfile.new [ "bigquery-test", ".#{extension}" ]
+    @tempfiles << file
+    file
+  end
+
   # Helper to create and return CSV file.
   # The block will be passed a CSV object.
   #
@@ -74,9 +71,8 @@ def delete_test_dataset!
   #
   #   puts file.path
   def create_csv &block
-    file = Tempfile.new %w[ bigquery-test csv ]
+    file = create_tempfile "csv"
     CSV.open file.path, "w", &block
-    @tempfiles << file
     file
   end
 
@@ -176,9 +172,7 @@ def capture &block
         csv << [ "Bob",   10 ]
       end
 
-      load_job = @table.load csv_file.path
-
-      load_job.wait_until_done!
+      @table.load(csv_file.path).wait_until_done!
 
       expect {
         list_table_data project_id: @project_id,
@@ -269,11 +263,73 @@ def capture &block
   end
 
   describe "Exporting data" do
-    example "export data to Cloud Storage"
+    example "export data to Cloud Storage" do
+      csv_file = create_csv do |csv|
+        csv << [ "Alice", 5 ]
+        csv << [ "Bob",   10 ]
+      end
+
+      @table.load(csv_file.path).wait_until_done!
+
+      expect(@bucket.file "bigquery-test.csv").to be nil
+
+      capture do
+        export_table_data_to_cloud_storage(
+          project_id:   @project_id,
+          dataset_id:   @dataset.dataset_id,
+          table_id:     @table.table_id,
+          storage_path: "gs://#{@bucket.name}/bigquery-test.csv"
+        )
+      end
+
+      expect(captured_output).to include(
+        "Exporting data to Cloud Storage file: " +
+        "gs://#{@bucket.name}/bigquery-test.csv"
+      )
+      expect(captured_output).to match(
+        /Waiting for extract job to complete: job_\w+/
+      )
+      expect(captured_output).to include "Data exported"
+
+      expect(@bucket.file "bigquery-test.csv").not_to be nil
+
+      local_file = create_tempfile "csv"
+      @bucket.file("bigquery-test.csv").download local_file.path
+
+      csv = CSV.read local_file.path
+
+      expect(csv[0]).to eq %w[ name value ]
+      expect(csv[1]).to eq %w[ Alice 5    ]
+      expect(csv[2]).to eq %w[ Bob   10   ]
+    end
   end
 
   describe "Querying" do
-    example "run query"
-    example "run query as job"
+    example "run query" do
+      capture do
+        run_query_sync(
+          project_id:   @project_id,
+          query_string: "SELECT TOP(word, 50) as word, COUNT(*) as count " +
+                        "FROM publicdata:samples.shakespeare"
+        )
+      end
+
+      expect(captured_output).to include '{"word"=>"you", "count"=>42}'
+    end
+
+    example "run query as job" do
+      capture do
+        run_query_async(
+          project_id:   @project_id,
+          query_string: "SELECT TOP(word, 50) as word, COUNT(*) as count " +
+                        "FROM publicdata:samples.shakespeare"
+        )
+      end
+      
+      expect(captured_output).to include "Running query"
+      expect(captured_output).to include "Waiting for query to complete"
+      expect(captured_output).to include "Query results:"
+      expect(captured_output).to include '{"word"=>"you", "count"=>42}'
+    end
   end
 end