-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Expand file tree
/
Copy pathtest_api.py
More file actions
169 lines (138 loc) · 6.25 KB
/
test_api.py
File metadata and controls
169 lines (138 loc) · 6.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from unittest import mock
import google.cloud.bigquery
import pytest
import bigframes._config.auth
import bigframes.dataframe
import bigframes.pandas
import bigframes.pandas.io.api as bf_io_api
import bigframes.session
import bigframes.session.clients
# _read_gbq_colab requires the polars engine.
pytest.importorskip("polars")
@mock.patch(
"bigframes.pandas.io.api._set_default_session_location_if_possible_deferred_query"
)
@mock.patch("bigframes.core.global_session.with_default_session")
def test_read_gbq_colab_dry_run_doesnt_call_set_location(
mock_with_default_session, mock_set_location
):
"""
Ensure that we don't bind to a location too early. If it's a dry run, the
user might not be done typing.
"""
mock_df = mock.create_autospec(bigframes.dataframe.DataFrame)
mock_with_default_session.return_value = mock_df
query_or_table = "SELECT {param1} AS param1"
sample_pyformat_args = {"param1": "value1"}
bf_io_api._read_gbq_colab(
query_or_table, pyformat_args=sample_pyformat_args, dry_run=True
)
mock_set_location.assert_not_called()
@mock.patch("bigframes._config.auth.pydata_google_auth.default")
@mock.patch("bigframes.core.global_session.with_default_session")
def test_read_gbq_colab_dry_run_doesnt_authenticate_multiple_times(
mock_with_default_session, mock_get_credentials, monkeypatch
):
"""
Ensure that we authenticate too often, which is an expensive operation,
performance-wise (2+ seconds).
"""
bigframes.pandas.close_session()
mock_get_credentials.return_value = (mock.Mock(), "unit-test-project")
mock_create_bq_client = mock.Mock()
mock_bq_client = mock.create_autospec(google.cloud.bigquery.Client, instance=True)
mock_create_bq_client.return_value = mock_bq_client
mock_query_job = mock.create_autospec(google.cloud.bigquery.QueryJob, instance=True)
type(mock_query_job).schema = mock.PropertyMock(return_value=[])
mock_query_job._properties = {}
mock_bq_client.query.return_value = mock_query_job
monkeypatch.setattr(
bigframes.session.clients.ClientsProvider,
"_create_bigquery_client",
mock_create_bq_client,
)
mock_df = mock.create_autospec(bigframes.dataframe.DataFrame)
mock_with_default_session.return_value = mock_df
bigframes._config.auth._cached_credentials = None
query_or_table = "SELECT {param1} AS param1"
sample_pyformat_args = {"param1": "value1"}
bf_io_api._read_gbq_colab(
query_or_table, pyformat_args=sample_pyformat_args, dry_run=True
)
mock_get_credentials.assert_called()
mock_with_default_session.assert_not_called()
mock_get_credentials.reset_mock()
# Repeat the operation so that the credentials would have have been cached.
bf_io_api._read_gbq_colab(
query_or_table, pyformat_args=sample_pyformat_args, dry_run=True
)
mock_get_credentials.assert_not_called()
@mock.patch(
"bigframes.pandas.io.api._set_default_session_location_if_possible_deferred_query"
)
@mock.patch("bigframes.core.global_session.with_default_session")
def test_read_gbq_colab_calls_set_location(
mock_with_default_session, mock_set_location
):
# Configure the mock for with_default_session to return a DataFrame mock
mock_df = mock.create_autospec(bigframes.dataframe.DataFrame)
mock_with_default_session.return_value = mock_df
query_or_table = "SELECT {param1} AS param1"
sample_pyformat_args = {"param1": "'value1'"}
result = bf_io_api._read_gbq_colab(
query_or_table, pyformat_args=sample_pyformat_args, dry_run=False
)
# Make sure that we format the SQL first to prevent syntax errors.
formatted_query = "SELECT 'value1' AS param1"
mock_set_location.assert_called_once()
args, _ = mock_set_location.call_args
assert formatted_query == args[0]()
mock_with_default_session.assert_called_once()
# Check the actual arguments passed to with_default_session
args, kwargs = mock_with_default_session.call_args
assert args[0] == bigframes.session.Session._read_gbq_colab
assert args[1] == query_or_table
assert kwargs["pyformat_args"] == sample_pyformat_args
assert not kwargs["dry_run"]
assert isinstance(result, bigframes.dataframe.DataFrame)
@mock.patch("bigframes.pandas.io.api._get_storage_client")
@mock.patch("bigframes.core.global_session.with_default_session")
def test_read_csv_gcs_sets_location(mock_with_default_session, mock_get_storage_client):
mock_storage_client = mock.Mock()
mock_bucket = mock.Mock()
mock_bucket.location = "us-east1"
mock_storage_client.get_bucket.return_value = mock_bucket
mock_get_storage_client.return_value = mock_storage_client
import bigframes._config as config
config.options.bigquery.location = None
config.options.bigquery._session_started = False
config.options.bigquery.use_regional_endpoints = None
bf_io_api.read_csv("gs://test-bucket/file.csv")
assert config.options.bigquery.location == "us-east1"
@mock.patch("bigframes.pandas.io.api._get_storage_client")
@mock.patch("bigframes.core.global_session.with_default_session")
def test_read_csv_gcs_doesnt_overwrite_set_location(mock_with_default_session, mock_get_storage_client):
mock_storage_client = mock.Mock()
mock_bucket = mock.Mock()
mock_bucket.location = "us-east1"
mock_storage_client.get_bucket.return_value = mock_bucket
mock_get_storage_client.return_value = mock_storage_client
import bigframes._config as config
config.options.bigquery.location = "eu"
config.options.bigquery._session_started = False
config.options.bigquery.use_regional_endpoints = None
bf_io_api.read_csv("gs://test-bucket/file.csv")
assert config.options.bigquery.location == "EU"