diff --git a/packages/bigquery-magics/bigquery_magics/bigquery.py b/packages/bigquery-magics/bigquery_magics/bigquery.py index b6737c1be3ec..d6fb565e4ea2 100644 --- a/packages/bigquery-magics/bigquery_magics/bigquery.py +++ b/packages/bigquery-magics/bigquery_magics/bigquery.py @@ -646,41 +646,26 @@ def _colab_node_expansion_callback(request: dict, params_str: str): MAX_GRAPH_VISUALIZATION_QUERY_RESULT_SIZE = 100_000 -def _get_graph_name(query_text: str): - """Returns the name of the graph queried. - - Supports GRAPH only, not GRAPH_TABLE. - - Args: - query_text: The SQL query text. - - Returns: - A (dataset_id, graph_id) tuple, or None if the graph name cannot be determined. - """ - match = re.match(r"\s*GRAPH\s+(\S+)\.(\S+)", query_text, re.IGNORECASE) - if match: - (dataset_id, graph_id) = (match.group(1)), match.group(2) - if "`" in dataset_id or "`" in graph_id: - return None # Backticks in graph name not support for schema view - return (dataset_id, graph_id) - return None - - def _get_graph_schema( bq_client: bigquery.client.Client, query_text: str, query_job: bigquery.job.QueryJob ): - graph_name_result = _get_graph_name(query_text) - if graph_name_result is None: + property_graphs = query_job.referenced_property_graphs + if len(property_graphs) != 1: return None - dataset_id, graph_id = graph_name_result + + graph_ref = property_graphs[0] info_schema_query = f""" select PROPERTY_GRAPH_METADATA_JSON - FROM `{query_job.configuration.destination.project}.{dataset_id}`.INFORMATION_SCHEMA.PROPERTY_GRAPHS + FROM `{graph_ref.project}.{graph_ref.dataset_id}`.INFORMATION_SCHEMA.PROPERTY_GRAPHS WHERE PROPERTY_GRAPH_NAME = @graph_id """ job_config = bigquery.QueryJobConfig( - query_parameters=[bigquery.ScalarQueryParameter("graph_id", "STRING", graph_id)] + query_parameters=[ + bigquery.ScalarQueryParameter( + "graph_id", "STRING", graph_ref.property_graph_id + ) + ] ) job_config.use_legacy_sql = False try: diff --git a/packages/bigquery-magics/setup.py b/packages/bigquery-magics/setup.py index aeb66b1ee6d2..763ca3afb9bc 100644 --- a/packages/bigquery-magics/setup.py +++ b/packages/bigquery-magics/setup.py @@ -29,11 +29,11 @@ release_status = "Development Status :: 4 - Beta" dependencies = [ "db-dtypes>=1.1.1,<2.0.0", - "google-cloud-bigquery >= 3.13.0, <4.0.0", + "google-cloud-bigquery >= 3.41.0, <4.0.0", "ipywidgets>=7.7.1", "ipython>=7.23.1", "ipykernel>=5.5.6", - "packaging >= 20.0.0", + "packaging >= 24.2.0", "pandas>=1.5.3", "pyarrow >= 12.0.0", "pydata-google-auth >=1.5.0", @@ -45,7 +45,7 @@ # moved back to optional due to bloat. See # https://github.com/googleapis/python-bigquery/issues/1196 for more background. "bqstorage": [ - "google-cloud-bigquery-storage >= 2.6.0, <3.0.0", + "google-cloud-bigquery-storage >= 2.25.0, <3.0.0", # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. diff --git a/packages/bigquery-magics/testing/constraints-3.10.txt b/packages/bigquery-magics/testing/constraints-3.10.txt index 71f1bc9dd36b..24b47290f490 100644 --- a/packages/bigquery-magics/testing/constraints-3.10.txt +++ b/packages/bigquery-magics/testing/constraints-3.10.txt @@ -7,13 +7,13 @@ # Then this file should have foo==1.14.0 db-dtypes==1.1.1 geopandas==1.0.1 -google-cloud-bigquery==3.13.0 -google-cloud-bigquery-storage==2.6.0 +google-cloud-bigquery==3.41.0 +google-cloud-bigquery-storage==2.25.0 ipywidgets==7.7.1 ipython==7.23.1 ipykernel==5.5.6 numpy==1.26.4 -packaging==20.0.0 +packaging==24.2.0 pandas==1.5.3 pyarrow==12.0.0 pydata-google-auth==1.5.0 diff --git a/packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py b/packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py index efb2c2f82bf6..3be93afdb721 100644 --- a/packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py +++ b/packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py @@ -241,23 +241,78 @@ def test__run_query_dry_run_without_errors_is_silent(): assert len(captured.stdout) == 0 -def test__get_graph_name(): - assert magics._get_graph_name("GRAPH foo.bar") == ("foo", "bar") - assert magics._get_graph_name("GRAPH `foo.bar`") is None - assert magics._get_graph_name("GRAPH `foo`.bar") is None - assert magics._get_graph_name("SELECT 1") is None - - def test__get_graph_schema_exception(): bq_client = mock.create_autospec(bigquery.Client, instance=True) bq_client.query.side_effect = Exception("error") query_text = "GRAPH foo.bar" query_job = mock.Mock() - query_job.configuration.destination.project = "my-project" + + graph_ref = mock.Mock() + graph_ref.project = "my-project" + graph_ref.dataset_id = "dataset" + graph_ref.property_graph_id = "graph" + query_job.referenced_property_graphs = [graph_ref] assert magics._get_graph_schema(bq_client, query_text, query_job) is None +def test__get_graph_schema_zero_references(): + bq_client = mock.create_autospec(bigquery.Client, instance=True) + query_job = mock.Mock() + query_job.referenced_property_graphs = [] + + assert magics._get_graph_schema(bq_client, "SELECT 1", query_job) is None + + +def test__get_graph_schema_two_references(): + bq_client = mock.create_autospec(bigquery.Client, instance=True) + query_job = mock.Mock() + + ref1 = mock.Mock() + ref2 = mock.Mock() + query_job.referenced_property_graphs = [ref1, ref2] + + assert magics._get_graph_schema(bq_client, "SELECT 1", query_job) is None + + +def test__get_graph_schema_success(): + bq_client = mock.create_autospec(bigquery.Client, instance=True) + query_job = mock.Mock() + + graph_ref = mock.Mock() + graph_ref.project = "my-project" + graph_ref.dataset_id = "dataset" + graph_ref.property_graph_id = "graph" + query_job.referenced_property_graphs = [graph_ref] + + mock_df = mock.MagicMock() + mock_df.shape = (1, 1) + mock_df.iloc.__getitem__.return_value = "schema_json" + bq_client.query.return_value.to_dataframe.return_value = mock_df + + with mock.patch( + "bigquery_magics.bigquery.graph_server._convert_schema" + ) as convert_mock: + convert_mock.return_value = {"nodes": [], "edges": []} + + result = magics._get_graph_schema(bq_client, "SELECT 1", query_job) + + assert result == {"nodes": [], "edges": []} + convert_mock.assert_called_once_with("schema_json") + + called_query = bq_client.query.call_args[0][0] + assert ( + "FROM `my-project.dataset`.INFORMATION_SCHEMA.PROPERTY_GRAPHS" + in called_query + ) + + called_config = bq_client.query.call_args[1]["job_config"] + called_params = called_config.query_parameters + assert len(called_params) == 1 + assert called_params[0].name == "graph_id" + assert called_params[0].value == "graph" + + @pytest.mark.skipif( bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @@ -417,6 +472,12 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present", ) def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch): + """If `spanner-graph-notebook` is not installed, the graph visualizer + widget cannot be displayed. + """ + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -468,6 +529,10 @@ def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_int_result(monkeypatch): + """Graph visualization of integer scalars is supported.""" + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -519,6 +584,10 @@ def test_bigquery_graph_int_result(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_str_result(monkeypatch): + """Graph visualization of string scalars is supported.""" + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -570,6 +639,10 @@ def test_bigquery_graph_str_result(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_json_json_result(monkeypatch): + """Graph visualization of JSON objects with valid JSON string fields is supported.""" + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -639,6 +712,9 @@ def test_bigquery_graph_json_json_result(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_json_result(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -758,6 +834,9 @@ def test_bigquery_graph_json_result(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_size_exceeds_max(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -813,6 +892,9 @@ def test_bigquery_graph_size_exceeds_max(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_size_exceeds_query_result_max(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -869,6 +951,9 @@ def test_bigquery_graph_size_exceeds_query_result_max(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_with_args_serialization(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -938,6 +1023,9 @@ def test_bigquery_graph_with_args_serialization(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_colab(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) # Mock the colab module so the code under test uses colab.register_callback(), rather than # GraphServer. sys.modules["google.colab"] = mock.Mock() @@ -1073,6 +1161,9 @@ def test_colab_node_expansion_callback(): reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present", ) def test_bigquery_graph_missing_spanner_deps(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -1142,11 +1233,17 @@ def test_add_graph_widget_with_schema(monkeypatch): query_result = pandas.DataFrame([{"id": 1}], columns=["result"]) query_text = "GRAPH my_dataset.my_graph" - query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True) + query_job = mock.Mock() query_job.configuration.destination.project = "p" query_job.configuration.destination.dataset_id = "d" query_job.configuration.destination.table_id = "t" + graph_ref = mock.Mock() + graph_ref.project = "p" + graph_ref.dataset_id = "my_dataset" + graph_ref.property_graph_id = "my_graph" + query_job.referenced_property_graphs = [graph_ref] + args = mock.Mock() args.bigquery_api_endpoint = "e" args.project = "p" @@ -1203,11 +1300,13 @@ def test_add_graph_widget_no_graph_name(monkeypatch): query_result = pandas.DataFrame([{"id": 1}], columns=["result"]) query_text = "SELECT * FROM my_dataset.my_table" - query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True) + query_job = mock.Mock() query_job.configuration.destination.project = "p" query_job.configuration.destination.dataset_id = "d" query_job.configuration.destination.table_id = "t" + query_job.referenced_property_graphs = [] + args = mock.Mock() args.bigquery_api_endpoint = "e" args.project = "p" @@ -1244,11 +1343,17 @@ def test_add_graph_widget_schema_not_found(monkeypatch): query_result = pandas.DataFrame([{"id": 1}], columns=["result"]) query_text = "GRAPH my_dataset.my_graph" - query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True) + query_job = mock.Mock() query_job.configuration.destination.project = "p" query_job.configuration.destination.dataset_id = "d" query_job.configuration.destination.table_id = "t" + graph_ref = mock.Mock() + graph_ref.project = "p" + graph_ref.dataset_id = "my_dataset" + graph_ref.property_graph_id = "my_graph" + query_job.referenced_property_graphs = [graph_ref] + args = mock.Mock() args.bigquery_api_endpoint = "e" args.project = "p" @@ -1293,9 +1398,8 @@ def test_bigquery_magic_default_connection_user_agent(): client_info_arg = conn.call_args[1].get("client_info") assert client_info_arg is not None - assert ( - client_info_arg.user_agent - == f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}" + assert client_info_arg.user_agent.startswith( + f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}" ) @@ -1611,9 +1715,8 @@ def warning_match(warning): assert kwargs.get("credentials") is mock_credentials client_info = kwargs.get("client_info") assert client_info is not None - assert ( - client_info.user_agent - == f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}" + assert client_info.user_agent.startswith( + f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}" ) query_job_mock.to_dataframe.assert_called_once_with(