Skip to content

Commit 0ad7482

Browse files
committed
revert change
1 parent 1ba46c0 commit 0ad7482

7 files changed

Lines changed: 507 additions & 0 deletions

File tree

packages/bigframes/bigframes/pandas/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,14 @@
9999
from bigframes.pandas.core.api import to_timedelta
100100
from bigframes.pandas.io.api import (
101101
_read_gbq_colab,
102+
from_glob_path,
102103
read_arrow,
103104
read_avro,
104105
read_csv,
105106
read_gbq,
106107
read_gbq_function,
107108
read_gbq_model,
109+
read_gbq_object_table,
108110
read_gbq_query,
109111
read_gbq_table,
110112
read_json,
@@ -453,6 +455,7 @@ def reset_session():
453455
_read_gbq_colab,
454456
read_gbq_function,
455457
read_gbq_model,
458+
read_gbq_object_table,
456459
read_gbq_query,
457460
read_gbq_table,
458461
read_json,
@@ -463,6 +466,7 @@ def reset_session():
463466
remote_function,
464467
to_datetime,
465468
to_timedelta,
469+
from_glob_path,
466470
]
467471

468472
# Use __all__ to let type checkers know what is part of the public API.
@@ -488,6 +492,7 @@ def reset_session():
488492
"_read_gbq_colab",
489493
"read_gbq_function",
490494
"read_gbq_model",
495+
"read_gbq_object_table",
491496
"read_gbq_query",
492497
"read_gbq_table",
493498
"read_json",
@@ -498,6 +503,7 @@ def reset_session():
498503
"remote_function",
499504
"to_datetime",
500505
"to_timedelta",
506+
"from_glob_path",
501507
# Other names
502508
"api",
503509
# pandas dtype attributes

packages/bigframes/bigframes/pandas/io/api.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,21 @@ def read_gbq_model(model_name: str):
394394
read_gbq_model.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_model)
395395

396396

397+
def read_gbq_object_table(
398+
object_table: str, *, name: Optional[str] = None
399+
) -> bigframes.dataframe.DataFrame:
400+
return global_session.with_default_session(
401+
bigframes.session.Session.read_gbq_object_table,
402+
object_table,
403+
name=name,
404+
)
405+
406+
407+
read_gbq_object_table.__doc__ = inspect.getdoc(
408+
bigframes.session.Session.read_gbq_object_table
409+
)
410+
411+
397412
@overload
398413
def read_gbq_query( # type: ignore[overload-overlap]
399414
query: str,
@@ -620,6 +635,19 @@ def read_gbq_function(
620635
read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function)
621636

622637

638+
def from_glob_path(
639+
path: str, *, connection: Optional[str] = None, name: Optional[str] = None
640+
) -> bigframes.dataframe.DataFrame:
641+
return global_session.with_default_session(
642+
bigframes.session.Session.from_glob_path,
643+
path=path,
644+
connection=connection,
645+
name=name,
646+
)
647+
648+
649+
from_glob_path.__doc__ = inspect.getdoc(bigframes.session.Session.from_glob_path)
650+
623651
_default_location_lock = threading.Lock()
624652

625653

packages/bigframes/bigframes/session/__init__.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2349,6 +2349,68 @@ def _create_object_table(self, path: str, connection: str) -> str:
23492349

23502350
return table
23512351

2352+
def from_glob_path(
2353+
self, path: str, *, connection: Optional[str] = None, name: Optional[str] = None
2354+
) -> dataframe.DataFrame:
2355+
r"""Create a BigFrames DataFrame that contains a BigFrames `ObjectRef column <https://docs.cloud.google.com/bigquery/docs/objectref-columns>`_ from a global wildcard path.
2356+
This operation creates a temporary BQ Object Table under the hood and requires bigquery.connections.delegate permission or BigQuery Connection Admin role.
2357+
If you have an existing BQ Object Table, use read_gbq_object_table().
2358+
2359+
.. note::
2360+
BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
2361+
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
2362+
and might have limited support. For more information, see the launch stage descriptions
2363+
(https://cloud.google.com/products#product-launch-stages).
2364+
2365+
Args:
2366+
path (str):
2367+
The wildcard global path, such as "gs://<bucket>/<folder>/\*".
2368+
connection (str or None, default None):
2369+
Connection to connect with remote service. str of the format <PROJECT_NUMBER/PROJECT_ID>.<LOCATION>.<CONNECTION_ID>.
2370+
If None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach
2371+
permission if the connection isn't fully set up.
2372+
name (str):
2373+
The column name of the ObjectRef column.
2374+
Returns:
2375+
bigframes.pandas.DataFrame:
2376+
Result BigFrames DataFrame.
2377+
"""
2378+
import bigframes.bigquery as bq
2379+
connection = self._create_bq_connection(connection=connection)
2380+
2381+
table = self._create_object_table(path, connection)
2382+
2383+
s = bq.obj.make_ref(self._loader.read_gbq_table(table)["uri"], authorizer=connection)
2384+
return s.rename(name).to_frame()
2385+
2386+
def read_gbq_object_table(
2387+
self, object_table: str, *, name: Optional[str] = None
2388+
) -> dataframe.DataFrame:
2389+
"""Read an existing object table to create a BigFrames `ObjectRef <https://docs.cloud.google.com/bigquery/docs/objectref-columns>`_ DataFrame. Use the connection of the object table for the connection of the ObjectRef.
2390+
This function dosen't retrieve the object table data. If you want to read the data, use read_gbq() instead.
2391+
2392+
.. note::
2393+
BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
2394+
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
2395+
and might have limited support. For more information, see the launch stage descriptions
2396+
(https://cloud.google.com/products#product-launch-stages).
2397+
2398+
Args:
2399+
object_table (str): name of the object table of form <PROJECT_ID>.<DATASET_ID>.<TABLE_ID>.
2400+
name (str or None): the returned ObjectRef column name.
2401+
2402+
Returns:
2403+
bigframes.pandas.DataFrame:
2404+
Result BigFrames DataFrame.
2405+
"""
2406+
import bigframes.bigquery as bq
2407+
# TODO(garrettwu): switch to pseudocolumn when b/374988109 is done.
2408+
table = self.bqclient.get_table(object_table)
2409+
connection = table._properties["externalDataConfiguration"]["connectionId"]
2410+
2411+
s = bq.obj.make_ref(self._loader.read_gbq_table(object_table)["uri"], authorizer=connection)
2412+
return s.rename(name).to_frame()
2413+
23522414
def _create_temp_view(self, sql: str) -> bigquery.TableReference:
23532415
"""Create a random id view from the sql string."""
23542416
return self._anon_dataset_manager.create_temp_view(sql)
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from unittest import mock
16+
17+
import pandas as pd
18+
import pytest
19+
20+
import bigframes
21+
import bigframes.pandas as bpd
22+
23+
pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True)
24+
25+
26+
idisplay = pytest.importorskip("IPython.display")
27+
28+
29+
def test_blob_create_from_uri_str(
30+
bq_connection: str, session: bigframes.Session, images_uris
31+
):
32+
uri_series = bpd.Series(images_uris, session=session)
33+
blob_series = uri_series.str.to_blob(connection=bq_connection)
34+
35+
pd_blob_df = blob_series.struct.explode().to_pandas()
36+
expected_pd_df = pd.DataFrame(
37+
{
38+
"uri": images_uris,
39+
"version": [None, None],
40+
"authorizer": [bq_connection.casefold(), bq_connection.casefold()],
41+
"details": [None, None],
42+
}
43+
)
44+
45+
pd.testing.assert_frame_equal(
46+
pd_blob_df, expected_pd_df, check_dtype=False, check_index_type=False
47+
)
48+
49+
50+
def test_blob_create_from_glob_path(
51+
bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
52+
):
53+
blob_df = session.from_glob_path(
54+
images_gcs_path, connection=bq_connection, name="blob_col"
55+
)
56+
pd_blob_df = (
57+
blob_df["blob_col"]
58+
.struct.explode()
59+
.to_pandas()
60+
.sort_values("uri")
61+
.reset_index(drop=True)
62+
)
63+
64+
expected_df = pd.DataFrame(
65+
{
66+
"uri": images_uris,
67+
"version": [None, None],
68+
"authorizer": [bq_connection.casefold(), bq_connection.casefold()],
69+
"details": [None, None],
70+
}
71+
)
72+
73+
pd.testing.assert_frame_equal(
74+
pd_blob_df, expected_df, check_dtype=False, check_index_type=False
75+
)
76+
77+
78+
def test_blob_create_read_gbq_object_table(
79+
bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
80+
):
81+
obj_table = session._create_object_table(images_gcs_path, bq_connection)
82+
83+
blob_df = session.read_gbq_object_table(obj_table, name="blob_col")
84+
pd_blob_df = (
85+
blob_df["blob_col"]
86+
.struct.explode()
87+
.to_pandas()
88+
.sort_values("uri")
89+
.reset_index(drop=True)
90+
)
91+
expected_df = pd.DataFrame(
92+
{
93+
"uri": images_uris,
94+
"version": [None, None],
95+
"authorizer": [bq_connection.casefold(), bq_connection.casefold()],
96+
"details": [None, None],
97+
}
98+
)
99+
100+
pd.testing.assert_frame_equal(
101+
pd_blob_df, expected_df, check_dtype=False, check_index_type=False
102+
)
103+
104+
105+
def test_display_images(monkeypatch, images_mm_df: bpd.DataFrame):
106+
mock_display = mock.Mock()
107+
monkeypatch.setattr(idisplay, "display", mock_display)
108+
109+
images_mm_df["blob_col"].blob.display()
110+
111+
for call in mock_display.call_args_list:
112+
args, _ = call
113+
arg = args[0]
114+
assert isinstance(arg, idisplay.Image)
115+
116+
117+
def test_display_nulls(
118+
monkeypatch,
119+
bq_connection: str,
120+
session: bigframes.Session,
121+
):
122+
uri_series = bpd.Series([None, None, None], dtype="string", session=session)
123+
blob_series = uri_series.str.to_blob(connection=bq_connection)
124+
mock_display = mock.Mock()
125+
monkeypatch.setattr(idisplay, "display", mock_display)
126+
127+
blob_series.blob.display()
128+
129+
for call in mock_display.call_args_list:
130+
args, _ = call
131+
arg = args[0]
132+
assert arg == "<NA>"

0 commit comments

Comments
 (0)