Skip to content

Commit 1177e63

Browse files
committed
Use python engine for sample_query to support extension dtypes
1 parent d927b06 commit 1177e63

2 files changed

Lines changed: 10 additions & 3 deletions

File tree

malariagen_data/anoph/base.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,10 @@ def _filter_sample_dataset(
948948

949949
# Determine which samples match the sample query.
950950
if sample_query != "":
951-
loc_samples = df_samples.eval(sample_query, **sample_query_options)
951+
# Use the python engine in order to support extension array dtypes, e.g. Float64, Int64, boolean.
952+
loc_samples = df_samples.eval(
953+
sample_query, **sample_query_options, engine="python"
954+
)
952955
else:
953956
loc_samples = pd.Series(True, index=df_samples.index)
954957

malariagen_data/anoph/sample_metadata.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -777,7 +777,10 @@ def sample_metadata(
777777
if prepared_sample_query is not None:
778778
# Assume a pandas query string.
779779
sample_query_options = sample_query_options or {}
780-
df_samples = df_samples.query(prepared_sample_query, **sample_query_options)
780+
# Use the python engine in order to support extension array dtypes, e.g. Float64, Int64, boolean.
781+
df_samples = df_samples.query(
782+
prepared_sample_query, **sample_query_options, engine="python"
783+
)
781784
df_samples = df_samples.reset_index(drop=True)
782785
elif sample_indices is not None:
783786
# Assume it is an indexer.
@@ -1068,8 +1071,9 @@ def _prep_sample_selection_cache_params(
10681071
# integer indices instead.
10691072
df_samples = self.sample_metadata(sample_sets=prepared_sample_sets)
10701073
sample_query_options = sample_query_options or {}
1074+
# Use the python engine in order to support extension array dtypes, e.g. Float64, Int64, boolean.
10711075
loc_samples = df_samples.eval(
1072-
prepared_sample_query, **sample_query_options
1076+
prepared_sample_query, **sample_query_options, engine="python"
10731077
).values
10741078
sample_indices = np.nonzero(loc_samples)[0].tolist()
10751079

0 commit comments

Comments
 (0)