Skip to content

Commit 57d6b5d

Browse files
authored
Merge pull request #833 from malariagen/GH796_prefix_private_functions
Prefix undocumented functions with an underscore to indicate that they're private
2 parents 4c9100b + 8d8cd5a commit 57d6b5d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+621
-620
lines changed

malariagen_data/adir1.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def __init__(
8888
tqdm_class=None,
8989
unrestricted_use_only=False,
9090
surveillance_use_only=False,
91-
**storage_options, # used by fsspec via init_filesystem()
91+
**storage_options,
9292
):
9393
super().__init__(
9494
url=url,
@@ -118,7 +118,7 @@ def __init__(
118118
gff_gene_type="protein_coding_gene",
119119
gff_gene_name_attribute="Note",
120120
gff_default_attributes=("ID", "Parent", "Note", "description"),
121-
storage_options=storage_options, # used by fsspec via init_filesystem()
121+
storage_options=storage_options,
122122
tqdm_class=tqdm_class,
123123
taxon_colors=TAXON_COLORS,
124124
virtual_contigs=None,

malariagen_data/af1.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def __init__(
9393
tqdm_class=None,
9494
unrestricted_use_only=False,
9595
surveillance_use_only=False,
96-
**storage_options, # used by fsspec via init_filesystem()
96+
**storage_options,
9797
):
9898
super().__init__(
9999
url=url,
@@ -123,7 +123,7 @@ def __init__(
123123
gff_gene_type="protein_coding_gene",
124124
gff_gene_name_attribute="Note",
125125
gff_default_attributes=("ID", "Parent", "Note", "description"),
126-
storage_options=storage_options, # used by fsspec via init_filesystem()
126+
storage_options=storage_options,
127127
tqdm_class=tqdm_class,
128128
taxon_colors=TAXON_COLORS,
129129
virtual_contigs=None,

malariagen_data/ag3.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def __init__(
164164
tqdm_class=None,
165165
unrestricted_use_only=False,
166166
surveillance_use_only=False,
167-
**storage_options, # used by fsspec via init_filesystem()
167+
**storage_options,
168168
):
169169
super().__init__(
170170
url=url,
@@ -194,7 +194,7 @@ def __init__(
194194
gff_gene_type="gene",
195195
gff_gene_name_attribute="Name",
196196
gff_default_attributes=("ID", "Parent", "Name", "description"),
197-
storage_options=storage_options, # used by fsspec via init_filesystem()
197+
storage_options=storage_options,
198198
tqdm_class=tqdm_class,
199199
taxon_colors=TAXON_COLORS,
200200
virtual_contigs=VIRTUAL_CONTIGS,

malariagen_data/amin1.py

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@
99
DIM_SAMPLE,
1010
DIM_VARIANT,
1111
Region,
12-
da_from_zarr,
13-
dask_compress_dataset,
14-
init_filesystem,
15-
init_zarr_store,
16-
locate_region,
17-
read_gff3,
18-
resolve_region,
19-
simple_xarray_concat,
20-
unpack_gff3_attributes,
12+
_da_from_zarr,
13+
_dask_compress_dataset,
14+
_init_filesystem,
15+
_init_zarr_store,
16+
_locate_region,
17+
_read_gff3,
18+
_resolve_region,
19+
_simple_xarray_concat,
20+
_unpack_gff3_attributes,
2121
)
2222

2323
GENOME_FEATURES_GFF3_PATH = (
@@ -32,7 +32,7 @@
3232
class Amin1:
3333
def __init__(self, url=DEFAULT_URL, **kwargs):
3434
# setup filesystem
35-
self._fs, self._path = init_filesystem(url, **kwargs)
35+
self._fs, self._path = _init_filesystem(url, **kwargs)
3636

3737
# setup caches
3838
self._cache_sample_metadata = None
@@ -74,7 +74,7 @@ def open_genome(self):
7474
"""
7575
if self._cache_genome is None:
7676
path = f"{self._path}/{genome_zarr_path}"
77-
store = init_zarr_store(fs=self._fs, path=path)
77+
store = _init_zarr_store(fs=self._fs, path=path)
7878
self._cache_genome = zarr.open_consolidated(store=store)
7979
return self._cache_genome
8080

@@ -100,9 +100,9 @@ def genome_sequence(self, region, inline_array=True, chunks="native"):
100100
101101
"""
102102
genome = self.open_genome()
103-
region = resolve_region(self, region)
103+
region = _resolve_region(self, region)
104104
z = genome[region.contig]
105-
d = da_from_zarr(z, inline_array=inline_array, chunks=chunks)
105+
d = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)
106106

107107
if region.start:
108108
slice_start = region.start - 1
@@ -143,17 +143,17 @@ def genome_features(self, attributes=("ID", "Parent", "Name", "description")):
143143
except KeyError:
144144
path = f"{self._path}/{GENOME_FEATURES_GFF3_PATH}"
145145
with self._fs.open(path, mode="rb") as f:
146-
df = read_gff3(f, compression="gzip")
146+
df = _read_gff3(f, compression="gzip")
147147
if attributes is not None:
148-
df = unpack_gff3_attributes(df, attributes=attributes)
148+
df = _unpack_gff3_attributes(df, attributes=attributes)
149149
self._cache_genome_features[attributes] = df
150150

151151
return df
152152

153153
def open_snp_calls(self):
154154
if self._cache_snp_genotypes is None:
155155
path = f"{self._path}/v1/snp_genotypes/all"
156-
store = init_zarr_store(fs=self._fs, path=path)
156+
store = _init_zarr_store(fs=self._fs, path=path)
157157
self._cache_snp_genotypes = zarr.open_consolidated(store=store)
158158
return self._cache_snp_genotypes
159159

@@ -168,14 +168,16 @@ def _snp_calls_dataset(self, *, region, inline_array, chunks):
168168

169169
# variant_position
170170
pos_z = root[f"{contig}/variants/POS"]
171-
variant_position = da_from_zarr(pos_z, inline_array=inline_array, chunks=chunks)
171+
variant_position = _da_from_zarr(
172+
pos_z, inline_array=inline_array, chunks=chunks
173+
)
172174
coords["variant_position"] = [DIM_VARIANT], variant_position
173175

174176
# variant_allele
175177
ref_z = root[f"{contig}/variants/REF"]
176178
alt_z = root[f"{contig}/variants/ALT"]
177-
ref = da_from_zarr(ref_z, inline_array=inline_array, chunks=chunks)
178-
alt = da_from_zarr(alt_z, inline_array=inline_array, chunks=chunks)
179+
ref = _da_from_zarr(ref_z, inline_array=inline_array, chunks=chunks)
180+
alt = _da_from_zarr(alt_z, inline_array=inline_array, chunks=chunks)
179181
variant_allele = da.concatenate([ref[:, None], alt], axis=1)
180182
data_vars["variant_allele"] = [DIM_VARIANT, DIM_ALLELE], variant_allele
181183

@@ -188,18 +190,18 @@ def _snp_calls_dataset(self, *, region, inline_array, chunks):
188190

189191
# variant_filter_pass
190192
fp_z = root[f"{contig}/variants/filter_pass"]
191-
fp = da_from_zarr(fp_z, inline_array=inline_array, chunks=chunks)
193+
fp = _da_from_zarr(fp_z, inline_array=inline_array, chunks=chunks)
192194
data_vars["variant_filter_pass"] = [DIM_VARIANT], fp
193195

194196
# call arrays
195197
gt_z = root[f"{contig}/calldata/GT"]
196-
call_genotype = da_from_zarr(gt_z, inline_array=inline_array, chunks=chunks)
198+
call_genotype = _da_from_zarr(gt_z, inline_array=inline_array, chunks=chunks)
197199
gq_z = root[f"{contig}/calldata/GQ"]
198-
call_gq = da_from_zarr(gq_z, inline_array=inline_array, chunks=chunks)
200+
call_gq = _da_from_zarr(gq_z, inline_array=inline_array, chunks=chunks)
199201
ad_z = root[f"{contig}/calldata/AD"]
200-
call_ad = da_from_zarr(ad_z, inline_array=inline_array, chunks=chunks)
202+
call_ad = _da_from_zarr(ad_z, inline_array=inline_array, chunks=chunks)
201203
mq_z = root[f"{contig}/calldata/MQ"]
202-
call_mq = da_from_zarr(mq_z, inline_array=inline_array, chunks=chunks)
204+
call_mq = _da_from_zarr(mq_z, inline_array=inline_array, chunks=chunks)
203205
data_vars["call_genotype"] = (
204206
[DIM_VARIANT, DIM_SAMPLE, DIM_PLOIDY],
205207
call_genotype,
@@ -210,7 +212,7 @@ def _snp_calls_dataset(self, *, region, inline_array, chunks):
210212

211213
# sample arrays
212214
z = root["samples"]
213-
sample_id = da_from_zarr(z, inline_array=inline_array, chunks=chunks)
215+
sample_id = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)
214216
coords["sample_id"] = [DIM_SAMPLE], sample_id
215217

216218
# setup attributes
@@ -221,7 +223,7 @@ def _snp_calls_dataset(self, *, region, inline_array, chunks):
221223

222224
# deal with region
223225
if region.start or region.end:
224-
loc_region = locate_region(region, pos_z)
226+
loc_region = _locate_region(region, pos_z)
225227
ds = ds.isel(variants=loc_region)
226228

227229
return ds
@@ -250,7 +252,7 @@ def snp_calls(self, region, site_mask=False, inline_array=True, chunks="native")
250252
251253
"""
252254

253-
region = resolve_region(self, region)
255+
region = _resolve_region(self, region)
254256

255257
# normalise to simplify concatenation logic
256258
if isinstance(region, str) or isinstance(region, Region):
@@ -265,14 +267,14 @@ def snp_calls(self, region, site_mask=False, inline_array=True, chunks="native")
265267
)
266268
for r in region
267269
]
268-
ds = simple_xarray_concat(
270+
ds = _simple_xarray_concat(
269271
datasets,
270272
dim=DIM_VARIANT,
271273
)
272274

273275
# apply site filters
274276
if site_mask:
275-
ds = dask_compress_dataset(
277+
ds = _dask_compress_dataset(
276278
ds, indexer="variant_filter_pass", dim=DIM_VARIANT
277279
)
278280

malariagen_data/anoph/aim_data.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from malariagen_data.anoph import plotly_params
1212

13-
from ..util import DIM_SAMPLE, check_types, init_zarr_store, simple_xarray_concat
13+
from ..util import DIM_SAMPLE, _check_types, _init_zarr_store, _simple_xarray_concat
1414
from . import aim_params, base_params
1515
from .genome_features import AnophelesGenomeFeaturesData
1616
from .genome_sequence import AnophelesGenomeSequenceData
@@ -62,7 +62,7 @@ def _prep_aims_param(self, *, aims: aim_params.aims) -> str:
6262
else:
6363
raise ValueError(f"Invalid aims parameter, must be one of {self.aim_ids}.")
6464

65-
@check_types
65+
@_check_types
6666
@doc(
6767
summary="Access ancestry informative marker variants.",
6868
returns="""
@@ -84,7 +84,7 @@ def aim_variants(self, aims: aim_params.aims) -> xr.Dataset:
8484
path = f"{self._base_path}/reference/aim_defs_{analysis}/{aims}.zarr"
8585

8686
# Initialise and open the zarr data.
87-
store = init_zarr_store(fs=self._fs, path=path)
87+
store = _init_zarr_store(fs=self._fs, path=path)
8888
ds = xr.open_zarr(store, concat_characters=False)
8989
ds = ds.set_coords(["variant_contig", "variant_position"])
9090

@@ -105,12 +105,12 @@ def _aim_calls_dataset(self, *, aims, sample_set):
105105
path = f"{self._base_path}/{release_path}/aim_calls_{analysis}/{sample_set}/{aims}.zarr"
106106

107107
# Initialise and open the zarr data.
108-
store = init_zarr_store(fs=self._fs, path=path)
108+
store = _init_zarr_store(fs=self._fs, path=path)
109109
ds = xr.open_zarr(store=store, concat_characters=False)
110110
ds = ds.set_coords(["variant_contig", "variant_position", "sample_id"])
111111
return ds
112112

113-
@check_types
113+
@_check_types
114114
@doc(
115115
summary="""
116116
Access ancestry informative marker SNP sites, alleles and genotype
@@ -161,7 +161,7 @@ def aim_calls(
161161
aim_calls_datasets.append(aim_calls_dataset)
162162

163163
# Concatenate data from multiple sample sets.
164-
ds = simple_xarray_concat(aim_calls_datasets, dim=DIM_SAMPLE)
164+
ds = _simple_xarray_concat(aim_calls_datasets, dim=DIM_SAMPLE)
165165

166166
# If there's a sample query...
167167
if prepared_sample_query is not None:

0 commit comments

Comments
 (0)