99 DIM_SAMPLE ,
1010 DIM_VARIANT ,
1111 Region ,
12- da_from_zarr ,
13- dask_compress_dataset ,
14- init_filesystem ,
15- init_zarr_store ,
16- locate_region ,
17- read_gff3 ,
18- resolve_region ,
19- simple_xarray_concat ,
20- unpack_gff3_attributes ,
12+ _da_from_zarr ,
13+ _dask_compress_dataset ,
14+ _init_filesystem ,
15+ _init_zarr_store ,
16+ _locate_region ,
17+ _read_gff3 ,
18+ _resolve_region ,
19+ _simple_xarray_concat ,
20+ _unpack_gff3_attributes ,
2121)
2222
2323GENOME_FEATURES_GFF3_PATH = (
3232class Amin1 :
3333 def __init__ (self , url = DEFAULT_URL , ** kwargs ):
3434 # setup filesystem
35- self ._fs , self ._path = init_filesystem (url , ** kwargs )
35+ self ._fs , self ._path = _init_filesystem (url , ** kwargs )
3636
3737 # setup caches
3838 self ._cache_sample_metadata = None
@@ -74,7 +74,7 @@ def open_genome(self):
7474 """
7575 if self ._cache_genome is None :
7676 path = f"{ self ._path } /{ genome_zarr_path } "
77- store = init_zarr_store (fs = self ._fs , path = path )
77+ store = _init_zarr_store (fs = self ._fs , path = path )
7878 self ._cache_genome = zarr .open_consolidated (store = store )
7979 return self ._cache_genome
8080
@@ -100,9 +100,9 @@ def genome_sequence(self, region, inline_array=True, chunks="native"):
100100
101101 """
102102 genome = self .open_genome ()
103- region = resolve_region (self , region )
103+ region = _resolve_region (self , region )
104104 z = genome [region .contig ]
105- d = da_from_zarr (z , inline_array = inline_array , chunks = chunks )
105+ d = _da_from_zarr (z , inline_array = inline_array , chunks = chunks )
106106
107107 if region .start :
108108 slice_start = region .start - 1
@@ -143,17 +143,17 @@ def genome_features(self, attributes=("ID", "Parent", "Name", "description")):
143143 except KeyError :
144144 path = f"{ self ._path } /{ GENOME_FEATURES_GFF3_PATH } "
145145 with self ._fs .open (path , mode = "rb" ) as f :
146- df = read_gff3 (f , compression = "gzip" )
146+ df = _read_gff3 (f , compression = "gzip" )
147147 if attributes is not None :
148- df = unpack_gff3_attributes (df , attributes = attributes )
148+ df = _unpack_gff3_attributes (df , attributes = attributes )
149149 self ._cache_genome_features [attributes ] = df
150150
151151 return df
152152
153153 def open_snp_calls (self ):
154154 if self ._cache_snp_genotypes is None :
155155 path = f"{ self ._path } /v1/snp_genotypes/all"
156- store = init_zarr_store (fs = self ._fs , path = path )
156+ store = _init_zarr_store (fs = self ._fs , path = path )
157157 self ._cache_snp_genotypes = zarr .open_consolidated (store = store )
158158 return self ._cache_snp_genotypes
159159
@@ -168,14 +168,16 @@ def _snp_calls_dataset(self, *, region, inline_array, chunks):
168168
169169 # variant_position
170170 pos_z = root [f"{ contig } /variants/POS" ]
171- variant_position = da_from_zarr (pos_z , inline_array = inline_array , chunks = chunks )
171+ variant_position = _da_from_zarr (
172+ pos_z , inline_array = inline_array , chunks = chunks
173+ )
172174 coords ["variant_position" ] = [DIM_VARIANT ], variant_position
173175
174176 # variant_allele
175177 ref_z = root [f"{ contig } /variants/REF" ]
176178 alt_z = root [f"{ contig } /variants/ALT" ]
177- ref = da_from_zarr (ref_z , inline_array = inline_array , chunks = chunks )
178- alt = da_from_zarr (alt_z , inline_array = inline_array , chunks = chunks )
179+ ref = _da_from_zarr (ref_z , inline_array = inline_array , chunks = chunks )
180+ alt = _da_from_zarr (alt_z , inline_array = inline_array , chunks = chunks )
179181 variant_allele = da .concatenate ([ref [:, None ], alt ], axis = 1 )
180182 data_vars ["variant_allele" ] = [DIM_VARIANT , DIM_ALLELE ], variant_allele
181183
@@ -188,18 +190,18 @@ def _snp_calls_dataset(self, *, region, inline_array, chunks):
188190
189191 # variant_filter_pass
190192 fp_z = root [f"{ contig } /variants/filter_pass" ]
191- fp = da_from_zarr (fp_z , inline_array = inline_array , chunks = chunks )
193+ fp = _da_from_zarr (fp_z , inline_array = inline_array , chunks = chunks )
192194 data_vars ["variant_filter_pass" ] = [DIM_VARIANT ], fp
193195
194196 # call arrays
195197 gt_z = root [f"{ contig } /calldata/GT" ]
196- call_genotype = da_from_zarr (gt_z , inline_array = inline_array , chunks = chunks )
198+ call_genotype = _da_from_zarr (gt_z , inline_array = inline_array , chunks = chunks )
197199 gq_z = root [f"{ contig } /calldata/GQ" ]
198- call_gq = da_from_zarr (gq_z , inline_array = inline_array , chunks = chunks )
200+ call_gq = _da_from_zarr (gq_z , inline_array = inline_array , chunks = chunks )
199201 ad_z = root [f"{ contig } /calldata/AD" ]
200- call_ad = da_from_zarr (ad_z , inline_array = inline_array , chunks = chunks )
202+ call_ad = _da_from_zarr (ad_z , inline_array = inline_array , chunks = chunks )
201203 mq_z = root [f"{ contig } /calldata/MQ" ]
202- call_mq = da_from_zarr (mq_z , inline_array = inline_array , chunks = chunks )
204+ call_mq = _da_from_zarr (mq_z , inline_array = inline_array , chunks = chunks )
203205 data_vars ["call_genotype" ] = (
204206 [DIM_VARIANT , DIM_SAMPLE , DIM_PLOIDY ],
205207 call_genotype ,
@@ -210,7 +212,7 @@ def _snp_calls_dataset(self, *, region, inline_array, chunks):
210212
211213 # sample arrays
212214 z = root ["samples" ]
213- sample_id = da_from_zarr (z , inline_array = inline_array , chunks = chunks )
215+ sample_id = _da_from_zarr (z , inline_array = inline_array , chunks = chunks )
214216 coords ["sample_id" ] = [DIM_SAMPLE ], sample_id
215217
216218 # setup attributes
@@ -221,7 +223,7 @@ def _snp_calls_dataset(self, *, region, inline_array, chunks):
221223
222224 # deal with region
223225 if region .start or region .end :
224- loc_region = locate_region (region , pos_z )
226+ loc_region = _locate_region (region , pos_z )
225227 ds = ds .isel (variants = loc_region )
226228
227229 return ds
@@ -250,7 +252,7 @@ def snp_calls(self, region, site_mask=False, inline_array=True, chunks="native")
250252
251253 """
252254
253- region = resolve_region (self , region )
255+ region = _resolve_region (self , region )
254256
255257 # normalise to simplify concatenation logic
256258 if isinstance (region , str ) or isinstance (region , Region ):
@@ -265,14 +267,14 @@ def snp_calls(self, region, site_mask=False, inline_array=True, chunks="native")
265267 )
266268 for r in region
267269 ]
268- ds = simple_xarray_concat (
270+ ds = _simple_xarray_concat (
269271 datasets ,
270272 dim = DIM_VARIANT ,
271273 )
272274
273275 # apply site filters
274276 if site_mask :
275- ds = dask_compress_dataset (
277+ ds = _dask_compress_dataset (
276278 ds , indexer = "variant_filter_pass" , dim = DIM_VARIANT
277279 )
278280
0 commit comments