@@ -256,32 +256,13 @@ def cnv_hmm(
256256 # If there are no sample query options, then default to an empty dict.
257257 sample_query_options = sample_query_options or {}
258258
259- # Determine which samples match the sample query.
260- loc_samples = df_samples .eval (
261- prepared_sample_query , ** sample_query_options
259+ ds = self ._filter_sample_dataset (
260+ ds = ds ,
261+ df_samples = df_samples ,
262+ sample_query = prepared_sample_query ,
263+ sample_query_options = sample_query_options ,
262264 )
263265
264- # Raise an error if no samples match the sample query.
265- if not loc_samples .any ():
266- raise ValueError (
267- f"No samples found for query { prepared_sample_query !r} "
268- )
269-
270- # Get the relevant sample ids from the sample metadata DataFrame, using the boolean mask.
271- relevant_sample_ids = df_samples .loc [loc_samples , "sample_id" ].values
272-
273- # Get all the sample ids from the unfiltered CNV HMM Dataset.
274- ds_sample_ids = ds .coords ["sample_id" ].values
275-
276- # Get the indices of samples in the CNV HMM Dataset that match the relevant sample ids.
277- # Note: we use `[0]` to get the first element of the tuple returned by `np.where`.
278- relevant_sample_indices = np .where (
279- np .isin (ds_sample_ids , relevant_sample_ids )
280- )[0 ]
281-
282- # Select only the relevant samples from the CNV HMM Dataset.
283- ds = ds .isel (samples = relevant_sample_indices )
284-
285266 debug ("handle coverage variance filter" )
286267 if max_coverage_variance is not None :
287268 cov_var = ds ["sample_coverage_variance" ].values
@@ -476,30 +457,15 @@ def cnv_coverage_calls(
476457 # Get the relevant sample metadata.
477458 df_samples = self .sample_metadata (sample_sets = prepared_sample_set )
478459
479- # Determine which samples match the sample query.
480- if prepared_sample_query != "" :
481- loc_samples = df_samples .eval (prepared_sample_query )
482- else :
483- loc_samples = pd .Series (True , index = df_samples .index )
484-
485- # Raise an error if no samples match the sample query.
486- if not loc_samples .any ():
487- raise ValueError (f"No samples found for query { prepared_sample_query !r} " )
488-
489- # Get the relevant sample ids from the sample metadata DataFrame, using the boolean mask.
490- relevant_sample_ids = df_samples .loc [loc_samples , "sample_id" ].values
460+ # If there is no sample query, then default to an empty str.
461+ prepared_sample_query = prepared_sample_query or ""
491462
492- # Get all the sample ids from the unfiltered CNV coverage calls Dataset.
493- ds_sample_ids = ds .coords ["sample_id" ].values
494-
495- # Get the indices of samples in the CNV coverage calls Dataset that match the relevant sample ids.
496- # Note: we use `[0]` to get the first element of the tuple returned by `np.where`.
497- relevant_sample_indices = np .where (np .isin (ds_sample_ids , relevant_sample_ids ))[
498- 0
499- ]
500-
501- # Select only the relevant samples from the CNV coverage calls Dataset.
502- ds = ds .isel (samples = relevant_sample_indices )
463+ ds = self ._filter_sample_dataset (
464+ ds = ds ,
465+ df_samples = df_samples ,
466+ sample_query = prepared_sample_query ,
467+ sample_query_options = {},
468+ )
503469
504470 return ds
505471
@@ -690,29 +656,12 @@ def cnv_discordant_read_calls(
690656 # If there are no sample query options, then default to an empty dict.
691657 sample_query_options = sample_query_options or {}
692658
693- # Determine which samples match the sample query.
694- loc_samples = df_samples .eval (prepared_sample_query , ** sample_query_options )
695-
696- # Raise an error if no samples match the sample query.
697- if not loc_samples .any ():
698- raise ValueError (
699- f"No samples found for query { prepared_sample_query !r} "
700- )
701-
702- # Get the relevant sample ids from the sample metadata DataFrame, using the boolean mask.
703- relevant_sample_ids = df_samples .loc [loc_samples , "sample_id" ].values
704-
705- # Get all the sample ids from the unfiltered CNV discordant reads Dataset.
706- ds_sample_ids = ds .coords ["sample_id" ].values
707-
708- # Get the indices of samples in the CNV discordant reads Dataset that match the relevant sample ids.
709- # Note: we use `[0]` to get the first element of the tuple returned by `np.where`.
710- relevant_sample_indices = np .where (
711- np .isin (ds_sample_ids , relevant_sample_ids )
712- )[0 ]
713-
714- # Select only the relevant samples from the CNV discordant reads Dataset.
715- ds = ds .isel (samples = relevant_sample_indices )
659+ ds = self ._filter_sample_dataset (
660+ ds = ds ,
661+ df_samples = df_samples ,
662+ sample_query = prepared_sample_query ,
663+ sample_query_options = sample_query_options ,
664+ )
716665
717666 return ds
718667
0 commit comments