@@ -886,14 +886,24 @@ def _gene_cnv(
886886 chunks ,
887887 inline_array ,
888888 ):
889- debug = self ._log .debug
890-
891- debug ("sanity check" )
889+ # Sanity check.
892890 assert isinstance (region , Region )
893891
894- debug ("access HMM data" )
892+ # Access genes within the region of interest.
893+ df_genome_features = self .genome_features (region = region )
894+ sample_query_options = sample_query_options or {}
895+ df_genes = df_genome_features .query (
896+ f"type == '{ self ._gff_gene_type } '" , ** sample_query_options
897+ )
898+
899+ # Refine the region for CNV data to ensure coverage of all requested genes.
900+ cnv_region = Region (
901+ region .contig , df_genes ["start" ].min (), df_genes ["end" ].max ()
902+ )
903+
904+ # Access HMM data.
895905 ds_hmm = self .cnv_hmm (
896- region = region . contig ,
906+ region = cnv_region ,
897907 sample_sets = sample_sets ,
898908 sample_query = sample_query ,
899909 sample_query_options = sample_query_options ,
@@ -907,45 +917,38 @@ def _gene_cnv(
907917 with self ._dask_progress (desc = "Load CNV HMM data" ):
908918 pos , end , cn = dask .compute (pos , end , cn )
909919
910- debug ("access genes" )
911- df_genome_features = self .genome_features (region = region )
912- sample_query_options = sample_query_options or {}
913- df_genes = df_genome_features .query (
914- f"type == '{ self ._gff_gene_type } '" , ** sample_query_options
915- )
916-
917- debug ("setup intermediates" )
920+ # Set up intermediates.
918921 windows = []
919922 modes = []
920923 counts = []
921924
922- debug ( "iterate over genes" )
925+ # Iterate over genes.
923926 genes_iterator = self ._progress (
924927 df_genes .itertuples (),
925928 desc = "Compute modal gene copy number" ,
926929 total = len (df_genes ),
927930 )
928931 for gene in genes_iterator :
929- # locate windows overlapping the gene
932+ # Locate windows overlapping the gene.
930933 loc_gene_start = bisect_left (end , gene .start )
931934 loc_gene_stop = bisect_right (pos , gene .end )
932935 w = loc_gene_stop - loc_gene_start
933936 windows .append (w )
934937
935- # slice out copy number data for the given gene
938+ # Slice out copy number data for the given gene.
936939 cn_gene = cn [loc_gene_start :loc_gene_stop ]
937940
938- # compute the modes
941+ # Compute the modes.
939942 m , c = _cn_mode (cn_gene , vmax = 12 )
940943 modes .append (m )
941944 counts .append (c )
942945
943- debug ( "combine results" )
946+ # Combine results.
944947 windows = np .array (windows )
945948 modes = np .vstack (modes )
946949 counts = np .vstack (counts )
947950
948- debug ( "build dataset" )
951+ # Build dataset.
949952 ds_out = xr .Dataset (
950953 coords = {
951954 "gene_id" : (["genes" ], df_genes ["ID" ].values ),
0 commit comments