Skip to content

Commit 2d0c75f

Browse files
author
suhr25
committed
fix: restore files incorrectly modified during master merge conflict resolution
1 parent 16252c1 commit 2d0c75f

File tree

15 files changed

+350
-68
lines changed

15 files changed

+350
-68
lines changed

malariagen_data/anoph/aim_data.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,14 +254,24 @@ def plot_aim_heatmap(
254254
gn = np.take(gn, ix_sorted, axis=1)
255255
samples = np.take(samples, ix_sorted, axis=0)
256256

257+
species = aims.split("_vs_")
258+
257259
# Set up colors for genotypes
258260
if palette is None:
259-
assert self._aim_palettes is not None
261+
if self._aim_palettes is None:
262+
raise RuntimeError(
263+
"AIM palettes are not available for this data resource. "
264+
"Please provide the 'palette' parameter explicitly (4 colors)."
265+
)
260266
palette = self._aim_palettes[aims]
261-
assert len(palette) == 4
267+
if len(palette) != 4:
268+
raise RuntimeError(
269+
"Expected AIM palette to have 4 colors "
270+
f"(missing, {species[0]}/{species[0]}, {species[0]}/{species[1]}, {species[1]}/{species[1]}), "
271+
f"got {len(palette)}"
272+
)
262273
# Expect 4 colors, in the order:
263274
# missing, hom taxon 1, het, hom taxon 2
264-
species = aims.split("_vs_")
265275

266276
# Create subplots.
267277
fig = go_make_subplots(

malariagen_data/anoph/cnv_frq.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,11 @@ def _gene_cnv_frequencies_advanced(
597597
if nobs_mode == "called":
598598
nobs[:, cohort_index] = np.repeat(cohort_n_called, 2)
599599
else:
600-
assert nobs_mode == "fixed"
600+
if nobs_mode != "fixed":
601+
raise RuntimeError(
602+
f"Internal error: expected nobs_mode='fixed', got {nobs_mode!r}. "
603+
"This should not happen; please open a GitHub issue."
604+
)
601605
nobs[:, cohort_index] = cohort.size
602606

603607
debug("compute frequency")

malariagen_data/anoph/frq_base.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -417,14 +417,6 @@ def plot_frequencies_heatmap(
417417
`aa_allele_frequencies_advanced()` or
418418
`gene_cnv_frequencies_advanced()`.
419419
""",
420-
taxa="""
421-
Taxon or list of taxa to include in the plot. If None,
422-
all taxa are shown.
423-
""",
424-
areas="""
425-
Area or list of areas to include in the plot. If None,
426-
all areas are shown.
427-
""",
428420
kwargs="Passed through to `px.line()`.",
429421
),
430422
returns="""

malariagen_data/anoph/genome_features.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,11 @@ def _genome_features_for_contig(self, *, contig: str, attributes: Tuple[str, ...
110110

111111
# Handle normal contigs in the reference genome.
112112
else:
113-
assert contig in self.contigs
113+
if contig not in self.contigs:
114+
raise ValueError(
115+
f"Contig {contig!r} not found. "
116+
f"Available contigs: {self.contigs}"
117+
)
114118
df = self._genome_features(attributes=attributes)
115119

116120
# Apply contig query.
@@ -561,7 +565,8 @@ def plot_genes(
561565

562566
# Increase the figure height by a certain factor, to accommodate labels.
563567
height_increase_factor = 1.3
564-
assert fig.height is not None
568+
if fig.height is None:
569+
raise RuntimeError("Figure height is unexpectedly None")
565570
fig.height = int(fig.height * height_increase_factor)
566571

567572
# Get the original y_range.

malariagen_data/anoph/genome_sequence.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,11 @@ def _genome_sequence_for_contig(self, *, contig, inline_array, chunks):
8686

8787
# Handle normal contigs in the reference genome.
8888
else:
89-
assert contig in self.contigs
89+
if contig not in self.contigs:
90+
raise ValueError(
91+
f"Contig {contig!r} not found. "
92+
f"Available contigs: {self.contigs}"
93+
)
9094
root = self.open_genome()
9195
z = root[contig]
9296
d = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)

malariagen_data/anoph/h1x.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,8 +403,17 @@ def _moving_h1x(ha, hb, size, start=0, stop=None, step=None):
403403
H1X values (sum of squares of joint haplotype frequencies).
404404
"""
405405

406-
assert ha.ndim == hb.ndim == 2
407-
assert ha.shape[0] == hb.shape[0]
406+
if ha.ndim != 2 or hb.ndim != 2:
407+
raise ValueError(
408+
"Expected both haplotype arrays to be 2-dimensional "
409+
"(n_variants, n_haplotypes), "
410+
f"got ndim=({ha.ndim}, {hb.ndim})"
411+
)
412+
if ha.shape[0] != hb.shape[0]:
413+
raise ValueError(
414+
"Expected both haplotype arrays to have the same number of variants "
415+
f"(axis 0), got ({ha.shape[0]}, {hb.shape[0]})"
416+
)
408417

409418
# Construct moving windows.
410419
windows = allel.index_windows(ha, size, start, stop, step)

malariagen_data/anoph/hap_data.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,11 @@ def phasing_analysis_ids(self) -> Tuple[str, ...]:
5858
def _prep_phasing_analysis_param(self, *, analysis: hap_params.analysis) -> str:
5959
if analysis == base_params.DEFAULT:
6060
# Use whatever is the default phasing analysis for this data resource.
61-
assert self._default_phasing_analysis is not None
61+
if self._default_phasing_analysis is None:
62+
raise RuntimeError(
63+
"No default phasing analysis configured. "
64+
"Please specify the 'analysis' parameter explicitly."
65+
)
6266
return self._default_phasing_analysis
6367
elif analysis in self.phasing_analysis_ids:
6468
return analysis
@@ -118,7 +122,11 @@ def _haplotype_sites_for_contig(
118122

119123
# Handle contig in the reference genome.
120124
else:
121-
assert contig in self.contigs
125+
if contig not in self.contigs:
126+
raise ValueError(
127+
f"Contig {contig!r} not found. "
128+
f"Available contigs: {self.contigs}"
129+
)
122130
root = self.open_haplotype_sites(analysis=analysis)
123131
z = root[f"{contig}/variants/{field}"]
124132
ret = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)
@@ -251,7 +259,11 @@ def _haplotypes_for_contig(
251259

252260
# Handle contig in the reference genome.
253261
else:
254-
assert contig in self.contigs
262+
if contig not in self.contigs:
263+
raise ValueError(
264+
f"Contig {contig!r} not found. "
265+
f"Available contigs: {self.contigs}"
266+
)
255267

256268
# Open haplotypes zarr.
257269
root = self.open_haplotypes(sample_set=sample_set, analysis=analysis)

malariagen_data/anoph/hap_frq.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,11 @@ def haplotypes_frequencies(
100100
hap_dict = {k: 0 for k in f_all.keys()}
101101

102102
n_samples = np.count_nonzero(loc_coh)
103-
assert n_samples >= min_cohort_size
103+
if n_samples < min_cohort_size:
104+
raise ValueError(
105+
f"Not enough samples ({n_samples}) for minimum "
106+
f"cohort size ({min_cohort_size})"
107+
)
104108
gt_coh = gt.compress(loc_coh, axis=1)
105109
gt_hap = gt_coh.to_haplotypes()
106110
f, _, _ = _haplotype_frequencies(gt_hap)
@@ -224,7 +228,11 @@ def haplotypes_frequencies_advanced(
224228
hap_freq = {k: 0 for k in f_all.keys()}
225229
hap_count = {k: 0 for k in f_all.keys()}
226230
hap_nob = {k: 2 * n_samples for k in f_all.keys()}
227-
assert n_samples >= min_cohort_size
231+
if n_samples < min_cohort_size:
232+
raise ValueError(
233+
f"Not enough samples ({n_samples}) for minimum "
234+
f"cohort size ({min_cohort_size})"
235+
)
228236
sample_indices = group_samples_by_cohort.indices[cohort_key]
229237
gt_coh = gt.take(sample_indices, axis=1)
230238
gt_hap = gt_coh.to_haplotypes()

malariagen_data/anoph/sample_metadata.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -594,8 +594,16 @@ def _aim_analysis(self):
594594
def _parse_aim_metadata(
595595
self, sample_set: str, data: Union[bytes, Exception]
596596
) -> pd.DataFrame:
597-
assert self._aim_metadata_columns is not None
598-
assert self._aim_metadata_dtype is not None
597+
if self._aim_metadata_columns is None:
598+
raise RuntimeError(
599+
"Internal error: AIM metadata columns are not configured. "
600+
"This should not happen; please open a GitHub issue."
601+
)
602+
if self._aim_metadata_dtype is None:
603+
raise RuntimeError(
604+
"Internal error: AIM metadata dtypes are not configured. "
605+
"This should not happen; please open a GitHub issue."
606+
)
599607
if isinstance(data, bytes):
600608
# Parse CSV data but don't apply the dtype yet.
601609
df = pd.read_csv(io.BytesIO(data), na_values="")
@@ -971,6 +979,8 @@ def plot_samples_interactive_map(
971979
fill_value=0,
972980
)
973981

982+
taxa = df_pivot.columns.dropna().sort_values().unique()
983+
974984
# Append aggregations to pivot.
975985
df_location_aggs = df_samples.groupby(location_composite_key).agg(
976986
{
@@ -1015,7 +1025,6 @@ def plot_samples_interactive_map(
10151025
samples_map.layout.width = width
10161026

10171027
# Add markers.
1018-
count_factors = df_samples[count_by].dropna().sort_values().unique()
10191028
for _, row in df_pivot.reset_index().iterrows():
10201029
title = (
10211030
f"Location: {row.location} ({row.latitude:.3f}, {row.longitude:.3f})"
@@ -1028,13 +1037,13 @@ def plot_samples_interactive_map(
10281037
title += f"\nContributors: {row.contributor}"
10291038
title += "\nNo. specimens: "
10301039
all_n = 0
1031-
for factor in count_factors:
1040+
for taxon in taxa:
10321041
# Get the number of samples in this taxon
1033-
n = row[factor]
1042+
n = int(row[taxon])
10341043
# Count the number of samples in all taxa
10351044
all_n += n
10361045
if n > 0:
1037-
title += f"{n} {factor}; "
1046+
title += f"{n} {taxon}; "
10381047
# Only show a marker when there are enough samples
10391048
if all_n >= min_samples:
10401049
marker = ipyleaflet.Marker(

0 commit comments

Comments
 (0)