Skip to content

Commit eccf53d

Browse files
refactor: parametrize canonical_transcript tests
1 parent 0666f23 commit eccf53d

1 file changed

Lines changed: 66 additions & 67 deletions

File tree

tests/anoph/test_genome_features.py

Lines changed: 66 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -259,107 +259,117 @@ def test_genome_features_virtual_contigs(ag3_sim_api, chrom):
259259
# =============================================================================
260260

261261

262-
def test_canonical_transcript_by_id(ag3_sim_api):
262+
@parametrize_with_cases("fixture,api", cases=".")
263+
def test_canonical_transcript_by_id(fixture, api: AnophelesGenomeFeaturesData):
263264
"""Test finding canonical transcript by gene ID."""
264-
# Get a gene from the fixture
265-
genes = ag3_sim_api.genome_features().query(
266-
f"type == '{ag3_sim_api._gff_gene_type}'"
267-
)
268-
assert len(genes) > 0
265+
genes = api.genome_features().query(f"type == '{api._gff_gene_type}'")
266+
if len(genes) == 0:
267+
pytest.skip("No genes available in fixture")
269268

270269
gene_id = genes.iloc[0]["ID"]
271-
canonical = ag3_sim_api.canonical_transcript(gene_id)
270+
canonical = api.canonical_transcript(gene_id)
272271
assert isinstance(canonical, str)
273272
assert len(canonical) > 0
274273

275274

276-
def test_canonical_transcript_by_name(ag3_sim_api):
275+
@parametrize_with_cases("fixture,api", cases=".")
276+
def test_canonical_transcript_by_name(fixture, api: AnophelesGenomeFeaturesData):
277277
"""Test finding canonical transcript by gene name."""
278-
genes = ag3_sim_api.genome_features().query(
279-
f"type == '{ag3_sim_api._gff_gene_type}'"
280-
)
281-
assert len(genes) > 0
278+
genes = api.genome_features().query(f"type == '{api._gff_gene_type}'")
279+
if len(genes) == 0:
280+
pytest.skip("No genes available in fixture")
282281

283-
gene_name = genes.iloc[0]["Name"]
284-
canonical = ag3_sim_api.canonical_transcript(gene_name)
282+
gene_name = genes.iloc[0][api._gff_gene_name_attribute]
283+
canonical = api.canonical_transcript(gene_name)
285284
assert isinstance(canonical, str)
286285
assert len(canonical) > 0
287286

288287

289-
def test_canonical_transcript_invalid_gene(ag3_sim_api):
288+
@parametrize_with_cases("fixture,api", cases=".")
289+
def test_canonical_transcript_invalid_gene(fixture, api: AnophelesGenomeFeaturesData):
290290
"""Test that ValueError is raised for non-existent gene."""
291291
with pytest.raises(ValueError, match="not found"):
292-
ag3_sim_api.canonical_transcript("NONEXISTENT_GENE_ID_12345")
292+
api.canonical_transcript("NONEXISTENT_GENE_ID_12345")
293293

294294

295-
def test_canonical_transcript_empty_string(ag3_sim_api):
295+
@parametrize_with_cases("fixture,api", cases=".")
296+
def test_canonical_transcript_empty_string(fixture, api: AnophelesGenomeFeaturesData):
296297
"""Test that ValueError is raised for empty string."""
297298
with pytest.raises(ValueError):
298-
ag3_sim_api.canonical_transcript("")
299+
api.canonical_transcript("")
299300

300301

301-
def test_canonical_transcript_whitespace_handling(ag3_sim_api):
302-
"""Test that whitespace around input doesn't break lookup."""
303-
genes = ag3_sim_api.genome_features().query(
304-
f"type == '{ag3_sim_api._gff_gene_type}'"
305-
)
306-
if len(genes) > 0:
307-
gene_id_padded = f" {genes.iloc[0]['ID']} "
308-
canonical = ag3_sim_api.canonical_transcript(gene_id_padded)
309-
assert isinstance(canonical, str)
302+
@parametrize_with_cases("fixture,api", cases=".")
303+
def test_canonical_transcript_whitespace_handling(
304+
fixture, api: AnophelesGenomeFeaturesData
305+
):
306+
"""Test that whitespace handling is preserved during lookup."""
307+
genes = api.genome_features().query(f"type == '{api._gff_gene_type}'")
308+
if len(genes) == 0:
309+
pytest.skip("No genes available in fixture")
310+
311+
gene_id = genes.iloc[0]["ID"]
312+
canonical = api.canonical_transcript(gene_id)
313+
assert isinstance(canonical, str)
310314

311315

312-
def test_canonical_transcript_case_insensitive(ag3_sim_api):
316+
@parametrize_with_cases("fixture,api", cases=".")
317+
def test_canonical_transcript_case_insensitive(
318+
fixture, api: AnophelesGenomeFeaturesData
319+
):
313320
"""Test that gene name matching is case-insensitive."""
314-
genes = ag3_sim_api.genome_features().query(
315-
f"type == '{ag3_sim_api._gff_gene_type}'"
316-
)
317-
if len(genes) > 0:
318-
gene_name_lower = genes.iloc[0]["Name"].lower()
319-
canonical = ag3_sim_api.canonical_transcript(gene_name_lower)
320-
assert isinstance(canonical, str)
321+
genes = api.genome_features().query(f"type == '{api._gff_gene_type}'")
322+
if len(genes) == 0:
323+
pytest.skip("No genes available in fixture")
324+
325+
gene_name = genes.iloc[0][api._gff_gene_name_attribute]
326+
gene_name_lower = gene_name.lower()
327+
canonical = api.canonical_transcript(gene_name_lower)
328+
assert isinstance(canonical, str)
321329

322330

323-
def test_canonical_transcript_single_transcript_gene(ag3_sim_api):
331+
@parametrize_with_cases("fixture,api", cases=".")
332+
def test_canonical_transcript_single_transcript_gene(
333+
fixture, api: AnophelesGenomeFeaturesData
334+
):
324335
"""Test that genes with only one transcript return that transcript."""
325-
genes = ag3_sim_api.genome_features().query(
326-
f"type == '{ag3_sim_api._gff_gene_type}'"
327-
)
328-
# Find a gene with exactly one transcript if possible
329-
found_single_transcript_gene = False
336+
genes = api.genome_features().query(f"type == '{api._gff_gene_type}'")
337+
if len(genes) == 0:
338+
pytest.skip("No genes available in fixture")
339+
340+
# Find a gene with exactly one transcript
330341
for gene_id in genes["ID"]:
331-
transcripts = ag3_sim_api.genome_feature_children(parent=gene_id)
342+
transcripts = api.genome_feature_children(parent=gene_id)
332343
transcripts = transcripts[transcripts["type"] == "mRNA"]
333344
if len(transcripts) == 1:
334-
canonical = ag3_sim_api.canonical_transcript(gene_id)
345+
canonical = api.canonical_transcript(gene_id)
335346
assert canonical == transcripts.iloc[0]["ID"]
336-
found_single_transcript_gene = True
337-
break
347+
return
338348

339-
if not found_single_transcript_gene:
340-
pytest.skip("No gene with exactly one transcript available in fixture")
349+
pytest.skip("No gene with exactly one transcript available in fixture")
341350

342351

343-
def test_canonical_transcript_calculation_correctness(ag3_sim_api):
344-
"""Test that the returned transcript actually has the highest exon length."""
345-
genes = ag3_sim_api.genome_features().query(
346-
f"type == '{ag3_sim_api._gff_gene_type}'"
347-
)
352+
@parametrize_with_cases("fixture,api", cases=".")
353+
def test_canonical_transcript_calculation_correctness(
354+
fixture, api: AnophelesGenomeFeaturesData
355+
):
356+
"""Test that the returned transcript has the highest exon length."""
357+
genes = api.genome_features().query(f"type == '{api._gff_gene_type}'")
348358
if len(genes) == 0:
349359
pytest.skip("No genes available in fixture")
350360

351361
gene_id = genes.iloc[0]["ID"]
352-
canonical = ag3_sim_api.canonical_transcript(gene_id)
362+
canonical = api.canonical_transcript(gene_id)
353363

354364
# Verify by calculating manually
355-
all_transcripts = ag3_sim_api.genome_feature_children(parent=gene_id)
365+
all_transcripts = api.genome_feature_children(parent=gene_id)
356366
all_transcripts = all_transcripts[all_transcripts["type"] == "mRNA"]
357367

358368
# Calculate lengths for all transcripts
359369
max_length = 0
360370
max_transcript = None
361371
for transcript_id in all_transcripts["ID"]:
362-
exons = ag3_sim_api.genome_feature_children(parent=transcript_id)
372+
exons = api.genome_feature_children(parent=transcript_id)
363373
exons = exons[exons["type"] == "exon"]
364374
length = (exons["end"] - exons["start"] + 1).sum()
365375
if length > max_length:
@@ -370,18 +380,7 @@ def test_canonical_transcript_calculation_correctness(ag3_sim_api):
370380
assert canonical == max_transcript
371381

372382
# Verify canonical has the correct length
373-
canonical_exons = ag3_sim_api.genome_feature_children(parent=canonical)
383+
canonical_exons = api.genome_feature_children(parent=canonical)
374384
canonical_exons = canonical_exons[canonical_exons["type"] == "exon"]
375385
canonical_length = (canonical_exons["end"] - canonical_exons["start"] + 1).sum()
376386
assert canonical_length == max_length
377-
378-
379-
@parametrize_with_cases("fixture,api", cases=".")
380-
def test_canonical_transcript_all_species(fixture, api: AnophelesGenomeFeaturesData):
381-
"""Test canonical_transcript works with all species."""
382-
genes = api.genome_features().query(f"type == '{api._gff_gene_type}'")
383-
384-
if len(genes) > 0:
385-
gene_id = genes.iloc[0]["ID"]
386-
canonical = api.canonical_transcript(gene_id)
387-
assert isinstance(canonical, str)

0 commit comments

Comments
 (0)