Skip to content

Commit 0b58f9e

Browse files
authored
Merge pull request #1158 from khushthecoder/GH1113-plink-filename-hash
Fix #1113: Hash content-affecting selection params into PLINK export …
2 parents 87432a4 + 17dfc31 commit 0b58f9e

2 files changed

Lines changed: 17 additions & 2 deletions

File tree

malariagen_data/anoph/plink_params.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,14 @@
1616
A string indicating the desired output file location.
1717
""",
1818
]
19+
20+
out: TypeAlias = Annotated[
21+
str,
22+
"""
23+
A string specifying the output file path prefix. The PLINK output files
24+
will be written as ``{output_dir}/{out}.bed``, ``{output_dir}/{out}.bim``,
25+
and ``{output_dir}/{out}.fam``. If not provided, a default prefix is
26+
generated from the SNP selection parameters (region, n_snps,
27+
min_minor_ac, max_missing_an, thin_offset).
28+
""",
29+
]

malariagen_data/anoph/to_plink.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,18 @@ def biallelic_snps_to_plink(
6969
random_seed: base_params.random_seed = 42,
7070
inline_array: base_params.inline_array = base_params.inline_array_default,
7171
chunks: base_params.chunks = base_params.native_chunks,
72+
out: Optional[plink_params.out] = None,
7273
):
7374
# Check that either sample_query xor sample_indices are provided.
7475
base_params._validate_sample_selection_params(
7576
sample_query=sample_query, sample_indices=sample_indices
7677
)
7778

78-
# Define output files
79-
plink_file_path = f"{output_dir}/{region}.{n_snps}.{min_minor_ac}.{max_missing_an}.{thin_offset}"
79+
# Use user-provided prefix or fall back to auto-generated default
80+
if out is not None:
81+
plink_file_path = f"{output_dir}/{out}"
82+
else:
83+
plink_file_path = f"{output_dir}/{region}.{n_snps}.{min_minor_ac}.{max_missing_an}.{thin_offset}"
8084

8185
bed_file_path = f"{plink_file_path}.bed"
8286

0 commit comments

Comments
 (0)