Skip to content

Commit 8dd775b

Browse files
Bugfixes, all tests pass. Need tests for new loci now.
1 parent 0755490 commit 8dd775b

13 files changed

Lines changed: 36 additions & 26 deletions

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ simple_comparison(
2929
interlocus2=True # doesn't matter for class I alleles
3030
)
3131
```
32-
In the `output.csv` file created in the current directory, you will find two rows: "In A\*68:02 but not in A\*68:01" and
33-
"In A*68:01 but not in A*68:02"
32+
In the `output.csv` file created in the current directory, you will find two rows: "In A*68:02 but not in
33+
A*68:01" and "In A*68:01 but not in A*68:02".
3434

3535
##### b. Batch mode
3636
Here is a minimal example with the file [Template.xlsx](https://github.com/MICS-Lab/pelc/raw/main/Template.xlsx)

pelc/_input_sanity_check.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@ def _equal_amount_of_unknown_alleles(
88
:param input_df_donor: Input pandas.DataFrame with the donor alleles
99
:param input_df_recipient: Input pandas.DataFrame with the recipient alleles
1010
11-
:return: True if the amount of unknown alleles is equal in both dataframes, False otherwise
11+
:return: True if the amount of unknown* alleles is equal in both dataframes, False otherwise
1212
13-
Also tests if the unknown alleles are either for both the alleles of the locus or for none of them
13+
Also tests if the unknown* alleles are either for both the alleles of the locus or for none of them.
14+
* unknown alleles are the ones that were inputted as "A*", "B*", "C*", "DRB1*", "DQA1*", "DQB1*", "DPA1*" and/or
15+
"DPB1*". Here we are not talking about the alleles that are unknown to the database.
1416
"""
1517

1618
unknown_alleles: list[str] = [

pelc/_open_epregistry_databases.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
def _open_epregistry_database(
66
path_to_csv: str,
7-
ghost_alleles: str | list[str],
7+
ghost_alleles: list[str],
88
no_eplets: bool = False
99
) -> pd.DataFrame:
1010
"""
@@ -17,7 +17,7 @@ def _open_epregistry_database(
1717
df_db: pd.DataFrame
1818

1919
file_name_no_extension: str = path_to_csv.split('.csv')[0]
20-
suffix: str = ghost_alleles if isinstance(ghost_alleles, str) else ghost_alleles[0]
20+
suffix: str = ghost_alleles[0]
2121
pickle_file_name: str = f"{file_name_no_extension}_{no_eplets}_{suffix}.pickle"
2222

2323
# replace * by _ in the pickle file name to avoid problems when * is in the file name

pelc/batch_eplet_comp.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ def compute_epletic_load(
3030
class_ii: bool = True,
3131
verifiedonly: bool = False,
3232
exclude: list[int | str] | None = None,
33-
interlocus2: bool = True
33+
interlocus2: bool = True,
34+
simple_comparison: bool = False,
3435
) -> None | pd.DataFrame | pd.Series | tuple[pd.DataFrame, pd.DataFrame]:
3536
"""
3637
:param input_df_donor: Input Donors Typing (pandas.DataFrame)
@@ -42,6 +43,9 @@ def compute_epletic_load(
4243
:param verifiedonly: How should the epletic charge be computed? Verified eplets only? Or all eplets?
4344
:param exclude: list of indices to exclude
4445
:param interlocus2: whether or not to take into account interlocus eplets for HLA of class II
46+
:param simple_comparison: whether or not it's a simple allele to allele comparison in which case, the function
47+
_equal_amount_of_unknown_alleles is not called (checks are already made in simple_comparison.py and would not
48+
pass here given the column names).
4549
4650
:return: None (if output_type is not None, the result will be saved on disk as a csv), or pandas.DataFrame
4751
(OutputType.COUNT_AND_DETAILS) or pandas.Series (OutputType.COUNT, or OutputType.ONLY_DETAILS) or
@@ -62,12 +66,15 @@ def compute_epletic_load(
6266
)
6367
return None
6468

65-
if not _equal_amount_of_unknown_alleles(input_df_donor, input_df_recipient):
66-
logging.error(
67-
"Either the number of unknown alleles is different for one donor and recipient pair or one allele is "
68-
"uknown whilst the other of the same locus isn't."
69-
)
70-
return None
69+
if not simple_comparison:
70+
if not _equal_amount_of_unknown_alleles(input_df_donor, input_df_recipient):
71+
# unknown alleles are the ones that were inputted as "A*", "B*", "C*", "DRB1*", "DRB345*", "DQA1*", "DQB1*",
72+
# "DPA1*" and/or "DPB1*". Here we are not talking about the alleles that are unknown to the database.
73+
logging.error(
74+
"Either the number of unknown alleles is different for one donor and recipient pair or one allele is "
75+
"uknown whilst the other of the same locus isn't."
76+
)
77+
return None
7178

7279

7380
df_a: pd.DataFrame
@@ -83,17 +90,17 @@ def compute_epletic_load(
8390

8491
this_file_directory_path: str = os.path.dirname(os.path.realpath(__file__))
8592
if class_i and class_ii:
86-
df_a = _open_epregistry_database(f"{this_file_directory_path}/data/A.csv", "A*")
87-
df_b = _open_epregistry_database(f"{this_file_directory_path}/data/B.csv", "B*")
88-
df_c = _open_epregistry_database(f"{this_file_directory_path}/data/C.csv", "C*")
93+
df_a = _open_epregistry_database(f"{this_file_directory_path}/data/A.csv", ["A*"])
94+
df_b = _open_epregistry_database(f"{this_file_directory_path}/data/B.csv", ["B*"])
95+
df_c = _open_epregistry_database(f"{this_file_directory_path}/data/C.csv", ["C*"])
8996
df_dr = _open_epregistry_database(f"{this_file_directory_path}/data/DR.csv", ["DRB1*", "DRB345*"])
9097
df_dq = _open_epregistry_database(f"{this_file_directory_path}/data/DQ.csv", ["DQB1*", "DQA1*"])
9198
df_dp = _open_epregistry_database(f"{this_file_directory_path}/data/DP.csv", ["DPB1*", "DPA1*"])
9299
else:
93100
if class_i:
94-
df_a = _open_epregistry_database(f"{this_file_directory_path}/data/A.csv", "A*")
95-
df_b = _open_epregistry_database(f"{this_file_directory_path}/data/B.csv", "B*")
96-
df_c = _open_epregistry_database(f"{this_file_directory_path}/data/C.csv", "C*")
101+
df_a = _open_epregistry_database(f"{this_file_directory_path}/data/A.csv", ["A*"])
102+
df_b = _open_epregistry_database(f"{this_file_directory_path}/data/B.csv", ["B*"])
103+
df_c = _open_epregistry_database(f"{this_file_directory_path}/data/C.csv", ["C*"])
97104
# we don't want to load .csv files if they are not needed
98105
df_dr = _open_epregistry_database(
99106
f"{this_file_directory_path}/data/DR.csv", ["DRB1*", "DRB345*"], no_eplets=True
@@ -110,13 +117,13 @@ def compute_epletic_load(
110117
df_dp = _open_epregistry_database(f"{this_file_directory_path}/data/DP.csv", ["DPB1*", "DPA1*"])
111118
# we don't want to load .csv files if they are not needed
112119
df_a = _open_epregistry_database(
113-
f"{this_file_directory_path}/data/A.csv", "A*", no_eplets=True
120+
f"{this_file_directory_path}/data/A.csv", ["A*"], no_eplets=True
114121
)
115122
df_b = _open_epregistry_database(
116-
f"{this_file_directory_path}/data/B.csv", "B*", no_eplets=True
123+
f"{this_file_directory_path}/data/B.csv", ["B*"], no_eplets=True
117124
)
118125
df_c = _open_epregistry_database(
119-
f"{this_file_directory_path}/data/C.csv", "C*", no_eplets=True
126+
f"{this_file_directory_path}/data/C.csv", ["C*"], no_eplets=True
120127
)
121128

122129

pelc/data/A_False_A_.pickle

-390 Bytes
Binary file not shown.

pelc/data/A_True_A_.pickle

-3 Bytes
Binary file not shown.

pelc/data/B_False_B_.pickle

-345 Bytes
Binary file not shown.

pelc/data/B_True_B_.pickle

-3 Bytes
Binary file not shown.

pelc/data/C_False_C_.pickle

-300 Bytes
Binary file not shown.

pelc/data/C_True_C_.pickle

-3 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)