Skip to content

Commit df207c8

Browse files
committed
WIP: dev support for surveillance_use_only, unrestricted_use_only params
1 parent 7fce9bb commit df207c8

1 file changed

Lines changed: 18 additions & 3 deletions

File tree

malariagen_data/anoph/pca.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,14 +142,29 @@ def pca(
142142
# Create a new DataFrame containing the PCA coords data.
143143
df_pca = pd.DataFrame(coords, index=samples)
144144

145-
# Name the DataFrame's columns PC1, PC2, etc.
145+
# Name the index of the PCA data and set it to a string type.
146+
df_pca.index.name = "sample_id"
147+
# df_pca.index = df_pca.index.astype(str)
148+
149+
# Name the DataFrame's columns as PC1, PC2, etc.
146150
df_pca.columns = pd.Index([f"PC{i+1}" for i in range(coords.shape[1])])
147151

152+
# Load the sample metadata.
153+
df_samples = self.sample_metadata(
154+
sample_sets=prepared_sample_sets,
155+
)
156+
157+
# Set the index of the sample metadata.
158+
df_samples.set_index("sample_id", inplace=True)
159+
160+
# Join the relevant sample metadata.
161+
df_pca = df_pca.join(df_samples, how="left", on="sample_id")
162+
148163
# Add a column to indicate which samples were included in fitting.
149164
df_pca["pca_fit"] = loc_keep_fit
150165

151-
# Name the index.
152-
df_pca.index.name = "sample_id"
166+
# Keep "sample_id" as a column, so that it can be specified as a `hover_name` in `plot_pca_coords`, etc.
167+
df_pca.reset_index(inplace=True)
153168

154169
return df_pca, evr
155170

0 commit comments

Comments
 (0)