@@ -252,22 +252,93 @@ def _create_phenotype_dataset(
252252 ds = xr .Dataset (data_vars , coords = coords )
253253
254254 if variant_data is not None :
255- if "samples" not in variant_data .dims :
255+ # Find the sample dimension and coordinate
256+ sample_dim = None
257+ sample_coord = None
258+
259+ # Check for sample dimension
260+ for dim_name in ["samples" , "sample_id" ]:
261+ if dim_name in variant_data .dims :
262+ sample_dim = dim_name
263+ break
264+
265+ if sample_dim is None :
256266 warnings .warn (
257- "Variant data does not contain 'samples' dimension, cannot merge."
267+ f"Variant data does not contain 'samples' or 'sample_id' dimension. "
268+ f"Available dimensions: { list (variant_data .dims )} "
258269 )
259- else :
260- common_samples = list (
261- set (sample_ids ) & set (variant_data .coords ["samples" ].values )
270+ return ds
271+
272+ for coord_name in ["sample_id" , "samples" ]:
273+ if coord_name in variant_data .coords :
274+ sample_coord = coord_name
275+ break
276+
277+ if sample_coord is None :
278+ warnings .warn (
279+ f"Variant data does not contain 'samples' or 'sample_id' coordinate. "
280+ f"Available coordinates: { list (variant_data .coords .keys ())} "
262281 )
263- if not common_samples :
264- warnings .warn (
265- "No common samples found between phenotype and variant data"
282+ return ds
283+
284+ # Get variant sample IDs - use the correct coordinate name
285+ variant_sample_ids = variant_data .coords [sample_coord ].values
286+
287+ # Find common samples
288+ common_samples = list (set (sample_ids ) & set (variant_sample_ids ))
289+ print (f"DEBUG: Found { len (common_samples )} common samples" )
290+
291+ if not common_samples :
292+ warnings .warn (
293+ "No common samples found between phenotype and variant data"
294+ )
295+ return ds
296+ else :
297+ # Select common samples from phenotype dataset
298+ ds_common = ds .sel (samples = common_samples )
299+
300+ # Select common samples from variant dataset
301+ try :
302+ # NEW APPROACH: Use isel with boolean indexing instead of sel
303+ # This avoids the "no index found" error
304+
305+ sample_mask = pd .Series (variant_sample_ids ).isin (common_samples )
306+ sample_indices = sample_mask [sample_mask ].index .tolist ()
307+ variant_data_common = variant_data .isel (
308+ {sample_dim : sample_indices }
309+ )
310+
311+ # Rename dimension to "samples" if it's not already
312+ if sample_dim != "samples" :
313+ variant_data_common = variant_data_common .rename (
314+ {sample_dim : "samples" }
315+ )
316+
317+ if (
318+ sample_coord != "samples"
319+ and sample_coord in variant_data_common .coords
320+ ):
321+ variant_data_common = variant_data_common .rename (
322+ {sample_coord : "samples" }
323+ )
324+
325+ variant_samples_selected = variant_data_common .coords [
326+ "samples"
327+ ].values
328+
329+ ds_common_reordered = ds_common .sel (
330+ samples = variant_samples_selected
266331 )
267- else :
268- ds_common = ds .sel (samples = common_samples )
269- variant_data_common = variant_data .sel (samples = common_samples )
270- ds = xr .merge ([ds_common , variant_data_common ])
332+
333+ # Merge the datasets
334+ ds = xr .merge ([ds_common_reordered , variant_data_common ])
335+
336+ except Exception as e :
337+ warnings .warn (f"Error selecting/merging variant data: { e } " )
338+ import traceback
339+
340+ traceback .print_exc ()
341+ return ds
271342
272343 return ds
273344
@@ -466,6 +537,7 @@ def phenotypes(
466537 hap_error_message = None
467538
468539 sample_sets_norm = self ._prep_sample_sets_param (sample_sets = sample_sets )
540+
469541 try :
470542 if hasattr (self , "snp_calls" ) and callable (self .snp_calls ):
471543 variant_data = self .snp_calls (
@@ -519,16 +591,23 @@ def phenotypes(
519591 )
520592
521593 # 3. Merge into an xarray Dataset
522- ds = self ._create_phenotype_dataset (df_phenotypes , variant_data )
523- return ds
594+ try :
595+ ds = self ._create_phenotype_dataset (df_phenotypes , variant_data )
596+ return ds
597+ except Exception :
598+ import traceback
599+
600+ traceback .print_exc ()
601+ # Return phenotype-only dataset as fallback
602+ return self ._create_phenotype_dataset (df_phenotypes , None )
524603
525604 def phenotype_sample_sets (self ) -> List [str ]:
526605 """
527606 Get list of sample sets that have phenotypic data available.
528607 """
529- all_sample_sets = self .sample_sets
608+ all_sample_sets = self .sample_sets ()[ "sample_set" ]. tolist ()
530609 phenotype_sample_sets = []
531- base_phenotype_path = f"{ self ._url } / v3.2/phenotypes/all"
610+ base_phenotype_path = f"{ self ._url } v3.2/phenotypes/all"
532611 for sample_set in all_sample_sets :
533612 try :
534613 phenotype_path = f"{ base_phenotype_path } /{ sample_set } /phenotypes.csv"
0 commit comments