@@ -210,23 +210,24 @@ def v3_wild(self):
210210 3.0 release, excluding the lab crosses."""
211211 return [
212212 x
213- for x in self .sample_sets (release = "3.0" )["sample_set" ].tolist ()
213+ for x in self ._available_sample_sets (release = "3.0" )["sample_set" ].tolist ()
214214 if x != "AG1000G-X"
215215 ]
216216
217217 def __repr__ (self ):
218218 text = (
219219 f"<MalariaGEN Ag3 API client>\n "
220- f"Storage URL : { self ._url } \n "
221- f"Data releases available : { ', ' .join (self .releases )} \n "
222- f"Results cache : { self ._results_cache } \n "
223- f"Cohorts analysis : { self ._cohorts_analysis } \n "
224- f"AIM analysis : { self ._aim_analysis } \n "
225- f"Site filters analysis : { self ._site_filters_analysis } \n "
226- f"Software version : malariagen_data { malariagen_data .__version__ } \n "
227- f"Client location : { self .client_location } \n "
220+ f"Storage URL : { self ._url } \n "
221+ f"Data releases available : { ', ' .join (self ._available_releases )} \n "
222+ f"Results cache : { self ._results_cache } \n "
223+ f"Cohorts analysis : { self ._cohorts_analysis } \n "
224+ f"AIM analysis : { self ._aim_analysis } \n "
225+ f"Site filters analysis : { self ._site_filters_analysis } \n "
226+ f"Software version : malariagen_data { malariagen_data .__version__ } \n "
227+ f"Client location : { self .client_location } \n "
228228 f"Data filtered to unrestricted use only: { self ._unrestricted_use_only } \n "
229229 f"Data filtered to surveillance use only: { self ._surveillance_use_only } \n "
230+ f"Relevant data releases : { ', ' .join (self .releases )} \n "
230231 f"---\n "
231232 f"Please note that data are subject to terms of use,\n "
232233 f"for more information see https://www.malariagen.net/data\n "
@@ -260,7 +261,7 @@ def _repr_html_(self):
260261 <th style="text-align: left">
261262 Data releases available
262263 </th>
263- <td>{ ', ' .join (self .releases )} </td>
264+ <td>{ ', ' .join (self ._available_releases )} </td>
264265 </tr>
265266 <tr>
266267 <th style="text-align: left">
@@ -310,6 +311,12 @@ def _repr_html_(self):
310311 </th>
311312 <td>{ self ._surveillance_use_only } </td>
312313 </tr>
314+ <tr>
315+ <th style="text-align: left">
316+ Relevant data releases
317+ </th>
318+ <td>{ ', ' .join (self .releases )} </td>
319+ </tr>
313320 </tbody>
314321 </table>
315322 """
@@ -357,6 +364,34 @@ def cross_metadata(self):
357364 debug ("drop 'phenotype' column, not used" )
358365 df .drop ("phenotype" , axis = "columns" , inplace = True )
359366
367+ # Identify the crosses sample set.
368+ # Note: this sample set identifier is also hard-coded in `v3_wild()`.
369+ crosses_sample_set = "AG1000G-X"
370+
371+ # If `_unrestricted_use_only` is `True`, then only return data if the crosses sample set has `unrestricted_use` set to `True`.
372+ if (
373+ self ._unrestricted_use_only
374+ and not self ._sample_set_has_unrestricted_use (
375+ sample_set = crosses_sample_set
376+ )
377+ ):
378+ # Remove all the data from the DataFrame and reset its index.
379+ df = df .iloc [0 :0 ].reset_index (drop = True )
380+
381+ # If `_surveillance_use_only` is `True`, then only return samples that have `is_surveillance` set to `True`.
382+ if self ._surveillance_use_only :
383+ crosses_surveillance_flags_df = self ._surveillance_flags (
384+ sample_sets = [crosses_sample_set ]
385+ )
386+ df = df .merge (
387+ crosses_surveillance_flags_df [["sample_id" , "is_surveillance" ]],
388+ on = "sample_id" ,
389+ how = "left" ,
390+ )
391+ df = df [df ["is_surveillance" ]]
392+ df = df .drop (columns = ["is_surveillance" ])
393+
394+ # Cache the cross metadata.
360395 self ._cache_cross_metadata = df
361396
362397 return self ._cache_cross_metadata .copy ()
0 commit comments