@@ -697,8 +697,57 @@ def clear_extra_metadata(self):
697697
698698 @_check_types
699699 @doc (
700- summary = "Access sample metadata for one or more sample sets." ,
701- returns = "A dataframe of sample metadata, one row per sample." ,
700+ summary = """
701+ Access sample-level metadata for one or more sample sets.
702+ This method returns a pandas DataFrame where each row corresponds
703+ to a single sample. The metadata is assembled by merging multiple
704+ sources including general metadata, sequence quality control (QC)
705+ metadata, surveillance flags, and—when available—AIM and cohort
706+ metadata.
707+ """ ,
708+ parameters = dict (
709+ sample_sets = """
710+ Sample set identifier(s), e.g. ``'AG1000G-AO'``. If None, all
711+ available sample sets are used.
712+ """ ,
713+ sample_query = """
714+ A pandas query string to filter samples, e.g.
715+ ``"country == 'Uganda' and sex_call == 'F'"``.
716+ """ ,
717+ sample_query_options = """
718+ Additional keyword arguments passed to :meth:`pandas.DataFrame.query`.
719+ """ ,
720+ sample_indices = """
721+ Integer indices of samples to select. Cannot be used together
722+ with ``sample_query``.
723+ """ ,
724+ ),
725+ returns = """
726+ A DataFrame with one row per sample. Columns include:
727+
728+ - **sample_id** (*str*) - Unique sample identifier.
729+ - **partner_sample_id** (*str*) - Sample ID assigned by the contributing partner.
730+ - **contributor** (*str*) - Name of the contributing institution or individual.
731+ - **country** (*str*) - Country where the sample was collected.
732+ - **location** (*str*) - Specific collection location (e.g. village or site name).
733+ - **year** (*int*) - Year of collection.
734+ - **month** (*int*) - Month of collection, if available.
735+ - **latitude** (*float*) - GPS latitude of the collection site.
736+ - **longitude** (*float*) - GPS longitude of the collection site.
737+ - **sex_call** (*str*) - Sex determination call; ``'F'`` for female, ``'M'`` for male.
738+ - **taxon** (*str*) - Species or taxon assignment.
739+ - **mean_cov** (*float*) - Mean sequencing coverage across the genome.
740+ - **median_cov** (*float*) - Median sequencing coverage.
741+ - **frac_reads_mapped** (*float*) - Fraction of reads mapped to the reference genome.
742+ - **contam_pct** (*float*) - Estimated contamination percentage.
743+ - **pass_qc** (*bool*) - Whether the sample passed quality control filters.
744+ - **cohort_admin1_year** (*str*) - Cohort label combining admin level 1 region and year (if available).
745+ - **cohort_admin2_year** (*str*) - Cohort label combining admin level 2 region and year (if available).
746+ - **aim_species** (*str*) - Species assignment from ancestry-informative markers (if available).
747+
748+ The returned DataFrame is a copy and can be safely modified
749+ without affecting internal caches.
750+ """ ,
702751 )
703752 def sample_metadata (
704753 self ,
0 commit comments