@@ -564,7 +564,10 @@ def _sample_set_has_unrestricted_use(self, *, sample_set: str):
564564 )
565565
566566 if "unrestricted_use" not in release_manifest_df .columns :
567- return False
567+ raise ValueError (
568+ f"Column 'unrestricted_use' missing from manifest for sample set '{ sample_set } '. "
569+ "This indicates a data integrity issue in the release manifest."
570+ )
568571
569572 sample_set_records_srs = release_manifest_df .loc [
570573 release_manifest_df ["sample_set" ] == sample_set , "unrestricted_use"
@@ -833,22 +836,15 @@ def lookup_terms_of_use_info(self, sample_set: base_params.sample_set) -> dict:
833836 "terms_of_use_url" ,
834837 "unrestricted_use" ,
835838 ]
836- placeholder_values = {
837- "terms_of_use_expiry_date" : "2099-12-31" ,
838- "terms_of_use_url" : float ("nan" ),
839- "unrestricted_use" : False ,
840- }
841- available_cols = [c for c in expected_cols if c in df_sample_sets .columns ]
842- if available_cols :
843- lookup = df_sample_sets [available_cols ].to_dict (orient = "index" )
844- missing_cols = set (expected_cols ) - set (available_cols )
845- if missing_cols :
846- for ss in lookup :
847- for mc in missing_cols :
848- lookup [ss ][mc ] = placeholder_values [mc ]
849- else :
850- lookup = {ss : dict (placeholder_values ) for ss in df_sample_sets .index }
851- self ._cache_sample_set_to_terms_of_use_info = lookup
839+ missing_cols = [c for c in expected_cols if c not in df_sample_sets .columns ]
840+ if missing_cols :
841+ raise ValueError (
842+ f"Terms-of-use columns missing from manifest: { missing_cols } . "
843+ "This indicates a data integrity issue in the release manifest."
844+ )
845+ self ._cache_sample_set_to_terms_of_use_info = df_sample_sets [
846+ expected_cols
847+ ].to_dict (orient = "index" )
852848 try :
853849 return self ._cache_sample_set_to_terms_of_use_info [sample_set ]
854850 except KeyError as e :
0 commit comments