File tree Expand file tree Collapse file tree
Deployment/data_migration Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -789,15 +789,18 @@ def export_blob_storage(export_dir: Path, account_name: str) -> None:
789789def _export_container (
790790 blob_service , container_name : str , blob_dir : Path
791791) -> None :
792- """Download all blobs from a single container."""
792+ """Download PDF and JSON blobs from a single container."""
793+ ALLOWED_EXTENSIONS = (".pdf" , ".json" )
794+
793795 container_client = blob_service .get_container_client (container_name )
794796 container_dir = blob_dir / container_name
795797 _long_path (container_dir ).mkdir (parents = True , exist_ok = True )
796798
797799 # Collect blob names and content types to preserve metadata
798800 logger .info (" Listing blobs in container '%s'..." , container_name )
799- blob_list = list (container_client .list_blobs (include = ["metadata" ]))
800- logger .info (" Found %d blobs." , len (blob_list ))
801+ all_blobs = list (container_client .list_blobs (include = ["metadata" ]))
802+ blob_list = [b for b in all_blobs if b .name .lower ().endswith (ALLOWED_EXTENSIONS )]
803+ logger .info (" Found %d blobs total, %d PDF/JSON files to export." , len (all_blobs ), len (blob_list ))
801804
802805 content_type_map : dict = {}
803806 blob_count = 0
You can’t perform that action at this time.
0 commit comments