11import copy
22import json
33import os
4- import polars as pl
54from schemas import enrichment_print_schema
65from utils import (
76 convert_to_dataframe ,
1110import xlsxwriter # type: ignore [import-untyped]
1211
1312
14- # Deals with list columns data that CSV cannot deal with.
15- def _stringify_list_columns (df : pl .DataFrame ) -> pl .DataFrame :
16- """Convert any List-type columns to JSON strings so CSV/Excel can handle them."""
17- list_cols = [col for col , dtype in zip (df .columns , df .dtypes ) if dtype .base_type () == pl .List ]
18- if list_cols :
19- df = df .with_columns (
20- [
21- pl .col (c )
22- .map_elements (lambda val : json .dumps (val .to_list (), default = str ), return_dtype = pl .String )
23- .alias (c )
24- for c in list_cols
25- ]
26- )
27- return df
28-
29-
3013def export_to_file (
3114 facilities_data : dict ,
3215 filename : str = "ice_detention_facilities_enriched" ,
@@ -40,13 +23,12 @@ def export_to_file(
4023 writer = convert_to_dataframe (facilities_data ["facilities" ])
4124 match file_type :
4225 case "xlsx" :
26+ # Excel doesn't support timezones properly, so...
4327 with xlsxwriter .Workbook (full_name , {"remove_timezone" : True }) as wb :
44- _ = _stringify_list_columns (writer ).write_excel (workbook = wb , include_header = True , autofit = True )
45- # _ = writer.write_excel(workbook=wb, include_header=True, autofit=True)
28+ _ = writer .write_excel (workbook = wb , include_header = True , autofit = True )
4629 case "csv" :
4730 with open (full_name , "w" , newline = "" , encoding = "utf-8" ) as f_out :
48- # writer.write_csv(file=f_out, include_header=True)
49- _stringify_list_columns (writer ).write_csv (file = f_out , include_header = True )
31+ _ = writer .write_csv (file = f_out , include_header = True )
5032 case "parquet" :
5133 writer .write_parquet (full_name , use_pyarrow = True )
5234 case _:
0 commit comments