Skip to content

Commit c81fd3e

Browse files
committed
update spreadsheet header work somewhat
Signed-off-by: John Seekins <john@robot-house.us>
1 parent 0979ee5 commit c81fd3e

1 file changed

Lines changed: 7 additions & 4 deletions

File tree

ice_scrapers/spreadsheet_load.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
"AOR",
4040
"Type Detailed",
4141
"Male/Female",
42-
"FY26 ALOS",
42+
"YEAR ALOS",
4343
"Level A",
4444
"Level B",
4545
"Level C",
@@ -56,7 +56,7 @@
5656
"Guaranteed Minimum",
5757
"Last Inspection Type",
5858
"Last Inspection End Date",
59-
# "Pending FY25 Inspection", # this was removed from the source sheet in late 2025.
59+
# "Pending YEAR Inspection",
6060
"Last Inspection Standard",
6161
"Last Final Rating",
6262
]
@@ -73,6 +73,7 @@ def _download_sheet(keep_sheet: bool = True, force_download: bool = True) -> tup
7373
# this is _usually_ the most recently uploaded sheet...
7474
actual_link = links[0]["href"]
7575
cur_year = int(datetime.datetime.now().strftime("%y"))
76+
fy = f"FY{cur_year}"
7677
# try to find the most recent
7778
for link in links:
7879
match = fy_re.search(link["href"])
@@ -83,6 +84,7 @@ def _download_sheet(keep_sheet: bool = True, force_download: bool = True) -> tup
8384
actual_link = link["href"]
8485
# this seems like tracking into the future...
8586
cur_year = year
87+
fy = f"FY{cur_year}"
8688
logger.debug("Found sheet at: %s", actual_link)
8789
if force_download or not os.path.exists(filename):
8890
logger.info("Downloading detention stats sheet from %s", actual_link)
@@ -92,8 +94,8 @@ def _download_sheet(keep_sheet: bool = True, force_download: bool = True) -> tup
9294
has_header=False,
9395
raise_if_empty=True,
9496
# because we're manually defining the header...
95-
read_options={"skip_rows": 7, "column_names": facility_sheet_header},
96-
sheet_name=f"Facilities FY{cur_year}",
97+
read_options={"skip_rows": 9, "column_names": [f.replace("YEAR", fy) for f in facility_sheet_header]},
98+
sheet_name=f"Facilities {fy}",
9799
source=open(filename, "rb"),
98100
)
99101
if not keep_sheet:
@@ -110,6 +112,7 @@ def load_sheet(keep_sheet: bool = True, force_download: bool = True) -> dict:
110112
# let's capture it
111113
phone_re = re.compile(r".+(\d{3}\s\d{3}\s\d{4})$")
112114
for row in df.iter_rows(named=True):
115+
# logger.debug("processing %s", row)
113116
details = copy.deepcopy(facility_schema)
114117
zcode, cleaned, other_zips = repair_zip(row["Zip"], row["City"])
115118
details["address"]["other_postal_codes"].extend(other_zips)

0 commit comments

Comments
 (0)