Skip to content

Commit 4351ec7

Browse files
committed
better solution for spreadsheet row skipping
Signed-off-by: John Seekins <john@robot-house.us>
1 parent c81fd3e commit 4351ec7

1 file changed

Lines changed: 17 additions & 3 deletions

File tree

ice_scrapers/spreadsheet_load.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,15 @@
6060
"Last Inspection Standard",
6161
"Last Final Rating",
6262
]
63+
required_cols = [
64+
"Name",
65+
"Address",
66+
"City",
67+
"State",
68+
"Zip",
69+
"AOR",
70+
"Type Detailed",
71+
]
6372

6473

6574
def _download_sheet(keep_sheet: bool = True, force_download: bool = True) -> tuple[polars.DataFrame, str]:
@@ -91,10 +100,11 @@ def _download_sheet(keep_sheet: bool = True, force_download: bool = True) -> tup
91100
download_file(actual_link, filename)
92101
df = polars.read_excel(
93102
drop_empty_rows=True,
103+
drop_empty_cols=False,
94104
has_header=False,
95105
raise_if_empty=True,
96-
# because we're manually defining the header...
97-
read_options={"skip_rows": 9, "column_names": [f.replace("YEAR", fy) for f in facility_sheet_header]},
106+
# because we're manually defining the column headers...
107+
read_options={"column_names": [f.replace("YEAR", fy) for f in facility_sheet_header]},
98108
sheet_name=f"Facilities {fy}",
99109
source=open(filename, "rb"),
100110
)
@@ -112,6 +122,10 @@ def load_sheet(keep_sheet: bool = True, force_download: bool = True) -> dict:
112122
# let's capture it
113123
phone_re = re.compile(r".+(\d{3}\s\d{3}\s\d{4})$")
114124
for row in df.iter_rows(named=True):
125+
# skip all rows that don't manage to populate all required headers
126+
if not all(row[k] is not None for k in required_cols):
127+
logger.debug("Skipping bad row in spreadsheet: %s", row)
128+
continue
115129
# logger.debug("processing %s", row)
116130
details = copy.deepcopy(facility_schema)
117131
zcode, cleaned, other_zips = repair_zip(row["Zip"], row["City"])
@@ -165,7 +179,7 @@ def load_sheet(keep_sheet: bool = True, force_download: bool = True) -> dict:
165179
if "/" in row["Male/Female"]:
166180
details["population"]["female"]["allowed"] = True
167181
details["population"]["male"]["allowed"] = True
168-
elif "Female" in row["Male/Female"]:
182+
elif row["Male/Female"] == "Female":
169183
details["population"]["female"]["allowed"] = True
170184
else:
171185
details["population"]["male"]["allowed"] = True

0 commit comments

Comments
 (0)