Skip to content

Commit 0c25f4a

Browse files
committed
slightly nicer typing
Signed-off-by: John Seekins <john@robot-house.us>
1 parent 27c0213 commit 0c25f4a

4 files changed

Lines changed: 9 additions & 9 deletions

File tree

file_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@ def export_to_file(
2323
match file_type:
2424
case "xlsx":
2525
with xlsxwriter.Workbook(full_name, {"remove_timezone": True}) as wb:
26-
writer.write_excel(workbook=wb, include_header=True, autofit=True)
26+
_ = writer.write_excel(workbook=wb, include_header=True, autofit=True)
2727
case "csv":
2828
with open(full_name, "w", newline="", encoding="utf-8") as f_out:
2929
writer.write_csv(file=f_out, include_header=True)
3030
case "parquet":
3131
writer.write_parquet(full_name, use_pyarrow=True)
32+
case _:
33+
logger.warning("Invalid dataframe output type %s", file_type)
3234
elif file_type == "json":
3335
with open(full_name, "w", encoding="utf-8") as f_out:
3436
json.dump(facilities_data, f_out, indent=2, sort_keys=True, default=str)
@@ -103,7 +105,7 @@ def print_summary(facilities_data: dict) -> None:
103105
false_positives = 0
104106
errors = 0
105107
for facility in facilities_data["facilities"].values():
106-
query = facility.get("wikipedia", {}).get("search_query", "")
108+
query: str = facility.get("wikipedia", {}).get("search_query", "")
107109
if "REJECTED" in query:
108110
false_positives += 1
109111
elif "ERROR" in query:

ice_scrapers/custom_facilities.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44
Handle manually discovered/managed facilities
55
"""
6-
custom_facilities = {
6+
custom_facilities: dict = {
77
"2309 North Highway 83,McCook,NE,69001": {
88
"_repaired_record": False,
99
"address": {

ice_scrapers/facilities_scraper.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def scrape_facilities(facilities_data: dict) -> dict:
8383
return facilities_data
8484

8585

86-
def _scrape_updated(url: str):
86+
def _scrape_updated(url: str) -> datetime.datetime:
8787
"""
8888
Scrape url to get "last updated" time
8989
Is specifically oriented around ice.gov facility pages
@@ -97,7 +97,7 @@ def _scrape_updated(url: str):
9797
response.raise_for_status()
9898
except Exception as e:
9999
logger.error(" Error parsing %s: %s", url, e)
100-
return []
100+
return datetime.datetime.strptime(default_timestamp, timestamp_format)
101101
soup = BeautifulSoup(response.content, "html.parser")
102102
times = soup.findAll("time")
103103
if not times:
@@ -181,7 +181,6 @@ def _scrape_page(page_url: str) -> list:
181181
facilities.append(facility_data)
182182

183183
logger.info(" Extracted %s facilities from page", len(facilities))
184-
185184
return facilities
186185

187186

@@ -194,7 +193,6 @@ def _find_facility_patterns(container):
194193
r"([A-Z][^|]+(?:\|[^|]+)?)\s*([A-Z][^A-Z]*Field Office)",
195194
r"([^-]+)\s*-\s*([A-Z][^A-Z]*Field Office)",
196195
]
197-
198196
text_content = container.get_text()
199197

200198
for pattern in facility_patterns:

schemas.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import copy
22
import datetime
33

4-
facilities_schema = {
4+
facilities_schema: dict = {
55
"enrich_runtime": 0,
66
"facilities": {},
77
"scrape_runtime": 0,
@@ -107,7 +107,7 @@
107107

108108

109109
# enrichment response object
110-
enrich_resp_schema = {
110+
enrich_resp_schema: dict = {
111111
"cleaned_name": "",
112112
"details": {},
113113
"enrichment_type": "",

0 commit comments

Comments
 (0)