|
| 1 | +from enrichers import Enrichment |
| 2 | +from utils import logger |
| 3 | + |
| 4 | + |
| 5 | +class OpenStreetMap(Enrichment): |
| 6 | + # default to Washington, D.C.? |
| 7 | + default_coords: dict = { |
| 8 | + "latitude": 38.89511000, |
| 9 | + "longitude": -77.03637000, |
| 10 | + } |
| 11 | + |
| 12 | + def search(self) -> dict: |
| 13 | + facility_name = self.search_args["facility_name"] |
| 14 | + address = self.search_args.get("address", {}) |
| 15 | + search_name = self._clean_facility_name(facility_name) |
| 16 | + search_url = "https://nominatim.openstreetmap.org/search" |
| 17 | + self.resp_info["enrichment_type"] = "openstreetmap" |
| 18 | + data = [] |
| 19 | + if not address: |
| 20 | + logger.debug("No address for %s, simply searching for name", facility_name) |
| 21 | + params = { |
| 22 | + "q": search_name, |
| 23 | + "format": "json", |
| 24 | + "limit": 5, |
| 25 | + "dedupe": 1, |
| 26 | + } |
| 27 | + logger.debug("Searching OSM for %s", search_name) |
| 28 | + self.resp_info["search_query_steps"].append(search_name) # type: ignore [attr-defined] |
| 29 | + try: |
| 30 | + response = self._req(search_url, params=params, timeout=15) |
| 31 | + logger.debug("Response: %s", response.text) |
| 32 | + data = response.json() |
| 33 | + except Exception as e: |
| 34 | + logger.debug(" OSM search error for '%s': %s", facility_name, e) |
| 35 | + self.resp_info["search_query_steps"].append(f"(Failed -> {e})") # type: ignore [attr-defined] |
| 36 | + return self.resp_info |
| 37 | + else: |
| 38 | + full_address = ( |
| 39 | + f"{address['street']} {address['locality']}, {address['administrative_area']} {address['postal_code']}" |
| 40 | + ) |
| 41 | + locality = f"{address['locality']}, {address['administrative_area']} {address['postal_code']}" |
| 42 | + search_url = "https://nominatim.openstreetmap.org/search" |
| 43 | + search_params = { |
| 44 | + "facility_name": { |
| 45 | + "q": f"{search_name} {full_address}", |
| 46 | + "format": "json", |
| 47 | + "limit": 5, |
| 48 | + "dedupe": 1, |
| 49 | + }, |
| 50 | + "street_address": { |
| 51 | + "q": f"{full_address}", |
| 52 | + "format": "json", |
| 53 | + "limit": 5, |
| 54 | + "dedupe": 1, |
| 55 | + }, |
| 56 | + "locality": { |
| 57 | + "q": f"{locality}", |
| 58 | + "format": "json", |
| 59 | + "limit": 5, |
| 60 | + "dedupe": 1, |
| 61 | + }, |
| 62 | + } |
| 63 | + for search_name, params in search_params.items(): |
| 64 | + logger.debug("Searching OSM for %s", params["q"]) |
| 65 | + self.resp_info["search_query_steps"].append(params["q"]) # type: ignore [attr-defined] |
| 66 | + try: |
| 67 | + response = self._req(search_url, params=params, timeout=15) |
| 68 | + data = response.json() |
| 69 | + except Exception as e: |
| 70 | + logger.debug(" OSM search error for '%s': %s", facility_name, e) |
| 71 | + self.resp_info["search_query_steps"].append(f"(Failed -> {e})") # type: ignore [attr-defined] |
| 72 | + continue |
| 73 | + if not data: |
| 74 | + return self.resp_info |
| 75 | + # when the URL result is a "way" this is usually correct. |
| 76 | + # checks top five results. |
| 77 | + match_terms = ["prison", "detention", "correctional", "jail"] |
| 78 | + for result in data: |
| 79 | + osm_type = result.get("type", "").lower() |
| 80 | + lat = result.get("lat", self.default_coords["latitude"]) |
| 81 | + lon = result.get("lon", self.default_coords["longitude"]) |
| 82 | + display_name = result.get("display_name", "").lower() |
| 83 | + if any(term in osm_type for term in match_terms) or any(term in display_name for term in match_terms): |
| 84 | + # todo courthouse could be added, or other tags such as "prison:for=migrant" as a clear positive search result. |
| 85 | + osm_id = result.get("osm_id", "") |
| 86 | + osm_type_prefix = result.get("osm_type", "") |
| 87 | + title = result.get("display_name", "") |
| 88 | + if osm_id and osm_type_prefix: |
| 89 | + self.resp_info["url"] = f"https://www.openstreetmap.org/{osm_type_prefix}/{osm_id}" |
| 90 | + self.resp_info["details"]["latitude"] = lat # type: ignore [index] |
| 91 | + self.resp_info["details"]["longitude"] = lon # type: ignore [index] |
| 92 | + self.resp_info["title"] = title |
| 93 | + return self.resp_info |
| 94 | + # fallback to first result |
| 95 | + first_result = data[0] |
| 96 | + logger.debug("Address searches didn't directly find anything, just using the first result: %s", first_result) |
| 97 | + title = first_result.get("display_name", "") |
| 98 | + lat = first_result.get("lat", self.default_coords["latitude"]) |
| 99 | + lon = first_result.get("lon", self.default_coords["longitude"]) |
| 100 | + self.resp_info["search_query_steps"].append(f"{lat}&{lon}") # type: ignore [attr-defined] |
| 101 | + if lat and lon: |
| 102 | + self.resp_info["url"] = f"https://www.openstreetmap.org/?mlat={lat}&mlon={lon}&zoom=15" |
| 103 | + self.resp_info["details"]["latitude"] = lat # type: ignore [index] |
| 104 | + self.resp_info["details"]["longitude"] = lon # type: ignore [index] |
| 105 | + self.resp_info["title"] = title |
| 106 | + return self.resp_info |
0 commit comments