Skip to content

Commit fb843ee

Browse files
committed
remove input arg
1 parent b0835d8 commit fb843ee

1 file changed

Lines changed: 82 additions & 89 deletions

File tree

process.py

Lines changed: 82 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -3,113 +3,106 @@
33
from datetime import datetime
44
import os
55
import requests
6-
import sys
76

87
URL = os.environ["REGALYTICS_API_BASE_URL"]
98
HEADERS = {
109
'Content-Type': 'application/json'
1110
}
1211
ARTICLE_PATH = pathlib.Path('/temp-output-directory/alternative/regalytics/articles')
1312

14-
def main(process_date):
15-
# objectives:# download data from API -> temp folder or in memory. Output processed datat to /temp-output-directory/alternative/regalytics/articles/yyyyMMdd.json
16-
ARTICLE_PATH.mkdir(parents=True, exist_ok=True)
17-
articles_by_date = {}
18-
19-
process_datetime = datetime.strptime(process_date, '%Y%m%d').date()
20-
process_date = process_datetime.strftime('%Y-%m-%d')
13+
# objectives:# download data from API -> temp folder or in memory. Output processed datat to /temp-output-directory/alternative/regalytics/articles/yyyyMMdd.json
14+
ARTICLE_PATH.mkdir(parents=True, exist_ok=True)
15+
articles_by_date = {}
16+
17+
process_datetime = datetime.strptime(os.environ['QC_DATAFLEET_DEPLOYMENT_DATE'], '%Y%m%d').date()
18+
process_date = process_datetime.strftime('%Y-%m-%d')
2119

22-
url = f"{URL}/search"
23-
payload = json.dumps({
24-
"apikey": os.environ["REGALYTICS_API_KEY"],
25-
"search_options": {
26-
"created_at": {
27-
"start": process_date,
28-
"end": process_date
29-
}
20+
url = f"{URL}/search"
21+
payload = json.dumps({
22+
"apikey": os.environ["REGALYTICS_API_KEY"],
23+
"search_options": {
24+
"created_at": {
25+
"start": process_date,
26+
"end": process_date
3027
}
31-
})
28+
}
29+
})
30+
31+
response = requests.post(url, headers=HEADERS, data=payload).json()
32+
articles = response['articles']
3233

33-
response = requests.post(url, headers=HEADERS, data=payload).json()
34-
articles = response['articles']
35-
36-
# "agencies": [
37-
# {
38-
# "name": "Iowa Department of Human Services",
39-
# "states": [
40-
# {
41-
# "name": "Iowa"
42-
# }
43-
# ],
44-
# "countries": [
45-
# {
46-
# "name": "United States"
47-
# }
48-
# ]
49-
# }
50-
# ]
51-
# if states is more than 0
52-
# loop into state and get the state name
53-
# 1. query all data, -> /api/v2/.../get-all; 2. look at latest_update, add delta of 1/2 days;
54-
# 3. write data to date of latest_update + delta. This date must be on the date we published the article on Regalytics
34+
# "agencies": [
35+
# {
36+
# "name": "Iowa Department of Human Services",
37+
# "states": [
38+
# {
39+
# "name": "Iowa"
40+
# }
41+
# ],
42+
# "countries": [
43+
# {
44+
# "name": "United States"
45+
# }
46+
# ]
47+
# }
48+
# ]
49+
# if states is more than 0
50+
# loop into state and get the state name
51+
# 1. query all data, -> /api/v2/.../get-all; 2. look at latest_update, add delta of 1/2 days;
52+
# 3. write data to date of latest_update + delta. This date must be on the date we published the article on Regalytics
5553

56-
for article in articles:
57-
article['in_federal_register'] = 'yes' in article['in_federal_register'].lower()
58-
# State -> Dictionary<string, List<string>>
59-
states = {}
60-
for agency in article['agencies']:
61-
state = agency['states']
62-
63-
if 'states' not in agency or state is None:
64-
continue
54+
for article in articles:
55+
article['in_federal_register'] = 'yes' in article['in_federal_register'].lower()
56+
# State -> Dictionary<string, List<string>>
57+
states = {}
58+
for agency in article['agencies']:
59+
state = agency['states']
60+
61+
if 'states' not in agency or state is None:
62+
continue
6563

66-
if 'countries' not in agency:
67-
continue
64+
if 'countries' not in agency:
65+
continue
6866

69-
countries = agency['countries']
70-
if countries is None:
71-
continue
67+
countries = agency['countries']
68+
if countries is None:
69+
continue
70+
71+
for country in countries:
72+
name = country['name']
7273

73-
for country in countries:
74-
name = country['name']
75-
76-
if not name in states:
77-
country_states = []
78-
states[name] = country_states
79-
else:
80-
country_states = states[name]
81-
82-
country_states.extend([x['name'] for x in state])
74+
if not name in states:
75+
country_states = []
76+
states[name] = country_states
77+
else:
78+
country_states = states[name]
8379

84-
article['states'] = states
85-
article['agencies'] = [agency['name'] for agency in article['agencies']]
86-
87-
# remove timezone info (-04:00) [NewYork]
88-
article['created_at'] = article['created_at'][:-6]
89-
90-
# all data received during day T would confer into day T+1 00:00
91-
date = datetime.strptime(article['created_at'], '%Y-%m-%dT%H:%M:%S.%f').date()
92-
date_key = date.strftime('%Y%m%d')
80+
country_states.extend([x['name'] for x in state])
9381

94-
if date_key not in articles_by_date:
95-
date_articles = []
96-
articles_by_date[date_key] = date_articles
97-
else:
98-
date_articles = articles_by_date[date_key]
82+
article['states'] = states
83+
article['agencies'] = [agency['name'] for agency in article['agencies']]
84+
85+
# remove timezone info (-04:00) [NewYork]
86+
article['created_at'] = article['created_at'][:-6]
87+
88+
# all data received during day T would confer into day T+1 00:00
89+
date = datetime.strptime(article['created_at'], '%Y-%m-%dT%H:%M:%S.%f').date()
90+
date_key = date.strftime('%Y%m%d')
9991

100-
date_articles.append(article)
92+
if date_key not in articles_by_date:
93+
date_articles = []
94+
articles_by_date[date_key] = date_articles
95+
else:
96+
date_articles = articles_by_date[date_key]
10197

102-
for date, articles in articles_by_date.items():
103-
lines = []
104-
for article in articles:
105-
lines.append(json.dumps(article, indent=None))
98+
date_articles.append(article)
10699

107-
article_lines = '\n'.join(lines)
100+
for date, articles in articles_by_date.items():
101+
lines = []
102+
for article in articles:
103+
lines.append(json.dumps(article, indent=None))
108104

109-
with open(ARTICLE_PATH / f'{date}.json', 'w') as article_file:
110-
article_file.write(article_lines)
105+
article_lines = '\n'.join(lines)
111106

112-
if __name__ == '__main__':
113-
if len(sys.argv) != 2:
114-
raise ValueError("process.py only takes 1 argument.")
115-
main(sys.argv[-1])
107+
with open(ARTICLE_PATH / f'{date}.json', 'w') as article_file:
108+
article_file.write(article_lines)

0 commit comments

Comments
 (0)