33from datetime import datetime
44import os
55import requests
6- import sys
76
87URL = os .environ ["REGALYTICS_API_BASE_URL" ]
98HEADERS = {
109 'Content-Type' : 'application/json'
1110}
1211ARTICLE_PATH = pathlib .Path ('/temp-output-directory/alternative/regalytics/articles' )
1312
14- def main (process_date ):
15- # objectives:# download data from API -> temp folder or in memory. Output processed datat to /temp-output-directory/alternative/regalytics/articles/yyyyMMdd.json
16- ARTICLE_PATH .mkdir (parents = True , exist_ok = True )
17- articles_by_date = {}
18-
19- process_datetime = datetime .strptime (process_date , '%Y%m%d' ).date ()
20- process_date = process_datetime .strftime ('%Y-%m-%d' )
13+ # objectives:# download data from API -> temp folder or in memory. Output processed datat to /temp-output-directory/alternative/regalytics/articles/yyyyMMdd.json
14+ ARTICLE_PATH .mkdir (parents = True , exist_ok = True )
15+ articles_by_date = {}
16+
17+ process_datetime = datetime .strptime (os .environ ['QC_DATAFLEET_DEPLOYMENT_DATE' ], '%Y%m%d' ).date ()
18+ process_date = process_datetime .strftime ('%Y-%m-%d' )
2119
22- url = f"{ URL } /search"
23- payload = json .dumps ({
24- "apikey" : os .environ ["REGALYTICS_API_KEY" ],
25- "search_options" : {
26- "created_at" : {
27- "start" : process_date ,
28- "end" : process_date
29- }
20+ url = f"{ URL } /search"
21+ payload = json .dumps ({
22+ "apikey" : os .environ ["REGALYTICS_API_KEY" ],
23+ "search_options" : {
24+ "created_at" : {
25+ "start" : process_date ,
26+ "end" : process_date
3027 }
31- })
28+ }
29+ })
30+
31+ response = requests .post (url , headers = HEADERS , data = payload ).json ()
32+ articles = response ['articles' ]
3233
33- response = requests .post (url , headers = HEADERS , data = payload ).json ()
34- articles = response ['articles' ]
35-
36- # "agencies": [
37- # {
38- # "name": "Iowa Department of Human Services",
39- # "states": [
40- # {
41- # "name": "Iowa"
42- # }
43- # ],
44- # "countries": [
45- # {
46- # "name": "United States"
47- # }
48- # ]
49- # }
50- # ]
51- # if states is more than 0
52- # loop into state and get the state name
53- # 1. query all data, -> /api/v2/.../get-all; 2. look at latest_update, add delta of 1/2 days;
54- # 3. write data to date of latest_update + delta. This date must be on the date we published the article on Regalytics
34+ # "agencies": [
35+ # {
36+ # "name": "Iowa Department of Human Services",
37+ # "states": [
38+ # {
39+ # "name": "Iowa"
40+ # }
41+ # ],
42+ # "countries": [
43+ # {
44+ # "name": "United States"
45+ # }
46+ # ]
47+ # }
48+ # ]
49+ # if states is more than 0
50+ # loop into state and get the state name
51+ # 1. query all data, -> /api/v2/.../get-all; 2. look at latest_update, add delta of 1/2 days;
52+ # 3. write data to date of latest_update + delta. This date must be on the date we published the article on Regalytics
5553
56- for article in articles :
57- article ['in_federal_register' ] = 'yes' in article ['in_federal_register' ].lower ()
58- # State -> Dictionary<string, List<string>>
59- states = {}
60- for agency in article ['agencies' ]:
61- state = agency ['states' ]
62-
63- if 'states' not in agency or state is None :
64- continue
54+ for article in articles :
55+ article ['in_federal_register' ] = 'yes' in article ['in_federal_register' ].lower ()
56+ # State -> Dictionary<string, List<string>>
57+ states = {}
58+ for agency in article ['agencies' ]:
59+ state = agency ['states' ]
60+
61+ if 'states' not in agency or state is None :
62+ continue
6563
66- if 'countries' not in agency :
67- continue
64+ if 'countries' not in agency :
65+ continue
6866
69- countries = agency ['countries' ]
70- if countries is None :
71- continue
67+ countries = agency ['countries' ]
68+ if countries is None :
69+ continue
70+
71+ for country in countries :
72+ name = country ['name' ]
7273
73- for country in countries :
74- name = country ['name' ]
75-
76- if not name in states :
77- country_states = []
78- states [name ] = country_states
79- else :
80- country_states = states [name ]
81-
82- country_states .extend ([x ['name' ] for x in state ])
74+ if not name in states :
75+ country_states = []
76+ states [name ] = country_states
77+ else :
78+ country_states = states [name ]
8379
84- article ['states' ] = states
85- article ['agencies' ] = [agency ['name' ] for agency in article ['agencies' ]]
86-
87- # remove timezone info (-04:00) [NewYork]
88- article ['created_at' ] = article ['created_at' ][:- 6 ]
89-
90- # all data received during day T would confer into day T+1 00:00
91- date = datetime .strptime (article ['created_at' ], '%Y-%m-%dT%H:%M:%S.%f' ).date ()
92- date_key = date .strftime ('%Y%m%d' )
80+ country_states .extend ([x ['name' ] for x in state ])
9381
94- if date_key not in articles_by_date :
95- date_articles = []
96- articles_by_date [date_key ] = date_articles
97- else :
98- date_articles = articles_by_date [date_key ]
82+ article ['states' ] = states
83+ article ['agencies' ] = [agency ['name' ] for agency in article ['agencies' ]]
84+
85+ # remove timezone info (-04:00) [NewYork]
86+ article ['created_at' ] = article ['created_at' ][:- 6 ]
87+
88+ # all data received during day T would confer into day T+1 00:00
89+ date = datetime .strptime (article ['created_at' ], '%Y-%m-%dT%H:%M:%S.%f' ).date ()
90+ date_key = date .strftime ('%Y%m%d' )
9991
100- date_articles .append (article )
92+ if date_key not in articles_by_date :
93+ date_articles = []
94+ articles_by_date [date_key ] = date_articles
95+ else :
96+ date_articles = articles_by_date [date_key ]
10197
102- for date , articles in articles_by_date .items ():
103- lines = []
104- for article in articles :
105- lines .append (json .dumps (article , indent = None ))
98+ date_articles .append (article )
10699
107- article_lines = '\n ' .join (lines )
100+ for date , articles in articles_by_date .items ():
101+ lines = []
102+ for article in articles :
103+ lines .append (json .dumps (article , indent = None ))
108104
109- with open (ARTICLE_PATH / f'{ date } .json' , 'w' ) as article_file :
110- article_file .write (article_lines )
105+ article_lines = '\n ' .join (lines )
111106
112- if __name__ == '__main__' :
113- if len (sys .argv ) != 2 :
114- raise ValueError ("process.py only takes 1 argument." )
115- main (sys .argv [- 1 ])
107+ with open (ARTICLE_PATH / f'{ date } .json' , 'w' ) as article_file :
108+ article_file .write (article_lines )
0 commit comments