Skip to content

Commit cbc237f

Browse files
committed
-Initial pipeline setup for regalytics articles.
1 parent c0ec32b commit cbc237f

10 files changed

Lines changed: 183 additions & 35 deletions

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,4 +276,5 @@ QuantConnect.Lean.sln.DotSettings*
276276
Research/Notebooks
277277

278278
#Docker result files
279-
Results/
279+
Results/
280+
/venv/

Demonstration.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* limitations under the License.
1414
*
1515
*/
16-
16+
/*
1717
using QuantConnect.Data;
1818
using QuantConnect.Util;
1919
using QuantConnect.Orders;
@@ -75,3 +75,4 @@ public override void OnOrderEvent(OrderEvent orderEvent)
7575
}
7676
}
7777
}
78+
*/

QuantConnect.DataSource.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
<PropertyGroup>
44
<TargetFramework>net5.0</TargetFramework>
55
<RootNamespace>QuantConnect.DataSource</RootNamespace>
6+
<AssemblyName>QuantConnect.DataSource.RegalyticsArticles</AssemblyName>
67
</PropertyGroup>
7-
88
<ItemGroup>
99
<PackageReference Include="QuantConnect.Common" Version="2.5.11800" />
1010
<PackageReference Include="protobuf-net" Version="3.0.29" />
Lines changed: 92 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,69 @@
2020
using System.IO;
2121
using QuantConnect.Data;
2222
using System.Collections.Generic;
23+
using Newtonsoft.Json;
2324

2425
namespace QuantConnect.DataSource
2526
{
2627
/// <summary>
27-
/// Example custom data type
28+
/// Regalytics Regulatory articles
2829
/// </summary>
2930
[ProtoContract(SkipConstructor = true)]
30-
public class MyCustomDataType : BaseData
31+
public class RegalyticsRegulatoryArticle : BaseData
3132
{
32-
/// <summary>
33-
/// Some custom data property
34-
/// </summary>
35-
[ProtoMember(2000)]
36-
public string SomeCustomProperty { get; set; }
33+
[JsonProperty(PropertyName = "id")]
34+
public int Id { get; set; }
35+
36+
[JsonProperty(PropertyName = "title")]
37+
public string Title { get; set; }
38+
39+
[JsonProperty(PropertyName = "summary")]
40+
public string Summary { get; set; }
41+
42+
[JsonProperty(PropertyName = "status")]
43+
public string Status { get; set; }
44+
45+
[JsonProperty(PropertyName = "classification")]
46+
public string Classificaiton { get; set; }
47+
48+
[JsonProperty(PropertyName = "filing_type")]
49+
public string FilingType { get; set; }
50+
51+
[JsonProperty(PropertyName = "in_federal_register")]
52+
public bool InFederalRegister { get; set; }
53+
54+
[JsonProperty(PropertyName = "federal_register_number")]
55+
public string FederalRegisterNumber { get; set; }
56+
57+
//[JsonProperty(PropertyName = "regalytics_alert_id")]
58+
//public string AlertId { get; set; }
59+
60+
[JsonProperty(PropertyName = "proposed_comments_due_date")]
61+
public DateTime? ProposedCommentsDueDate { get; set; }
62+
63+
[JsonProperty(PropertyName = "original_publication_date")]
64+
public DateTime? OriginalPublicationDate { get; set; }
65+
66+
[JsonProperty(PropertyName = "federal_register_publication_date")]
67+
public DateTime? FederalRegisterPublicationDate { get; set; }
68+
69+
[JsonProperty(PropertyName = "rule_effective_date")]
70+
public DateTime? RuleEffectiveDate { get; set; }
71+
72+
[JsonProperty(PropertyName = "latest_update")]
73+
public DateTime LatestUpdate { get; set; }
74+
75+
[JsonProperty(PropertyName = "alert_type")]
76+
public string AlertType { get; set; }
77+
78+
[JsonProperty(PropertyName = "states")]
79+
public Dictionary<string, List<string>> States { get; set; }
80+
81+
[JsonProperty(PropertyName = "agencies")]
82+
public List<string> Agencies { get; set; }
83+
84+
[JsonProperty(PropertyName = "pdf_url")]
85+
public string AnnouncementUrl { get; set; }
3786

3887
/// <summary>
3988
/// Return the URL string source of the file. This will be converted to a stream
@@ -48,8 +97,9 @@ public override SubscriptionDataSource GetSource(SubscriptionDataConfig config,
4897
Path.Combine(
4998
Globals.DataFolder,
5099
"alternative",
51-
"mycustomdatatype",
52-
$"{config.Symbol.Value.ToLowerInvariant()}.csv"
100+
"regalytics",
101+
"articles",
102+
$"{date:yyyyMMdd}.json"
53103
),
54104
SubscriptionTransportMedium.LocalFile
55105
);
@@ -65,16 +115,16 @@ public override SubscriptionDataSource GetSource(SubscriptionDataConfig config,
65115
/// <returns>New instance</returns>
66116
public override BaseData Reader(SubscriptionDataConfig config, string line, DateTime date, bool isLiveMode)
67117
{
68-
var csv = line.Split(',');
118+
var article = JsonConvert.DeserializeObject<RegalyticsRegulatoryArticle>(line);
69119

70-
var parsedDate = Parse.DateTimeExact(csv[0], "yyyyMMdd");
71-
return new MyCustomDataType
72-
{
73-
Symbol = config.Symbol,
74-
SomeCustomProperty = csv[1],
75-
Time = parsedDate,
76-
EndTime = parsedDate + TimeSpan.FromDays(1)
77-
};
120+
// date == the day that the data was published (2021-05-21)
121+
// 2021-05-21 for example, contains aggregated data from 2021-05-19, 2021-05-20.
122+
// Regalytics publishes at 07:30:00 Eastern time, EndTime should be at that time.
123+
124+
article.Symbol = config.Symbol;
125+
article.EndTime = date.Date.AddHours(7).AddMinutes(30);
126+
127+
return article;
78128
}
79129

80130
/// <summary>
@@ -83,12 +133,30 @@ public override BaseData Reader(SubscriptionDataConfig config, string line, Date
83133
/// <returns>A clone of the object</returns>
84134
public override BaseData Clone()
85135
{
86-
return new MyCustomDataType
136+
return new RegalyticsRegulatoryArticle
87137
{
88138
Symbol = Symbol,
89139
Time = Time,
90140
EndTime = EndTime,
91-
SomeCustomProperty = SomeCustomProperty,
141+
142+
Id = Id,
143+
Title = Title,
144+
Summary = Summary,
145+
Status = Status,
146+
Classificaiton = Classificaiton,
147+
FilingType = FilingType,
148+
InFederalRegister = InFederalRegister,
149+
FederalRegisterNumber = FederalRegisterNumber,
150+
// AlertId = AlertId,
151+
ProposedCommentsDueDate = ProposedCommentsDueDate,
152+
OriginalPublicationDate = OriginalPublicationDate,
153+
FederalRegisterPublicationDate = FederalRegisterPublicationDate,
154+
RuleEffectiveDate = RuleEffectiveDate,
155+
LatestUpdate = LatestUpdate,
156+
AlertType = AlertType,
157+
States = States,
158+
Agencies = Agencies,
159+
AnnouncementUrl = AnnouncementUrl,
92160
};
93161
}
94162

@@ -98,7 +166,7 @@ public override BaseData Clone()
98166
/// <returns>false</returns>
99167
public override bool RequiresMapping()
100168
{
101-
return true;
169+
return false;
102170
}
103171

104172
/// <summary>
@@ -116,7 +184,7 @@ public override bool IsSparseData()
116184
/// </summary>
117185
public override string ToString()
118186
{
119-
return $"{Symbol} - {SomeCustomProperty}";
187+
return $"ID: {Id} - Title: {Title} - Summary: {Summary}";
120188
}
121189

122190
/// <summary>
@@ -141,7 +209,7 @@ public override List<Resolution> SupportedResolutions()
141209
/// <returns>The <see cref="T:NodaTime.DateTimeZone" /> of this data type</returns>
142210
public override DateTimeZone DataTimeZone()
143211
{
144-
return DateTimeZone.Utc;
212+
return TimeZones.NewYork;
145213
}
146214
}
147-
}
215+
}

output/alternative/mycustomdatatype/spy.csv

Lines changed: 0 additions & 6 deletions
This file was deleted.

process.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import json
2+
import pathlib
3+
from datetime import datetime, timedelta
4+
import os
5+
import requests
6+
7+
url = os.environ["REGALYTICS_API_BASE_URL"] + "/all"
8+
9+
payload = json.dumps({
10+
"apikey": os.environ["REGALYTICS_API_KEY"]
11+
})
12+
headers = {
13+
'Content-Type': 'application/json'
14+
}
15+
16+
response = requests.post(url, headers=headers, data=payload).json()
17+
articles_path = pathlib.Path('/temp-output-directory/alternative/regalytics/articles')
18+
# objectives:# download data from API -> temp folder or in memory. Output processed datat to /temp-output-directory/alternative/regalytics/articles/yyyyMMdd.json
19+
articles_path.mkdir(parents=True, exist_ok=True)
20+
# "states": [
21+
# {
22+
# "name": "United States",
23+
# "country": {
24+
# "name": "United States"
25+
# }
26+
# }
27+
# ],
28+
# if states is more than 0
29+
# loop into state and get the state name
30+
# 1. query all data, -> /api/.../all; 2. look at latest_update, add delta of 1/2 days;
31+
# 3. write data to date of latest_update + delta. This date must be on the date we published the article on Regalytics
32+
articles = response['articles']
33+
articles_by_date = {}
34+
35+
for article in articles:
36+
article['in_federal_register'] = 'yes' in article['in_federal_register'].lower()
37+
# State -> Dictionary<string, List<string>>
38+
states = {}
39+
if 'states' not in article or article['states'] is None:
40+
continue
41+
42+
for state in article['states']:
43+
if 'country' not in state:
44+
continue
45+
46+
country = state['country']
47+
if country is None:
48+
continue
49+
50+
if not country['name'] in states:
51+
country_states = []
52+
states[country['name']] = country_states
53+
else:
54+
country_states = states[country['name']]
55+
56+
country_states.append(state['name'])
57+
58+
agencies = [agency['name'] for agency in article['agencies']]
59+
article['states'] = states
60+
article['agencies'] = agencies
61+
# If friday or weekend, the delta will make the date land on Tuesday
62+
date = datetime.strptime(article['latest_update'], '%Y-%m-%d')
63+
weekday = date.weekday()
64+
delta = (8 - weekday) if weekday >= 3 else 2
65+
date = date + timedelta(days=delta)
66+
date_key = date.strftime('%Y%m%d')
67+
68+
if date_key not in articles_by_date:
69+
date_articles = []
70+
articles_by_date[date_key] = date_articles
71+
else:
72+
date_articles = articles_by_date[date_key]
73+
74+
date_articles.append(article)
75+
76+
for date, articles in articles_by_date.items():
77+
lines = []
78+
for article in articles:
79+
lines.append(json.dumps(article, indent=None))
80+
81+
article_lines = '\n'.join(lines)
82+
83+
with open(articles_path / f'{date}.json', 'w') as article_file:
84+
article_file.write(article_lines)

process.sample.ipynb

Whitespace-only changes.

process.sample.py

Whitespace-only changes.

process.sample.sh

Whitespace-only changes.

tests/MyCustomDataTypeTests.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* limitations under the License.
1414
*
1515
*/
16-
16+
/*
1717
using System;
1818
using ProtoBuf;
1919
using System.IO;
@@ -96,4 +96,4 @@ private BaseData CreateNewInstance()
9696
};
9797
}
9898
}
99-
}
99+
}*/

0 commit comments

Comments
 (0)