Skip to content

Commit bb9c09b

Browse files
committed
Rerun token preprocessing and redo manual token edits
1 parent 8bbbff1 commit bb9c09b

7 files changed

Lines changed: 4729 additions & 902 deletions
Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
1-
,doc_offset,corpus_span,correct_span,error_type
2-
0,15.0,"[41, 51): 'CUNNINGHAM'","(33, 51]: 'RANDALL CUNNINGHAM'",Token
3-
1,15.0,"[41, 51): 'CUNNINGHAM'","(33,51] 'RANDALL CUNNINGHAM'",Token
4-
2,15.0,"[15, 40): 'AMERICAN FOOTBALL-RANDALL'","[15, 32): 'AMERICAN FOOTBALL'",Token
5-
3,15.0,"[15, 40): 'AMERICAN FOOTBALL-RANDALL'","[33, 40): 'RANDALL'",Token
6-
4,15.0,"[41, 51): 'CUNNINGHAM'","[33,51): 'RANDALL CUNNINGHAM'",Token
7-
5,39.0,"[11, 23): 'Boxing-Bruno'","[18, 23): 'Bruno'",Token
8-
6,39.0,,"[18, 23): 'Bruno' ",Token
9-
7,60.0,"[1358, 1371): 'Tripoli-based'","[1358, 1365): 'Tripoli'",Token
10-
8,65.0,"[1125, 1134): 'asset-St.'","[1131, 1146): 'St. Louis based'",Token
11-
9,65.0,"[592, 607): 'St. Louis-based'","[592, 601): 'St. Louis'",Token
12-
10,65.0,"[1125, 1134): 'asset-St.'",,Token
13-
11,175.0,"[252, 264): 'London-based'","[252, 258): 'London'",Token
14-
12,181.0,"[1761, 1774): 'Moscow-backed'","[1761, 1767): 'Moscow'",Token
15-
13,198.0,"[39, 47): 'aid-U.N.'","[43, 47): 'U.N.'",Token
1+
,doc_offset,corpus_span,correct_span,correct_ent_type,error_type
2+
0,15,"[41, 51): 'CUNNINGHAM'","[33, 51): 'RANDALL CUNNINGHAM'",PER,Token
3+
1,39,"[11, 23): 'Boxing-Bruno'","[18, 23): 'Bruno'",PER,Token
4+
2,60,"[1358, 1371): 'Tripoli-based'","[1358, 1365): 'Tripoli'",LOC,Token
5+
3,65,"[1125, 1134): 'asset-St.'","[1131, 1140): 'St. Louis'",LOC,Token
6+
4,65,"[1359, 1376): 'Minneapolis-based'","[1359, 1370): 'Minneapolis'",LOC,Token
7+
5,65,"[1419, 1429): 'Ohio-based'","[1419, 1423): 'Ohio'",LOC,Token
8+
6,65,"[592, 607): 'St. Louis-based'","[592, 601): 'St. Louis'",LOC,Token
9+
7,65,"[793, 802): 'Mo.-based'","[793, 796): 'Mo.'",LOC,Token
10+
8,175,"[252, 264): 'London-based'","[252, 258): 'London'",LOC,Token
11+
9,181,"[1761, 1774): 'Moscow-backed'","[1761, 1767): 'Moscow'",LOC,Token
12+
10,198,"[39, 47): 'aid-U.N.'","[43, 47): 'U.N.'",ORG,Token
Lines changed: 29 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,29 @@
1-
,doc_offset,corpus_span,correct_span,error_type
2-
0,3.0,"[21, 37): 'SKIING-WORLD CUP'","[28, 37): 'WORLD CUP'",Token
3-
1,3.0,"[21, 37): 'SKIING-WORLD CUP'","[28,37)'WORLD CUP'",Token
4-
2,22.0,"[19, 23): 'ARAB'","[19, 35): 'ARAB CONTRACTORS'",Token
5-
3,27.0,"[565, 573): 'X-DENVER'","[567, 573): 'DENVER'",Token
6-
4,27.0,,"[567, 573): 'DENVER'",Token
7-
5,27.0,"[889, 900): 'Y-GREEN BAY'","[891, 900): 'GREEN BAY'",Token
8-
6,29.0,"[25, 44): 'FOOTBALL-OHIO STATE'","[34, 44): 'OHIO STATE'",Token
9-
7,29.0,"[25, 44): 'FOOTBALL-OHIO STATE'","[34,44): 'OHIO STATE'",Token
10-
8,39.0,"[1158, 1175): 'AbelardoFernandez'","[1158, 1175): 'Abelardo Fernandez'",Token
11-
9,54.0,"[1145, 1152): 'Boxmeer'","[1141, 1152): 'van Boxmeer'",Token
12-
10,54.0,"[11, 27): 'INTERVIEW-ZYWIEC'","[21, 27): 'ZYWIEC'",Token
13-
11,54.0,"[2594, 2601): 'Boxmeer'","[2590, 2601): 'van Boxmeer'",Token
14-
12,54.0,"[3231, 3241): 'Full Light'","[3224, 3241): 'Zywiec Full Light'",Token
15-
13,54.0,"[3421, 3428): 'Boxmeer'","[3417, 3428): 'van Boxmeer'",Token
16-
14,54.0,,"[?, 27): 'ZYWIEC'",Token
17-
15,56.0,"[11, 16): 'UK-US'","[11, 13): 'UK'",Token
18-
16,56.0,"[11, 16): 'UK-US'","[11,13) 'UK'",Token
19-
17,56.0,"[11, 16): 'UK-US'","[14, 16): 'US'",Token
20-
18,56.0,"[11, 16): 'UK-US'","[14,16) 'UK'",Token
21-
19,60.0,"[345, 363): 'Trade and Industry'","[345, 373): 'Trade and Industry Secretary'",Token
22-
20,63.0,"[19, 39): 'office-Conservatives'","[26, 39): 'Conservatives' ",Token
23-
21,63.0,"[19, 39): 'office-Conservatives'","[27, 39): 'Conservatives'",Token
24-
22,68.0,"[11, 19): 'Canadian'","[11, 30): 'Canadian West Coast'",Token
25-
23,68.0,"[157, 165): 'Canadian'",,Token
26-
24,70.0,"[177, 197): 'New York Commodities'","[177, 202): 'New York Commodities Desk'",Token
27-
25,71.0,"[153, 173): 'New York Commodities'","[153, 178): 'New York Commodities Desk'",Token
28-
26,75.0,"[2736, 2752): 'Newmont-Santa Fe'","[2736, 2743): 'Newmont'",Token
29-
27,75.0,"[2736, 2752): 'Newmont-Santa Fe'","[2744, 2752): 'Santa Fe'",Token
30-
28,114.0,"[11, 17): 'Iowa-S'","[11, 15): 'Iowa'",Token
31-
29,114.0,"[11, 17): 'Iowa-S'","[11, 15): 'Iowa' ",Token
32-
30,114.0,"[51, 61): 'sales-USDA'","[57, 61): 'USDA'",Token
33-
31,114.0,"[51, 61): 'sales-USDA'",,Token
34-
32,123.0,"[11, 17): 'Iowa-S'","[11, 15): 'Iowa' ",Token
35-
33,178.0,"[951, 960): 'then-U.S.'","[956, 960): 'U.S.'",Token
36-
34,183.0,"[18, 35): 'SKIING-GLADISHIVA'","[25, 35): 'GLADISHIVA'",Token
37-
35,186.0,,"[1398, 1405): 'Austria'",Token
38-
36,188.0,"[18, 34): 'SKIING-WORLD CUP'","[25, 34): 'WORLD CUP'",Token
39-
37,190.0,"[11, 27): 'BOBSLEIGH-SHIMER'","[21, 27): 'SHIMER'",Token
40-
38,190.0,"[11, 27): 'BOBSLEIGH-SHIMER'",,Token
41-
39,191.0,"[11, 25): 'SKIING-CHINESE'","[18, 25): 'CHINESE'",Token
42-
40,192.0,"[11, 30): 'BOBSLEIGH-WORLD CUP'","[21, 30): 'WORLD CUP'",Token
43-
41,194.0,"[21, 37): 'SKIING-WORLD CUP'","[28, 37): 'WORLD CUP'",Token
44-
42,195.0,"[21, 37): 'SKIING-WORLD CUP'","[28, 37): 'WORLD CUP'",Token
45-
43,214.0,"[243, 262): 'Saturday'sWorld Cup'","[253, 262): 'World Cup'",Token
1+
,doc_offset,corpus_span,correct_span,correct_ent_type,error_type
2+
0,3,"[21, 37): 'SKIING-WORLD CUP'","[28, 37): 'WORLD CUP'",MISC,Token
3+
1,25,"[24, 38): 'FOOTBALL-COLTS'","[33, 38): 'COLTS'",ORG,Token
4+
2,27,"[565, 573): 'X-DENVER'","[567, 573): 'DENVER'",ORG,Token
5+
3,27,"[889, 900): 'Y-GREEN BAY'","[891, 900): 'GREEN BAY'",ORG,Token
6+
4,29,"[25, 44): 'FOOTBALL-OHIO STATE'","[34, 44): 'OHIO STATE'",ORG,Token
7+
5,39,"[1158, 1175): 'AbelardoFernandez'","[1158, 1175): 'Abelardo Fernandez'",PER,Token
8+
6,40,"[215, 236): 'Real Madrid-Barcelona'","[215, 226): 'Real Madrid'",ORG,Token
9+
7,40,"[215, 236): 'Real Madrid-Barcelona'","[227, 236): 'Barcelona'",ORG,Token
10+
8,54,"[11, 27): 'INTERVIEW-ZYWIEC'","[21, 27): 'ZYWIEC'",ORG,Token
11+
9,56,"[11, 16): 'UK-US'","[11, 13): 'UK'",LOC,Token
12+
10,56,"[11, 16): 'UK-US'","[14, 16): 'US'",LOC,Token
13+
11,63,"[19, 39): 'office-Conservatives'","[26, 39): 'Conservatives'",ORG,Token
14+
12,75,"[2736, 2752): 'Newmont-Santa Fe'","[2736, 2743): 'Newmont'",ORG,Token
15+
13,75,"[2736, 2752): 'Newmont-Santa Fe'","[2744, 2752): 'Santa Fe'",ORG,Token
16+
14,114,"[11, 17): 'Iowa-S'","[11, 15): 'Iowa'",LOC,Token
17+
15,114,"[18, 22): 'Minn'","[16, 22): 'S Minn'",LOC,Token
18+
16,114,"[51, 61): 'sales-USDA'","[57, 61): 'USDA'",ORG,Token
19+
17,123,"[11, 17): 'Iowa-S'","[11, 15): 'Iowa'",LOC,Token
20+
18,123,"[18, 22): 'Minn'","[16, 22): 'S Minn'",LOC,Token
21+
19,178,"[951, 960): 'then-U.S.'","[956, 960): 'U.S.'",LOC,Token
22+
20,183,"[18, 35): 'SKIING-GLADISHIVA'","[25, 35): 'GLADISHIVA'",PER,Token
23+
21,186,"[1398, 1409): 'Austria)118'","[1398, 1405): 'Austria'",LOC,Token
24+
22,188,"[18, 34): 'SKIING-WORLD CUP'","[25, 34): 'WORLD CUP'",MISC,Token
25+
23,190,"[11, 27): 'BOBSLEIGH-SHIMER'","[21, 27): 'SHIMER'",PER,Token
26+
24,191,"[11, 25): 'SKIING-CHINESE'","[18, 25): 'CHINESE'",LOC,Token
27+
25,192,"[11, 30): 'BOBSLEIGH-WORLD CUP'","[21, 30): 'WORLD CUP'",,Token
28+
26,194,"[21, 37): 'SKIING-WORLD CUP'","[28, 37): 'WORLD CUP'",,Token
29+
27,214,"[243, 262): 'Saturday'sWorld Cup'","[253, 262): 'World Cup'",MISC,Token
Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
1-
,doc_offset,corpus_span,correct_span,error_type
2-
0,80.0,,"[44, 59): 'Interfax'",Token
3-
1,80.0,,"[51, 59): 'Interfax'",Token
4-
2,115.0,,"[23, 27): 'News'",Token
5-
3,163.0,"[220, 232): 'x-AEK Athens'","[222, 232): 'AEK Athens'",Token
6-
4,163.0,"[271, 283): 'x-Olympiakos'","[273, 283): 'Olympiakos'",Token
7-
5,163.0,"[308, 313): 'x-PAO'","[310, 313): 'PAO'",Token
8-
6,169.0,"[50, 61): 'trip-Canada'","[55, 61): 'Canada'",Token
9-
7,298.0,"[49, 60): '1997--Ruehe'","[55, 60): 'Ruehe'",Token
10-
8,343.0,"[11, 31): 'AUSTRALIAN RULES-AFL'","[11, 21): 'AUSTRALIAN'",Token
11-
9,343.0,"[11, 31): 'AUSTRALIAN RULES-AFL'","[11, 27): 'AUSTRALIAN RULES'",Token
12-
10,343.0,"[11, 31): 'AUSTRALIAN RULES-AFL'","[28, 31): 'AFL'",Token
13-
11,343.0,"[11, 31): 'AUSTRALIAN RULES-AFL'","[11, 21): 'AUSTRALIAN'",Token
14-
12,422.0,"[236, 246): 'Videoton(*'","[236, 244): 'Videoton'",Token
15-
13,593.0,"[45, 57): 'France-Juppe'","[45, 51): 'France'",Token
16-
14,593.0,"[45, 57): 'France-Juppe'","[52, 57): 'Juppe'",Token
17-
15,626.0,"[42, 59): 'disarmament-China'","[42, 59): 'disarmament-China'",Token
18-
16,701.0,"[17, 30): 'union-England'","[23, 30): 'England'",Token
19-
17,918.0,"[11, 24): 'INTERVIEW-T&N'","[11, 24): 'INTERVIEW-T&N'",Token
20-
18,918.0,"[11, 24): 'INTERVIEW-T&N'","[21, 24): 'T&N'",Token
1+
,doc_offset,corpus_span,correct_span,correct_ent_type,error_type
2+
0,80,"[44, 59): 'rebels-Interfax'","[51, 59): 'Interfax'",ORG,Token
3+
1,115,"[17, 27): 'FOCUS-News'","[23, 27): 'News'",ORG,Token
4+
2,163,"[220, 232): 'x-AEK Athens'","[222, 232): 'AEK Athens'",ORG,Token
5+
3,163,"[271, 283): 'x-Olympiakos'","[273, 283): 'Olympiakos'",ORG,Token
6+
4,163,"[308, 313): 'x-PAO'","[310, 313): 'PAO'",ORG,Token
7+
5,169,"[50, 61): 'trip-Canada'","[55, 61): 'Canada'",LOC,Token
8+
6,198,"[11, 20): 'WSC-India'","[11, 14): 'WSC'",ORG,Token
9+
7,198,"[11, 20): 'WSC-India'","[15, 20): 'India'",LOC,Token
10+
8,298,"[49, 60): '1997--Ruehe'","[55, 60): 'Ruehe'",PER,Token
11+
9,343,"[11, 31): 'AUSTRALIAN RULES-AFL'","[11, 21): 'AUSTRALIAN'",MISC,Token
12+
10,343,"[11, 31): 'AUSTRALIAN RULES-AFL'","[28, 31): 'AFL'",MISC,Token
13+
11,422,"[236, 246): 'Videoton(*'","[236, 244): 'Videoton'",,Token
14+
12,444,"[20, 40): 'SCOREBOARD-AUSTRALIA'","[31, 40): 'AUSTRALIA'",ORG,Token
15+
13,593,"[45, 57): 'France-Juppe'","[45, 51): 'France'",LOC,Token
16+
14,593,"[45, 57): 'France-Juppe'","[52, 57): 'Juppe'",PER,Token
17+
15,626,"[42, 59): 'disarmament-China'","[54, 59): 'China'",LOC,Token
18+
16,701,"[17, 30): 'union-England'","[23, 30): 'England'",ORG,Token
19+
17,734,"[11, 31): 'BALANCE-Water Dist 1'","[19, 31): 'Water Dist 1'",ORG,Token
20+
18,918,"[11, 24): 'INTERVIEW-T&N'","[21, 24): 'T&N'",ORG,Token

0 commit comments

Comments
 (0)