Skip to content

Commit 579fb17

Browse files
committed
tokens relabelled
1 parent 951a55c commit 579fb17

2 files changed

Lines changed: 229 additions & 113 deletions

File tree

corrected_labels/token_corrections.csv

Lines changed: 40 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -2,195 +2,122 @@
22
0,test,1131.0,3.0,"[21, 37): 'SKIING-WORLD CUP'","[28, 37): 'WORLD CUP'","SKIING NNP I-NP I-MISC
33
- : OI-MISC
44
WORLD NNP I-NP I-MISC"
5-
1,test,1131.0,3.0,"[21, 37): 'SKIING-WORLD CUP'","[28,37)'WORLD CUP'",
6-
2,test,5174.0,22.0,"[19, 23): 'ARAB'","[19, 35): 'ARAB CONTRACTORS'",
7-
3,test,6545.0,27.0,"[565, 573): 'X-DENVER'","[567, 573): 'DENVER'","X IN I-PP O
5+
2,test,6545.0,27.0,"[565, 573): 'X-DENVER'","[567, 573): 'DENVER'","X IN I-PP O
86
- : 0 0
97
DENVER NNP I-NP I-MISC"
10-
4,test,-2.0,27.0,,"[567, 573): 'DENVER'",
11-
5,test,6640.0,27.0,"[889, 900): 'Y-GREEN BAY'","[891, 900): 'GREEN BAY'","YNNP I-NP O
8+
4,test,6640.0,27.0,"[889, 900): 'Y-GREEN BAY'","[891, 900): 'GREEN BAY'","YNNP I-NP O
129
- : O O
1310
GREEN NNP I-NP I-MISC"
14-
6,test,6851.0,29.0,"[25, 44): 'FOOTBALL-OHIO STATE'","[34, 44): 'OHIO STATE'","FOOTBALL NNP I-NP O
11+
5,test,6851.0,29.0,"[25, 44): 'FOOTBALL-OHIO STATE'","[34, 44): 'OHIO STATE'","FOOTBALL NNP I-NP O
1512
- : O O
1613
OHIO NNP I-NP I-MISC"
17-
7,test,6851.0,29.0,"[25, 44): 'FOOTBALL-OHIO STATE'","[34,44): 'OHIO STATE'","FOOTBALL NNP I-NP O
18-
-: O O
19-
OHIO NNP I-NP I-MISC"
20-
8,test,8644.0,39.0,"[1158, 1175): 'AbelardoFernandez'","[1158, 1175): 'Abelardo Fernandez'","Abelardo NNS I-NP I-PER
14+
6,test,8644.0,39.0,"[1158, 1175): 'AbelardoFernandez'","[1158, 1175): 'Abelardo Fernandez'","Abelardo NNS I-NP I-PER
2115
Fernandez NNS I-NP I-PER"
22-
9,test,12284.0,54.0,"[1145, 1152): 'Boxmeer'","[1141, 1152): 'van Boxmeer'",
23-
10,test,12068.0,54.0,"[11, 27): 'INTERVIEW-ZYWIEC'","[21, 27): 'ZYWIEC'","INTERVIEW NNP I-NP O
16+
8,test,12068.0,54.0,"[11, 27): 'INTERVIEW-ZYWIEC'","[21, 27): 'ZYWIEC'","INTERVIEW NNP I-NP O
2417
- : O O
2518
ZYWIEC NNP I-NP I-MISC"
26-
11,test,12555.0,54.0,"[2594, 2601): 'Boxmeer'","[2590, 2601): 'van Boxmeer'",
27-
12,test,12682.0,54.0,"[3231, 3241): 'Full Light'","[3224, 3241): 'Zywiec Full Light'",
28-
13,test,12720.0,54.0,"[3421, 3428): 'Boxmeer'","[3417, 3428): 'van Boxmeer'",
29-
14,test,-2.0,54.0,,"[?, 27): 'ZYWIEC'",
30-
15,test,13001.0,56.0,"[11, 16): 'UK-US'","[11, 13): 'UK'","UK NNP I-NP I-MISC
19+
13,test,13001.0,56.0,"[11, 16): 'UK-US'","[11, 13): 'UK'","UK NNP I-NP I-MISC
3120
- : O O
3221
US NNP I-NP I-MISC"
33-
16,test,13001.0,56.0,"[11, 16): 'UK-US'","[11,13) 'UK'","UK NNP I-NP I-MISC
34-
- : O O
35-
US NNP I-NP I-MISC"
36-
17,test,13001.0,56.0,"[11, 16): 'UK-US'","[14, 16): 'US'","UK NNP I-NP I-MISC
37-
- : O O
38-
US NNP I-NP I-MISC"
39-
18,test,13001.0,56.0,"[11, 16): 'UK-US'","[14,16) 'UK'","UK NNP I-NP I-MISC
40-
- : O O
41-
US NNP I-NP I-MISC"
42-
19,test,13447.0,60.0,"[345, 363): 'Trade and Industry'","[345, 373): 'Trade and Industry Secretary'",
43-
20,test,14467.0,63.0,"[19, 39): 'office-Conservatives'","[26, 39): 'Conservatives' ","office NNP I-NP O
22+
15,test,14467.0,63.0,"[19, 39): 'office-Conservatives'","[26, 39): 'Conservatives' ","office NNP I-NP O
4423
- : O O
4524
Conservatives JJ I-NP I-MISC"
46-
21,test,14467.0,63.0,"[19, 39): 'office-Conservatives'","[27, 39): 'Conservatives'","office NNP I-NP O
25+
20,test,16319.0,75.0,"[2736, 2752): 'Newmont-Santa Fe'","[2736, 2743): 'Newmont'","NewmontNNP I-NP I-ORG
4726
- : O O
48-
Conservatives JJ I-NP I-MISC"
49-
22,test,15155.0,68.0,"[11, 19): 'Canadian'","[11, 30): 'Canadian West Coast'",
50-
23,test,15183.0,68.0,"[157, 165): 'Canadian'",,
51-
24,test,15337.0,70.0,"[177, 197): 'New York Commodities'","[177, 202): 'New York Commodities Desk'",
52-
25,test,15380.0,71.0,"[153, 173): 'New York Commodities'","[153, 178): 'New York Commodities Desk'",
53-
26,test,16319.0,75.0,"[2736, 2752): 'Newmont-Santa Fe'","[2736, 2743): 'Newmont'","NewmontNNP I-NP I-ORG
54-
- : O O
55-
Santa NNP I-NP I-ORG"
56-
27,test,16319.0,75.0,"[2736, 2752): 'Newmont-Santa Fe'","[2744, 2752): 'Santa Fe'","Newmont NNP I-NP I-ORG
57-
- :O O
5827
Santa NNP I-NP I-ORG"
59-
28,test,26720.0,114.0,"[11, 17): 'Iowa-S'","[11, 15): 'Iowa'","Iowa I-NP I-LOC
60-
- : O O
61-
S NNP I-NP I-LOC"
62-
29,test,26720.0,114.0,"[11, 17): 'Iowa-S'","[11, 15): 'Iowa' ","Iowa I-NP I-LOC
28+
21,test,26720.0,114.0,"[11, 17): 'Iowa-S'","[11, 15): 'Iowa'","Iowa I-NP I-LOC
6329
- : O O
6430
S NNP I-NP I-LOC"
65-
30,test,26728.0,114.0,"[51, 61): 'sales-USDA'","[57, 61): 'USDA'","sales NNP I-NP O
31+
22,test,26728.0,114.0,"[51, 61): 'sales-USDA'","[57, 61): 'USDA'","sales NNP I-NP O
6632
- : O O
6733
USDA NN I-NP I-MISC"
68-
31,test,26728.0,114.0,"[51, 61): 'sales-USDA'",,"sales NNP I-NP O
69-
- : O O
70-
USDA NN I-NP I-MISC"
71-
32,test,28718.0,123.0,"[11, 17): 'Iowa-S'","[11, 15): 'Iowa' ","Iowa NNP I-NP I-LOC
34+
23,test,28718.0,123.0,"[11, 17): 'Iowa-S'","[11, 15): 'Iowa' ","Iowa NNP I-NP I-LOC
7235
- : O O
7336
S NNP I-NP I-LOC"
74-
33,test,38688.0,178.0,"[951, 960): 'then-U.S.'","[956, 960): 'U.S.'","then RB I-ADVP O
37+
24,test,38688.0,178.0,"[951, 960): 'then-U.S.'","[956, 960): 'U.S.'","then RB I-ADVP O
7538
- : O O
7639
U.S. JJ I-NP I-MISC"
77-
34,test,39979.0,183.0,"[18, 35): 'SKIING-GLADISHIVA'","[25, 35): 'GLADISHIVA'","SKIING NNP I-NP O
40+
25,test,39979.0,183.0,"[18, 35): 'SKIING-GLADISHIVA'","[25, 35): 'GLADISHIVA'","SKIING NNP I-NP O
7841
- : O O
7942
GLADISHIVA NNP I-NP I-MISC"
80-
35,test,-2.0,186.0,,"[1398, 1405): 'Austria'",
81-
36,test,41169.0,188.0,"[18, 34): 'SKIING-WORLD CUP'","[25, 34): 'WORLD CUP'","SKIING NNP I-NP O
43+
27,test,41169.0,188.0,"[18, 34): 'SKIING-WORLD CUP'","[25, 34): 'WORLD CUP'","SKIING NNP I-NP O
8244
- : O O
8345
WORLD NNP I-NP I-MISC"
84-
37,test,41413.0,190.0,"[11, 27): 'BOBSLEIGH-SHIMER'","[21, 27): 'SHIMER'","BOBSLEIGH NNP I-NP O
85-
- : O O
86-
SHIMER NNP I-NP I-MISC"
87-
38,test,41413.0,190.0,"[11, 27): 'BOBSLEIGH-SHIMER'",,"BOBSLEIGH NNP I-NP O
46+
28,test,41413.0,190.0,"[11, 27): 'BOBSLEIGH-SHIMER'","[21, 27): 'SHIMER'","BOBSLEIGH NNP I-NP O
8847
- : O O
8948
SHIMER NNP I-NP I-MISC"
90-
39,test,41595.0,191.0,"[11, 25): 'SKIING-CHINESE'","[18, 25): 'CHINESE'","SKIING NNP I-NP O
49+
29,test,41595.0,191.0,"[11, 25): 'SKIING-CHINESE'","[18, 25): 'CHINESE'","SKIING NNP I-NP O
9150
- : O O
9251
CHINESE JJ I-NP I-MISC"
93-
40,test,41850.0,192.0,"[11, 30): 'BOBSLEIGH-WORLD CUP'","[21, 30): 'WORLD CUP'","BOBSLEIGH NNP I-NP O
52+
30,test,41850.0,192.0,"[11, 30): 'BOBSLEIGH-WORLD CUP'","[21, 30): 'WORLD CUP'","BOBSLEIGH NNP I-NP O
9453
- : O O
9554
WORLD NNP I-NP I-MISC"
96-
41,test,42385.0,194.0,"[21, 37): 'SKIING-WORLD CUP'","[28, 37): 'WORLD CUP'","SKIING NNP I-NP O
55+
31,test,42385.0,194.0,"[21, 37): 'SKIING-WORLD CUP'","[28, 37): 'WORLD CUP'","SKIING NNP I-NP O
9756
-: O O
9857
WORLD NNP I-NP I-MISC"
99-
42,test,-1.0,195.0,"[21, 37): 'SKIING-WORLD CUP'","[28, 37): 'WORLD CUP'",
100-
43,test,46987.0,214.0,"[243, 262): 'Saturday'sWorld Cup'","[253, 262): 'World Cup'","Saturday NNP I-NP O
58+
33,test,46987.0,214.0,"[243, 262): 'Saturday'sWorld Cup'","[253, 262): 'World Cup'","Saturday NNP I-NP O
10159
's POS B-NP O
10260
World NNP B-NP I-MISC"
103-
44,dev,4120.0,15.0,"[41, 51): 'CUNNINGHAM'","(33, 51]: 'RANDALL CUNNINGHAM'","FOOTBALL NNP I-NP O
61+
34,dev,4120.0,15.0,"[41, 51): 'CUNNINGHAM'","(33, 51]: 'RANDALL CUNNINGHAM'","FOOTBALL NNP I-NP O
10462
- : O O
10563
RANDALL NNP I-NP I-MISC"
106-
45,dev,4120.0,15.0,"[41, 51): 'CUNNINGHAM'","(33,51] 'RANDALL CUNNINGHAM'","FOOTBALL NNP I-NP O
64+
35,dev,4118.0,15.0,"[15, 40): 'AMERICAN FOOTBALL-RANDALL'","[15, 32): 'AMERICAN FOOTBALL'","FOOTBALL NNP I-NP O
10765
- : O O
10866
RANDALL NNP I-NP I-MISC"
109-
46,dev,4118.0,15.0,"[15, 40): 'AMERICAN FOOTBALL-RANDALL'","[15, 32): 'AMERICAN FOOTBALL'","FOOTBALL NNP I-NP O
110-
- : O O
111-
RANDALL NNP I-NP I-MISC"
112-
47,dev,4118.0,15.0,"[15, 40): 'AMERICAN FOOTBALL-RANDALL'","[33, 40): 'RANDALL'","FOOTBALL NNP I-NP O
113-
- : O O
114-
RANDALL NNP I-NP I-MISC"
115-
48,dev,4120.0,15.0,"[41, 51): 'CUNNINGHAM'","[33,51): 'RANDALL CUNNINGHAM'","FOOTBALL NNP I-NP O
116-
- : O O
117-
RANDALL NNP I-NP I-MISC"
118-
49,dev,10619.0,39.0,"[11, 23): 'Boxing-Bruno'","[18, 23): 'Bruno'","Boxing NNP I-NP O
67+
36,dev,10619.0,39.0,"[11, 23): 'Boxing-Bruno'","[18, 23): 'Bruno'","Boxing NNP I-NP O
11968
- : O O
12069
Bruno NN I-NP I-MISC"
121-
50,dev,-2.0,39.0,,"[18, 23): 'Bruno' ",
122-
51,dev,15911.0,60.0,"[1358, 1371): 'Tripoli-based'","[1358, 1365): 'Tripoli'","Tripoli NNP I-NP I-LOC
70+
38,dev,15911.0,60.0,"[1358, 1371): 'Tripoli-based'","[1358, 1365): 'Tripoli'","Tripoli NNP I-NP I-LOC
12371
- : O O
12472
based JJ I-NP I-MISC"
125-
52,dev,19233.0,65.0,"[1125, 1134): 'asset-St.'","[1131, 1146): 'St. Louis based'","asset NNP I-NP O
73+
39,dev,19233.0,65.0,"[1125, 1134): 'asset-St.'","[1131, 1146): 'St. Louis based'","asset NNP I-NP O
12674
- : O O
12775
St NN I-NP I-MISC"
128-
53,dev,19130.0,65.0,"[592, 607): 'St. Louis-based'","[592, 601): 'St. Louis'","Louis NNP I-NP I-LOC
76+
40,dev,19130.0,65.0,"[592, 607): 'St. Louis-based'","[592, 601): 'St. Louis'","Louis NNP I-NP I-LOC
12977
- : O O
13078
based JJ I-NP I-MISC"
131-
54,dev,19233.0,65.0,"[1125, 1134): 'asset-St.'",,"asset NNP I-NP O
132-
- : O O
133-
St NN I-NP I-MISC"
134-
55,dev,44837.0,175.0,"[252, 264): 'London-based'","[252, 258): 'London'","London NNP I_NP I-LOC
79+
41,dev,44837.0,175.0,"[252, 264): 'London-based'","[252, 258): 'London'","London NNP I_NP I-LOC
13580
- : O O
13681
based JJ I-NP I-MISC"
137-
56,dev,46457.0,181.0,"[1761, 1774): 'Moscow-backed'","[1761, 1767): 'Moscow'","Moscow NNP I-NP I-ORG
82+
42,dev,46457.0,181.0,"[1761, 1774): 'Moscow-backed'","[1761, 1767): 'Moscow'","Moscow NNP I-NP I-ORG
13883
- : O O
13984
backed JJ I-NP I-MISC"
140-
57,dev,50964.0,198.0,"[39, 47): 'aid-U.N.'","[43, 47): 'U.N.'","aid NN I-NP O
85+
43,dev,50964.0,198.0,"[39, 47): 'aid-U.N.'","[43, 47): 'U.N.'","aid NN I-NP O
14186
- . O O
14287
U.N. NN I-NP I-ORG"
143-
58,train,17719.0,80.0,,"[44, 59): 'Interfax'","rebels NNP I-PP O
88+
44,train,17719.0,80.0,,"[44, 59): 'Interfax'","rebels NNP I-PP O
14489
- . O O
14590
Interfax NN I-NP O"
146-
59,train,-2.0,80.0,,"[51, 59): 'Interfax'",
147-
60,train,-2.0,115.0,,"[23, 27): 'News'",
148-
61,train,37914.0,163.0,"[220, 232): 'x-AEK Athens'","[222, 232): 'AEK Athens'","x NNP I-NP O
91+
47,train,37914.0,163.0,"[220, 232): 'x-AEK Athens'","[222, 232): 'AEK Athens'","x NNP I-NP O
14992
- . O O
15093
AEK IN I-NP I-ORG"
151-
62,train,37926.0,163.0,"[271, 283): 'x-Olympiakos'","[273, 283): 'Olympiakos'","x NNP I-NP O
94+
48,train,37926.0,163.0,"[271, 283): 'x-Olympiakos'","[273, 283): 'Olympiakos'","x NNP I-NP O
15295
- . O O
15396
Olympiakos NNS I-NP I-ORG"
154-
63,train,37933.0,163.0,"[308, 313): 'x-PAO'","[310, 313): 'PAO'","x NNP I-NP O
97+
49,train,37933.0,163.0,"[308, 313): 'x-PAO'","[310, 313): 'PAO'","x NNP I-NP O
15598
- . O O
15699
PAO JJ I-NP I-ORG"
157-
64,train,38844.0,169.0,"[50, 61): 'trip-Canada'","[55, 61): 'Canada'","trip NNP I-NP O
100+
50,train,38844.0,169.0,"[50, 61): 'trip-Canada'","[55, 61): 'Canada'","trip NNP I-NP O
158101
- . O O
159102
Canada NN I-NP I-MISC"
160-
65,train,65636.0,298.0,"[49, 60): '1997--Ruehe'","[55, 60): 'Ruehe'","1997CD I-NP O
103+
51,train,65636.0,298.0,"[49, 60): '1997--Ruehe'","[55, 60): 'Ruehe'","1997CD I-NP O
161104
-- : O O
162105
Ruehe CD I-NP I-MISC"
163-
66,train,75410.0,343.0,"[11, 31): 'AUSTRALIAN RULES-AFL'","[11, 21): 'AUSTRALIAN'","RULES NNP I-NP O
106+
52,train,75410.0,343.0,"[11, 31): 'AUSTRALIAN RULES-AFL'","[11, 21): 'AUSTRALIAN'","RULES NNP I-NP O
164107
- : O O
165108
AFL NNP I-NP I-MISC"
166-
67,train,75410.0,343.0,"[11, 31): 'AUSTRALIAN RULES-AFL'","[11, 27): 'AUSTRALIAN RULES'","RULES NNP I-NP O
167-
- . O O
168-
AFL NNP I-NP I-MISC"
169-
68,train,75410.0,343.0,"[11, 31): 'AUSTRALIAN RULES-AFL'","[28, 31): 'AFL'","RULES NNP I-NP O
170-
- : O O
171-
AFL NNP I-NP I-MISC"
172-
69,train,75410.0,343.0,"[11, 31): 'AUSTRALIAN RULES-AFL'","[11, 21): 'AUSTRALIAN'","RULES NNP I-NP O
173-
- : O O
174-
AFL NNP I-NP I-MISC"
175-
70,train,95553.0,422.0,"[236, 246): 'Videoton(*'","[236, 244): 'Videoton'","Videoton NN I-NP I-ORG
109+
53,train,95553.0,422.0,"[236, 246): 'Videoton(*'","[236, 244): 'Videoton'","Videoton NN I-NP I-ORG
176110
( ( O O
177111
* SYM O O"
178-
71,train,139024.0,593.0,"[45, 57): 'France-Juppe'","[45, 51): 'France'","France NNP I-NP I-LOC
112+
54,train,139024.0,593.0,"[45, 57): 'France-Juppe'","[45, 51): 'France'","France NNP I-NP I-LOC
179113
- . O O
180114
Juppe JJ I-NP I-PER"
181-
72,train,139024.0,593.0,"[45, 57): 'France-Juppe'","[52, 57): 'Juppe'","France NNP I-NP I-LOC
182-
- . O O
183-
Juppe JJ I-NP I-PER"
184-
73,train,146605.0,626.0,"[42, 59): 'disarmament-China'","[42, 59): 'disarmament-China'","disarmament NNP I-NP O
115+
55,train,146605.0,626.0,"[42, 59): 'disarmament-China'","[42, 59): 'disarmament-China'","disarmament NNP I-NP O
185116
- . O O
186117
China NN I-NP I-LOC"
187-
74,train,164709.0,701.0,"[17, 30): 'union-England'","[23, 30): 'England'","union NNP I-NP O
118+
56,train,164709.0,701.0,"[17, 30): 'union-England'","[23, 30): 'England'","union NNP I-NP O
188119
- . O O
189120
England JJ I-NP I-ORG"
190-
75,train,213462.0,918.0,"[11, 24): 'INTERVIEW-T&N'","[11, 24): 'INTERVIEW-T&N'","INTERVIEW NNP I-NP O
191-
- . O O
192-
T&N NNP I-NP I-MISC"
193-
76,train,213462.0,918.0,"[11, 24): 'INTERVIEW-T&N'","[21, 24): 'T&N'"," INTERVIEW NNP I-NP O
121+
57,train,213462.0,918.0,"[11, 24): 'INTERVIEW-T&N'","[11, 24): 'INTERVIEW-T&N'","INTERVIEW NNP I-NP O
194122
- . O O
195123
T&N NNP I-NP I-MISC"
196-
77,,,,,,

0 commit comments

Comments
 (0)