Skip to content

Commit 2d0a22a

Browse files
committed
Fix various data entry errors
1 parent e8e5b03 commit 2d0a22a

10 files changed

Lines changed: 118 additions & 82 deletions

corrected_labels/human_labels_audited/CoNLL_2_in_gold.csv

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,9 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
9595
0,test,27,"[565, 573): 'X-DENVER'",MISC,Token,"[567, 573): 'DENVER'",ORG,"split on '-', X"" is an annotation""",,,
9696
0,test,27,"[889, 900): 'Y-GREEN BAY'",MISC,Token,"[891, 900): 'GREEN BAY'",ORG,"split on '-', Y"" is an annotation""",,,
9797
,test,27,"[410, 412): 'PA'",ORG,Wrong,,,Points Allowed,,,
98-
0,test,28,"[82, 90): 'National'",ORG,Sentence,"[82, 106): 'National Football League'",,Also sentence boundary after National,,,
99-
0,test,28,"[91, 99): 'Football'",LOC,Sentence,"[82, 106): 'National Football League'",ORG,,,,
100-
0,test,28,"[100, 106): 'League'",LOC,Sentence,"[82, 106): 'National Football League'",ORG,,,,
98+
0,test,28,"[82, 90): 'National'",ORG,Sentence,"[82, 106): 'National Football League'",ORG,Also entity boundary after National,,,
99+
0,test,28,"[91, 99): 'Football'",LOC,Tag,"[91, 99): 'Football'",ORG,,,,
100+
0,test,28,"[100, 106): 'League'",LOC,Tag,"[100, 106): 'League'",ORG,,,,
101101
0,test,29,"[25, 44): 'FOOTBALL-OHIO STATE'",MISC,Token,"[34, 44): 'OHIO STATE'",ORG,"Need to split on '-' ""FOOTBALL-OHIO""",,,
102102
0,test,29,"[47, 51): 'PACE'",PER,None,,,last name,,,
103103
0,test,29,"[65, 79): 'LOMBARDI AWARD'",MISC,None,,,Award,,,
@@ -11626,4 +11626,4 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
1162611626
16,test,230,"[1108, 1115): 'Germany'",LOC,,,,,,,
1162711627
16,test,230,"[1127, 1132): 'Irish'",MISC,,,,,,,
1162811628
16,test,230,"[1153, 1160): 'England'",LOC,,,,,,,
11629-
16,test,230,"[1252, 1259): 'England'",LOC,,,,,,,
11629+
16,test,230,"[1252, 1259): 'England'",LOC,,,,,,,

corrected_labels/human_labels_audited/CoNLL_2_not_in_gold.csv

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,9 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
137137
13,test,19,"[1107, 1111): 'Tour'",MISC,None,,,,,,,,
138138
13,test,25,"[1996, 1999): 'NFC'",ORG,None,,,,,Conferences/divisions of leagues not considered entities,,,
139139
13,test,25,"[194, 197): 'AFC'",ORG,None,,,,,Conferences/divisions of leagues not considered entities,,,
140-
,test,26,,,Wrong,"[236, 244): 'ATLANTIC'",LOC,,,Conferences/divisions of leagues not considered entities,,,
140+
,test,26,,,None,"[236, 244): 'ATLANTIC'",LOC,,,Conferences/divisions of leagues not considered entities,,,
141141
13,test,26,"[419, 435): 'CENTRAL DIVISION'",MISC,None,,,,,Conferences/divisions of leagues not considered entities,,,
142-
,test,26,,,Wrong,"[822, 829): 'PACIFIC'",LOC,,MISC,Conferences/divisions of leagues not considered entities,,,
142+
,test,26,,,None,"[822, 829): 'PACIFIC'",LOC,,,Conferences/divisions of leagues not considered entities,,,
143143
,test,26,,,Tag,"[1042, 1048): 'BOSTON'",LOC,,ORG,Boston basketball team,,,
144144
,test,26,,,Tag,"[1062, 1069): 'DETROIT'",LOC,,ORG,Detroit basketball team,,,
145145
,test,26,,,Tag,"[1082, 1087): 'MIAMI'",LOC,,ORG,Miami basketball team,,,
@@ -289,8 +289,8 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
289289
11,test,18,"[112, 128): 'Sheffield Shield'",ORG,None,,MISC,,,,,,
290290
11,test,25,"[740, 750): 'Cincinnati'",ORG,None,,LOC,,,,,,
291291
11,test,28,"[82, 99): 'National Football'",ORG,Sentence,"[82, 90): 'National'",ORG,"[82, 106): 'National Football League'",ORG,"""National Football League"" with sentence boundary and tag issues",,,
292-
,test,28,,,Sentence,"[91, 99): 'Football'",LOC,"[82, 106): 'National Football League'",ORG,"""National Football League"" with sentence boundary and tag issues",,,
293-
,test,28,,,Sentence,"[100, 106): 'League'",LOC,"[82, 106): 'National Football League'",ORG,"""National Football League"" with sentence boundary and tag issues",,,
292+
,test,28,,,Tag,"[91, 99): 'Football'",LOC,"[91, 99): 'Football'",ORG,"""National Football League"" with sentence boundary and tag issues",,,
293+
,test,28,,,Tag,"[100, 106): 'League'",LOC,"[100, 106): 'League'",ORG,"""National Football League"" with sentence boundary and tag issues",,,
294294
11,test,40,"[215, 236): 'Real Madrid-Barcelona'",ORG,None,,MISC,,,"""Saturday's big Real Madrid-Barcelona game""",,,
295295
11,test,50,"[1262, 1267): 'Czech'",MISC,Tag,,LOC,"[1262, 1267): 'Czech'",MISC,,,,
296296
11,test,51,"[17, 24): 'ROMANIA'",LOC,None,"[11, 24): 'RADIO ROMANIA'",ORG,,,,,,
@@ -350,7 +350,7 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
350350
10,dev,155,"[2049, 2055): 'German'",MISC,None,"[2049, 2061): 'German Villa'",PER,,,,,,
351351
,dev,165,,,Span,"[25, 45): 'TATTERSALLS BREEDERS' ",MISC,"[25, 52): 'TATTERSALLS BREEDERS STAKES' ",,,,,
352352
10,dev,165,"[93, 113): 'Tattersalls Breeders'",ORG,Sentence,"[93, 113): 'Tattersalls Breeders'",MISC,"[93, 120): 'Tattersalls Breeders Stakes'",MISC,"Sentence boundary between ""Breeders"" and ""Stakes""",,,
353-
,dev,165,,,Wrong,"[114, 120): 'Stakes'",,,,See previous row,,,
353+
,dev,165,,,Sentence,"[114, 120): 'Stakes'",MISC,"[93, 120): 'Tattersalls Breeders Stakes'",MISC,See previous row,,,
354354
10,dev,170,"[770, 774): 'Kehl'",PER,None,,LOC,,,,,,
355355
10,dev,170,"[1926, 1930): 'Alex'",PER,None,"[1914, 1930): 'Ice Cold in Alex'",MISC,,,,,,
356356
10,dev,170,"[1965, 1980): 'Cardinal Wolsey'",PER,None,"[1974, 1980): 'Wolsey'",,,,,,,

corrected_labels/human_labels_audited/CoNLL_3_in_gold.csv

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -282,9 +282,9 @@ X,0,test,27,"[1434, 1441): 'NY JETS'",ORG,None,,,,,,
282282
X,0,test,27,"[1468, 1475): 'SEATTLE'",LOC,Tag,,ORG,football team,,,
283283
X,0,test,27,"[1476, 1484): 'CAROLINA'",ORG,None,,,,,,
284284
X,0,test,28,"[11, 14): 'NFL'",ORG,None,,,,,,
285-
X,0,test,28,"[82, 90): 'National'",ORG,Sentence,"[82, 106): 'National Football League'",,,,,
286-
X,0,test,28,"[91, 99): 'Football'",LOC,Sentence,"[82, 106): 'National Football League'",ORG,,,,
287-
X,0,test,28,"[100, 106): 'League'",LOC,Sentence,"[82, 106): 'National Football League'",ORG,,,,
285+
X,0,test,28,"[82, 90): 'National'",ORG,Sentence,"[82, 106): 'National Football League'",ORG,,,,
286+
X,0,test,28,"[91, 99): 'Football'",LOC,Tag,"[91, 99): 'Football'",ORG,,,,
287+
X,0,test,28,"[100, 106): 'League'",LOC,Tag,"[100, 106): 'League'",ORG,,,,
288288
X,0,test,29,"[25, 44): 'FOOTBALL-OHIO STATE'",MISC,Token,"[34, 44): 'OHIO STATE'",ORG,,,,
289289
X,0,test,29,"[47, 51): 'PACE'",PER,None,,,,,,
290290
X,0,test,29,"[65, 79): 'LOMBARDI AWARD'",MISC,None,,,,,,
@@ -444,8 +444,8 @@ X,0,test,137,"[296, 301): 'Minas'",MISC,None,,,Zach: Brand of oil; Fred: Ambiguo
444444
X,0,test,137,"[356, 361): 'Dubai'",MISC,None,,,Zach: Brand of oil; Fred: Ambiguous; a grade of oil named for the location from which it is extracted,,,
445445
X,0,test,137,"[386, 395): 'Tia Juana'",MISC,None,,,Zach: Brand of oil; Fred: Ambiguous; a grade of oil named for the location from which it is extracted,,,
446446
X,0,test,137,"[409, 416): 'Isthmus'",MISC,None,,,Zach: Brand of oil; Fred: Ambiguous; a grade of oil named for the location from which it is extracted,,,
447-
X,0,test,142,"[342, 348): 'Kesers'",PER,Tag,,MISC,Two rival clans ,,,
448-
X,,test,142,"[353, 363): 'Karabuluts'",PER,Tag,,MISC,Two rival clans ,,,
447+
X,0,test,142,"[342, 348): 'Kesers'",PER,Tag,,ORG,Two rival clans ,,,
448+
X,,test,142,"[353, 363): 'Karabuluts'",PER,Tag,,ORG,Two rival clans ,,,
449449
X,0,test,144,"[48, 58): 'DODGE CITY'",LOC,None,,,,,,
450450
X,0,test,146,"[11, 17): 'ACCESS'",MISC,Tag,,ORG,Short fo NYMEX ACCESS,,,
451451
X,0,test,146,"[143, 155): 'NYMEX ACCESS'",MISC,Tag,,ORG,Stock market ,,,
@@ -541,7 +541,7 @@ X,0,test,192,"[808, 815): 'Garrett'",PER,None,,,,,,
541541
X,0,test,192,"[816, 821): 'Hines'",PER,None,,,,,,
542542
X,0,test,192,"[850, 861): 'Austria III'",ORG,None,,,,,,
543543
X,0,test,193,"[20, 27): 'WOOLMER'",PER,None,,,,,,
544-
X,0,test,193,"[744, 759): 'United Province'",LOC,Tag,,ORG,"Fred: Somewhat ambiguous, but appears to refer to the United Province cricket team",,,
544+
X,0,test,193,"[744, 759): 'United Province'",LOC,None,,,"Fred: Ambiguous: ""His father Clarence Woolmer represented United Province, now renamed Uttar Pradesh""�",,,
545545
X,0,test,193,"[1412, 1430): 'India-South Africa'",MISC,None,,,,,,
546546
X,0,test,196,"[22, 38): 'WORLD GRAND PRIX'",MISC,None,,,,,,
547547
X,0,test,199,"[27, 52): 'SCOTTISH PREMIER DIVISION'",MISC,Span,"[27, 35): 'SCOTTISH'",Span,Divisions of leagues not considered entities,,,
@@ -11635,4 +11635,4 @@ X,4,test,225,"[34, 41): 'RIBALTA'",PER,None,,,,,,
1163511635
,17,test,230,"[1153, 1160): 'England'",LOC,,,,,,,
1163611636
,17,test,230,"[1213, 1225): 'Leeds United'",ORG,,,,,,,
1163711637
,17,test,230,"[1252, 1259): 'England'",LOC,,,,,,,
11638-
,17,test,230,"[1395, 1400): 'Bobby'",PER,,,,,,,
11638+
,17,test,230,"[1395, 1400): 'Bobby'",PER,,,,,,,

corrected_labels/human_labels_audited/CoNLL_3_not_in_gold.csv

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
1818
17,dev,11,"[1961, 1975): 'Czech Republic'",LOC,Sentence,"[1967, 1975): 'Republic'",LOC,,,"Sentence boundary between ""Czech"" and ""Republic""",,,
1919
14,dev,12,"[1290, 1310): 'MONTREAL LOS ANGELES'",LOC,None,,,,,,,,
2020
16,dev,12,"[1366, 1383): 'NEW YORK COLORADO'",LOC,None,,,,,,,,
21-
17,dev,13,"[83, 104): 'Major League Baseball'",MISC,Sentence,"[83, 95): 'Major League'",MISC,,,"Sentence boundary between ""League"" and ""Baseball""",,6/15/20 15:01,
21+
17,dev,13,"[83, 104): 'Major League Baseball'",MISC,Sentence,"[83, 95): 'Major League'",MISC,"[83, 104): 'Major League Baseball'",MISC,"Sentence boundary between ""League"" and ""Baseball""",,6/15/20 15:01,
22+
,dev,13,,,Missing,,,"[96, 104): 'Baseball'",MISC,"Sentence boundary between ""League"" and ""Baseball""",,,
2223
15,dev,14,"[3579, 3583): 'Rios'",ORG,None,,PER,,,,6/15/20 15:54,,
2324
16,dev,14,"[452, 459): 'Stadium'",LOC,Missing,,,,,"""Stadium"" with a capital ""S"", referring to Arthur Ashe Stadium",,,
2425
16,dev,15,"[109, 133): 'National Football League'",MISC,None,,ORG,,,,,,
@@ -166,6 +167,7 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
166167
,dev,148,,,Tag,"[1385, 1393): 'MONTREAL'",LOC,,ORG,Baseball team,,,
167168
,dev,148,,,Tag,"[1406, 1414): 'ST LOUIS'",LOC,,ORG,Baseball team,,,
168169
17,dev,149,"[81, 102): 'Major League Baseball'",MISC,Sentence,"[81, 93): 'Major League'",MISC,,ORG,"Sentence boundary between ""League"" and ""Baseball""",,6/19/20 15:49,
170+
,dev,149,,,Missing,,,"[94, 102): 'Baseball'",MISC,"Sentence boundary between ""League"" and ""Baseball""",,,
169171
16,dev,150,"[1872, 1883): 'Los Angeles'",LOC,None,,ORG,,,,6/19/20 19:39,,
170172
17,dev,150,"[3115, 3123): 'Montreal'",ORG,Tag,,LOC,,MISC,"""one Montreal hit""",,,
171173
17,dev,150,"[3142, 3150): 'Montreal'",ORG,Tag,,LOC,,,,,,
@@ -625,9 +627,9 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
625627
13,test,9,"[774, 785): 'Efes Pilsen'",PER,None,,ORG,,,,,,
626628
13,test,11,"[17, 25): 'ZIMBABWE'",LOC,None,"[17, 30): 'ZIMBABWE OPEN'",MISC,,,,,,
627629
13,test,27,"[1196, 1204): 'CLINCHED'",ORG,None,,,,,,,,
628-
,test,28,,,Sentence,"[82, 90): 'National'",ORG,"[82, 106): 'National Football League'",MISC,See two lines down,,,
629-
,test,28,,,Sentence,"[91, 99): 'Football'",LOC,"[82, 106): 'National Football League'",MISC,See next line,,,
630-
13,test,28,"[82, 106): 'National Football League'",MISC,Sentence,"[100, 106): 'League'",LOC,"[82, 106): 'National Football League'",MISC,"Sentence boundary between ""Football"" and ""League""",,,
630+
,test,28,,,Sentence,"[82, 90): 'National'",ORG,"[82, 106): 'National Football League'",ORG,See two lines down,,,
631+
,test,28,,,Tag,"[91, 99): 'Football'",LOC,"[91, 99): 'Football'",ORG,See next line,,,
632+
13,test,28,"[82, 106): 'National Football League'",MISC,Tag,"[100, 106): 'League'",LOC,"[100, 106): 'League'",ORG,"Sentence boundary between ""Football"" and ""League""",,,
631633
13,test,43,"[143, 163): 'AC Milan George Weah'",ORG,None,"[143, 151): 'AC Milan'",,,,,,,
632634
13,test,44,"[260, 268): 'Mercedes'",ORG,None,,MISC,,,,,,
633635
13,test,52,"[11, 16): 'CZECH'",PER,None,,MISC,,,,,,
@@ -794,7 +796,7 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
794796
10,dev,124,"[412, 419): 'Reynard'",ORG,None,"[412, 433): 'Reynard Mercedes-Benz'",,,,,,,
795797
10,dev,124,"[420, 433): 'Mercedes-Benz'",ORG,None,"[412, 433): 'Reynard Mercedes-Benz'",,,,,,,
796798
10,dev,125,"[133, 141): 'CONCACAF'",MISC,None,,ORG,,,,,,
797-
10,dev,125,"[183, 187): '1998'",MISC,Span,"[188, 197): 'World Cup'",MISC,"[183, 187): '1998 World Cup'",,,,,
799+
10,dev,125,"[183, 187): '1998'",MISC,Span,"[188, 197): 'World Cup'",MISC,"[183, 197): '1998 World Cup'",MISC,,,,
798800
10,dev,126,"[3225, 3238): 'Alama Ieremia'",ORG,None,,PER,,,,,,
799801
10,dev,144,"[68, 89): 'Richard Finn NEW YORK'",PER,None,"[68, 80): 'Richard Finn'",,,,,,,
800802
10,dev,146,"[246, 272): 'Netherlands Czech Republic'",LOC,None,"[246, 257): 'Netherlands'",,,,,,,

0 commit comments

Comments
 (0)