Skip to content

Commit 7947cff

Browse files
committed
Additional corrections
1 parent 2d9d797 commit 7947cff

8 files changed

Lines changed: 15 additions & 15 deletions

corrected_labels/human_labels_audited/CoNLL_2_in_gold.csv

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,correct_ent_type,notes,time_started,time_stopped,time_elapsed
22
0,dev,2,"[25, 30): 'ASHES'",MISC,None,,,teams label as ORG ,,,
33
0,dev,15,"[15, 40): 'AMERICAN FOOTBALL-RANDALL'",MISC,Wrong,,, divisions of leagues not entities,,,
4-
,dev,15,"[41, 51): 'CUNNINGHAM'",PER,Token,"(33, 51]: 'RANDALL CUNNINGHAM'",,"need to split on '-' ""FOOTBALL-RANDALL""",,,
4+
,dev,15,"[41, 51): 'CUNNINGHAM'",PER,Token,"[32, 51): 'RANDALL CUNNINGHAM'",PER,"need to split on '-' ""FOOTBALL-RANDALL""",,,
55
0,dev,20,"[90, 96): 'Berlin'",MISC,Sentence,"[90, 107): 'Berlin Grand Prix'",,,,,
66
0,dev,22,"[213, 244): 'Solidarity Meeting for Sarajevo'",MISC,None,,,,,,
77
0,dev,22,"[826, 847): 'IAAF Grand Prix Final'",MISC,None,,,,,,
88
0,dev,34,"[2269, 2291): 'Jackson Hole symposium'",MISC,None,,,teams label as PER,,,
99
0,dev,38,"[624, 635): 'Chicago PMI'",MISC,None,,,Purchase Managers Index,,,
10-
0,dev,39,"[11, 23): 'Boxing-Bruno'",MISC,Span,"[18, 23): 'Bruno'",PER,TODO cleanup,,,
10+
0,dev,39,"[11, 23): 'Boxing-Bruno'",MISC,Token,"[18, 23): 'Bruno'",PER,Tokenizer treated 'Boxing-Bruno' as one token,,,
1111
,dev,42,,,Missing,"[476, 500): 'Driefontein Consolidated'",ORG,Description of two companies jointly owning a third company,,,
1212
,dev,42,,,Missing,"[505, 516): 'Gold Fields'",ORG,Description of two companies jointly owning a third company,,,
1313
0,dev,42,"[476, 539): 'Driefontein Consolidated and Gold Fields ' Kloof Gold Mining Co'",ORG,Span,"[519, 539): 'Kloof Gold Mining Co'",ORG,Description of two companies jointly owning a third company,,,
@@ -125,7 +125,7 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
125125
0,test,54,"[1717, 1723): 'Okocim'",ORG,None,,,brewery,,,
126126
,test,54,"[3224, 3230): 'Zywiec'",ORG,None,,,"Ambiguous: Is [3224, 3241): 'Zywiec Full Light' the brand, or is ""Full Light"" the brand?",,,
127127
0,test,54,"[3231, 3241): 'Full Light'",MISC,Tag,,ORG,"See previous line – type of beer, brand",,,
128-
,test,63,"[19, 39): 'office-Conservatives' ",MISC,Token,"[26, 39): 'Conservatives' ",ORG,political party,,,
128+
,test,63,"[19, 39): 'office-Conservatives' ",MISC,Token,"[26, 39): 'Conservatives'",ORG,political party,,,
129129
0,test,63,"[148, 160): 'Conservative'",MISC,Tag,,ORG,political party,,,
130130
0,test,70,"[79, 93): 'Maritime Queen'",MISC,None,,,carrier,,,
131131
0,test,70,"[177, 197): 'New York Commodities'",ORG,Span,"[177, 202): 'New York Commodities Desk'",,"??? team labeled ""New York Commodities Desk""",,,

corrected_labels/human_labels_audited/CoNLL_2_not_in_gold.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
363363
10,test,18,"[290, 298): 'Victoria'",LOC,None,,ORG,,,,,,
364364
10,test,19,"[38, 53): 'AUSTRALIAN TOUR'",MISC,None,,,,,,,,
365365
10,test,19,"[1611, 1632): 'Sydney Cricket Ground'",ORG,None,,LOC,,,,,,
366-
10,test,27,"[565, 573): 'X-DENVER'",ORG,Token,,,"[567, 573): 'DENVER'",,"""X-"" prefix is a footnote, meaning ""CLINCHED DIVISION TITLE""",,,
366+
10,test,27,"[565, 573): 'X-DENVER'",ORG,Token,"[565, 573): 'X-DENVER'",,"[567, 573): 'DENVER'",ORG,"""X-"" prefix is a footnote, meaning ""CLINCHED DIVISION TITLE""",,,
367367
10,test,29,"[65, 73): 'LOMBARDI'",PER,None,"[65, 79): 'LOMBARDI AWARD'",MISC,,,,,,
368368
10,test,31,"[561, 568): 'Schalke'",ORG,None,"[561, 571): 'Schalke 04'",,,,,,,
369369
10,test,43,"[317, 322): 'Sport'",ORG,None,"[302, 322): 'Gazzetta dello Sport'",,,,,,,

corrected_labels/human_labels_audited/CoNLL_3_in_gold.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ X,0,test,28,"[11, 14): 'NFL'",ORG,None,,,,,,
286286
X,0,test,28,"[82, 90): 'National'",ORG,Sentence,"[82, 106): 'National Football League'",,,,,
287287
X,0,test,28,"[91, 99): 'Football'",LOC,Sentence,"[82, 106): 'National Football League'",ORG,,,,
288288
X,0,test,28,"[100, 106): 'League'",LOC,Sentence,"[82, 106): 'National Football League'",ORG,,,,
289-
X,0,test,29,"[25, 44): 'FOOTBALL-OHIO STATE'",MISC,Token,"[34,44): 'OHIO STATE'",ORG,,,,
289+
X,0,test,29,"[25, 44): 'FOOTBALL-OHIO STATE'",MISC,Token,"[34, 44): 'OHIO STATE'",ORG,,,,
290290
X,0,test,29,"[47, 51): 'PACE'",PER,None,,,,,,
291291
X,0,test,29,"[65, 79): 'LOMBARDI AWARD'",MISC,None,,,,,,
292292
X,0,test,29,"[220, 231): 'Rotary Club'",ORG,None,,,,,,
@@ -523,7 +523,7 @@ X,0,test,186,"[1275, 1283): 'Florence'",LOC,Wrong,,,Firsrt name of Florence Masn
523523
X,0,test,186,"[1284, 1291): 'Masnada'",PER,Span,"[1275, 1291): 'Florence Masnada'",PER,Combined with above,,,
524524
X,0,test,186,"[1607, 1614): 'Super G'",MISC,Wrong,,,"Fred: Short for ""super giant slalom"", which is a generic term for an event; also, the ""super g"" is mentioned elsewhere in the same doc and not tagged there",,,
525525
X,0,test,187,"[33, 42): 'WORLD CUP'",MISC,None,,,,,,
526-
X,0,test,188,"[18, 34): 'SKIING-WORLD CUP'",MISC,Token,"[25, 34): 'WORLD CUP'",,,,,
526+
X,0,test,188,"[18, 34): 'SKIING-WORLD CUP'",MISC,Token,"[25, 34): 'WORLD CUP'",MISC,,,,
527527
X,0,test,190,"[875, 880): 'USA I'",ORG,None,,,,,,
528528
X,,test,190,"[440, 445): 'Italy'",LOC ,Both,"[440, 447): 'Italy I'",ORG,Bobsled team,,,
529529
X,0,test,191,"[11, 25): 'SKIING-CHINESE'",MISC,Token,"[18, 25): 'CHINESE'",ORG,,,,

corrected_labels/human_labels_audited/CoNLL_3_not_in_gold.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
6363
14,dev,38,"[624, 631): 'Chicago'",ORG,None,"[624, 635): 'Chicago PMI'",MISC,,,,,,
6464
17,dev,38,"[632, 635): 'PMI'",ORG,None,"[624, 635): 'Chicago PMI'",MISC,,,,,,
6565
17,dev,38,"[735, 738): 'PMI'",ORG,None,,,,,"Ambiguous: ""Chicago PMI"" tagged as MISC earlier in the same sentence",,,
66-
17,dev,39,"[11, 23): 'Boxing-Bruno'",PER,Token,,MISC,"[18, 23): 'Bruno' ",PER,"Tokenizer treats ""Boxing-Bruno"" as one token",,,
66+
17,dev,39,"[11, 23): 'Boxing-Bruno'",PER,Token,"[11, 23): 'Boxing-Bruno'",MISC,"[18, 23): 'Bruno'",PER,"Tokenizer treats ""Boxing-Bruno"" as one token",,,
6767
17,dev,39,"[340, 360): 'World Boxing Council'",MISC,None,,ORG,,,,,,
6868
16,dev,42,"[476, 516): 'Driefontein Consolidated and Gold Fields'",ORG,Missing,,,"[476, 500): 'Driefontein Consolidated'",ORG,Two companies that own a third company,,,
6969
16,dev,42,"[476, 516): 'Driefontein Consolidated and Gold Fields'",ORG,Missing,,,"[505, 516): 'Gold Fields'",ORG,Two companies that own a third company,,,
@@ -492,7 +492,7 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
492492
17,test,185,"[333, 342): 'World Cup'",MISC,None,"[328, 342): '1993 World Cup'",,,,,,,
493493
17,test,186,"[1275, 1291): 'Florence Masnada'",PER,Wrong,"[1275, 1283): 'Florence'",LOC,,,,,,
494494
17,test,186,"[1275, 1291): 'Florence Masnada'",PER,Span,"[1284, 1291): 'Masnada'",PER,"[1275, 1291): 'Florence Masnada'",PER,,,,
495-
17,test,186,"[1398, 1409): 'Austria)118'",LOC,Token,,,"[1398, 1405): 'Austria'",LOC,Token and Missing ,,,
495+
17,test,186,"[1398, 1409): 'Austria)118'",LOC,Token,"[1398, 1409): 'Austria)118'",,"[1398, 1405): 'Austria'",LOC,Token and Missing ,,,
496496
14,test,186,"[432, 454): 'Ingeborg Helen Markein'",PER,Span,"[432, 446): 'Ingeborg Helen'",PER,,,,,,
497497
17,test,186,"[533, 549): 'Florence Masnada'",PER,Wrong,"[533, 541): 'Florence'",,,,,,,
498498
17,test,186,"[533, 549): 'Florence Masnada'",PER,Span,"[542, 549): 'Masnada'",PER,"[533, 549): 'Florence Masnada'",PER,,,,

corrected_labels/human_labels_audited/CoNLL_3_train_not_in_gold.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
4848
17,train,58,"[751, 765): 'Derrick Cooper'",PER,Sentence,"[759, 765): 'Cooper'",PER,,,Sentence boundary between first and last name,,,
4949
17,train,58,"[816, 829): 'Michael Welch'",PER,Sentence,"[816, 823): 'Michael'",PER,,,Sentence boundary between first and last name,,,
5050
17,train,58,"[816, 829): 'Michael Welch'",PER,Sentence,"[824, 829): 'Welch'",PER,,,Sentence boundary between first and last name,,,
51-
17,train,80,"[44, 59): 'rebels-Interfax'",ORG,Token,,,"[44, 59): 'Interfax'",ORG,"Missing annotation, and 'rebels-Interfax' treated as a single token",,,
51+
17,train,80,"[44, 59): 'rebels-Interfax'",ORG,Token,"[44, 59): 'rebels-Interfax'",,"[51, 59): 'Interfax'",ORG,"Missing annotation, and 'rebels-Interfax' treated as a single token",,,
5252
15,train,94,"[1046, 1066): 'Israel-PLO self-rule'",MISC,None,"[1046, 1056): 'Israel-PLO'",,,,,,,
5353
16,train,98,"[81, 85): 'N.M.'",LOC,Missing,,,,,New Mexico,,,
5454
15,train,112,"[80, 105): 'The Reserve bank of India'",ORG,None,"[84, 105): 'Reserve bank of India'",,,,,,,
55-
17,train,115,"[17, 27): 'FOCUS-News'",ORG,Token,,,"[23, 27): 'News'",,"FOCUS-News' treated as a single token; ""News"" is a reference to News Corp Ltd",,,
55+
17,train,115,"[17, 27): 'FOCUS-News'",ORG,Token,"[17, 27): 'FOCUS-News'",,"[23, 27): 'News'",ORG,"FOCUS-News' treated as a single token; ""News"" is a reference to News Corp Ltd",,,
5656
17,train,119,"[11, 16): 'Thais'",MISC,Missing,,,,,,,,
5757
,train,119,,,Missing,,,"[826, 831): 'Vivit'",PER,,,,
5858
17,train,130,"[166, 184): 'Belgian Grand Prix'",MISC,None,"[174, 184): 'Grand Prix'",MISC,,,"Ambiguous: Should the ""Belgian"" in ""Belgian Grand Prix"" be considered part of the event's name?",,,

corrected_labels/human_labels_audited/CoNLL_4_in_gold.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
4949
1,dev,14,"[989, 998): 'Wimbledon'",MISC,None,,,,,,
5050
0,dev,15,"[109, 133): 'National Football League'",ORG,Tag,,MISC,"Leagues should be tagged MISC, per the rubric",,,
5151
1,dev,15,"[11, 14): 'NFL'",ORG,Tag,,MICC,"Leagues should be tagged MISC, per the rubric",,,
52-
0,dev,15,"[15, 40): 'AMERICAN FOOTBALL-RANDALL'",MISC,Token,"[32, 51): 'RANDALL CUNNINGHAM'",,"FOOTBALL-RANDALL treated as a single token; ""AMERICAN FOOTBALL"" is the generic name of a sport, not a named entity",,,
53-
1,dev,15,"[41, 51): 'CUNNINGHAM'",PER,Token,"[32, 51): 'RANDALL CUNNINGHAM'",,See previous line,,,
52+
0,dev,15,"[15, 40): 'AMERICAN FOOTBALL-RANDALL'",MISC,Wrong,,,"FOOTBALL-RANDALL treated as a single token; ""AMERICAN FOOTBALL"" is the generic name of a sport, not a named entity",,,
53+
1,dev,15,"[41, 51): 'CUNNINGHAM'",PER,Token,"[32, 51): 'RANDALL CUNNINGHAM'",PER,See previous line,,,
5454
4,dev,16,"[175, 202): 'Brown Deer Park Golf Course'",LOC,None,,,,,,
5555
0,dev,16,"[407, 413): 'Willie'",PER,Sentence,"[407, 418): 'Willie Wood'",PER,,,,
5656
0,dev,16,"[414, 418): 'Wood'",PER,Sentence,"[407, 418): 'Willie Wood'",PER,,,,

corrected_labels/human_labels_audited/CoNLL_4_not_in_gold.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
3737
13,dev,14,"[3579, 3583): 'Rios'",ORG,None,"[3579, 3583): 'Rios'",PER,,,"Ambiguous: ""a Rios rooting section"" -- MISC?",,,
3838
15,dev,14,"[452, 459): 'Stadium'",LOC,Missing,,,,,a two hour 51 minute struggle on the Stadium court.,,,
3939
17,dev,15,"[109, 133): 'National Football League'",MISC,None,"109, 133): 'National Football League'",ORG,,,,,,
40-
14,dev,15,"[24, 40): 'FOOTBALL-RANDALL'",PER,Token,"[15, 40): 'AMERICAN FOOTBALL-RANDALL'",MISC,"[32, 51): 'RANDALL CUNNINGHAM'",PER,"""FOOTBALL-RANDALL"" treated as a single token",,,
40+
14,dev,15,"[24, 40): 'FOOTBALL-RANDALL'",PER,Token,"[41, 51): 'CUNNINGHAM'",PER,"[32, 51): 'RANDALL CUNNINGHAM'",PER,"""FOOTBALL-RANDALL"" treated as a single token",,,
4141
17,dev,16,"[407, 418): 'Willie Wood'",PER,Sentence,"[407, 413): 'Willie'",PER,,,Sentence boundary between first and last name,,,
4242
17,dev,16,"[407, 418): 'Willie Wood'",PER,Sentence,"[414, 418): 'Wood'",PER,,,Sentence boundary between first and last name,,,
4343
17,dev,16,"[529, 538): 'Ken Green'",PER,Sentence,"[529, 532): 'Ken'",PER,,,Sentence boundary between first and last name,,,
@@ -573,7 +573,7 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
573573
16,test,185,"[333, 342): 'World Cup'",MISC,None,"[328, 342): '1993 World Cup'",MISC,,,,,,
574574
17,test,186,"[1275, 1291): 'Florence Masnada'",PER,Wrong,"[1275, 1283): 'Florence'",LOC,,,,,,
575575
17,test,186,"[1275, 1291): 'Florence Masnada'",PER,Span,"[1284, 1291): 'Masnada'",PER,"[1275, 1291): 'Florence Masnada'",PER,,,,
576-
17,test,186,"[1398, 1409): 'Austria)118'",LOC,Token,,,"[1398, 1405): 'Austria'",LOC,Tokenizer treated 'Austria)118' as a single token,,,
576+
17,test,186,"[1398, 1409): 'Austria)118'",LOC,Token,"[1398, 1409): 'Austria)118'",,"[1398, 1405): 'Austria'",LOC,Tokenizer treated 'Austria)118' as a single token,,,
577577
14,test,186,"[432, 454): 'Ingeborg Helen Markein'",PER,Span,"[432, 446): 'Ingeborg Helen'",PER,,,,,,
578578
17,test,186,"[533, 549): 'Florence Masnada'",PER,Wrong,"[533, 541): 'Florence'",LOC,,,,,,
579579
17,test,186,"[533, 549): 'Florence Masnada'",PER,Span,"[542, 549): 'Masnada'",PER,"[533, 549): 'Florence Masnada'",PER,,,,

corrected_labels/human_labels_audited/CoNLL_4_train_not_in_gold.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
110110
15,train,99,"[61, 66): 'Mich.'",LOC,None,"[61, 65): 'Mich'",LOC,,,Ambiguous: Sentence boundary in the middle of the dateline; otherwise period would be unambiguously part of the abbreviation,,,
111111
12,train,103,"[528, 549): 'French Roman Catholic'",MISC,None,"[528, 534): 'French'",MISC,,,,,,
112112
12,train,107,"[867, 874): 'Yevgeny'",PER,None,"[867, 883): 'Yevgeny Primakov'",PER,,,,,,
113-
17,train,115,"[17, 27): 'FOCUS-News'",ORG,Token,,,"[23, 27): 'News'",ORG,"Tokenizer treats ""FOCUS-News"" as a single token; ""News"" == ""News Corp""",,,
113+
17,train,115,"[17, 27): 'FOCUS-News'",ORG,Token,"[17, 27): 'FOCUS-News'",,"[23, 27): 'News'",ORG,"Tokenizer treats ""FOCUS-News"" as a single token; ""News"" == ""News Corp""",,,
114114
17,train,119,"[11, 16): 'Thais'",MISC,Missing,,,,,,,,
115115
12,train,119,"[826, 831): 'Vivit'",PER,Missing,,,,,,,,
116116
14,train,120,"[158, 166): 'YEAR-AGO'",ORG,None,,,,,,,,

0 commit comments

Comments
 (0)