Skip to content

Commit e7dd9d4

Browse files
committed
Correct additional data entry errors
1 parent dd01375 commit e7dd9d4

6 files changed

Lines changed: 14 additions & 16 deletions

File tree

corrected_labels/human_labels_audited/CoNLL_2_in_gold.csv

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,correct_ent_type,notes,time_started,time_stopped,time_elapsed
22
0,dev,2,"[25, 30): 'ASHES'",MISC,None,,,teams label as ORG ,,,
33
0,dev,15,"[15, 40): 'AMERICAN FOOTBALL-RANDALL'",MISC,Wrong,,, divisions of leagues not entities,,,
4-
,dev,15,"[41, 51): 'CUNNINGHAM'",PER,Token,"[32, 51): 'RANDALL CUNNINGHAM'",PER,"need to split on '-' ""FOOTBALL-RANDALL""",,,
4+
,dev,15,"[41, 51): 'CUNNINGHAM'",PER,Token,"[33, 51): 'RANDALL CUNNINGHAM'",PER,"need to split on '-' ""FOOTBALL-RANDALL""",,,
55
0,dev,20,"[90, 96): 'Berlin'",MISC,Sentence,"[90, 107): 'Berlin Grand Prix'",,,,,
66
0,dev,22,"[213, 244): 'Solidarity Meeting for Sarajevo'",MISC,None,,,,,,
77
0,dev,22,"[826, 847): 'IAAF Grand Prix Final'",MISC,None,,,,,,
@@ -125,7 +125,6 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
125125
0,test,54,"[1717, 1723): 'Okocim'",ORG,None,,,brewery,,,
126126
,test,54,"[3224, 3230): 'Zywiec'",ORG,None,,,"Ambiguous: Is [3224, 3241): 'Zywiec Full Light' the brand, or is ""Full Light"" the brand?",,,
127127
0,test,54,"[3231, 3241): 'Full Light'",MISC,Tag,,ORG,"See previous line – type of beer, brand",,,
128-
,test,63,"[19, 39): 'office-Conservatives' ",MISC,Token,"[26, 39): 'Conservatives'",ORG,political party,,,
129128
0,test,63,"[148, 160): 'Conservative'",MISC,Tag,,ORG,political party,,,
130129
0,test,70,"[79, 93): 'Maritime Queen'",MISC,None,,,carrier,,,
131130
0,test,70,"[177, 197): 'New York Commodities'",ORG,Span,"[177, 202): 'New York Commodities Desk'",,"??? team labeled ""New York Commodities Desk""",,,
@@ -451,7 +450,7 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
451450
2,test,54,"[3522, 3528): 'Zywiec'",ORG,None,,,brewery,,,
452451
2,test,60,"[2313, 2329): 'London-to-Boston'",MISC,None,,,airplane route,,,
453452
2,test,63,"[11, 16): 'Major'",PER,None,,,last name,,,
454-
2,test,63,"[19, 39): 'office-Conservatives'",MISC,Token,"[26, 39): 'Conservatives' ",ORG,political party,,,
453+
2,test,63,"[19, 39): 'office-Conservatives'",MISC,Token,"[26, 39): 'Conservatives'",ORG,political party,,,
455454
2,test,67,"[682, 690): 'Manitoba'",ORG,None,,,pork company/council,,,
456455
2,test,75,"[207, 215): 'Santa Fe'",ORG,None,,,company name,,,
457456
2,test,75,"[455, 463): 'Santa Fe'",ORG,None,,,company name,,,
@@ -11627,4 +11626,4 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
1162711626
16,test,230,"[1108, 1115): 'Germany'",LOC,,,,,,,
1162811627
16,test,230,"[1127, 1132): 'Irish'",MISC,,,,,,,
1162911628
16,test,230,"[1153, 1160): 'England'",LOC,,,,,,,
11630-
16,test,230,"[1252, 1259): 'England'",LOC,,,,,,,
11629+
16,test,230,"[1252, 1259): 'England'",LOC,,,,,,,

corrected_labels/human_labels_audited/CoNLL_3_in_gold.csv

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ X,0,dev,14,"[2213, 2222): 'Wimbledon'",MISC,None,,,,,,
2727
X,0,dev,14,"[3579, 3583): 'Rios'",PER,None,,,,,,
2828
X,0,dev,15,"[11, 14): 'NFL'",ORG,None,,,,,,
2929
X,0,dev,15,"[15, 40): 'AMERICAN FOOTBALL-RANDALL'",MISC,Token,,,"Tokenizer treated ""FOOTBALL-RANDALL"" as a single token. Fred: I don't think ""AMERICAN FOOTBALL"" qualifies as a MISC entity.",,,
30-
X,,dev,15,"[41, 51): 'CUNNINGHAM'",PER,Token,"[33,51): 'RANDALL CUNNINGHAM'",MISC,"Tokenizer treated ""FOOTBALL-RANDALL"" as a single token",,,
3130
X,0,dev,15,"[109, 133): 'National Football League'",ORG,None,,,,,,
3231
X,0,dev,16,"[407, 413): 'Willie'",PER,Sentence,"[407, 418): 'Willie Wood'",,,,,
3332
X,0,dev,16,"[414, 418): 'Wood'",PER,Sentence,"[407, 418): 'Willie Wood'",,,,,
@@ -639,7 +638,7 @@ X,3,dev,12,"[691, 698): 'TORONTO'",LOC,Tag,,ORG,baseball team ,,,
639638
X,3,dev,12,"[699, 708): 'MINNESOTA'",ORG,None,,,,,,
640639
X,3,dev,12,"[735, 740): 'TEXAS'",LOC,Tag,,ORG,baseball team ,,,
641640
X,3,dev,12,"[88, 109): 'Major League Baseball'",MISC,Tag,,ORG,organization ,,,
642-
X,3,dev,15,"[41, 51): 'CUNNINGHAM'",PER,Token,"(33,51] 'RANDALL CUNNINGHAM'",,,,,
641+
X,3,dev,15,"[41, 51): 'CUNNINGHAM'",PER,Token,"[33, 51): 'RANDALL CUNNINGHAM'",,,,,
643642
X,3,dev,16,"[175, 202): 'Brown Deer Park Golf Course'",LOC,None,,,,,,
644643
X,2,dev,22,"[32, 39): 'EDWARDS'",PER,None,,,,,,
645644
X,1,dev,23,"[38, 42): 'NICE'",ORG,None,,,,,,
@@ -866,7 +865,7 @@ X,1,test,59,"[137, 158): 'Irish Republican Army'",ORG,None,,,,,,
866865
X,3,test,60,"[2076, 2090): 'trans-Atlantic'",MISC,None,,,,,,
867866
X,2,test,61,"[1052, 1060): 'American'",MISC,None,,,,,,
868867
X,2,test,61,"[1535, 1538): 'Elf'",ORG,None,,,,,,
869-
X,3,test,63,"[19, 39): 'office-Conservatives'",MISC,Token,"[27, 39): 'Conservatives'",ORG,conservative party,,,
868+
X,3,test,63,"[19, 39): 'office-Conservatives'",MISC,Token,"[26, 39): 'Conservatives'",ORG,conservative party,,,
870869
X,1,test,72,"[197, 204): 'Augusta'",LOC,None,,,Zach: boat names?; Fred: Location of a port -- https://en.wikipedia.org/wiki/Port_of_Augusta,,,
871870
X,1,test,72,"[235, 245): 'Sidi Kreir'",LOC,None,,,Fred: Offshore oil terminal in Egypt; see https://www.marinetraffic.com/en/ais/details/ports/21191,,,
872871
X,3,test,72,"[296, 299): 'Fos'",LOC,None,,,"Zach: boat names?; Fred: A port in France, see https://www.findaport.com/port-of-fos ",,,
@@ -11636,4 +11635,4 @@ X,4,test,225,"[34, 41): 'RIBALTA'",PER,None,,,,,,
1163611635
,17,test,230,"[1153, 1160): 'England'",LOC,,,,,,,
1163711636
,17,test,230,"[1213, 1225): 'Leeds United'",ORG,,,,,,,
1163811637
,17,test,230,"[1252, 1259): 'England'",LOC,,,,,,,
11639-
,17,test,230,"[1395, 1400): 'Bobby'",PER,,,,,,,
11638+
,17,test,230,"[1395, 1400): 'Bobby'",PER,,,,,,,

corrected_labels/human_labels_audited/CoNLL_3_train_in_gold.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
761761
2,train,734,"[11, 31): 'BALANCE-Water Dist 1'",MISC,Token,"[19, 31): 'Water Dist 1'",ORG,,,,
762762
1,train,734,"[189, 196): 'MOODY'S'",ORG,None,,,,,,
763763
0,train,734,"[45, 48): 'Kan'",LOC,Sentence,"[45, 49): 'Kan.'",,Incorrect sentence boundary led to period not being included in abbreviation,,,
764-
0,train,734,"[64, 80): 'WATER DISTRICT 1'",MISC,None,,,,,,
764+
0,train,734,"[64, 80): 'WATER DISTRICT 1'",MISC,Tag,"[64, 80): 'WATER DISTRICT 1'",ORG,,,,
765765
2,train,734,"[84, 94): 'JOHNSON CO'",ORG,Sentence,"[84, 95): 'JOHNSON CO.'",,Incorrect sentence boundary led to period not being included in abbreviation,,,
766766
0,train,734,"[98, 100): 'KS'",LOC,None,,,,,,
767767
3,train,736,"[134, 138): 'U.S.'",LOC,None,,,,,,
@@ -23624,4 +23624,4 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
2362423624
17,train,945,"[119, 127): 'Plymouth'",ORG,,,,,,,
2362523625
17,train,945,"[130, 137): 'Preston'",ORG,,,,,,,
2362623626
17,train,945,"[155, 162): 'Swansea'",ORG,,,,,,,
23627-
17,train,945,"[165, 172): 'Lincoln'",ORG,,,,,,,
23627+
17,train,945,"[165, 172): 'Lincoln'",ORG,,,,,,,

corrected_labels/human_labels_audited/CoNLL_4_in_gold.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
5050
0,dev,15,"[109, 133): 'National Football League'",ORG,Tag,,MISC,"Leagues should be tagged MISC, per the rubric",,,
5151
1,dev,15,"[11, 14): 'NFL'",ORG,Tag,,MICC,"Leagues should be tagged MISC, per the rubric",,,
5252
0,dev,15,"[15, 40): 'AMERICAN FOOTBALL-RANDALL'",MISC,Wrong,,,"FOOTBALL-RANDALL treated as a single token; ""AMERICAN FOOTBALL"" is the generic name of a sport, not a named entity",,,
53-
1,dev,15,"[41, 51): 'CUNNINGHAM'",PER,Token,"[32, 51): 'RANDALL CUNNINGHAM'",PER,See previous line,,,
53+
1,dev,15,"[41, 51): 'CUNNINGHAM'",PER,Token,"[33, 51): 'RANDALL CUNNINGHAM'",PER,See previous line,,,
5454
4,dev,16,"[175, 202): 'Brown Deer Park Golf Course'",LOC,None,,,,,,
5555
0,dev,16,"[407, 413): 'Willie'",PER,Sentence,"[407, 418): 'Willie Wood'",PER,,,,
5656
0,dev,16,"[414, 418): 'Wood'",PER,Sentence,"[407, 418): 'Willie Wood'",PER,,,,
@@ -11742,4 +11742,4 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
1174211742
17,test,230,"[1153, 1160): 'England'",LOC,,,,,,,
1174311743
17,test,230,"[1213, 1225): 'Leeds United'",ORG,,,,,,,
1174411744
17,test,230,"[1252, 1259): 'England'",LOC,,,,,,,
11745-
17,test,230,"[1395, 1400): 'Bobby'",PER,,,,,,,
11745+
17,test,230,"[1395, 1400): 'Bobby'",PER,,,,,,,

corrected_labels/human_labels_audited/CoNLL_4_not_in_gold.csv

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
3737
13,dev,14,"[3579, 3583): 'Rios'",ORG,None,"[3579, 3583): 'Rios'",PER,,,"Ambiguous: ""a Rios rooting section"" -- MISC?",,,
3838
15,dev,14,"[452, 459): 'Stadium'",LOC,Missing,,,,,a two hour 51 minute struggle on the Stadium court.,,,
3939
17,dev,15,"[109, 133): 'National Football League'",MISC,None,"109, 133): 'National Football League'",ORG,,,,,,
40-
14,dev,15,"[24, 40): 'FOOTBALL-RANDALL'",PER,Token,"[41, 51): 'CUNNINGHAM'",PER,"[32, 51): 'RANDALL CUNNINGHAM'",PER,"""FOOTBALL-RANDALL"" treated as a single token",,,
40+
14,dev,15,"[24, 40): 'FOOTBALL-RANDALL'",PER,Token,"[41, 51): 'CUNNINGHAM'",PER,"[33, 51): 'RANDALL CUNNINGHAM'",PER,"""FOOTBALL-RANDALL"" treated as a single token",,,
4141
17,dev,16,"[407, 418): 'Willie Wood'",PER,Sentence,"[407, 413): 'Willie'",PER,,,Sentence boundary between first and last name,,,
4242
17,dev,16,"[407, 418): 'Willie Wood'",PER,Sentence,"[414, 418): 'Wood'",PER,,,Sentence boundary between first and last name,,,
4343
17,dev,16,"[529, 538): 'Ken Green'",PER,Sentence,"[529, 532): 'Ken'",PER,,,Sentence boundary between first and last name,,,
@@ -597,7 +597,7 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
597597
17,test,193,"[1412, 1430): 'India-South Africa'",LOC,None,"[1412, 1430): 'India-South Africa'",MISC,,,,,,
598598
17,test,193,"[744, 759): 'United Province'",ORG,None,"[744, 759): 'United Province'",LOC,,,,,,
599599
16,test,194,"[11, 20): 'FREESTYLE'",PER,None,,,,,,,,
600-
13,test,195,"[31, 36): 'WORLD'",MISC,Token,"[21, 37): 'SKIING-WORLD CUP'",MISC,"[28, 37): 'WORLD CUP'",MISC,"Tokenizer treated ""SKIING-WORLD"" as a single token",,,
600+
13,test,195,"[31, 36): 'WORLD'",MISC,None,"[31, 40): 'WORLD CUP'",MISC,,,,,,
601601
,test,199,,,Span,"[27, 52): 'SCOTTISH PREMIER DIVISION'",MISC,"[27, 35): 'SCOTTISH'",MISC,Divisions of leagues not considered entities,,,
602602
17,test,199,"[108, 116): 'Scottish'",MISC,Span,"[108, 124): 'Scottish premier'",MISC,"[108, 116): 'Scottish'",MISC,Divisions of leagues not considered entities,,,
603603
17,test,199,"[484, 498): 'Robert Winters'",PER,Sentence,"[484, 490): 'Robert'",PER,"[484, 498): 'Robert Winters'",PER,Sentence boundary between first and last name,,,
@@ -4498,4 +4498,4 @@ num_models,fold,doc_offset,model_span,model_ent_type,error_type,corpus_span,corp
44984498
1,test,228,"[831, 845): 'Super Peasants'",MISC,,,,,,,,,
44994499
1,test,228,"[801, 811): 'Doetinchem'",MISC,,,,,,,,,
45004500
1,test,230,"[19, 29): 'ENGLISHMAN'",LOC,,,,,,,,,
4501-
1,test,230,"[19, 38): 'ENGLISHMAN CHARLTON'",PER,,,,,,,,,
4501+
1,test,230,"[19, 38): 'ENGLISHMAN CHARLTON'",PER,,,,,,,,,

corrected_labels/human_labels_audited/CoNLL_4_train_in_gold.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -873,7 +873,7 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
873873
1,train,734,"[11, 31): 'BALANCE-Water Dist 1'",MISC,Token,"[19, 31): 'Water Dist 1'",ORG,,,,
874874
0,train,734,"[189, 196): 'MOODY'S'",ORG,None,,,,,,
875875
2,train,734,"[45, 48): 'Kan'",LOC,Sentence,"[45, 49): 'Kan.'",,Incorrect sentence boundary led to period not being included in abbreviation,,,
876-
0,train,734,"[64, 80): 'WATER DISTRICT 1'",MISC,None,,,,,,
876+
0,train,734,"[64, 80): 'WATER DISTRICT 1'",MISC,Tag,,ORG,,,,
877877
0,train,734,"[84, 94): 'JOHNSON CO'",ORG,Sentence,"[84, 95): 'JOHNSON CO.'",,Incorrect sentence boundary led to period not being included in abbreviation,,,
878878
0,train,734,"[98, 100): 'KS'",LOC,None,,,,,,
879879
2,train,735,"[736, 743): 'Edouard'",MISC,None,,,Name of hurricane,,,

0 commit comments

Comments
 (0)