Skip to content

Commit 7202008

Browse files
authored
Merge pull request #39 from frreiss/MIC
Fix data entry errors pointed out in issues #37 and #38
2 parents dd8f886 + 2035e84 commit 7202008

3 files changed

Lines changed: 21 additions & 5 deletions

File tree

corrected_labels/all_conll_corrections_combined.csv

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,11 +351,19 @@ test,0,"[1072, 1077): 'Syria'",LOC,Tag,"[1072, 1077): 'Syria'",ORG,Soccer team
351351
test,0,"[118, 123): 'Japan'",LOC,Tag,"[118, 123): 'Japan'",ORG,Soccer team
352352
test,0,"[1328, 1333): 'Syria'",LOC,Tag,"[1328, 1333): 'Syria'",ORG,Soccer team
353353
test,0,"[1528, 1533): 'Syria'",LOC,Tag,"[1528, 1533): 'Syria'",ORG,Soccer team
354+
test,0,"[1592, 1597): 'Japan'",LOC,Tag,"[1592, 1597): 'Japan'",ORG,Soccer team; see issue #38
355+
test,0,"[1749, 1754): 'Japan'",LOC,Tag,"[1749, 1754): 'Japan'",ORG,Soccer team; see issue #38
354356
test,0,"[19, 24): 'JAPAN'",LOC,Tag,"[19, 24): 'JAPAN'",ORG,Soccer team
357+
test,0,"[1928, 1933): 'Japan'",LOC,Tag,"[1928, 1933): 'Japan'",ORG,Soccer team; see issue #38
355358
test,0,"[196, 201): 'Syria'",LOC,Tag,"[196, 201): 'Syria'",ORG,Soccer team
359+
test,0,"[2056, 2059): 'UAE'",LOC,Tag,"[2056, 2059): 'UAE'",ORG,Soccer team; see issue #38
360+
test,0,"[2065, 2071): 'Kuwait'",LOC,Tag,"[2065, 2071): 'Kuwait'",ORG,Soccer team; see issue #38
361+
test,0,"[2076, 2087): 'South Korea'",LOC,Tag,"[2076, 2087): 'South Korea'",ORG,Soccer team; see issue #38
362+
test,0,"[2096, 2105): 'Indonesia'",LOC,Tag,"[2096, 2105): 'Indonesia'",ORG,Soccer team; see issue #38
356363
test,0,"[249, 254): 'China'",LOC,Tag,"[249, 254): 'China'",ORG,Soccer team
364+
test,0,"[363, 373): 'Uzbekistan'",LOC,Tag,"[363, 373): 'Uzbekistan'",ORG,Soccer team; see issue #38
357365
test,0,"[375, 380): 'China'",LOC,Tag,"[375, 380): 'China'",ORG,Soccer team
358-
test,0,"[40, 45): 'CHINA'",PER,Tag,"[40, 45): 'CHINA'",LOC,
366+
test,0,"[40, 45): 'CHINA'",PER,Tag,"[40, 45): 'CHINA'",ORG,Fred: Soccer team
359367
test,0,"[875, 885): 'Uzbekistan'",LOC,Tag,"[875, 885): 'Uzbekistan'",ORG,Soccer team
360368
test,0,"[982, 987): 'Japan'",LOC,Tag,"[982, 987): 'Japan'",ORG,Soccer team
361369
test,1,"[148, 156): 'Scotland'",LOC,Tag,"[148, 156): 'Scotland'",ORG,Rugby team
@@ -506,7 +514,7 @@ test,41,"[674, 682): 'Sporting'",ORG,Span,"[674, 688): 'Sporting Gijon'",ORG,
506514
test,42,"[19, 24): 'SPAIN'",LOC,Tag,"[19, 24): 'SPAIN'",ORG,Soccer team
507515
test,42,"[86, 91): 'Spain'",LOC,Tag,"[86, 91): 'Spain'",ORG,Soccer team
508516
test,44,"[260, 268): 'Mercedes'",MISC,Tag,"[260, 268): 'Mercedes'",ORG,brand
509-
test,45,"[1361, 1366): 'Czech'",LOC,Tag,"[1361, 1366): 'Czech'",MIC,
517+
test,45,"[1361, 1366): 'Czech'",LOC,Tag,"[1361, 1366): 'Czech'",MISC,
510518
test,49,"[31, 39): 'S.AFRICA'",MISC,Tag,"[31, 39): 'S.AFRICA'",LOC,
511519
test,49,"[57, 63): 'DURBAN'",PER,Tag,"[57, 63): 'DURBAN'",LOC,city
512520
test,50,"[1083, 1093): 'Communists'",MISC,Tag,"[1083, 1093): 'Communists'",ORG,Communist party

corrected_labels/human_labels_audited/CoNLL_2_in_gold.csv

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,15 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
7777
0,dev,206,"[2399, 2406): 'Marines'",MISC,None,,,"ambiguous maybe ORG, but refs individuals",,,
7878
0,dev,206,"[2472, 2483): 'Carl Vinson'",MISC,None,,,,,,
7979
0,dev,214,"[2740, 2755): 'Michael Collins'",MISC,None,,,Film,,,
80-
0,test,0,"[40, 45): 'CHINA'",PER,Tag,,LOC,,,,
80+
0,test,0,"[40, 45): 'CHINA'",PER,Tag,,ORG,Fred: Soccer team,,,
81+
,test,0,"[363, 373): 'Uzbekistan'",LOC,Tag,,ORG,Soccer team; see issue #38,,,
82+
,test,0,"[1592, 1597): 'Japan'",LOC,Tag,,ORG,Soccer team; see issue #38,,,
83+
,test,0,"[1749, 1754): 'Japan'",LOC,Tag,,ORG,Soccer team; see issue #38,,,
84+
,test,0,"[1928, 1933): 'Japan'",LOC,Tag,,ORG,Soccer team; see issue #38,,,
85+
,test,0,"[2056, 2059): 'UAE'",LOC,Tag,,ORG,Soccer team; see issue #38,,,
86+
,test,0,"[2065, 2071): 'Kuwait'",LOC,Tag,,ORG,Soccer team; see issue #38,,,
87+
,test,0,"[2076, 2087): 'South Korea'",LOC,Tag,,ORG,Soccer team; see issue #38,,,
88+
,test,0,"[2096, 2105): 'Indonesia'",LOC,Tag,,ORG,Soccer team; see issue #38,,,
8189
0,test,8,"[239, 249): 'McDonald's'",ORG,None,,,"some teams label PER, some teams ORG but McDonald""""",,,
8290
0,test,8,"[697, 706): 'Yorkshire'",LOC,None,,,??? Yorkshire club – ref soccer,,,
8391
0,test,9,"[706, 711): 'Split'",ORG,None,,,"??? split is a city in croatia, but ref a basketball team here, barcelona same thing",,,

corrected_labels/human_labels_audited/CoNLL_4_in_gold.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,7 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
752752
0,test,43,"[49, 53): 'WEAH'",PER,None,,,,,,
753753
3,test,44,"[260, 268): 'Mercedes'",MISC,None,,,Ambiguous: Brand used as adjective,,,
754754
0,test,44,"[28, 45): 'MANCHESTER UNITED'",ORG,None,,,,,,
755-
0,test,45,"[1361, 1366): 'Czech'",LOC,Tag,,MIC,,,,
755+
0,test,45,"[1361, 1366): 'Czech'",LOC,Tag,,MISC,,,,
756756
3,test,45,"[1653, 1662): 'Norwegian'",MISC,None,,,,,,
757757
2,test,45,"[179, 182): 'GMT'",MISC,None,,,,,,
758758
3,test,45,"[2082, 2091): 'Argentine'",MISC,None,,,,,,
@@ -11744,4 +11744,4 @@ num_models,fold,doc_offset,corpus_span,corpus_ent_type,error_type,correct_span,c
1174411744
17,test,230,"[1153, 1160): 'England'",LOC,,,,,,,
1174511745
17,test,230,"[1213, 1225): 'Leeds United'",ORG,,,,,,,
1174611746
17,test,230,"[1252, 1259): 'England'",LOC,,,,,,,
11747-
17,test,230,"[1395, 1400): 'Bobby'",PER,,,,,,,
11747+
17,test,230,"[1395, 1400): 'Bobby'",PER,,,,,,,

0 commit comments

Comments
 (0)