Skip to content

Commit a46ae9e

Browse files
authored
Delete all specific skipping (#27)
1 parent f3684a0 commit a46ae9e

1 file changed

Lines changed: 2 additions & 14 deletions

File tree

scripts/download_and_correct_corpus.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,12 @@ def find(self, span, doc_num):
9292
return -1, -2
9393
found_df = df[df["span"].values.begin == begin]
9494
if found_df.shape[0] == 0:
95-
print(f"[WARNING] Could not find {span}", file=sys.stderr)
95+
print(f"[WARNING] Could not find {span}: No span begins with {begin}", file=sys.stderr)
9696
return -1, -2
9797
begin_linum = found_df.iloc[0]['line_num']
9898
found_df = df[df["span"].values.end == end]
9999
if found_df.shape[0] == 0:
100-
print(f"[WARNING] Could not find {span}", file=sys.stderr)
100+
print(f"[WARNING] Could not find {span}: No span ends with {end}", file=sys.stderr)
101101
return -1, -2
102102
end_linum = found_df.iloc[0]['line_num']
103103
return begin_linum, end_linum
@@ -248,9 +248,6 @@ def process_label_file(dataset_fold, dataset_file, csv_patch_file, csv_encoding=
248248
continue
249249

250250
if row['error_type'] == 'Missing':
251-
if row['correct_span'] == "[53,64) 'West Indes'":
252-
print(f"Skip span error for {row['correct_span']}. Please correct it by hand.", file=sys.stderr)
253-
continue
254251
if isinstance(row['correct_ent_type'], float) and math.isnan(row['correct_ent_type']):
255252
print(f'[WARNING] correct ent type for line {index} are empty. row: {row}. Skipping...',
256253
file=sys.stderr)
@@ -264,15 +261,6 @@ def process_label_file(dataset_fold, dataset_file, csv_patch_file, csv_encoding=
264261

265262
for index, row in csv_patch.iterrows():
266263
if row['error_type'] == 'Span':
267-
if row['corpus_span'].endswith("'Minn'"):
268-
print("Skip span error for '(Iowa-S) Minn'. Please correct it by hand.", file=sys.stderr)
269-
continue
270-
if row['corpus_span'].endswith("'Boxing-Bruno'"):
271-
print(f"Skip span error for '{row['corpus_span']}'. Please correct it by hand.", file=sys.stderr)
272-
continue
273-
if row['correct_span'] == "[43, 47): 'U.N.'":
274-
print(f"Skip span error for '{row['correct_span']}'. Please correct it by hand.", file=sys.stderr)
275-
continue
276264
if isinstance(row['correct_span'], float) and math.isnan(row['correct_span']):
277265
print(f'[WARNING] Correct span for line {index} is empty. Skipping...', file=sys.stderr)
278266
continue

0 commit comments

Comments
 (0)