@@ -237,6 +237,7 @@ def process_label_file(dataset_fold, dataset_file, csv_patch_file, csv_encoding=
237237 csv_patch = csv_patch [csv_patch ['error_type' ] != 'None' ]
238238
239239 dataset = Dataset (dataset_file )
240+
240241 for index , row in csv_patch .iterrows ():
241242 # A lot of rows misplace correct_span and corpus_span. If corpus_span
242243 # is empty, use correct_span as the corpus_span for Missing, Tag, and Wrong.
@@ -247,13 +248,6 @@ def process_label_file(dataset_fold, dataset_file, csv_patch_file, csv_encoding=
247248 file = sys .stderr )
248249 continue
249250
250- if row ['error_type' ] == 'Missing' :
251- if isinstance (row ['correct_ent_type' ], float ) and math .isnan (row ['correct_ent_type' ]):
252- print (f'[WARNING] correct ent type for line { index } are empty. row: { row } . Skipping...' ,
253- file = sys .stderr )
254- continue
255- dataset .correct_missing (corpus_span , row ['correct_ent_type' ], int (row ['doc_offset' ]))
256- continue
257251 elif row ['error_type' ] == 'Tag' :
258252 dataset .correct_tag (corpus_span , row ['correct_ent_type' ], int (row ['doc_offset' ]))
259253 elif row ['error_type' ] == 'Wrong' :
@@ -272,6 +266,17 @@ def process_label_file(dataset_fold, dataset_file, csv_patch_file, csv_encoding=
272266 dataset .correct_tag (row ['corpus_span' ], row ['correct_ent_type' ], int (row ['doc_offset' ]))
273267 dataset .correct_span (row ['corpus_span' ], row ['correct_span' ], int (row ['doc_offset' ]))
274268
269+ for index , row in csv_patch .iterrows ():
270+ if row ['error_type' ] == 'Missing' :
271+ if isinstance (row ['correct_span' ], float ) and math .isnan (row ['correct_span' ]):
272+ print (f'[WARNING] Correct span for line { index } is empty. Skipping...' , file = sys .stderr )
273+ continue
274+ if isinstance (row ['correct_ent_type' ], float ) and math .isnan (row ['correct_ent_type' ]):
275+ print (f'[WARNING] correct ent type for line { index } are empty. row: { row } . Skipping...' ,
276+ file = sys .stderr )
277+ continue
278+ dataset .correct_missing (row ['correct_span' ], row ['correct_ent_type' ], int (row ['doc_offset' ]))
279+
275280 result = dataset .save ()
276281
277282 with open (target_file , mode = "w" ) as f :
@@ -309,7 +314,7 @@ def process_token_file(dataset_fold, dataset_file, sentence_json_file, token_edi
309314 if l in sentence_deletes [dataset_fold ]:
310315 removed += 1
311316 if l in edits .index :
312- file_lines [l - removed ] = edits .at [l , 'correct_line' ]
317+ file_lines [l - removed ] = edits .at [l , 'correct_line' ] + ' \n '
313318 with open (target_file , "w+" ) as new_file :
314319 for l in file_lines :
315320 new_file .write (l )
0 commit comments