Skip to content

Commit

Permalink
do not include items marked as illformed/ungrammatical into accuracy …
Browse files Browse the repository at this point in the history
…computation denominator
  • Loading branch information
olzama committed May 13, 2024
1 parent c9d840a commit 0b14312
Showing 1 changed file with 22 additions and 6 deletions.
28 changes: 22 additions & 6 deletions util/treebanking-scripts/report_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ def report_stats(treebanks_path):
all_sentences = []
all_accepted = []
all_rejected = []
all_overgenerated = []
all_illformed = []
for i, tsuite in enumerate(sorted(glob.iglob(treebanks_path + '/**'))):
sentences = []
accepted = []
rejected = []
overgenerated = []
illformed = []
ts = itsdb.TestSuite(tsuite)
items = list(ts.processed_items())
#print("{} sentences in corpus {} including possible sentences with no parse.".format(len(items), ts.path.stem))
Expand All @@ -19,17 +23,29 @@ def report_stats(treebanks_path):
sentences.append(response['i-input'])
# In a thinned parsed forest, results will be empty if the item was not accepted as correct in treebanking.
if len(response['results']) > 0:
accepted.append(response['i-input'])
all_accepted.append(response['i-input'])
if response['i-wf'] == 1:
accepted.append(response['i-input'])
all_accepted.append(response['i-input'])
else:
overgenerated.append(response['i-input'])
all_overgenerated.append(response['i-input'])
illformed.append(response['i-input'])
#deriv = response.result(0).derivation()
else:
#print('Rejected: {}'.format(response['i-input']))
if response['i-wf'] == 0:
illformed.append(response['i-input'])
all_illformed.append(response['i-input'])
rejected.append(response['i-input'])
all_rejected.append(response['i-input'])
acc = len(accepted)/len(sentences)
print('Corpus {} accuracy {} out of {} ({:.4f})'.format(ts.path.stem, len(accepted), len(sentences), acc))
acc = len(all_accepted) / len(all_sentences)
print('Total accuracy: {} out of {} ({:.4f})'.format(len(all_accepted), len(all_sentences), acc))
acc = len(accepted)/(len(sentences) - len(illformed))
overgen = len(overgenerated)/len(illformed) if len(illformed) > 0 else 0
print('Corpus {} accuracy {} out of {} ({:.4f})'.format(ts.path.stem, len(accepted), len(sentences)-len(illformed), acc))
#print('Corpus {} overgeneration {} out of {} ({:.4f})'.format(ts.path.stem, len(overgenerated), len(illformed), overgen))
acc = len(all_accepted) / (len(all_sentences) - len(all_illformed))
overgen = len(all_overgenerated) / len(all_illformed) if len(all_illformed) > 0 else 0
print('Total accuracy: {} out of {} ({:.4f})'.format(len(all_accepted), len(all_sentences)-len(all_illformed), acc))
#print('Total overgeneration: {} out of {} ({:.4f})'.format(len(all_overgenerated), len(all_illformed), overgen))


if __name__ == '__main__':
Expand Down

0 comments on commit 0b14312

Please sign in to comment.