Skip to content

Commit

Permalink
USE NFKD uc normalization for Dict metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
einspunktnull committed Jun 20, 2023
1 parent 64a1d13 commit 740d6b7
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/digital_eval/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
OCRDifferenceMetric,
accuracy_for_bow,
error_for_bow,
MetricDictionaryLangTool, MetricDictionary,
MetricDictionaryLangTool, MetricDictionary, UC_NORMALIZATION_NFKD,
)

# script constants
Expand Down Expand Up @@ -106,6 +106,8 @@ def _initialize_metrics(
calc_func = CALC_DICT[calc]
if m == 'BoWs' or m == 'BagOfWords':
calc_func = CALC_DICT_BOW[calc]
if 'Dict' in m:
norm = UC_NORMALIZATION_NFKD
metric_inst: OCRDifferenceMetric = clazz(normalization=norm, calc_func=calc_func)
metric_objects.append(metric_inst)
return metric_objects
Expand Down Expand Up @@ -221,7 +223,7 @@ def start():
required=False,
help=f"UTF-8 Unicode Python Normalization (optional; default: '{DEFAULT_UTF8_NORM}'; available: 'NFC','NFKC','NFD','NFKD')",
)
PARSER.add_argument("-s","--sequential",
PARSER.add_argument("-s", "--sequential",
action='store_true',
required=False,
help="Execute calculations sequentially (optional; default: 'False')",
Expand Down

0 comments on commit 740d6b7

Please sign in to comment.