From 723eef06dda70b35e6e50ada8435f75371ffef94 Mon Sep 17 00:00:00 2001
From: Uwe Hartwig <uwe.hartwig@bitsrc.info>
Date: Fri, 24 May 2024 19:20:48 +0200
Subject: [PATCH] [app][rfct] mjr re-organization

---
 README.md                      |  23 ++--
 pyproject.toml                 |  33 ++++++
 requirements.txt               |   6 -
 setup.cfg                      |  36 ------
 src/digital_eval/VERSION       |   1 -
 src/digital_eval/__init__.py   |   6 +-
 src/digital_eval/cli.py        | 189 ++++++++++++++-----------------
 src/digital_eval/evaluation.py | 197 +++++----------------------------
 src/digital_eval/metrics.py    | 172 +++++++++++++++++++++++++---
 src/ocr_util/cli.py            |  18 ++-
 tests/test_dict_metric.py      |  37 +++----
 tests/test_digital_eval_cli.py |  20 ++--
 tests/test_ocr_evaluate.py     |  94 ++++++++--------
 tests/test_ocr_metrics.py      |  50 ++++-----
 tests/test_ocr_metrics_base.py |  34 +++---
 15 files changed, 434 insertions(+), 482 deletions(-)
 delete mode 100644 requirements.txt
 delete mode 100644 setup.cfg
 delete mode 100644 src/digital_eval/VERSION

diff --git a/README.md b/README.md
index eff5d84..377f02c 100644
--- a/README.md
+++ b/README.md
@@ -7,16 +7,16 @@ Python3 Tool to report evaluation outcomes from mass digitalization workflows.
 
 ## Features
 
-* match automatically groundtruth (i.e. reference data) and candidates by filename
+* match groundtruth (i.e. reference data) and candidates by filename start
 * use geometric information to evaluate only specific frame (i.e. specific column or region from large page) of
   candidates (requires ALTO or PAGE format)
-* aggregate evaluation outcome on domain range (with multiple subdomains)
+* aggregate evaluation outcomes on domain range (with multiple subdomains) according to folder layout
 * choose from textual metrics based on characters or words plus common Information Retrieval
-* choose between accuracy / error rate and different UTF-8 Python norms
+* choose from different UTF-8 Python norms
 * formats: ALTO, PAGE or plain text for both groundtruth and candidates
 * speedup with parallel execution
 * additional OCR util:
-  * filter custom areas of single OCR files
+  * filter custom areas of single OCR files of ALTO files
 
 ## Installation
 
@@ -28,13 +28,12 @@ pip install digital-eval
 
 ### Metrics
 
-Calculate similarity (`acc`) or difference (`err`) ratios between single reference/groundtruth and test/candidate item.
+#### Edit-Distance based Strin Similarity
 
-#### Edit-Distance based
-
-Character-based text string minus whitechars (`Cs`, `Characters`) or Letter-based (`Ls`, `Letters`) minus whites,
-punctuation and digits.
-Word/Token-based edit-distance of single tokens identified by whitespaces.
+Calculate similarity for each single reference/groundtruth and test/candidate item.
+Complete haracter-based text string (`Cs`, `Characters`) or Letter-based (`Ls`, `Letters`) minus whitespaces,
+punctuation and common digits (arabic, persian). 
+Word/Token-based edit-distance of single tokens identified by Word or String elements or whitespaces, depending on data.
 
 #### Set based
 
@@ -141,8 +140,8 @@ digital-eval --help
 
 Contributions, suggestions and proposals welcome!
 
-## Licence
+## License
 
 Under terms of the [MIT license](https://opensource.org/licenses/MIT).
 
-**NOTE**: This software depends on other packages that _may_ be licensed under different open source licenses.
+**NOTE**: This software depends on packages that _may_ be licensed under different terms.
diff --git a/pyproject.toml b/pyproject.toml
index 638dd9c..31be912 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,36 @@
 [build-system]
 requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
+
+[tool.setuptools.dynamic]
+version = {attr = "digital_eval.__version__"}
+
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[project]
+name = "digital-eval"
+dynamic = ["version"]
+description = "Evaluate Digitalization Data"
+readme = "README.md"
+requires-python = ">=3.8"
+authors = [{name = "Universitäts- und Landesbibliothek Sachsen-Anhalt",email = "development@bibliothek.uni-halle.de"}]
+classifiers = [
+    "Programming Language :: Python :: 3",
+	"License :: OSI Approved :: MIT License"
+]
+dependencies = [
+	"rapidfuzz>3",
+    "nltk",
+    "requests",
+    "docker",
+    "numpy",
+    "digital-object==0.2.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/ulb-sachsen-anhalt/digital-eval"
+
+[project.scripts]
+digital-eval = "digital_eval.cli:start"
+ocr-util = "ocr_util.cli:start"
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index da93842..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-rapidfuzz
-numpy
-nltk
-requests
-docker
-digital-object==0.2.0
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index a582a59..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,36 +0,0 @@
-[metadata]
-name = digital-eval
-version = file:src/digital_eval/VERSION
-description = Evaluate Mass Digitalization Data
-long_description = file:README.md
-long_description_content_type = text/markdown
-author = Universitäts- und Landesbibliothek Sachsen-Anhalt
-author_email = development@bibliothek.uni-halle.de
-maintainer = Uwe Hartwig
-maintainer_email = uwe.hartwig@bibliothek.uni-halle.de
-classifiers =
-    Programming Language :: Python :: 3
-    License :: OSI Approved :: MIT License
-    Operating System :: OS Independent
-project_urls =
-    Homepage = https://github.com/ulb-sachsen-anhalt/digital-eval
-
-[options]
-python_requires = >=3.8
-package_dir =
-    =src
-packages = find:
-include_package_data = True
-install_requires =
-    rapidfuzz>3
-    numpy
-    nltk
-    shapely
-
-[options.packages.find]
-where = src
-
-[options.entry_points]
-console_scripts =
-    digital-eval = digital_eval.cli:start
-    ocr-util = ocr_util.cli:start
diff --git a/src/digital_eval/VERSION b/src/digital_eval/VERSION
deleted file mode 100644
index dc1e644..0000000
--- a/src/digital_eval/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-1.6.0
diff --git a/src/digital_eval/__init__.py b/src/digital_eval/__init__.py
index e0f0165..6a10bbd 100644
--- a/src/digital_eval/__init__.py
+++ b/src/digital_eval/__init__.py
@@ -1,6 +1,5 @@
-#
-# provided API exports
-#
+"""digital eval main API"""
+__version__ = '1.6.0'
 from .evaluation import (
     Evaluator,
     find_groundtruth,
@@ -18,4 +17,3 @@
     MetricIRRec,
     MetricIRFM,
 )
-
diff --git a/src/digital_eval/cli.py b/src/digital_eval/cli.py
index a92a8a2..5e45e29 100644
--- a/src/digital_eval/cli.py
+++ b/src/digital_eval/cli.py
@@ -2,12 +2,9 @@
 """OCR QA Evaluation CLI"""
 
 import argparse
-import datetime as dt
 import os
 import sys
-from typing import (
-    List, Type
-)
+import typing
 
 import digital_eval as digev
 import digital_eval.dictionary_metrics.common as digev_cm
@@ -46,29 +43,18 @@
 }
 
 
-def _get_info():
-    here = os.path.abspath(os.path.dirname(__file__))
-    _v = ''
-    _t = ''
-    _fp = os.path.join(here, 'VERSION')
-    with open(_fp) as fp:
-        _v = fp.read()
-    _t = dt.datetime.fromtimestamp(os.stat(_fp).st_mtime).strftime("%Y-%m-%d")
-    return f'v{_v}/{_t}'
-
-
 def _initialize_metrics(
         the_metrics,
         norm,
-) -> List[digem.SimilarityMetric]:
+) -> typing.List[digem.SimilarityMetric]:
     _tokens = the_metrics.split(',')
     try:
-        metric_objects: List[digem.SimilarityMetric] = []
+        metric_objects: typing.List[digem.SimilarityMetric] = []
         for m in _tokens:
-            clazz: Type[digem.SimilarityMetric] = METRIC_DICT[m]
+            clazz: typing.Type[digem.SimilarityMetric] = METRIC_DICT[m]
             if 'Dict' in m:
                 norm = digem.UC_NORMALIZATION_NFKD
-            metric_inst: digem.SimilarityMetric = clazz(normalization=norm) 
+            metric_inst: digem.SimilarityMetric = clazz(normalization=norm)
             metric_objects.append(metric_inst)
         return metric_objects
     except KeyError as _err:
@@ -78,30 +64,66 @@ def _initialize_metrics(
         sys.exit(1)
 
 
-########
-# MAIN #
-########
-def _main(
-        path_candidates,
-        path_reference,
-        metrics,
-        utf8norm,
-        # calc,
-        xtra,
-        is_sequential=False,
-):
+#########
+# START #
+#########
+def start_evaluation(parse_args: typing.Dict):
+    """Main workflow"""
+
+    path_candidates = parse_args["candidates"]
+    path_reference = parse_args["reference"]
+    metrics: str = parse_args["metrics"]
+    utf8norm = parse_args["utf8"]
+    verbosity = parse_args["verbosity"]
+    is_seq = parse_args["sequential"] if "sequential" in parse_args else False
+    xtra = parse_args["extra"] if "extra" in parse_args else None
+
+    if "language" in parse_args:
+        digem.MetricDictionary.LANGUAGE = parse_args["language"]
+    uses_lang_tool: bool = 'DictLT' in metrics or "DictionaryLangTool" in metrics
+    if uses_lang_tool:
+        lt_url: str = parse_args["lt_api_url"] if "lp_api_url" in parse_args else LanguageTool.DEFAULT_URL
+        LanguageTool.initialize(lt_url)
+
+    # go on with basic validation
+    if not os.path.isdir(path_candidates):
+        print(f'[ERROR] input "{path_candidates}": invalid directory! exit!')
+        sys.exit(1)
+    if path_reference and not os.path.isdir(path_reference):
+        print(f'[ERROR] reference "{path_reference}": invalid directory! exit!')
+        sys.exit(1)
+
+    # sanitize trailing slash
+    if not isinstance(path_candidates, str):
+        path_candidates = str(path_candidates)
+    if not isinstance(path_reference, str):
+        path_reference = str(path_reference)
+    path_candidates = path_candidates[:-1] if path_candidates.endswith('/') else path_candidates
+    path_reference = path_reference[:-1] if path_reference.endswith('/') else path_reference
+
+    # if candidates and both reference provided: do domains match?
+    if path_candidates and path_reference:
+        _base_can = os.path.basename(path_candidates)
+        _base_ref = os.path.basename(path_reference)
+        if _base_can != _base_ref:
+            print(f"[WARN ] start domains '{_base_can}' and '{_base_ref}' mismatch, summary might be inaccurate!")
+
+    # some diagnostics
+    if verbosity >= 2:
+        args = f"{path_candidates}, {path_reference}, {verbosity}, {xtra}"
+        print(f'[DEBUG] called with {args}')
+
     # create basic evaluator instance
     evaluator = digev.Evaluator(
         path_candidates,
-        verbosity=VERBOSITY,
+        verbosity=verbosity,
         extras=xtra,
     )
     evaluator.metrics = _initialize_metrics(metrics, norm=utf8norm)#, calc=calc)
-    # evaluator.calc = calc
-    if VERBOSITY >= 1:
-        print(f"[DEBUG] text normalized using '{utf8norm}' values for '{metrics}'")
+    if verbosity >= 1:
+        print(f"[DEBUG] text normalized using '{utf8norm}' code points for '{metrics}'")
 
-    evaluator.is_sequential = is_sequential
+    evaluator.is_sequential = is_seq
     evaluator.domain_reference = path_reference
 
     # gather structure information
@@ -122,7 +144,7 @@ def _main(
     n_diff = n_entries - len(gt_entries)
     gt_missing = set(gt_entries) ^ set(candidates)
     rnd_str = f" ({gt_missing})" if gt_missing else ""
-    if VERBOSITY >= 1:
+    if verbosity >= 1:
         print(f'[DEBUG] from "{n_entries}" filtered "{n_diff}" candidates missing groundtruth{rnd_str}')
 
     # trigger actual evaluation
@@ -135,110 +157,67 @@ def _main(
     evaluator.eval_map()
 
     # serialize stdout report
-    if VERBOSITY >= 0:
-        digev.report_stdout(evaluator, VERBOSITY)
-    
+    if verbosity >= 0:
+        digev.report_stdout(evaluator, verbosity)
+
     # for testing purposes
-    return evaluator.get_results()
+    eval_results =  evaluator.get_results()
+
+    # final clean-up
+    if uses_lang_tool:
+        LanguageTool.deinitialize()
+
+    return eval_results
 
 
 def start():
-    PARSER = argparse.ArgumentParser(description=f"""
-        Evaluate Mass Digital Data. ({_get_info()})
-        """)
-    PARSER.add_argument(
-        "candidates",
-        help="Root Directory for evaluation candidates"
-    )
-    PARSER.add_argument("-ref", "--reference",
+    """Wrap argparsing"""
+    parser = argparse.ArgumentParser(description=f"Evaluate Mass Digitalization Data {digev.__version__}")
+    parser.add_argument("candidates",
+                        help="Root Directory for evaluation candidates"
+                        )
+    parser.add_argument("-ref", "--reference",
                         required=False,
                         help="Root directory for Reference/Groundtruth data (optional, but necessary for most metrics)"
                         )
-    PARSER.add_argument("-v", "--VERBOSITY",
+    parser.add_argument("-v", "--verbosity",
                         action='count',
                         default=DEFAULT_VERBOSITY,
                         required=False,
                         help=f"Verbosity flag. To increase, append multiple 'v's (optional; default: '{DEFAULT_VERBOSITY}')"
                         )
-    PARSER.add_argument("--metrics",
+    parser.add_argument("--metrics",
                         default=DEFAULT_OCR_METRICS,
                         required=False,
                         help=f"List of metrics to use (optional, default: '{DEFAULT_OCR_METRICS}'; available: '{','.join(METRIC_DICT.keys())}')"
                         )
-    PARSER.add_argument("--utf8",
+    parser.add_argument("--utf8",
                         default=DEFAULT_UTF8_NORM,
                         required=False,
                         help=f"UTF-8 Unicode Python Normalization (optional; default: '{DEFAULT_UTF8_NORM}'; available: 'NFC','NFKC','NFD','NFKD')",
                         )
-    PARSER.add_argument("-s", "--sequential",
+    parser.add_argument("-s", "--sequential",
                         action='store_true',
                         required=False,
                         help="Execute calculations sequentially (optional; default: 'False')",
                         )
-    PARSER.add_argument("-x", "--extra",
+    parser.add_argument("-x", "--extra",
                         required=False,
                         help="pass additional information to evaluation, like 'ignore_geometry' (compare only text, ignore coords)"
                         )
-    PARSER.add_argument('-l', "--language",
+    parser.add_argument('-l', "--language",
                         default=digev_cm.LANGUAGE_KEY_DEFAULT,
                         choices=digev_cm.LANGUAGE_KEYS,
                         required=False,
                         help=f"Language code for LanguagTool according to ISO 639-2 (optional; default: '{digev_cm.LANGUAGE_KEY_DEFAULT}')",
                         )
-    PARSER.add_argument('-u', "--lt-api-url",
+    parser.add_argument('-u', "--lt-api-url",
                         default=LanguageTool.DEFAULT_URL,
                         required=False,
                         help=f"Language Tool Api URL (optional; default: '{LanguageTool.DEFAULT_URL}')",
                         )
-    PARSER.set_defaults(sequential=False)
-
-    ARGS = vars(PARSER.parse_args())
-    path_candidates = ARGS["candidates"]
-    path_reference = ARGS["reference"]
-    global VERBOSITY
-    VERBOSITY = ARGS["VERBOSITY"]
-    IS_SEQUENTIAL = ARGS["sequential"]
-    xtra = ARGS["extra"]
-    metrics: str = ARGS["metrics"]
-    utf8norm = ARGS["utf8"]
-    digem.MetricDictionary.LANGUAGE = ARGS["language"]
-    lt_api_url = ARGS["lt_api_url"]
-
-    uses_lang_tool: bool = 'DictLT' in metrics or "DictionaryLangTool" in metrics
-
-    if uses_lang_tool:
-        lt_url: str = lt_api_url if LanguageTool.DEFAULT_URL != lt_api_url else LanguageTool.DEFAULT_URL
-        LanguageTool.initialize(lt_url)
-    # go on
-    # basic validation
-    if not os.path.isdir(path_candidates):
-        print(f'[ERROR] input "{path_candidates}": invalid directory! exit!')
-        sys.exit(1)
-    if path_reference and not os.path.isdir(path_reference):
-        print(f'[ERROR] reference "{path_reference}": invalid directory! exit!')
-        sys.exit(1)
-
-    # sanitize trailing slash
-    path_candidates = path_candidates[:-1] if path_candidates.endswith('/') else path_candidates
-    path_reference = path_reference[:-1] if path_reference.endswith('/') else path_reference
-
-    # if candidates and both reference provided: do domains match?
-    if path_candidates and path_reference:
-        _base_can = os.path.basename(path_candidates)
-        _base_ref = os.path.basename(path_reference)
-        if _base_can != _base_ref:
-            print(f"[WARN ] start domains '{_base_can}' and '{_base_ref}' mismatch, summary might be inaccurate!")
-
-    # some diagnostics
-    if VERBOSITY >= 2:
-        args = f"{path_candidates}, {path_reference}, {VERBOSITY}, {xtra}"
-        print(f'[DEBUG] called with {args}')
-
-    # here we go
-    _main(path_candidates, path_reference, metrics, utf8norm, xtra, is_sequential=IS_SEQUENTIAL)
-
-    if uses_lang_tool:
-        LanguageTool.deinitialize()
+    main_args = vars(parser.parse_args())
+    start_evaluation(main_args)
 
 
 if __name__ == "__main__":
diff --git a/src/digital_eval/evaluation.py b/src/digital_eval/evaluation.py
index ef7f3d1..19a3758 100644
--- a/src/digital_eval/evaluation.py
+++ b/src/digital_eval/evaluation.py
@@ -4,35 +4,23 @@
 
 import concurrent.futures
 import copy
+import datetime
+import math
+import multiprocessing
 import os
 import re
 import sys
+import typing
 import xml.dom.minidom
 import xml.etree.ElementTree as ET
-from datetime import (
-    date
-)
-from math import (
-    floor
-)
-from multiprocessing import (
-    cpu_count
-)
+
 from pathlib import (
     Path
 )
-from typing import (
-    List,
-    Tuple,
-)
 
 import numpy as np
 
-from digital_object import ( 
-	DigitalObjectTree, 
-	DigitalObjectLevel, 
-	to_digital_object,
-)
+import digital_eval.metrics as digem
 
 PAGE_2013 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15'
 XML_NS = {'alto': 'http://www.loc.gov/standards/alto/ns-v3#',
@@ -77,7 +65,7 @@ def get_statistics(data_points):
     return (the_mean, the_deviation, the_median)
 
 
-def gather_candidates(start_path) -> List[EvalEntry]:
+def gather_candidates(start_path) -> typing.List[EvalEntry]:
     candidates = []
     if os.path.isdir(start_path):
         for curr_dir, _, files in os.walk(start_path):
@@ -118,10 +106,9 @@ def match_candidates(path_candidates, path_gt_file):
     '''Find candidates that match groundtruth'''
 
     if not os.path.isdir(path_candidates):
-        raise IOError('invalid ocr result path "{}"'.format(path_candidates))
+        raise IOError(f'invalid ocr result path "{path_candidates}"')
     if not os.path.exists(path_gt_file):
-        raise IOError(
-            'invalid groundtruth data path "{}"'.format(path_gt_file))
+        raise IOError(f'invalid groundtruth data path "{path_gt_file}"')
 
     gt_filename = os.path.basename(path_gt_file)
 
@@ -205,7 +192,7 @@ def get_bbox_data(file_path):
     '''Get Bounding Box Data from given resource, if any exists'''
 
     if not os.path.exists(file_path):
-        raise IOError('{} not existing!'.format(file_path))
+        raise IOError(f'{file_path} not existing!')
 
     # 1: inspect filename
     file_name = os.path.basename(file_path)
@@ -266,7 +253,7 @@ def get_bbox_data(file_path):
     return None
 
 
-def _map_alto(e: ET.Element) -> Tuple[str, int, int, int, int]:
+def _map_alto(e: ET.Element) -> typing.Tuple[str, int, int, int, int]:
     i = e.attrib['ID']
     x0 = int(e.attrib['HPOS'])
     y0 = int(e.attrib['VPOS'])
@@ -275,25 +262,14 @@ def _map_alto(e: ET.Element) -> Tuple[str, int, int, int, int]:
     return (i, x0, y0, x1, y1)
 
 
-def _map_page2013(elem: ET.Element) -> Tuple[str, int, int, int, int]:
+def _map_page2013(elem: ET.Element) -> typing.Tuple[str, int, int, int, int]:
     points = elem.attrib['points'].strip().split(' ')
     _xs = [int(p.split(',')[0]) for p in points]
     _ys = [int(p.split(',')[1]) for p in points]
     return (NOT_SET, min(_xs), min(_ys), max(_xs), max(_ys))
 
 
-def _get_line_digos_from_digo(digo: DigitalObjectTree, lines: List[DigitalObjectTree] = None) -> List[DigitalObjectTree]:
-    if lines is None:
-        lines = []
-    if digo.level == DigitalObjectLevel.LINE and digo.transcription:
-        lines.append(digo)
-        return lines
-    for child in digo.children:
-        _get_line_digos_from_digo(child, lines)
-    return lines
-
-
-def calculate_bounding_box(elements: List[ET.Element], map_func) -> Tuple[int, int, int, int]:
+def calculate_bounding_box(elements: typing.List[ET.Element], map_func) -> typing.Tuple[int, int, int, int]:
     """Review element's points to get points for
     minimum (top-left) and maximum (bottom-right)"""
 
@@ -305,99 +281,6 @@ def calculate_bounding_box(elements: List[ET.Element], map_func) -> Tuple[int, i
     return ((min(all_x1), min(all_y1)), (max(all_x2), max(all_y2)))
 
 
-def digital_object_to_text(file_path, frame=None, oneliner=True) -> Tuple[str | List[str], int]:
-    """Wrap OCR-Data Comparison"""
-
-    try:
-        top_digo: DigitalObjectTree = to_digital_object(file_path)
-        # explicit filter frame?
-        if not frame:
-            frame = top_digo.dimensions
-        elif len(frame) == 2:
-            frame = [[frame[0][0], frame[0][1]],
-                     [frame[1][0], frame[0][1]],
-                     [frame[1][0], frame[1][1]],
-                     [frame[0][0], frame[1][1]]]
-        frame_digo = DigitalObjectTree()
-        frame_digo.dimensions = frame
-        filter_word_pieces(frame_digo, top_digo)
-        the_lines = _get_line_digos_from_digo(top_digo)
-        if oneliner:
-            return top_digo.transcription, len(the_lines)
-        else:
-            return [line.transcription for line in the_lines], len(the_lines)
-    except xml.parsers.expat.ExpatError as _:
-        with open(file_path, mode='r', encoding='utf-8') as fhandle:
-            text_lines = fhandle.readlines()
-            if oneliner:
-                text_lines = ' '.join([l.strip() for l in text_lines])
-            return text_lines, len(text_lines)
-    except RuntimeError as exc:
-        raise RuntimeError(f"{file_path}: {exc}") from exc
-
-
-def digital_object_to_dict_text(file_path: str, frame=None, oneliner=False) -> Tuple[str | List[str], int]:
-    line_texts: List[str]
-    len_lines: int
-    line_texts, len_lines = digital_object_to_text(file_path=file_path, frame=frame, oneliner=False)
-    non_empty_lines: List[str] = [line_text for line_text in line_texts if len(line_text) > 0]
-    lines_sanitized_wraps: List[str] = _sanitize_wraps(non_empty_lines)
-    lines_sanitized_chars: List[str] = _sanitize_chars(lines_sanitized_wraps)
-    text = ' '.join(lines_sanitized_chars) if oneliner else lines_sanitized_chars
-    return text, len_lines
-
-
-_HYPHENS: List[str] = [
-    "⸗",
-    "-",
-    "—",
-]
-
-
-def _sanitize_wraps(lines: List[str]) -> List[str]:
-    """Sanitize word wraps if
-    * last word token ends with '-', "⸗" or "—"
-    * another line following
-    * following line not empty
-    """
-
-    normalized_lines: List[str] = []
-    for i, line in enumerate(lines):
-        if i < len(lines) - 1:
-            for hyphen in _HYPHENS:
-                if line.endswith(hyphen):
-                    next_line = lines[i + 1]
-                    if len(next_line.strip()) == 0:
-                        # encountered empty next line, no merge possible
-                        continue
-                    next_line_tokens = next_line.split()
-                    nextline_first_token = next_line_tokens.pop(0)
-                    # join the rest of valid next line
-                    lines[i + 1] = ' '.join(next_line_tokens)
-                    line = line[:-1] + nextline_first_token
-                    break
-        normalized_lines.append(line)
-    return normalized_lines
-
-
-def _sanitize_chars(lines: List[str]) -> List[str]:
-    """Replace or remove nonrelevant chars for current german word error rate"""
-
-    sanitized: List[str] = []
-    for line in lines:
-        text = line.strip()
-        bad_chars = '0123456789“„"\'?!*.;:-=[]()|'
-        text = ''.join([c for c in text if c not in bad_chars])
-        if '..' in text:
-            text = text.replace('..', '')
-        if '  ' in text:
-            text = text.replace('  ', ' ')
-        text = ' '.join([t for t in text.split() if len(t) > 1])
-        sanitized.append(text)
-
-    return sanitized
-
-
 def _get_groundtruth_from_filename(file_path) -> str:
     _file_name = os.path.basename(file_path)
     result = re.match(r'.*gt.(\w{3,}).xml$', _file_name)
@@ -411,36 +294,6 @@ def _get_groundtruth_from_filename(file_path) -> str:
             return NOT_SET
 
 
-def filter_word_pieces(frame, current) -> int:
-    _filtered = 0
-    _tmp_stack = []
-    _total_stack = []
-    # stack all items
-    _total_stack.append(current)
-    _tmp_stack.append(current)
-    while _tmp_stack:
-        _current: DigitalObjectTree = _tmp_stack.pop()
-        if _current.children:
-            _tmp_stack += _current.children
-            _total_stack += _current.children
-    # now pick words
-    _words = [_p for _p in _total_stack if _p.level == DigitalObjectLevel.WORD]
-
-    # check for each word piece
-    for _word in _words:
-        if _word not in frame:
-            _filtered += 1
-            _uplete(_word)
-    return _filtered
-
-
-def _uplete(curr: DigitalObjectTree):
-    if len(curr.children) == 0 and curr.level < DigitalObjectLevel.PAGE:
-        _pa: DigitalObjectTree = curr.parent
-        _pa.remove_children(curr)
-        _uplete(_pa)
-
-
 def _normalize_gt_type(label) -> str:
     if label.startswith('art'):
         return 'article'
@@ -505,7 +358,7 @@ def __str__(self) -> str:
             _val = m.value
             _ref = m.n_ref
             if _ref > 10000:
-                _ref_fmt = f'{(floor(float(m.n_ref) / 1000)):>2}K+'
+                _ref_fmt = f'{(math.floor(float(m.n_ref) / 1000)):>2}K+'
             else:
                 _ref_fmt = f'{m.n_ref:>4}'
             _raw = f'{m.label}:{_val:>5.2f}({_ref_fmt})'
@@ -519,7 +372,7 @@ def __str__(self) -> str:
         return ', '.join(_raws)
 
     def __repr__(self) -> str:
-        return '{} {}'.format(self.gt_type, self.path_c)
+        return f'{self.gt_type} {self.path_c}'
 
 
 class Evaluator:
@@ -552,17 +405,17 @@ def __init__(
         self.evaluation_map = {}
         self.text_mode = extras == EVAL_EXTRA_IGNORE_GEOMETRY
         self.is_sequential = False
-        self.metrics: List = []
+        self.metrics: typing.List[digem.SimilarityMetric] = []
         self.evaluation_report = {}
 
-    def eval_all(self, entries: List[EvalEntry], sequential=False) -> None:
+    def eval_all(self, entries: typing.List[EvalEntry], sequential=False) -> None:
         """evaluate all pairs groundtruth-candidate"""
 
         _entries = []
         if sequential or self.is_sequential:
             _entries = [self._wrap_eval_entry(e) for e in entries]
         else:
-            cpus = cpu_count()
+            cpus = multiprocessing.cpu_count()
             n_executors = cpus // 2 if cpus > 3 else 1
             if self.verbosity == 1:
                 print(f"[DEBUG] use {n_executors} executors ({cpus}) to create evaluation data")
@@ -714,10 +567,14 @@ def eval_map(self):
             # re-order
             self.evaluation_results = sorted(self.evaluation_results, key=lambda e: e.eval_key)
 
-    def aggregate(self, by_type=False, by_metrics=[0, 1, 2, 3]):
+    def aggregate(self, by_type=False, by_metrics=None):
+        """Aggregate item's metrics for domain/directory
+        and/or annotated type (if present)"""
 
         # precheck - having root dir
         self._check_aggregate_preconditions()
+        if by_metrics is None:
+            by_metrics = [0, 1, 2, 3]
 
         root_base = Path(self.domain_reference).parts[-1]
 
@@ -780,16 +637,16 @@ def report_stdout(evaluator: Evaluator, verbosity):
     results = evaluator.get_results()
     _path_can = evaluator.domain_candidate
     _path_ref = evaluator.domain_reference
-    evaluation_date = date.today().isoformat()
+    evaluation_date = datetime.date.today().isoformat()
     print(f'[INFO ] Evaluation Summary (candidates: "{_path_can}" vs. reference: "{_path_ref}" ({evaluation_date})')
     for result in results:
         (gt_type, n_total, mean_total, med, _n_refs) = result.get_defaults()
-        add_stats = f', std: {result.std:.2f}, median: {med:.2f}' if n_total > 1 else ''
-        print(f'[INFO ] "{gt_type}"\t∅: {mean_total:.2f}\t{n_total} items, {_n_refs} refs{add_stats}')
+        add_stats = f', std: {result.std:5.2f}, median: {med:5.2f}' if n_total > 1 else ''
+        print(f'[INFO ] "{gt_type}"\t∅: {mean_total:5.2f}\t{n_total: 3d} items, {_n_refs:_} refs{add_stats}')
         if result.cleared_result:
             (_, n_t2, mean2, med2, n_c2) = result.cleared_result.get_defaults()
             ccr_std = result.cleared_result.std
             drops = n_total - n_t2
             if drops > 0:
                 print(
-                    f'[INFO ] "{gt_type}"\t∅: {mean2:.2f}\t{n_t2} items (-{drops}), {n_c2} refs, std: {ccr_std:.2f}, median: {med2:.2f}')
+                    f'[INFO ] "{gt_type}(-{drops})"\t∅: {mean2:5.2f}\t{n_t2: 3d} items, {n_c2:_} refs, std: {ccr_std:5.2f}, median: {med2:5.2f}')
diff --git a/src/digital_eval/metrics.py b/src/digital_eval/metrics.py
index 0861522..c829201 100644
--- a/src/digital_eval/metrics.py
+++ b/src/digital_eval/metrics.py
@@ -8,6 +8,7 @@
 import string
 import typing
 import unicodedata
+import xml.dom.minidom
 
 import nltk
 import nltk.corpus as nltk_corp
@@ -18,9 +19,10 @@
  )
 import rapidfuzz.distance.Levenshtein as rfls
 
+import digital_object as do
+
 from digital_eval.dictionary_metrics.common import LANGUAGE_KEY_DEFAULT
 from digital_eval.dictionary_metrics.language_tool.LanguageTool import LanguageTool
-from digital_eval.evaluation import digital_object_to_text, digital_object_to_dict_text
 
 # Python3 standard Unicode Normalization
 #
@@ -32,7 +34,7 @@
 # usual spatium and special control sequences
 WHITESPACES = string.whitespace
 
-WHITESPACES_EXCLUDING_BLANK_CHARS = WHITESPACES[1:]
+WHITESPACES_EXCL_BLANK_CHARS = WHITESPACES[1:]
 
 # punctuations
 #
@@ -64,26 +66,26 @@
 # filter mechanics
 #
 # via Python3 string translation maps
-WHITESPACE_TRANSLATOR = str.maketrans('', '', WHITESPACES)
-WHITESPACE_EXCLUDING_BLANK_CHARS_TRANSLATOR = str.maketrans('', '', WHITESPACES_EXCLUDING_BLANK_CHARS)
-PUNCT_TRANLATOR = str.maketrans('', '', PUNCTUATIONS)
-DIGIT_TRANSLATOR = str.maketrans('', '', DIGITS)
+WHITESPACE_TRNSL = str.maketrans('', '', WHITESPACES)
+WHITESPACE_EXCL_BLANK_CHARS_TRNSL = str.maketrans('', '', WHITESPACES_EXCL_BLANK_CHARS)
+PUNCT_TRNSL = str.maketrans('', '', PUNCTUATIONS)
+DIGIT_TRNSL = str.maketrans('', '', DIGITS)
 
 
 def _filter_whitespaces(a_str) -> str:
-    return a_str.translate(WHITESPACE_TRANSLATOR)
+    return a_str.translate(WHITESPACE_TRNSL)
 
 
 def _filter_whitespaces_excluding_blank_chars(a_str) -> str:
-    return a_str.translate(WHITESPACE_EXCLUDING_BLANK_CHARS_TRANSLATOR)
+    return a_str.translate(WHITESPACE_EXCL_BLANK_CHARS_TRNSL)
 
 
 def _filter_puncts(a_str) -> str:
-    return a_str.translate(PUNCT_TRANLATOR)
+    return a_str.translate(PUNCT_TRNSL)
 
 
 def _filter_digits(a_str) -> str:
-    return a_str.translate(DIGIT_TRANSLATOR)
+    return a_str.translate(DIGIT_TRNSL)
 
 
 def _tokenize(a_str) -> typing.List[str]:
@@ -109,11 +111,13 @@ def _tokenize_to_sorted_set(a_str) -> typing.Set[str]:
 STOPWORDS_DEFAULT = ['german', 'english', 'arabic', 'russian']
 
 
-def get_stopwords(nltk_mappings=NLTK_STOPWORDS, languages=None) -> typing.Set[str]:
+def get_stopwords(nltk_mappings=None, languages=None) -> typing.Set[str]:
     """Helper Function to gather NLTK stopword data
     * ensure stopwords files are locally available
     * extract them as set
     """
+    if nltk_mappings is None:
+        nltk_mappings = NLTK_STOPWORDS
     try:
         for mapping in nltk_mappings:
             nltk_corp.stopwords.words(mapping)
@@ -155,6 +159,140 @@ def transform_string(the_content):
     return the_content
 
 
+def digital_object_to_dict_text(file_path: str, frame=None, oneliner=False) -> typing.Tuple:
+    line_texts: typing.List[str]
+    len_lines: int
+    line_texts, len_lines = digital_object_to_text(file_path=file_path, frame=frame, oneliner=False)
+    non_empty_lines: typing.List[str] = [line_text for line_text in line_texts if len(line_text) > 0]
+    lines_sanitized_wraps: typing.List[str] = _sanitize_wraps(non_empty_lines)
+    lines_sanitized_chars: typing.List[str] = _sanitize_chars(lines_sanitized_wraps)
+    text = ' '.join(lines_sanitized_chars) if oneliner else lines_sanitized_chars
+    return text, len_lines
+
+
+def digital_object_to_text(file_path, frame=None, oneliner=True) -> typing.Tuple:
+    """Wrap OCR-Data Comparison"""
+
+    try:
+        top_digo: do.DigitalObjectTree = do.to_digital_object(file_path)
+        # explicit filter frame?
+        if not frame:
+            frame = top_digo.dimensions
+        elif len(frame) == 2:
+            frame = [[frame[0][0], frame[0][1]],
+                     [frame[1][0], frame[0][1]],
+                     [frame[1][0], frame[1][1]],
+                     [frame[0][0], frame[1][1]]]
+        frame_digo = do.DigitalObjectTree()
+        frame_digo.dimensions = frame
+        filter_word_pieces(frame_digo, top_digo)
+        the_lines = _get_line_digos_from_digo(top_digo)
+        if oneliner:
+            return top_digo.transcription, len(the_lines)
+        else:
+            return [line.transcription for line in the_lines], len(the_lines)
+    except xml.parsers.expat.ExpatError as _:
+        with open(file_path, mode='r', encoding='utf-8') as fhandle:
+            text_lines = fhandle.readlines()
+            if oneliner:
+                text_lines = ' '.join([l.strip() for l in text_lines])
+            return text_lines, len(text_lines)
+    except RuntimeError as exc:
+        raise RuntimeError(f"{file_path}: {exc}") from exc
+
+
+def filter_word_pieces(frame, current) -> int:
+    _filtered = 0
+    _tmp_stack = []
+    _total_stack = []
+    # stack all items
+    _total_stack.append(current)
+    _tmp_stack.append(current)
+    while _tmp_stack:
+        _current: do.DigitalObjectTree = _tmp_stack.pop()
+        if _current.children:
+            _tmp_stack += _current.children
+            _total_stack += _current.children
+    # now pick words
+    _words = [_p for _p in _total_stack if _p.level == do.DigitalObjectLevel.WORD]
+
+    # check for each word piece
+    for _word in _words:
+        if _word not in frame:
+            _filtered += 1
+            _uplete(_word)
+    return _filtered
+
+
+def _uplete(curr: do.DigitalObjectTree):
+    if len(curr.children) == 0 and curr.level < do.DigitalObjectLevel.PAGE:
+        _pa: do.DigitalObjectTree = curr.parent
+        _pa.remove_children(curr)
+        _uplete(_pa)
+
+
+def _get_line_digos_from_digo(digo: do.DigitalObjectTree, lines: typing.List = None) -> typing.List[do.DigitalObjectTree]:
+    if lines is None:
+        lines = []
+    if digo.level == do.DigitalObjectLevel.LINE and digo.transcription:
+        lines.append(digo)
+        return lines
+    for child in digo.children:
+        _get_line_digos_from_digo(child, lines)
+    return lines
+
+
+_HYPHENS: typing.List[str] = [
+    "⸗",
+    "-",
+    "—",
+]
+
+
+def _sanitize_wraps(lines: typing.List[str]) -> typing.List[str]:
+    """Sanitize word wraps if
+    * last word token ends with '-', "⸗" or "—"
+    * another line following
+    * following line not empty
+    """
+
+    normalized_lines: typing.List[str] = []
+    for i, line in enumerate(lines):
+        if i < len(lines) - 1:
+            for hyphen in _HYPHENS:
+                if line.endswith(hyphen):
+                    next_line = lines[i + 1]
+                    if len(next_line.strip()) == 0:
+                        # encountered empty next line, no merge possible
+                        continue
+                    next_line_tokens = next_line.split()
+                    nextline_first_token = next_line_tokens.pop(0)
+                    # join the rest of valid next line
+                    lines[i + 1] = ' '.join(next_line_tokens)
+                    line = line[:-1] + nextline_first_token
+                    break
+        normalized_lines.append(line)
+    return normalized_lines
+
+
+def _sanitize_chars(lines: typing.List[str]) -> typing.List[str]:
+    """Replace or remove nonrelevant chars for current german word error rate"""
+
+    sanitized: typing.List[str] = []
+    for line in lines:
+        text = line.strip()
+        bad_chars = '0123456789“„"\'?!*.;:-=[]()|'
+        text = ''.join([c for c in text if c not in bad_chars])
+        if '..' in text:
+            text = text.replace('..', '')
+        if '  ' in text:
+            text = text.replace('  ', ' ')
+        text = ' '.join([t for t in text.split() if len(t) > 1])
+        sanitized.append(text)
+
+    return sanitized
+
+
 class DigitalEvalMetricException(Exception):
     """Mark Exception during validation/calculating metrics"""
 
@@ -163,7 +301,9 @@ def __init__(self, *args: object) -> None:
 
 
 class SimilarityMetric:
-    """Basic definition of a OCRDifferenceMetric"""
+    """Basic definition of OCR Similarity Metric,
+    expressed in percent (0 - 100)
+    """
 
     def __init__(
             self,
@@ -186,9 +326,6 @@ def __init__(
         self._data_candidate = None
         self.languages = None
 
-    def norm_percentual(self):
-        self._value = self._value * 100
-
     @property
     def reference(self):
         """Reference/Groundtruth data"""
@@ -319,6 +456,7 @@ def __init__(self, precision=2, normalization=UC_NORMALIZATION_NFKD,
             preprocessings=preprocessings,
         )
         self._label = 'DictLT'
+        self.diff = 0
 
     def _forward(self):
         text: str = self._data_candidate
@@ -476,7 +614,7 @@ def ir_fmeasure(reference_data, candidate_data) -> float:
 
 
 # diacritica to take care of
-_COMBINING_SMALL_E = u'\u0364'
+_COMBINING_SMALL_E = '\u0364'
 
 def _normalize_vocal_ligatures(a_string) -> str:
     """Replace vocal ligatures, which otherwise
@@ -505,4 +643,4 @@ def _normalize_vocal_ligatures(a_string) -> str:
         _out.append(_c)
 
     # strip all combining e's anyway
-    return ''.join(_out).replace(_COMBINING_SMALL_E, '')
\ No newline at end of file
+    return ''.join(_out).replace(_COMBINING_SMALL_E, '')
diff --git a/src/ocr_util/cli.py b/src/ocr_util/cli.py
index f116f71..1c7b9a3 100644
--- a/src/ocr_util/cli.py
+++ b/src/ocr_util/cli.py
@@ -4,19 +4,17 @@
 import argparse
 import re
 from pathlib import PurePath
-from typing import Final
 
-from digital_object import DigitalObject, from_digital_objects
-from digital_object.filter import PolygonFrameFilterUtil, PolygonFrameFilter
+import digital_object as do
+import digital_object.filter as dofi
 
 # script constants
-
-DEFAULT_VERBOSITY: int = 0
-SUB_CMD_FRAME: Final[str] = 'frame'
+DEFAULT_VERBOSITY = 0
+SUB_CMD_FRAME = 'frame'
 
 
 def points_type(points: str) -> str:
-    match: re.Match = re.match(PolygonFrameFilterUtil.POINT_LIST_PATTERN, points)
+    match: re.Match = re.match(dofi.PolygonFrameFilterUtil.POINT_LIST_PATTERN, points)
     if not match:
         raise argparse.ArgumentTypeError(f"Invalid point coordinates: '{points}'")
     return points
@@ -72,13 +70,13 @@ def start() -> None:
         points: str = args.points
         if verbosity > 1:
             print(f"[DEBUG] args: {input_ocr_file}, {output_ocr_file}, {points}, {verbosity}")
-        polygon_frame_filter: PolygonFrameFilter = PolygonFrameFilter(
+        polygon_frame_filter: dofi.PolygonFrameFilter = dofi.PolygonFrameFilter(
             input_ocr_file,
             points,
             verbosity
         )
-        piece_result: DigitalObject = polygon_frame_filter.process()
-        file_result: PurePath = from_digital_objects(piece_result, output_ocr_file)
+        piece_result: do.DigitalObjectTree = polygon_frame_filter.process()
+        file_result: PurePath = do.from_digital_object(piece_result, output_ocr_file)
         if verbosity > 0:
             print('[INFO ] file_result', file_result)
 
diff --git a/tests/test_dict_metric.py b/tests/test_dict_metric.py
index 638ca73..501af91 100644
--- a/tests/test_dict_metric.py
+++ b/tests/test_dict_metric.py
@@ -1,8 +1,5 @@
-from digital_eval.evaluation import (
-    digital_object_to_dict_text,
-    digital_object_to_text
-)
-from digital_eval.metrics import normalize_unicode, UC_NORMALIZATION_NFKD, _normalize_vocal_ligatures
+import digital_eval.metrics as digem
+
 from .conftest import TEST_RES_DIR
 
 
@@ -10,12 +7,12 @@ def test_piece_to_dict_text_alto():
     alto_path = f'{TEST_RES_DIR}/dict_metric/alto.xml'
 
     # act
-    alto_text_no_sanit, _ = digital_object_to_text(alto_path, oneliner=True)
+    alto_text_no_sanit, _ = digem.digital_object_to_text(alto_path, oneliner=True)
     alto_words_no_sanit = alto_text_no_sanit.split()
-    alto_text, _ = digital_object_to_dict_text(alto_path, oneliner=True)
-    alto_lines, alto_num_lines = digital_object_to_dict_text(alto_path, oneliner=False)
-    alto_lines_norm_vocal_ligatures = [_normalize_vocal_ligatures(line) for line in alto_lines]
-    alto_lines_norm = [normalize_unicode(line, UC_NORMALIZATION_NFKD) for line in alto_lines_norm_vocal_ligatures]
+    alto_text, _ = digem.digital_object_to_dict_text(alto_path, oneliner=True)
+    alto_lines, alto_num_lines = digem.digital_object_to_dict_text(alto_path, oneliner=False)
+    alto_lines_norm_vocal_ligatures = [digem._normalize_vocal_ligatures(line) for line in alto_lines]
+    alto_lines_norm = [digem.normalize_unicode(line, digem.UC_NORMALIZATION_NFKD) for line in alto_lines_norm_vocal_ligatures]
     alto_text_norm = " ".join(alto_lines_norm)
     alto_words = alto_text_norm.split()
 
@@ -31,12 +28,12 @@ def test_piece_to_dict_text_page2019():
     page_path = f'{TEST_RES_DIR}/dict_metric/page2019.xml'
 
     # act
-    page_text_no_sanit, _ = digital_object_to_text(page_path, oneliner=True)
+    page_text_no_sanit, _ = digem.digital_object_to_text(page_path, oneliner=True)
     page_words_no_sanit = page_text_no_sanit.split()
-    page_text, _ = digital_object_to_dict_text(page_path, oneliner=True)
-    page_lines, alto_num_lines = digital_object_to_dict_text(page_path, oneliner=False)
-    page_lines_norm_vocal_ligatures = [_normalize_vocal_ligatures(line) for line in page_lines]
-    page_lines_norm = [normalize_unicode(line, UC_NORMALIZATION_NFKD) for line in page_lines_norm_vocal_ligatures]
+    page_text, _ = digem.digital_object_to_dict_text(page_path, oneliner=True)
+    page_lines, alto_num_lines = digem.digital_object_to_dict_text(page_path, oneliner=False)
+    page_lines_norm_vocal_ligatures = [digem._normalize_vocal_ligatures(line) for line in page_lines]
+    page_lines_norm = [digem.normalize_unicode(line, digem.UC_NORMALIZATION_NFKD) for line in page_lines_norm_vocal_ligatures]
     page_text_norm = " ".join(page_lines_norm)
     page_words = page_text_norm.split()
 
@@ -52,12 +49,12 @@ def test_piece_to_dict_text_page2013():
     page_path = f'{TEST_RES_DIR}/dict_metric/page2013.xml'
 
     # act
-    page_text_no_sanit, _ = digital_object_to_text(page_path, oneliner=True)
+    page_text_no_sanit, _ = digem.digital_object_to_text(page_path, oneliner=True)
     page_words_no_sanit = page_text_no_sanit.split()
-    page_text, _ = digital_object_to_dict_text(page_path, oneliner=True)
-    page_lines, alto_num_lines = digital_object_to_dict_text(page_path, oneliner=False)
-    page_lines_norm_vocal_ligatures = [_normalize_vocal_ligatures(line) for line in page_lines]
-    page_lines_norm = [normalize_unicode(line, UC_NORMALIZATION_NFKD) for line in page_lines_norm_vocal_ligatures]
+    page_text, _ = digem.digital_object_to_dict_text(page_path, oneliner=True)
+    page_lines, alto_num_lines = digem.digital_object_to_dict_text(page_path, oneliner=False)
+    page_lines_norm_vocal_ligatures = [digem._normalize_vocal_ligatures(line) for line in page_lines]
+    page_lines_norm = [digem.normalize_unicode(line, digem.UC_NORMALIZATION_NFKD) for line in page_lines_norm_vocal_ligatures]
     page_text_norm = " ".join(page_lines_norm)
     page_words = page_text_norm.split()
 
diff --git a/tests/test_digital_eval_cli.py b/tests/test_digital_eval_cli.py
index ab15336..ccbfd80 100644
--- a/tests/test_digital_eval_cli.py
+++ b/tests/test_digital_eval_cli.py
@@ -5,7 +5,7 @@
 
 from pathlib import Path
 
-import digital_eval.cli as dival
+import digital_eval.cli as dig
 
 from .conftest import TEST_RES_DIR
 
@@ -25,7 +25,7 @@ def test_mwe_cli(tmp_path, capsys):
     """
 
     # arrange
-    dival.VERBOSITY = 1
+    dig.VERBOSITY = 1
     src_candidates = TEST_RES_DIR / 'candidate' / 'frk_alto'
     src_reference = TEST_RES_DIR / 'groundtruth' / 'page'
     dst_candidates = tmp_path / 'candidate' / _DOMAIN_LABEL
@@ -38,16 +38,18 @@ def test_mwe_cli(tmp_path, capsys):
     assert _DOMAIN_LABEL == tmp_reference.name
 
     # act
-    _results = dival._main(dst_candidates, dst_reference,
-                           dival.DEFAULT_OCR_METRICS, dival.DEFAULT_UTF8_NORM, None)
+    cli_args = {"candidates": dst_candidates, "reference": dst_reference,
+                "metrics": dig.DEFAULT_OCR_METRICS,
+                "verbosity": 1,
+                "utf8": dig.DEFAULT_UTF8_NORM,
+                "sequential": True}
+    eval_results = dig.start_evaluation(cli_args)
 
     # assert
+    assert len(eval_results) == 4
     captured = capsys.readouterr().out
-    assert captured.startswith("[DEBUG] text normalized using 'NFC'")
-    assert len(captured) == 1027
     std_lines = captured.split('\n')
     assert len(std_lines) == 11
-    assert std_lines[1].startswith('[DEBUG] from "5" filtered "3" candidates')
+    assert std_lines[0] == "[DEBUG] text normalized using 'NFC' code points for 'Cs,Ls'"
+    assert str(std_lines[1]).startswith('[DEBUG] from "5" filtered "3" candidates')
     assert std_lines[4] == "[DEBUG] [1667522809_J_0001_0002](art) [Cs:39.20(5309), Ls:38.54(4383)(- 0.66)]"
-    assert len(_results) == 4
-    assert _results[0]
diff --git a/tests/test_ocr_evaluate.py b/tests/test_ocr_evaluate.py
index 42c1051..14a3917 100644
--- a/tests/test_ocr_evaluate.py
+++ b/tests/test_ocr_evaluate.py
@@ -12,22 +12,16 @@
     approx
 )
 
-from digital_eval.evaluation import (
-    EvalEntry,
-    Evaluator,
-    match_candidates,
-    digital_object_to_text,
-    get_bbox_data,
-    _get_groundtruth_from_filename,
-)
-from digital_eval.metrics import MetricIRFM, MetricIRPre, MetricIRRec, MetricChars, SimilarityMetric
+import digital_eval.evaluation as digev
+import digital_eval.metrics as digem
+
 from .conftest import (
     TEST_RES_DIR
 )
 
 
 def test_match_candidates_alto_candidate_with_coords():
-    actual_matches = match_candidates(f'{TEST_RES_DIR}/candidate/frk_alto',
+    actual_matches = digev.match_candidates(f'{TEST_RES_DIR}/candidate/frk_alto',
                                       f'{TEST_RES_DIR}/groundtruth/alto/1667522809_J_0073_0001_375x2050_2325x9550.xml')
     assert f'{TEST_RES_DIR}/candidate/frk_alto/1667522809_J_0073_0001_part.xml' == actual_matches[0]
 
@@ -35,13 +29,13 @@ def test_match_candidates_alto_candidate_with_coords():
 def test_match_candidates_both_txt_files():
     path_candidates = f'{TEST_RES_DIR}/candidate/txt'
     path_gt = f'{TEST_RES_DIR}/groundtruth/txt/1246734.gt.txt'
-    actual_matches = match_candidates(path_candidates, path_gt)
+    actual_matches = digev.match_candidates(path_candidates, path_gt)
     assert f'{TEST_RES_DIR}/candidate/txt/OCR-Fraktur_1246734.txt' == actual_matches[0]
 
 
 def test_match_candidates_fails_no_groundtruth():
     with pytest.raises(IOError) as exc:
-        match_candidates(
+        digev.match_candidates(
             f'{TEST_RES_DIR}/candidate/txt',
             './test/sresources/txt/no_gt.txt')
     assert "invalid groundtruth data path" in str(exc)
@@ -49,7 +43,7 @@ def test_match_candidates_fails_no_groundtruth():
 
 def test_match_candidates_fails_no_candidates():
     with pytest.raises(IOError) as exc:
-        match_candidates(
+        digev.match_candidates(
             './text/no_results',
             f'{TEST_RES_DIR}/txt/gt/1246734.txt')
     assert "invalid ocr result path" in str(exc)
@@ -60,7 +54,7 @@ def test_match_candidates_groundtruth_txt_candidate_alto():
     path_gt = f'{TEST_RES_DIR}/groundtruth/txt/217745.gt.txt'
 
     # act
-    actual_matches = match_candidates(path_cd, path_gt)
+    actual_matches = digev.match_candidates(path_cd, path_gt)
 
     # assert
     assert actual_matches[0] == f'{TEST_RES_DIR}/candidate/ara_alto/217745.xml'
@@ -74,8 +68,8 @@ def test_piece_to_text_alto_candidate_with_coords():
     p2 = (6200, 3425)
 
     # act
-    _as_lines, _ = digital_object_to_text(alto_path, frame=(p1, p2), oneliner=False)
-    _gt_type = _get_groundtruth_from_filename(alto_path)
+    _as_lines, _ = digem.digital_object_to_text(alto_path, frame=(p1, p2), oneliner=False)
+    _gt_type = digev._get_groundtruth_from_filename(alto_path)
 
     # assert
     assert _gt_type == 'n.a.'
@@ -91,8 +85,8 @@ def test_evaluate_single_alto_candidate_with_page_groundtruth(tmp_path):
     eval_domain.mkdir(parents=True)
     gt_domain = tmp_path / 'groundtruth' / '1667522809_J_0001'
     gt_domain.mkdir(parents=True)
-    evaluator = Evaluator(eval_domain)
-    evaluator.metrics = [MetricChars()]
+    evaluator = digev.Evaluator(eval_domain)
+    evaluator.metrics = [digem.MetricChars()]
     # required for directory-like aggregation
     evaluator.domain_reference = gt_domain
     _candidate_src = os.path.join(f'{TEST_RES_DIR}/candidate/frk_alto/1667522809_J_0001_0002.xml')
@@ -102,7 +96,7 @@ def test_evaluate_single_alto_candidate_with_page_groundtruth(tmp_path):
     shutil.copy(_gt_src, _gt_dst)
 
     # act
-    eval_entry = EvalEntry(str(eval_domain / '1667522809_J_0001_0002.xml'))
+    eval_entry = digev.EvalEntry(str(eval_domain / '1667522809_J_0001_0002.xml'))
     eval_entry.path_g = _gt_dst
     evaluator.eval_all([eval_entry], sequential=True)
     evaluator.aggregate(by_type=True)
@@ -136,8 +130,8 @@ def test_evaluate_page_groundtruth_with_itself(tmp_path):
     eval_domain.mkdir(parents=True)
     gt_domain = tmp_path / 'groundtruth' / '1667522809_J_0001'
     gt_domain.mkdir(parents=True)
-    evaluator = Evaluator(eval_domain)
-    evaluator.metrics = [MetricChars()]
+    evaluator = digev.Evaluator(eval_domain)
+    evaluator.metrics = [digem.MetricChars()]
     evaluator.domain_reference = gt_domain
     _candidate_src = os.path.join(f'{TEST_RES_DIR}/groundtruth/page/1667522809_J_0001_0002.art.gt.xml')
     _candidate_dst = str(eval_domain / '1667522809_J_0001_0002.xml')
@@ -147,7 +141,7 @@ def test_evaluate_page_groundtruth_with_itself(tmp_path):
     shutil.copy(_gt_src, _gt_dst)
 
     # act
-    eval_entry = EvalEntry(str(eval_domain / '1667522809_J_0001_0002.xml'))
+    eval_entry = digev.EvalEntry(str(eval_domain / '1667522809_J_0001_0002.xml'))
     eval_entry.path_g = _gt_dst
     evaluator.eval_all([eval_entry], sequential=True)
     evaluator.aggregate(by_type=True)
@@ -188,44 +182,44 @@ def test_evaluate_set_with_5_entries(tmp_path):
     path_dir_gt.mkdir()
     path_dir_c = tmp_path / 'media' / 'jpg' / 'odem'
     path_dir_c.mkdir(parents=True)
-    evaluator = Evaluator(path_dir_c)
+    evaluator = digev.Evaluator(path_dir_c)
     evaluator.domain_reference = path_dir_gt
-    _metric_ca1 = MetricChars()
+    _metric_ca1 = digem.MetricChars()
     _metric_ca1._value = 95.70
     _metric_ca1._data_reference = 't' * 810
-    _metric_ca2 = MetricChars()
+    _metric_ca2 = digem.MetricChars()
     _metric_ca2._value = 96.53
     _metric_ca2._data_reference = 't' * 675
-    _metric_ca3 = MetricChars()
+    _metric_ca3 = digem.MetricChars()
     _metric_ca3._value = 94.91
     _metric_ca3._data_reference = 't' * 1395
-    _metric_ca4 = MetricChars()
+    _metric_ca4 = digem.MetricChars()
     _metric_ca4._value = 94.40
     _metric_ca4._data_reference = 't' * 1466
     # outlier !
-    _metric_ca5 = MetricChars()
+    _metric_ca5 = digem.MetricChars()
     _metric_ca5._value = 86.44
     _metric_ca5._data_reference = 't' * 1520
-    _metric_ca6 = MetricChars()
+    _metric_ca6 = digem.MetricChars()
     _metric_ca6._value = 93.44
     _metric_ca6._data_reference = 't' * 1520
 
-    entry1 = EvalEntry(path_dir_c / 'eng' / 'urn+nbn+de+gbv+3+1-135654-p0403-5_eng.xml')
+    entry1 = digev.EvalEntry(path_dir_c / 'eng' / 'urn+nbn+de+gbv+3+1-135654-p0403-5_eng.xml')
     entry1.path_g = str(path_dir_gt / 'eng' / 'urn+nbn+de+gbv+3+1-135654-p0403-5_eng.gt.xml')
     entry1.metrics = [_metric_ca1]
-    entry2 = EvalEntry(path_dir_c / 'ger' / 'urn+nbn+de+gbv+3+1-816198-p0493-2_ger.xml')
+    entry2 = digev.EvalEntry(path_dir_c / 'ger' / 'urn+nbn+de+gbv+3+1-816198-p0493-2_ger.xml')
     entry2.path_g = str('/data/ocr/groundtruth/odem/ger/urn+nbn+de+gbv+3+1-816198-p0493-2_ger.gt.xml')
     entry2.metrics = [_metric_ca2]
-    entry3 = EvalEntry(path_dir_c / 'ger' / 'urn+nbn+de+gbv+3+1-818383-p0034-5_ger.xml')
+    entry3 = digev.EvalEntry(path_dir_c / 'ger' / 'urn+nbn+de+gbv+3+1-818383-p0034-5_ger.xml')
     entry3.path_g = '/data/ocr/groundtruth/odem/ger/urn+nbn+de+gbv+3+1-818383-p0034-5_ger.gt.xml'
     entry3.metrics = [_metric_ca3]
-    entry4 = EvalEntry(path_dir_c / 'ger' / 'urn+nbn+de+gbv+3+1-822479-p1119-4_ger.xml')
+    entry4 = digev.EvalEntry(path_dir_c / 'ger' / 'urn+nbn+de+gbv+3+1-822479-p1119-4_ger.xml')
     entry4.path_g = '/data/ocr/groundtruth/odem/ger/urn+nbn+de+gbv+3+1-822479-p1119-4_ger.gt.xml'
     entry4.metrics = [_metric_ca4]
-    entry5 = EvalEntry(path_dir_c / 'ger' / 'urn+nbn+de+gbv+3+1-828020-p0173-6_ger.xml')
+    entry5 = digev.EvalEntry(path_dir_c / 'ger' / 'urn+nbn+de+gbv+3+1-828020-p0173-6_ger.xml')
     entry5.path_g = '/data/ocr/groundtruth/odem/ger/urn+nbn+de+gbv+3+1-828020-p0173-6_ger.gt.xml'
     entry5.metrics = [_metric_ca5]
-    entry6 = EvalEntry(path_dir_c / 'ger' / 'urn+nbn+de+gbv+3+1-125584-p0314-6_ger.xml')
+    entry6 = digev.EvalEntry(path_dir_c / 'ger' / 'urn+nbn+de+gbv+3+1-125584-p0314-6_ger.xml')
     entry6.path_g = '/data/ocr/groundtruth/odem/ger/urn+nbn+de+gbv+3+1-125584-p0314-6_ger.gt.xml'
     entry6.metrics = [_metric_ca6]
     evaluator.evaluation_entries = [entry1, entry2, entry3, entry4, entry5, entry6]
@@ -250,7 +244,7 @@ def test_no_groundtruth_at_all(tmp_path):
     doesn't make any sense so far
     """
 
-    evaluator = Evaluator(tmp_path)
+    evaluator = digev.Evaluator(tmp_path)
     evaluator.eval_all([])
 
     with pytest.raises(RuntimeError) as err:
@@ -271,12 +265,12 @@ def test_handle_exception_invalid_literal_for_int():
 
     # arrange
     path_gt = f'{TEST_RES_DIR}/groundtruth/page/urn+nbn+de+gbv+3+1-792101-p0667-5_ger.gt.xml'
-    eval_entry = EvalEntry('dummy_candidate')
+    eval_entry = digev.EvalEntry('dummy_candidate')
     eval_entry.path_g = path_gt
 
     # act
-    evaluator = Evaluator('dummy_path')
-    evaluator.metrics = [SimilarityMetric()]
+    evaluator = digev.Evaluator('dummy_path')
+    evaluator.metrics = [digem.SimilarityMetric()]
     with pytest.raises(RuntimeError) as err:
         evaluator.eval_entry(eval_entry)
 
@@ -295,10 +289,10 @@ def test_handle_empty_candidate_information_retrival():
     # arrange
     path_gt = f'{TEST_RES_DIR}/groundtruth/page/urn+nbn+de+gbv+3+1-138193-p0904-0_ger.gt.xml'
     path_cd = f'{TEST_RES_DIR}/candidate/frk_page/urn+nbn+de+gbv+3+1-138193-p0904-0_ger.xml'
-    eval_entry = EvalEntry(path_cd)
+    eval_entry = digev.EvalEntry(path_cd)
     eval_entry.path_g = path_gt
-    evaluator = Evaluator('/data')
-    evaluator.metrics = [MetricIRPre(), MetricIRRec(), MetricIRFM()]
+    evaluator = digev.Evaluator('/data')
+    evaluator.metrics = [digem.MetricIRPre(), digem.MetricIRRec(), digem.MetricIRFM()]
     evaluator.verbosity = 1
 
     # act
@@ -323,12 +317,12 @@ def test_handle_table_text_groundtruth():
     # arrange
     path_gt = f'{TEST_RES_DIR}/groundtruth/page/urn+nbn+de+gbv+3+1-126343-p0285-7_ger.gt.xml'
     path_cd = f'{TEST_RES_DIR}/candidate/frk_page/urn+nbn+de+gbv+3+1-126343-p0285-7_ger.xml'
-    eval_entry = EvalEntry(path_cd)
+    eval_entry = digev.EvalEntry(path_cd)
     eval_entry.path_g = path_gt
 
     # act
-    evaluator = Evaluator('/data')
-    evaluator.metrics = [MetricChars()]
+    evaluator = digev.Evaluator('/data')
+    evaluator.metrics = [digem.MetricChars()]
     evaluator._wrap_eval_entry(eval_entry)
 
     # assert / legacy: 5.825 , actual 4.0
@@ -343,7 +337,7 @@ def test_get_box_from_empty_page():
     _path_gt = f'{TEST_RES_DIR}/groundtruth/page/urn+nbn+de+gbv+3+1-201080-p0034-8_ger.gt.xml'
 
     # act
-    _p1, _p2 = get_bbox_data(_path_gt)
+    _p1, _p2 = digev.get_bbox_data(_path_gt)
 
     # assert 
     assert _p1 == (77, 58)
@@ -359,7 +353,7 @@ def test_get_box_when_line_points_messy():
     _path_gt = f'{TEST_RES_DIR}/groundtruth/page/rahbar-1771946695-00000040.xml'
 
     # act
-    _p1, _p2 = get_bbox_data(_path_gt)
+    _p1, _p2 = digev.get_bbox_data(_path_gt)
 
     # assert
     assert _p1 == (368, 619)
@@ -375,12 +369,12 @@ def test_handle_exception_invalid_alto_xml():
 
     # arrange
     path_gt = f'{TEST_RES_DIR}/candidate/frk_alto/1667522809_J_0001_0256_corrupt.xml'
-    eval_entry = EvalEntry('dummy_candidate')
+    eval_entry = digev.EvalEntry('dummy_candidate')
     eval_entry.path_g = path_gt
 
     # act
-    evaluator = Evaluator('dummy_path')
-    evaluator.metrics = [SimilarityMetric()]
+    evaluator = digev.Evaluator('dummy_path')
+    evaluator.metrics = [digem.SimilarityMetric()]
     with pytest.raises(ParseError) as err:
         evaluator.eval_entry(eval_entry)
 
diff --git a/tests/test_ocr_metrics.py b/tests/test_ocr_metrics.py
index 096b41c..80cbc12 100644
--- a/tests/test_ocr_metrics.py
+++ b/tests/test_ocr_metrics.py
@@ -5,7 +5,7 @@
 
 import pytest
 
-import digital_eval.metrics as deme
+import digital_eval.metrics as digem
 
 # default reference
 THE_COMBINED_A_FOX = 'the á lazy brown fox jumps over the hump'
@@ -20,7 +20,7 @@ def test_metric_unicode_normalization_textual_metric():
     """
 
     # arrange
-    char_metric = deme.MetricChars()
+    char_metric = digem.MetricChars()
     char_metric.reference = THE_LAZY_FOX
     char_metric.candidate = THE_COMBINED_A_FOX
 
@@ -32,7 +32,7 @@ def test_metric_characters_from_empty_gt():
     """Total un-similarity"""
 
     # arrange
-    _metric = deme.MetricChars()
+    _metric = digem.MetricChars()
     # _metric.preprocessings = [_filter_whitespaces]
     _metric.reference = ''
     _metric.candidate = THE_LAZY_FOX
@@ -45,7 +45,7 @@ def test_metric_letter_from_empty_gt_and_empty_candidate():
     """Behavor: Similarity of empty strings"""
 
     # arrange
-    _metric = deme.MetricLetters()
+    _metric = digem.MetricLetters()
     _metric.reference = ''
     _metric.candidate = ''
 
@@ -57,7 +57,7 @@ def test_metric_words_with_only_slight_difference():
     """simple word accurracy test"""
 
     # arrange
-    _metric = deme.MetricWords()
+    _metric = digem.MetricWords()
     _metric.reference = THE_LAZY_FOX
     _metric.candidate = THE_FOX_LAZY
 
@@ -75,7 +75,7 @@ def test_metric_wa_with_identical_data():
     """simple word similarity for similar inputs"""
 
     # arrange
-    _metric = deme.MetricWords()
+    _metric = digem.MetricWords()
     _metric.reference = THE_LAZY_FOX
     _metric.candidate = THE_LAZY_FOX
 
@@ -87,7 +87,7 @@ def test_metric_bow_from_reasonable_input():
     """simple bag of words test"""
 
     # arrange
-    _metric = deme.MetricBoW()
+    _metric = digem.MetricBoW()
     _metric.reference = THE_LAZY_FOX
     _metric.candidate = THE_FOX_LAZY
 
@@ -99,7 +99,7 @@ def test_metric_bow_from_empty_gt_and_empty_candidate():
     """how to handle empty data - means: no errors"""
 
     # arrange
-    _metric = deme.MetricBoW()
+    _metric = digem.MetricBoW()
     _metric.reference = ''
     _metric.candidate = ''
 
@@ -117,7 +117,7 @@ def test_bow_ocrd_similarity_rate():
     """
 
     # arrange
-    _metric = deme.MetricBoW()
+    _metric = digem.MetricBoW()
     _metric.reference = "der Mann steht an der Ampel"
     _metric.candidate = "cer Mann fteht an der Ampel"
 
@@ -135,7 +135,7 @@ def test_bow_ocrd_spec_similarity_rate_ref_contains_more_data():
     """
 
     # arrange
-    _metric = deme.MetricBoW()
+    _metric = digem.MetricBoW()
     _metric.reference = "der Mann steht an der roten Ampel"
     _metric.candidate = "cer Mann fteht an der Ampel"
 
@@ -153,7 +153,7 @@ def test_bow_ocrd_spec_similarity_rate_ref_contains_less_data():
     """
 
     # arrange
-    _metric = deme.MetricBoW()
+    _metric = digem.MetricBoW()
     _metric.reference = "der Mann steht an der Ampel"
     _metric.candidate = "cer Mann fteht an der schönen roten Ampel"
 
@@ -171,7 +171,7 @@ def test_metric_character_accuracy():
     str2 = 'fthe lazy brown fox jumps ouer the hump'
 
     # arrange
-    char_metric = deme.MetricChars()
+    char_metric = digem.MetricChars()
     char_metric.reference = str1
     char_metric.candidate = str2
 
@@ -187,7 +187,7 @@ def test_metric_bot_ident():
     random.shuffle(list2)
     str2 = ' '.join(list2)
 
-    result = deme.bag_of_tokens(gt1.split(), str2.split())
+    result = digem.bag_of_tokens(gt1.split(), str2.split())
     assert result == 1.0
     assert len(gt1.split()) == len(str2.split())
 
@@ -201,7 +201,7 @@ def test_metric_bot_candidate_with_only_repetitions():
     str2 = "the dizzy brown fox fox fox jumps"
 
     # actsert
-    assert 0.83 == pytest.approx(deme.bag_of_tokens(gt1.split(), str2.split()), abs=1e-2)
+    assert 0.83 == pytest.approx(digem.bag_of_tokens(gt1.split(), str2.split()), abs=1e-2)
 
 
 def test_metric_bot_miss_tokens():
@@ -211,7 +211,7 @@ def test_metric_bot_miss_tokens():
     str2 = "the brown fux jumps"
 
     # acsert
-    assert 0.66 == pytest.approx(deme.bag_of_tokens(gt1.split(), str2.split()), abs=1e-2)
+    assert 0.66 == pytest.approx(digem.bag_of_tokens(gt1.split(), str2.split()), abs=1e-2)
 
 
 def test_ir_metric_precision_fox():
@@ -219,7 +219,7 @@ def test_ir_metric_precision_fox():
     having all tokens included (minus stopwords)"""
 
     # arrange
-    m_prec = deme.MetricIRPre()
+    m_prec = digem.MetricIRPre()
     m_prec.reference = THE_LAZY_FOX
     m_prec.candidate = THE_FOX_INPUT_IR
 
@@ -236,7 +236,7 @@ def test_ir_metric_recall_fox():
     (minus stoppwords)"""
 
     # arrange
-    m_prec = deme.MetricIRRec()
+    m_prec = digem.MetricIRRec()
     m_prec.reference = THE_LAZY_FOX
     m_prec.candidate = THE_FOX_INPUT_IR
 
@@ -256,7 +256,7 @@ def test_ir_metrics_precision_english_poor_candidate():
     a rather poor candidate"""
 
     # arrange
-    pre = deme.MetricIRPre()
+    pre = digem.MetricIRPre()
     pre.reference = THE_LAZY_FOX
     pre.candidate = IR_CANDIDATE_TEXT
 
@@ -271,7 +271,7 @@ def test_ir_metrics_recall_english_poor_candidate():
     a rather poor candidate"""
 
     # arrange
-    rec = deme.MetricIRRec()
+    rec = digem.MetricIRRec()
     rec.reference = THE_LAZY_FOX
     rec.candidate = IR_CANDIDATE_TEXT
 
@@ -284,7 +284,7 @@ def test_ir_metrics_fmeasure_english_poor_candidate():
     a rather poor candidate"""
 
     # arrange
-    metric_fm = deme.MetricIRFM()
+    metric_fm = digem.MetricIRFM()
     metric_fm.reference = THE_LAZY_FOX
     metric_fm.candidate = IR_CANDIDATE_TEXT
 
@@ -302,7 +302,7 @@ def test_ir_metrics_precision_german():
     and very nice candidate precision"""
 
     # arrange
-    prec = deme.MetricIRPre(languages=['german'])
+    prec = digem.MetricIRPre(languages=['german'])
     prec.reference = IR_REFERENCE_TEXT_GERMAN
     prec.candidate = IR_CANDIDATE_TEXT_GERMAN
 
@@ -315,7 +315,7 @@ def test_ir_metrics_recall_german():
     and very nice candidate recall"""
 
     # arrange
-    rec = deme.MetricIRRec(languages=['german'])
+    rec = digem.MetricIRRec(languages=['german'])
     rec.reference = IR_REFERENCE_TEXT_GERMAN
     rec.candidate = IR_CANDIDATE_TEXT_GERMAN
 
@@ -328,7 +328,7 @@ def test_ir_metrics_precision_german_poor_candidate():
     and rather poor candidate"""
 
     # arrange
-    metric_pre = deme.MetricIRPre(languages=['german'])
+    metric_pre = digem.MetricIRPre(languages=['german'])
     metric_pre.reference = IR_CANDIDATE_TEXT_GERMAN
     metric_pre.candidate = IR_REFERENCE_TEXT_GERMAN_POOR
 
@@ -341,7 +341,7 @@ def test_ir_metrics_recall_german_poor_candidate():
     and rather poor candidate"""
 
     # arrange
-    metric_rec = deme.MetricIRRec(languages=['german'])
+    metric_rec = digem.MetricIRRec(languages=['german'])
     metric_rec.reference = IR_CANDIDATE_TEXT_GERMAN
     metric_rec.candidate = IR_REFERENCE_TEXT_GERMAN_POOR
 
@@ -367,7 +367,7 @@ def test_metrics_token_based_more_gt_than_tc():
     cand = "faule springt Fuchs Hecke".split()
 
     # act
-    m_word = deme.MetricWords()
+    m_word = digem.MetricWords()
     m_word._data_reference = gt1
     m_word._data_candidate = cand
 
diff --git a/tests/test_ocr_metrics_base.py b/tests/test_ocr_metrics_base.py
index 4f86d56..75baad4 100644
--- a/tests/test_ocr_metrics_base.py
+++ b/tests/test_ocr_metrics_base.py
@@ -5,7 +5,7 @@
 
 import pytest
 
-import digital_eval.metrics as deme
+import digital_eval.metrics as digem
 
 # default reference
 THE_COMBINED_A_FOX = 'the á lazy brown fox jumps over the hump'
@@ -23,11 +23,11 @@ def test_metric_unicode_normalization_happens():
     # arrange
     raw1 = 'the á lazy brown fox jumps over the hump'
     raw2 = THE_COMBINED_A_FOX
-    norm1 = deme.normalize_unicode(raw1, uc_norm_by=deme.UC_NORMALIZATION_NFKD)
-    norm2 = deme.normalize_unicode(raw2, uc_norm_by=deme.UC_NORMALIZATION_NFKD)
+    norm1 = digem.normalize_unicode(raw1, uc_norm_by=digem.UC_NORMALIZATION_NFKD)
+    norm2 = digem.normalize_unicode(raw2, uc_norm_by=digem.UC_NORMALIZATION_NFKD)
 
     # act
-    similarity = deme.levenshtein_norm(norm1, norm2)
+    similarity = digem.levenshtein_norm(norm1, norm2)
     assert 1.0 == pytest.approx(similarity, abs=1e-6)
 
     # assert
@@ -54,14 +54,14 @@ def test_metric_unicode_normalization_not_happens():
     # arrange
     raw1 = THE_LAZY_FOX
     raw2 = THE_COMBINED_A_FOX
-    norm1_nfc = deme.normalize_unicode(raw1, uc_norm_by=deme.UC_NORMALIZATION_DEFAULT)
-    norm1_nfkd = deme.normalize_unicode(raw1, uc_norm_by=deme.UC_NORMALIZATION_NFKD)
-    norm2_nfc = deme.normalize_unicode(raw2, uc_norm_by=deme.UC_NORMALIZATION_DEFAULT)
-    norm2_nfkd = deme.normalize_unicode(raw2, uc_norm_by=deme.UC_NORMALIZATION_NFKD)
+    norm1_nfc = digem.normalize_unicode(raw1, uc_norm_by=digem.UC_NORMALIZATION_DEFAULT)
+    norm1_nfkd = digem.normalize_unicode(raw1, uc_norm_by=digem.UC_NORMALIZATION_NFKD)
+    norm2_nfc = digem.normalize_unicode(raw2, uc_norm_by=digem.UC_NORMALIZATION_DEFAULT)
+    norm2_nfkd = digem.normalize_unicode(raw2, uc_norm_by=digem.UC_NORMALIZATION_NFKD)
 
     # act
-    sim_nfc = deme.levenshtein_norm(norm1_nfc, norm2_nfc)
-    sim_nfkd = deme.levenshtein_norm(norm1_nfkd, norm2_nfkd)
+    sim_nfc = digem.levenshtein_norm(norm1_nfc, norm2_nfc)
+    sim_nfkd = digem.levenshtein_norm(norm1_nfkd, norm2_nfkd)
 
     # assert
     assert 0.95 == sim_nfc
@@ -72,7 +72,7 @@ def test_metric_calculate_character_edit_distance():
     """explore edit-distance"""
     str1 = 'sthe lazy brown fox jumps overthe hump'
     str2 = 'fthe lazy brown fox jumps ouer the hump'
-    distance = deme.levenshtein_norm(str1, str2)
+    distance = digem.levenshtein_norm(str1, str2)
     assert 0.923 == pytest.approx(distance, 1e-4)
 
 
@@ -84,7 +84,7 @@ def test_metric_bot_ident():
     random.shuffle(list2)
     str2 = ' '.join(list2)
 
-    similarity = deme.bag_of_tokens(gt1.split(), str2.split())
+    similarity = digem.bag_of_tokens(gt1.split(), str2.split())
     assert similarity == 1.0
     assert len(gt1.split()) == len(str2.split())
 
@@ -98,7 +98,7 @@ def test_metric_bot_candidate_with_only_repetitions():
     str2 = "the dizzy brown fox fox fox jumps"
 
     # actsert
-    assert 0.833 == pytest.approx(deme.bag_of_tokens(gt1.split(), str2.split()), 1e-3)
+    assert 0.833 == pytest.approx(digem.bag_of_tokens(gt1.split(), str2.split()), 1e-3)
 
 
 def test_metric_bot_miss_tokens():
@@ -108,7 +108,7 @@ def test_metric_bot_miss_tokens():
     str2 = "the brown fux jumps"
 
     # acsert
-    assert 0.66 == pytest.approx(deme.bag_of_tokens(gt1.split(), str2.split()), abs=1e-2)
+    assert 0.66 == pytest.approx(digem.bag_of_tokens(gt1.split(), str2.split()), abs=1e-2)
 
 
 def test_metrics_token_based_more_gt_than_tc():
@@ -129,7 +129,7 @@ def test_metrics_token_based_more_gt_than_tc():
     cand = "faule springt Fuchs Hecke".split()
 
     # act
-    result = deme.levenshtein_norm(gt1, cand)
+    result = digem.levenshtein_norm(gt1, cand)
 
     # assert
     assert 0.2857 == pytest.approx(result, rel=1e-4)
@@ -144,7 +144,7 @@ def test_metrics_token_based_equal():
     cand = "der fahle Fuchs springt über die Hecke"
 
     # act
-    sim = deme.levenshtein_norm(gt1.split(), cand.split())
+    sim = digem.levenshtein_norm(gt1.split(), cand.split())
 
     # assert
     assert 1.0 == sim
@@ -159,7 +159,7 @@ def test_metrics_token_based_no_test_candidate():
     gt1 = "ein Dachs springt die Hecke"
 
     # act
-    diff = deme.levenshtein_norm(gt1.split(), [], inverse=True)
+    diff = digem.levenshtein_norm(gt1.split(), [], inverse=True)
 
     # assert
     assert diff == 1.0