Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor: Consolidated pronom modules into pronom package and created a utils package #227

Open
wants to merge 5 commits into
base: dev/1.8/enhance-tests
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 3 additions & 19 deletions fido/fido.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@
import tempfile
import zipfile
from contextlib import closing
from time import perf_counter
from typing import Optional
from xml.etree import cElementTree as ET

from fido import CONFIG_DIR, __version__
from fido.char_handler import escape
from fido.cli_args import parse_cli_args
from fido.package import OlePackage, ZipPackage
from fido.versions import get_local_versions, sig_file_actions
from fido.pronom.versions import get_local_versions, sig_file_actions
from fido.utils.char_handler import escape
from fido.utils.timer import PerfTimer

defaults = {
"config_dir": CONFIG_DIR,
Expand All @@ -50,22 +50,6 @@
}


class PerfTimer:
"""Utility class that carries out simple process timings."""

def __init__(self):
"""New instance with start time running."""
self.start_time = perf_counter()

def start(self):
"""Start new timer."""
self.start_time = perf_counter()

def duration(self):
"""Return the duration since instantiation or start() was last called."""
return perf_counter() - self.start_time


class Fido:
"""Main FIDO application class."""

Expand Down
196 changes: 46 additions & 150 deletions fido/prepare.py → fido/pronom/prepare.py

Large diffs are not rendered by default.

29 changes: 10 additions & 19 deletions fido/update_signatures.py → fido/pronom/update_signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@
from shutil import rmtree
from xml.etree import ElementTree as CET

from . import CONFIG_DIR, __version__
from .prepare import run as prepare_pronom_to_fido
from .pronom.soap import (
from pronom.prepare import run as prepare_pronom_to_fido

from fido import CONFIG_DIR, __version__
from fido.pronom.soap import (
NS,
get_droid_signatures,
get_pronom_sig_version,
get_sig_xml_for_puid,
)
from .versions import get_local_versions
from fido.pronom.versions import get_local_versions

ABORT_MSG = "Aborting update..."

Expand Down Expand Up @@ -117,9 +118,7 @@ def sig_version_check(version="latest"):
print("Getting latest version number from PRONOM...")
version = get_pronom_sig_version()
if not version:
sys.exit(
"Failed to obtain PRONOM signature file version number, please try again."
)
sys.exit("Failed to obtain PRONOM signature file version number, please try again.")

print("Querying PRONOM for signaturefile version {}.".format(version))
sig_file_name = _sig_file_name(version)
Expand Down Expand Up @@ -159,9 +158,7 @@ def init_sig_download(defaults):
resume = False
if os.path.isdir(tmpdir):
print("Found previously created temporary folder for download:", tmpdir)
resume = query_yes_no(
"Do you want to resume download (yes) or start over (no)?"
)
resume = query_yes_no("Do you want to resume download (yes) or start over (no)?")
if resume:
print("Resuming download...")
else:
Expand All @@ -171,9 +168,7 @@ def init_sig_download(defaults):
except OSError:
pass
if not os.path.isdir(tmpdir):
sys.stderr.write(
"Failed to create temporary folder for PUID's, using: " + tmpdir
)
sys.stderr.write("Failed to create temporary folder for PUID's, using: " + tmpdir)
return tmpdir, resume


Expand All @@ -187,9 +182,7 @@ def download_signatures(defaults, format_eles, resume, tmpdir):
download_sig(format_ele, tmpdir, resume, defaults)
numfiles += 1
print(
r"Downloaded {}/{} files [{}%]".format(
numfiles, puid_count, int(float(numfiles) / one_percent)
),
r"Downloaded {}/{} files [{}%]".format(numfiles, puid_count, int(float(numfiles) / one_percent)),
end="\r",
)
print("100%")
Expand Down Expand Up @@ -258,9 +251,7 @@ def update_versions_xml(version):

def main():
"""Main CLI entrypoint."""
parser = ArgumentParser(
description="Download and convert the latest PRONOM signatures"
)
parser = ArgumentParser(description="Download and convert the latest PRONOM signatures")
parser.add_argument(
"-tmpdir",
default=OPTIONS["tmp_dir"],
Expand Down
File renamed without changes.
11 changes: 4 additions & 7 deletions fido/toxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
import csv
import sys

from fido.pronom.versions import get_local_versions

from . import __version__
from .versions import get_local_versions


def main():
Expand All @@ -34,9 +35,7 @@ def main():
<versions>
<fido_version>{0}</fido_version>
<signature_version>{1}</signature_version>
</versions>""".format(
__version__, get_local_versions().pronom_version
)
</versions>""".format(__version__, get_local_versions().pronom_version)
)

reader = csv.reader(sys.stdin)
Expand All @@ -54,9 +53,7 @@ def main():
<formatname>{6}</formatname>
<signaturename>{7}</signaturename>
<filesize>{8}</filesize>
</file>""".format(
row[6], row[0], row[8], row[1], row[2], row[7], row[3], row[4], row[5]
)
</file>""".format(row[6], row[0], row[8], row[1], row[2], row[7], row[3], row[4], row[5])
)

sys.stdout.write("\n</fido_output>\n")
Expand Down
Empty file added fido/utils/__init__.py
Empty file.
File renamed without changes.
17 changes: 17 additions & 0 deletions fido/utils/timer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from time import perf_counter


class PerfTimer:
"""Utility class that carries out simple process timings."""

def __init__(self):
"""New instance with start time running."""
self.start_time = perf_counter()

def start(self):
"""Start new timer."""
self.start_time = perf_counter()

def duration(self):
"""Return the duration since instantiation or start() was last called."""
return perf_counter() - self.start_time
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ testing = [

[project.scripts]
fido = "fido.fido:main"
fido-prepare = "fido.prepare:main"
fido-prepare = "fido.pronom.prepare:main"
fido-toxml = "fido.toxml:main"

fido-update-signatures = "fido.pronom.update_signatures:run"

[tool.setuptools.package-data]
"fido" = ["*.*", "conf/*.*", "pronom/*.*"]
Expand Down
3 changes: 2 additions & 1 deletion tests/test_fido.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

import pytest

from fido.fido import Fido, PerfTimer
from fido.fido import Fido
from fido.utils.timer import PerfTimer


def test_perf_timer():
Expand Down
16 changes: 8 additions & 8 deletions tests/test_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from fido.prepare import convert_to_regex
from fido.pronom.prepare import convert_to_regex


def binrep_convert(byt):
Expand Down Expand Up @@ -64,17 +64,17 @@ def test_bitmasks(pronom_bytesequence, matches_predicate):
("pronom_bytesequence", "input_", "matches_bool"),
(
# These are good:
("ab{3}cd(01|02|03)~07ff", "\xAB\xDD\xDD\xDD\xCD\x02\x11\xFF", True),
("ab{3}cd(01|02|03)~07ff", "\xAB\xDD\xDD\xDD\xCD\x03\x11\xFF", True),
("ab{3}cd(01|02|03)~07ff", "\xAB\xDD\xDD\xDD\xCD\x02\xFE\xFF", True),
("ab{3}cd(01|02|03)~07ff", "\xab\xdd\xdd\xdd\xcd\x02\x11\xff", True),
("ab{3}cd(01|02|03)~07ff", "\xab\xdd\xdd\xdd\xcd\x03\x11\xff", True),
("ab{3}cd(01|02|03)~07ff", "\xab\xdd\xdd\xdd\xcd\x02\xfe\xff", True),
# Bad because missing three anythings between AB and CD
("ab{3}cd(01|02|03)~07ff", "\xAB\xDD\xDD\xCD\x02\x11\xFF", False),
("ab{3}cd(01|02|03)~07ff", "\xab\xdd\xdd\xcd\x02\x11\xff", False),
# Bad because not at start of string
("ab{3}cd(01|02|03)~07ff", "\xDA\xAB\xDD\xDD\xDD\xCD\x02\x11\xFF", False),
("ab{3}cd(01|02|03)~07ff", "\xda\xab\xdd\xdd\xdd\xcd\x02\x11\xff", False),
# Bad because 04 is not in (01|02|03)
("ab{3}cd(01|02|03)~07ff", "\xAB\xDD\xDD\xDD\xCD\x04\x11\xFF", False),
("ab{3}cd(01|02|03)~07ff", "\xab\xdd\xdd\xdd\xcd\x04\x11\xff", False),
# Bad because 18 is not in ~07
("ab{3}cd(01|02|03)~07ff", "\xAB\xDD\xDD\xDD\xCD\x02\x18\xFF", False),
("ab{3}cd(01|02|03)~07ff", "\xab\xdd\xdd\xdd\xcd\x02\x18\xff", False),
),
)
def test_heterogenous_sequences(pronom_bytesequence, input_, matches_bool):
Expand Down
Loading