Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor: Consolidated pronom modules into pronom package and created a utils package #227

Open
wants to merge 5 commits into
base: dev/1.8/enhance-tests
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 5 additions & 20 deletions fido/fido.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,16 @@
import tempfile
import zipfile
from contextlib import closing
from time import perf_counter
from typing import Optional
from xml.etree import cElementTree as ET

from defusedxml import ElementTree as ET

from fido import CONFIG_DIR, __version__
from fido.char_handler import escape
from fido.cli_args import parse_cli_args
from fido.package import OlePackage, ZipPackage
from fido.versions import get_local_versions, sig_file_actions
from fido.pronom.versions import get_local_versions, sig_file_actions
from fido.utils.char_handler import escape
from fido.utils.timer import PerfTimer

defaults = {
"config_dir": CONFIG_DIR,
Expand All @@ -50,22 +51,6 @@
}


class PerfTimer:
"""Utility class that carries out simple process timings."""

def __init__(self):
"""New instance with start time running."""
self.start_time = perf_counter()

def start(self):
"""Start new timer."""
self.start_time = perf_counter()

def duration(self):
"""Return the duration since instantiation or start() was last called."""
return perf_counter() - self.start_time


class Fido:
"""Main FIDO application class."""

Expand Down
263 changes: 80 additions & 183 deletions fido/prepare.py → fido/pronom/prepare.py

Large diffs are not rendered by default.

35 changes: 12 additions & 23 deletions fido/pronom/soap.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@

PRONOM format signatures SOAP calls.
"""

import sys
import urllib
import xml.etree.ElementTree as ET
from urllib.error import HTTPError, URLError
from xml.etree import ElementTree as ET

from defusedxml.ElementTree import fromstring

from fido import __version__

Expand Down Expand Up @@ -50,9 +53,7 @@

def get_sig_xml_for_puid(puid):
"""Return the full PRONOM signature XML for the passed PUID."""
req = urllib.request.Request(
"http://www.nationalarchives.gov.uk/pronom/{}.xml".format(puid)
)
req = urllib.request.Request("http://www.nationalarchives.gov.uk/pronom/{}.xml".format(puid))
response = urllib.request.urlopen(req)
xml = response.read()
return xml
Expand Down Expand Up @@ -82,16 +83,12 @@ def get_droid_signatures(version):
format_count = False
try:
with urllib.request.urlopen(
"https://www.nationalarchives.gov.uk/documents/DROID_SignatureFile_V{}.xml".format(
version
)
"https://www.nationalarchives.gov.uk/documents/DROID_SignatureFile_V{}.xml".format(version)
) as f:
xml = f.read().decode("utf-8")
root_ele = ET.fromstring(xml)
root_ele = fromstring(xml)
format_count = len(
root_ele.findall(
".//{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat"
)
root_ele.findall(".//{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat")
)
except HTTPError as httpe:
sys.stderr.write(
Expand All @@ -105,27 +102,19 @@ def get_droid_signatures(version):
def _get_soap_ele_tree(soap_action):
soap_string = '{}<soap:Envelope xmlns:xsi="{}" xmlns:xsd="{}" xmlns:soap="{}"><soap:Body><{} xmlns="{}" /></soap:Body></soap:Envelope>'.format(
XML_PROC, NS.get("xsi"), NS.get("xsd"), NS.get("soap"), soap_action, PRONOM_NS
).encode(
ENCODING
)
).encode(ENCODING)
soap_action = '"{}:{}In"'.format(PRONOM_NS, soap_action)
xml = _get_soap_response(soap_action, soap_string)
for prefix, uri in NS.items():
ET.register_namespace(prefix, uri)
return ET.fromstring(xml)
return fromstring(xml)


def _get_soap_response(soap_action, soap_string):
try:
req = urllib.request.Request(
"http://{}/pronom/service.asmx".format(PRONOM_HOST), data=soap_string
)
req = urllib.request.Request("http://{}/pronom/service.asmx".format(PRONOM_HOST), data=soap_string)
except URLError:
print(
"There was a problem contacting the PRONOM service at http://{}/pronom/service.asmx.".format(
PRONOM_HOST
)
)
print("There was a problem contacting the PRONOM service at http://{}/pronom/service.asmx.".format(PRONOM_HOST))
print("Please check your network connection and try again.")
sys.exit(1)
for key, value in HEADERS.items():
Expand Down
31 changes: 11 additions & 20 deletions fido/update_signatures.py → fido/pronom/update_signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,18 @@
import zipfile
from argparse import ArgumentParser
from shutil import rmtree
from xml.etree import ElementTree as CET

from . import CONFIG_DIR, __version__
from .prepare import run as prepare_pronom_to_fido
from .pronom.soap import (
from defusedxml import ElementTree as CET
from pronom.prepare import run as prepare_pronom_to_fido

from fido import CONFIG_DIR, __version__
from fido.pronom.soap import (
NS,
get_droid_signatures,
get_pronom_sig_version,
get_sig_xml_for_puid,
)
from .versions import get_local_versions
from fido.pronom.versions import get_local_versions

ABORT_MSG = "Aborting update..."

Expand Down Expand Up @@ -117,9 +118,7 @@ def sig_version_check(version="latest"):
print("Getting latest version number from PRONOM...")
version = get_pronom_sig_version()
if not version:
sys.exit(
"Failed to obtain PRONOM signature file version number, please try again."
)
sys.exit("Failed to obtain PRONOM signature file version number, please try again.")

print("Querying PRONOM for signaturefile version {}.".format(version))
sig_file_name = _sig_file_name(version)
Expand Down Expand Up @@ -159,9 +158,7 @@ def init_sig_download(defaults):
resume = False
if os.path.isdir(tmpdir):
print("Found previously created temporary folder for download:", tmpdir)
resume = query_yes_no(
"Do you want to resume download (yes) or start over (no)?"
)
resume = query_yes_no("Do you want to resume download (yes) or start over (no)?")
if resume:
print("Resuming download...")
else:
Expand All @@ -171,9 +168,7 @@ def init_sig_download(defaults):
except OSError:
pass
if not os.path.isdir(tmpdir):
sys.stderr.write(
"Failed to create temporary folder for PUID's, using: " + tmpdir
)
sys.stderr.write("Failed to create temporary folder for PUID's, using: " + tmpdir)
return tmpdir, resume


Expand All @@ -187,9 +182,7 @@ def download_signatures(defaults, format_eles, resume, tmpdir):
download_sig(format_ele, tmpdir, resume, defaults)
numfiles += 1
print(
r"Downloaded {}/{} files [{}%]".format(
numfiles, puid_count, int(float(numfiles) / one_percent)
),
r"Downloaded {}/{} files [{}%]".format(numfiles, puid_count, int(float(numfiles) / one_percent)),
end="\r",
)
print("100%")
Expand Down Expand Up @@ -258,9 +251,7 @@ def update_versions_xml(version):

def main():
"""Main CLI entrypoint."""
parser = ArgumentParser(
description="Download and convert the latest PRONOM signatures"
)
parser = ArgumentParser(description="Download and convert the latest PRONOM signatures")
parser.add_argument(
"-tmpdir",
default=OPTIONS["tmp_dir"],
Expand Down
51 changes: 12 additions & 39 deletions fido/versions.py → fido/pronom/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,14 @@
PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
"""


import importlib.resources
import os
import re
import sys
from xml.etree import ElementTree as ET
from xml.etree.ElementTree import ParseError, parse

import requests
from defusedxml import ElementTree as ET
from defusedxml.ElementTree import ParseError, parse

from fido import CONFIG_DIR

Expand Down Expand Up @@ -87,9 +86,7 @@ def __setattr__(self, name, value):

def get_zip_file(self):
"""Obtain location to the PRONOM XML Zip file based on the current PRONOM version."""
return os.path.join(
self.conf_dir, "pronom-xml-v{}.zip".format(self.pronom_version)
)
return os.path.join(self.conf_dir, "pronom-xml-v{}.zip".format(self.pronom_version))

def get_signature_file(self):
"""Obtain location to the current PRONOM signature file."""
Expand All @@ -101,9 +98,7 @@ def write(self):
for key, value in self.PROPS_MAPPING.items():
if self.root.find(value) is None:
raise ValueError("Field {} has not been defined!".format(key))
self.tree.write(
self.versions_file, xml_declaration=True, method="xml", encoding="utf-8"
)
self.tree.write(self.versions_file, xml_declaration=True, method="xml", encoding="utf-8")


def get_local_versions(config_dir=CONFIG_DIR):
Expand Down Expand Up @@ -147,19 +142,11 @@ def _list_available_versions(update_url):
def _check_update_signatures(sig_vers, update_url, versions, is_update=False):
is_new, latest = _version_check(sig_vers, update_url)
if is_new:
sys.stdout.write(
"Updated signatures v{} are available, current version is v{}\n".format(
latest, sig_vers
)
)
sys.stdout.write("Updated signatures v{} are available, current version is v{}\n".format(latest, sig_vers))
if is_update:
_output_details(latest, update_url, versions)
else:
sys.stdout.write(
"Your signature files are up to date, current version is v{}\n".format(
sig_vers
)
)
sys.stdout.write("Your signature files are up to date, current version is v{}\n".format(sig_vers))
sys.exit(0)


Expand All @@ -169,33 +156,23 @@ def _download_sig_version(sig_act, update_url, versions):

if not match:
sys.exit(
'{} is not a valid version number, to download a sig file try "-sig v104" or "-sig 104".'.format(
sig_act
)
'{} is not a valid version number, to download a sig file try "-sig v104" or "-sig 104".'.format(sig_act)
)
ver = sig_act
if not ver.startswith("v"):
ver = "v" + sig_act
resp = requests.get(update_url + "format/" + ver + "/")
if resp.status_code != 200:
sys.exit(
"No signature files found for {}, REST status {}".format(
sig_act, resp.status_code
)
)
_output_details(
re.search(r"\d+|$", ver).group(), update_url, versions
) # noqa: W605
sys.exit("No signature files found for {}, REST status {}".format(sig_act, resp.status_code))
_output_details(re.search(r"\d+|$", ver).group(), update_url, versions) # noqa: W605


def _get_version(ver_string):
"""Parse a PROMOM version number from a string."""
match = re.search(r"^v?(\d+)$", ver_string, re.IGNORECASE)
if not match:
sys.exit(
'{} is not a valid version number, to download a sig file try "-sig v104" or "-sig 104".'.format(
ver_string
)
'{} is not a valid version number, to download a sig file try "-sig v104" or "-sig 104".'.format(ver_string)
)
ver = ver_string
return ver_string if not ver.startswith("v") else ver_string[1:]
Expand All @@ -214,18 +191,14 @@ def _output_details(version, update_url, versions):
def _version_check(sig_ver, update_url):
resp = requests.get(update_url + "format/latest/")
if resp.status_code != 200:
sys.exit(
"Error getting latest version info: HTTP Status {}".format(resp.status_code)
)
sys.exit("Error getting latest version info: HTTP Status {}".format(resp.status_code))
root_ele = ET.fromstring(resp.text)
latest = _get_version(root_ele.get("version"))
return int(latest) > int(sig_ver), latest


def _write_sigs(latest, update_url, type, name_template):
sig_out = str(
importlib.resources.files("fido").joinpath("conf", name_template.format(latest))
)
sig_out = str(importlib.resources.files("fido").joinpath("conf", name_template.format(latest)))
if os.path.exists(sig_out):
return
resp = requests.get(update_url + "format/{0}/{1}/".format(latest, type))
Expand Down
11 changes: 4 additions & 7 deletions fido/toxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
import csv
import sys

from fido.pronom.versions import get_local_versions

from . import __version__
from .versions import get_local_versions


def main():
Expand All @@ -34,9 +35,7 @@ def main():
<versions>
<fido_version>{0}</fido_version>
<signature_version>{1}</signature_version>
</versions>""".format(
__version__, get_local_versions().pronom_version
)
</versions>""".format(__version__, get_local_versions().pronom_version)
)

reader = csv.reader(sys.stdin)
Expand All @@ -54,9 +53,7 @@ def main():
<formatname>{6}</formatname>
<signaturename>{7}</signaturename>
<filesize>{8}</filesize>
</file>""".format(
row[6], row[0], row[8], row[1], row[2], row[7], row[3], row[4], row[5]
)
</file>""".format(row[6], row[0], row[8], row[1], row[2], row[7], row[3], row[4], row[5])
)

sys.stdout.write("\n</fido_output>\n")
Expand Down
Empty file added fido/utils/__init__.py
Empty file.
File renamed without changes.
17 changes: 17 additions & 0 deletions fido/utils/timer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from time import perf_counter


class PerfTimer:
"""Utility class that carries out simple process timings."""

def __init__(self):
"""New instance with start time running."""
self.start_time = perf_counter()

def start(self):
"""Start new timer."""
self.start_time = perf_counter()

def duration(self):
"""Return the duration since instantiation or start() was last called."""
return perf_counter() - self.start_time
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ classifiers = [

dependencies = [
"olefile >= 0.46, < 1",
"requests",
"requests >= 2",
"defusedxml >= 0.7"
]

[project.urls]
Expand All @@ -45,9 +46,9 @@ testing = [

[project.scripts]
fido = "fido.fido:main"
fido-prepare = "fido.prepare:main"
fido-prepare = "fido.pronom.prepare:main"
fido-toxml = "fido.toxml:main"

fido-update-signatures = "fido.pronom.update_signatures:run"

[tool.setuptools.package-data]
"fido" = ["*.*", "conf/*.*", "pronom/*.*"]
Expand Down
Loading
Loading