Skip to content

Commit

Permalink
[app][feat] introduce record service
Browse files Browse the repository at this point in the history
  • Loading branch information
M3ssman committed Aug 21, 2024
1 parent 8765042 commit 90216e1
Show file tree
Hide file tree
Showing 8 changed files with 568 additions and 43 deletions.
2 changes: 1 addition & 1 deletion src/digiflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,4 @@
validate_tiff,
)

from .common import UNSET_LABEL, XMLNS
from .common import UNSET_LABEL, XMLNS, FallbackLogger
21 changes: 21 additions & 0 deletions src/digiflow/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""common constants"""

import logging
import sys

XMLNS = {
'alto': 'http://www.loc.gov/standards/alto/ns-v4#',
Expand All @@ -23,3 +25,22 @@

UNSET_LABEL = 'n.a.'


class FallbackLogger:
"""Different way to inject logging facilities"""

def __init__(self, some_logger=None):
self.logger: logging.Logger = some_logger

def log(self, message: str, *args, level = logging.INFO):
"""Encapsulate Loggin"""
if self.logger:
self.logger.log(level, message, *args)
else:
message = message.replace('%s','{}')
if args is not None and len(args) > 0:
message = message.format(*args)
if level >= logging.ERROR:
print(message, file=sys.stderr)
else:
print(message)
1 change: 1 addition & 0 deletions src/digiflow/record/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
State,
Datetime,
)
from .record_service import Client, HandlerInformation, run_server
123 changes: 105 additions & 18 deletions src/digiflow/record/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Common record attributes"""

import ast
import time
import typing

import digiflow as df
Expand All @@ -14,6 +16,8 @@
UNSET_LABEL = 'n.a.'

FIELD_IDENTIFIER = 'IDENTIFIER'
FIELD_URN = 'URN'
FIELD_SYSTEM_HANDLE = 'HANDLE'
FIELD_SPEC = 'SETSPEC'
FIELD_DATESTAMP = 'CREATED'
FIELD_INFO = 'INFO'
Expand All @@ -25,6 +29,17 @@
RECORD_HEADER = [FIELD_IDENTIFIER, FIELD_INFO,
FIELD_STATE, FIELD_STATETIME]

DEFAULT_MAPPINGS = {
'identifier': FIELD_IDENTIFIER,
'ext_urn': FIELD_URN,
'system_handle': FIELD_SYSTEM_HANDLE,
'setspec': FIELD_SPEC,
'created_time': FIELD_DATESTAMP,
'info': FIELD_INFO,
'state': FIELD_STATE,
'state_time': FIELD_STATETIME,
}


class RecordDataException(Exception):
"""Mark inconsistent record data,
Expand All @@ -38,9 +53,9 @@ class RecordDataException(Exception):

class Record:
"""
OAIRecord based on valid URN-Identifier with optional set specification data
Record based on valid OAI-URN-Identifier with optional setspec data
based on http://www.openarchives.org/OAI/2.0/guidelines-oai-identifier.htm
and commonly transported via OAI-PMH API
transported via OAI-PMH API or delivered by RecordService
Examples:
Expand All @@ -56,11 +71,13 @@ class Record:
def __init__(self, urn):
self.__urn = urn
self.__local_ident = None
self.ext_urn = UNSET_LABEL
self.system_handle = UNSET_LABEL
self.set = UNSET_LABEL
self.date_stamp = UNSET_LABEL
self.info = UNSET_LABEL
self.state = UNSET_LABEL
self.state_datetime = UNSET_LABEL
self.created_time = UNSET_LABEL
self._info = UNSET_LABEL
self._state = UNSET_LABEL
self.state_time = UNSET_LABEL

@property
def local_identifier(self):
Expand Down Expand Up @@ -89,20 +106,86 @@ def __str__(self) -> str:
the_str = f"{self.__urn}"
if self.set != df.UNSET_LABEL:
the_str = f"{the_str}\t{self.set}"
if self.date_stamp != df.UNSET_LABEL:
the_str = f"{the_str}\t{self.date_stamp}"
if self.info != df.UNSET_LABEL:
the_str = f"{the_str}\t{self.info}"
return f"{the_str}\n{self.state}\t{self.state_datetime}"
if self.created_time != df.UNSET_LABEL:
the_str = f"{the_str}\t{self.created_time}"
if self._info != df.UNSET_LABEL:
the_str = f"{the_str}\t{self._info}"
return f"{the_str}\t{self._state}\t{self.state_time}"

@staticmethod
def create(input_data):
def parse(input_data):
"""De-serialize record from different input forms"""
record = Record(UNSET_LABEL)
if isinstance(input_data, dict):
record = row_to_record(input_data)
return record

def dict(self, dict_map=None) -> typing.Dict:
"""Serialize Record into Python dict
as input for JSON load
"""
as_dict = {}
if dict_map is None:
dict_map = DEFAULT_MAPPINGS
for label, field in dict_map.items():
if hasattr(self, label):
as_dict[field] = getattr(self, label)
# as_dict[FIELD_IDENTIFIER] = self.identifier
# if self._info != UNSET_LABEL:
# as_dict[FIELD_INFO] = self._info
# if self._state != UNSET_LABEL:
# as_dict[FIELD_STATE] = self._state
# if self.state_time != UNSET_LABEL:
# as_dict[FIELD_STATETIME] = self.state_time
# # legacy
# if self.setspec != UNSET_LABEL:
# as_dict[FIELD_SPEC] = self.setspec
# if self.created_time != UNSET_LABEL:
# as_dict[FIELD_DATESTAMP] = self.created_time
return as_dict

@property
def state(self):
"""Get state"""
return self._state

@state.setter
def state(self, state_label):
"""Set new state and update statetime"""

self._state = state_label
right_now = time.strftime(STATETIME_FORMAT)
self.state_time = right_now

@property
def info(self):
"""Get Record Information"""
return self._info

@info.setter
def info(self, any_value):
"""Update existing Information lazy.
Assume info consists of at least
a single dict or several dicts,
in which case only the last dict
will be updated"""

try:
if any_value == UNSET_LABEL:
any_value = {}
if self._info == UNSET_LABEL:
self._info = {}
if isinstance(any_value, str):
any_value = ast.literal_eval(any_value)
elif isinstance(self._info, str):
self._info = ast.literal_eval(self._info)
if isinstance(self._info, dict):
self._info.update(any_value)
elif isinstance(self._info, tuple):
self._info[-1].update(any_value)
except (AttributeError,SyntaxError, ValueError):
self._info = any_value


def row_to_record(row: typing.Dict):
"""Serialize data row to Record with all
Expand All @@ -113,16 +196,20 @@ def row_to_record(row: typing.Dict):
if FIELD_IDENTIFIER not in row:
raise RecordDataException(f"Missing {FIELD_IDENTIFIER} in {row}")
record = Record(row[FIELD_IDENTIFIER])
if FIELD_URN in row and str(row[FIELD_URN]).strip():
record.ext_urn = row[FIELD_URN]
if FIELD_SYSTEM_HANDLE in row and str(row[FIELD_SYSTEM_HANDLE]).strip():
record.system_handle = row[FIELD_SYSTEM_HANDLE]
if FIELD_SPEC in row and str(row[FIELD_SPEC]).strip():
record.set = str(row[FIELD_SPEC]).strip()
if FIELD_DATESTAMP in row and str(row[FIELD_DATESTAMP]).strip():
record.date_stamp = str(row[FIELD_DATESTAMP]).strip()
record.created_time = str(row[FIELD_DATESTAMP]).strip()
if FIELD_INFO in row and str(FIELD_INFO).strip():
record.info = str(row[FIELD_INFO]).strip()
if FIELD_STATE not in row:
raise RecordDataException(f"Missing {FIELD_STATE} in {row}")
record.state = row[FIELD_STATE]
if FIELD_STATETIME not in row:
raise RecordDataException(f"Missing {FIELD_STATETIME} in {row}")
record.state_datetime = row[FIELD_STATETIME]
record.state = UNSET_LABEL
else:
record.state = row[FIELD_STATE]
if FIELD_STATETIME in row:
record.state_time = row[FIELD_STATETIME]
return record
13 changes: 6 additions & 7 deletions src/digiflow/record/record_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import time


import digiflow as df
import digiflow.record as df_r

RECORD_STATE_MASK_FRAME = 'other_load'
Expand Down Expand Up @@ -67,6 +66,7 @@ def __init__(self, data_path, data_fields=None,

@property
def total_len(self):
"""Number of records"""
return len(self.data)

def _build_data(self):
Expand Down Expand Up @@ -118,17 +118,15 @@ def _restore_header(self, first_line):
self.header = _header

def _validate_header(self, data_fields):
"""validate both occurence and order"""
"""validate header fields presence and order"""
if self.header != data_fields:
msg = "invalid fields: '{}', expect: '{}'".format(
self.header, data_fields)
msg = f"invalid fields: '{self.header}', expect: '{data_fields}'"
raise RecordHandlerException(msg)

def next_record(self, state=None):
"""
Get *NEXT* IRecord _from scratch_ with
given state if any exist, raise Exception
otherwise
Get *NEXT* Record with given state
if any exist, otherwise None
"""

if not state:
Expand All @@ -140,6 +138,7 @@ def next_record(self, state=None):
if state == row[self.state_field]:
self.position = f"{(i+1):04d}/{(self.total_len):04d}"
return self.transform_func(row)
return None

def get(self, identifier, exact_match=True):
"""Read data for first Record with
Expand Down
Loading

0 comments on commit 90216e1

Please sign in to comment.