Skip to content

Commit

Permalink
task: relation direction logic
Browse files Browse the repository at this point in the history
  • Loading branch information
Dominick Leppich committed Nov 7, 2024
1 parent 57af637 commit 444c4a0
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 17 deletions.
4 changes: 2 additions & 2 deletions migration/lib/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,14 @@ def insert_record(self, record):
result = self.query(url, record)
return result['id']

def find_record(self, ctx, vocabulary_id, search_term, main_value_only=False):
def find_record(self, ctx, vocabulary_id, search_term, search_field=None):
url = self.urls[RECORD_SEARCH].replace('{{VOCABULARY_ID}}', str(vocabulary_id)).replace('{{SEARCH_TERM}}', search_term)
result = self.query(url, obj=None, method='GET')
if not '_embedded' in result:
raise Exception(f'Record search in vocabulary "{vocabulary_id}" for search term "{search_term}" has no results')
results = result['_embedded']['vocabularyRecordList']
# Filter for exact searches
results = [r for r in results if ctx.record_contains_value(r, search_term, main_value_only=main_value_only)]
results = [r for r in results if ctx.record_contains_value(r, search_term, search_field=search_field)]

if len(results) == 0:
raise Exception(f'Record search in vocabulary "{vocabulary_id}" for search term "{search_term}" has no results')
Expand Down
19 changes: 10 additions & 9 deletions migration/lib/mets_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
RECORD_PATTERN = re.compile('^(\\d+).*$')

class Context:
def __init__(self, api, dry, verbose, continue_on_error, metadata_directory, mapping_file, preferred_mets_main_value_language, manual_id_fix, trust):
def __init__(self, api, dry, verbose, continue_on_error, metadata_directory, mapping_file, preferred_mets_main_value_language, manual_id_fix, trust, enable_relation_vocabulary_column_logic):
self.api = api
self.dry = dry
self.verbose = verbose
Expand All @@ -16,6 +16,7 @@ def __init__(self, api, dry, verbose, continue_on_error, metadata_directory, map
self.preferred_mets_main_value_language = preferred_mets_main_value_language
self.manual_id_fix = manual_id_fix
self.trust = trust
self.enable_relation_vocabulary_column_logic = enable_relation_vocabulary_column_logic
self.vocabulary_name_id_map = {}
self.vocabulary_id_name_map = {}
self.vocabulary_id_map = {}
Expand Down Expand Up @@ -80,18 +81,18 @@ def retrieve_main_field_id(self, schema_id):
self.schema_id_main_field_id_map[schema_id] = main_definitions[0]['id']
return self.schema_id_main_field_id_map[schema_id]

def record_contains_value(self, record, value, main_value_only=False):
main_value_id = None
if main_value_only:
def record_contains_value(self, record, value, search_field=None):
field_id = None
if search_field != None:
vocabulary = self.api.lookup_vocabulary(record['vocabularyId'])
schema = self.api.lookup_schema(vocabulary['schemaId'])
mainIds = [d['id'] for d in schema['definitions'] if d['mainEntry'] == True]
if len(mainIds) != 1:
logging.critical(f'Non unique main entries: {mainIds}!')
ids = [d['id'] for d in schema['definitions'] if d['name'] == search_field]
if len(ids) != 1:
logging.critical(f'Non unique "{search_field}" fields found: {ids}!')
sys.exit(1)
main_value_id = mainIds[0]
field_id = ids[0]
for f in record['fields']:
if main_value_id == None or f['definitionId'] == main_value_id:
if field_id == None or f['definitionId'] == field_id:
for v in f['values']:
for t in v['translations']:
if t['value'] == value:
Expand Down
35 changes: 31 additions & 4 deletions migration/lib/mets_manipulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ def process_vocabulary_reference_by_id(self, node):
def process_vocabulary_reference_by_value(self, node):
try:
vocabulary_name = node.attrib['authority']

if vocabulary_name == 'geonames':
return
vocabulary_id = self.ctx.find_vocabulary_by_name(vocabulary_name)
except Exception as e:
error = f'Unable to retrieve vocabulary by name: {vocabulary_name}\n\t\t{e}'
Expand All @@ -153,10 +156,34 @@ def process_vocabulary_reference_by_value(self, node):

try:
value = node.text
try:
new_record_id = self.ctx.api.find_record(self.ctx, vocabulary_id, value, main_value_only=False)
except:
new_record_id = self.ctx.api.find_record(self.ctx, vocabulary_id, value, main_value_only=True)

search_field=None
if self.ctx.enable_relation_vocabulary_column_logic and 'Relationship' in vocabulary_name:
parent = node.getparent()
if parent == None:
logging.warn(f'No parent found!')
dump_node(node)
return

entity_type = None
for sibling in parent:
if sibling.attrib['name'] == 'RelationEntityType':
entity_type = sibling.text
break

entity_type_in_relation_count = vocabulary_name.count(entity_type)
if entity_type_in_relation_count == 1:
# Find out relation direction
separator_position = vocabulary_name.index('-')
entity_type_position = vocabulary_name.index(entity_type)

# use second column of vocabulary: `Reverse relationship` (The relation vocabulary is specified from `A->B`, the relation references an entity of type `A` and is therefore of type `B`)
if entity_type_position < separator_position:
search_field='Reverse relationship'
else:
search_field='Relationship type'

new_record_id = self.ctx.api.find_record(self.ctx, vocabulary_id, value, search_field=search_field)

# Set all attributes accordingly
node.attrib['authority'] = vocabulary_name
Expand Down
5 changes: 3 additions & 2 deletions migration/metadata-migrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def main():
args.vocabulary_server_port,
args.vocabulary_server_token
)
ctx = Context(api, args.dry, args.verbose, args.continue_on_error, args.metadata_directory, args.mapping_file, args.preferred_mets_main_value_language, args.manual_id_fix, args.trust)
ctx = Context(api, args.dry, args.verbose, args.continue_on_error, args.metadata_directory, args.mapping_file, args.preferred_mets_main_value_language, args.manual_id_fix, args.trust, args.enable_relation_vocabulary_column_logic)

try:
migrator = MetsMigrator(ctx)
Expand All @@ -39,7 +39,8 @@ def parse_args():
parser.add_argument('--vocabulary-server-port', type=str, default='8081', help='vocabulary server port')
parser.add_argument('--vocabulary-server-token', type=str, default=None, help='vocabulary server security token')
parser.add_argument('--preferred-mets-main-value-language', type=str, default='eng', help='Default language to use for mets value writing, if present and prior value invalid')
parser.add_argument('--trust', type=str, default='ID', help='Set the data source to trust for the migration. Possible values are: "ID" and "Value". If "ID" is set, the record ID is parsed from the valueURI and used to find the migrated record. If "Value" is set, the XML elements value is used to find the newly migrated record by value. Defaults to "ID".')
parser.add_argument('--trust', required=False, type=str, default='ID', help='Set the data source to trust for the migration. Possible values are: "ID" and "Value". If "ID" is set, the record ID is parsed from the valueURI and used to find the migrated record. If "Value" is set, the XML elements value is used to find the newly migrated record by value. Defaults to "ID".')
parser.add_argument('--enable-relation-vocabulary-column-logic', required=False, default=False, action='store_const', const=True, help='Activate relationship vocabulary correct column finding logic (reverse vs non-reverse, artist dictionary)')
parser.add_argument('--manual-id-fix', type=str, default=None, help='Manually fix the record ID of elements whose name attribute matches this parameter. Caution, this must not be executed twice!')
parser.add_argument('--log', required=False, default='INFO', help='logger level (possible values are: NOTSET, DEBUG, INFO, WARNING, ERROR, CRITICAL)')
parser.add_argument('--verbose', required=False, default=False, action='store_const', const=True, help='verbose output')
Expand Down

0 comments on commit 444c4a0

Please sign in to comment.