From 9be121ad777d528cc599f5236b571b47c7dc0b32 Mon Sep 17 00:00:00 2001 From: Douglas Rioux Date: Mon, 3 Jun 2024 16:00:29 -0400 Subject: [PATCH] Remove nonfunctional command --- .../commands/update_inserts_from_server.py | 167 ------------------ 1 file changed, 167 deletions(-) delete mode 100644 src/encoded/commands/update_inserts_from_server.py diff --git a/src/encoded/commands/update_inserts_from_server.py b/src/encoded/commands/update_inserts_from_server.py deleted file mode 100644 index f7d0ab10ee..0000000000 --- a/src/encoded/commands/update_inserts_from_server.py +++ /dev/null @@ -1,167 +0,0 @@ -import structlog -import logging -import argparse -import json -from os import walk -# use ff_utils to find inserts and write data -from dcicutils.ff_utils import search_metadata, expand_es_metadata, dump_results_to_json -# use this function to read inserts -from .run_upgrader_on_inserts import get_inserts - -logger = structlog.getLogger(__name__) -EPILOG = __doc__ - - -def read_local_inserts_dir(dir_name, path, target_types=[]): - """ - Given path string path, read local inserts directory and return a - dictionary of all inserts keyed by item type, as well as a list of all - found uuids - - Args: - dir_name (str): string name of the inserts directory - path (str): string path to the inserts directory - target_types (list): list of item types to load. Empty means all types - - Returns: - dict of inserts, list of item uuids - """ - item_types = [] - item_uuids = [] - local_inserts = {} - # find item types that are represented in the given inserts path - for (dirpath, dirnames, filenames) in walk(path): - item_types = [it[:-5] for it in filenames if it.endswith('.json')] - if target_types: - bad_item_types = [it for it in target_types if it not in item_types] - if bad_item_types: - raise Exception('update_inserts: Specified item type(s) %s are not found in ' - 'the inserts dir. Found: %s' % (bad_item_types, item_types)) - # update item_types if user specified specific ones - fetch_item_types = target_types if target_types else item_types - # load current insert contents from json file - for item_type in item_types: - local_inserts[item_type] = {} # key these by uuid for now - for it_item in get_inserts(dir_name, item_type): - # only fetch items for specified fetch_item_types - if item_type in fetch_item_types: - item_uuids.append(it_item['uuid']) - local_inserts[item_type][it_item['uuid']] = it_item - return local_inserts, item_uuids - - -def main(): - """ - Use this command to update the inserts from a given fourfront env - """ - logging.basicConfig() - # Loading app will have configured from config file. Reconfigure here: - logging.getLogger('encoded').setLevel(logging.DEBUG) - - parser = argparse.ArgumentParser( # noqa - PyCharm wrongly thinks the formatter_class is specified wrong here. - description="Update Inserts", epilog=EPILOG, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument('--env', default='data', - help='FF environment to update from. Defaults to data') - parser.add_argument('--dest', default='temp-local-inserts', - help="destination file in inserts dir to write to") - parser.add_argument('--item-type', action='append', default=[], - help="item type, e.g. file_fastq. Defaults to all types") - parser.add_argument('--ignore-field', action='append', default=["submitted_by", "date_created", "last_modified", "schema_version"], - help='field name to ignore when running expand_es_metadata') - parser.add_argument('--from-search', help='query passed to search_metadata to find uuids') - - args = parser.parse_args() - # this will work since bin/ commands are run from root FF directory - inserts_location = 'src/encoded/tests/data' - # hardcode these to prevent accidental creation of inserts files - inserts_files = ['inserts', 'master-inserts', 'perf-testing', 'workbook-inserts', - 'temp-local-inserts', 'deploy-inserts'] - if args.dest not in inserts_files: - raise Exception('Specified inserts destination %s must be one of: %s' - % (args.dest, inserts_files)) - inserts_path = '/'.join([inserts_location, args.dest]) - - local_inserts, item_uuids = read_local_inserts_dir(args.dest, inserts_path, args.item_type) - - # Used to preserve order of existing inserts in folder(s), if any. - local_inserts_ordering_map = {} - for item_type, local_inserts_for_type in local_inserts.items(): - for insrt_index, insrt_uuid in enumerate(local_inserts_for_type): - # Duplicate insrt_indx between different item types are OK and present. - # local_inserts_ordering_map is shallow. - local_inserts_ordering_map[insrt_uuid] = insrt_index - - # add uuids from the input search result, if present - if args.from_search: - use_search = args.from_search - # get frame=object search results to keep response small - if 'frame=' not in use_search: - use_search += '&frame=object' - search_res = search_metadata(use_search, ff_env=args.env) - search_uuids = [item['uuid'] for item in search_res] - logger.info('update_inserts: Will update using %s items from search' % len(search_uuids)) - item_uuids = list(set(item_uuids + search_uuids)) - - # now find uuids and all linked from the given server - svr_inserts, svr_uuids = expand_es_metadata(item_uuids, ff_env=args.env, - store_frame='raw', add_pc_wfr=True, - ignore_field=args.ignore_field) - - # if we are updating `inserts`, must make sure that items don't conflict - # with those in `master-inserts` - skip_uuids = set() - if args.dest == 'inserts': - master_path = '/'.join([inserts_location, 'master-inserts']) - master_inserts, master_uuids = read_local_inserts_dir('master-inserts', master_path) - item_conflict_report = {} - for item_type in svr_inserts: - itype_err = [] - itype_okay = [] - conflicting_items = [item for item in svr_inserts[item_type] if item['uuid'] in master_uuids] - for conflict in conflicting_items: - # compare inserts by loading json objects - svr_json = json.dumps(conflict, sort_keys=True) - mstr_json = json.dumps(master_inserts[item_type][conflict['uuid']], sort_keys=True) - if svr_json != mstr_json: - itype_err.append(conflict['uuid']) - else: - # the json is the same. Remove from the `inserts` update - skip_uuids.add(conflict['uuid']) - itype_okay.append(conflict['uuid']) - item_conflict_report[item_type] = {'error': itype_err, 'okay': itype_okay} - if any([it for it in item_conflict_report if item_conflict_report[it]['error']]): - error_report = {it: item_conflict_report[it]['error'] for it in item_conflict_report} - logger.error('update_inserts: Cannot update the following items in "inserts" directory,' - ' since there are conflicting items with different values' - 'in the master-inserts. Update those first. Conflicts:\n%s' % json.dumps(error_report, indent=4)) - raise Exception('Cannot load inserts as there are conflicting items in `master-inserts`') - elif any([it for it in item_conflict_report if item_conflict_report[it]['okay']]): - conflict_report = {it: item_conflict_report[it]['okay'] for it in item_conflict_report} - logger.warning('update_inserts: The following items are already in "master-inserts".' - ' Will not add to "inserts". Items:\n%s' % json.dumps(conflict_report, indent=4)) - - # now we need to update the server inserts with contents from local inserts - # so that existing information is not lost - for item_type in svr_inserts: - if skip_uuids: - # remove items specified by skip uuids - svr_inserts[item_type] = [ - insrt for insrt in svr_inserts[item_type] - if insrt['uuid'] not in skip_uuids - ] - svr_inserts[item_type].sort(key=lambda insrt: local_inserts_ordering_map.get(insrt["uuid"], 99999) ) - for item_uuid in local_inserts.get(item_type, {}): - if item_uuid not in svr_uuids and item_uuid not in skip_uuids: - svr_inserts[item_type].append(local_inserts[item_type][item_uuid]) - - dump_results_to_json(svr_inserts, inserts_path) - logger.info('update_inserts: Successfully wrote to %s' % inserts_path) - for item_type in svr_inserts: - logger.info('update_inserts: Wrote %s items to %s' % - (len(svr_inserts[item_type]), item_type + '.json')) - - -if __name__ == "__main__": - main()