Skip to content

Commit

Permalink
Merge pull request #398 from ror-community/dump-index-error-if-extra-dir
Browse files Browse the repository at this point in the history
add error and sys exit if dump zip contains a directory
  • Loading branch information
lizkrznarich authored Jul 25, 2024
2 parents ea1f523 + a0fe3a6 commit 3713bcc
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 20 deletions.
12 changes: 9 additions & 3 deletions rorapi/management/commands/getrordump.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_ror_dump_sha(filename, use_test_data, github_headers):
except:
return None

def get_ror_dump_zip(filename, use_test_data, github_headers):
def get_ror_dump_zip(self, filename, use_test_data, github_headers):
sha = get_ror_dump_sha(filename, use_test_data, github_headers)
if sha:
if use_test_data:
Expand All @@ -46,9 +46,14 @@ def get_ror_dump_zip(filename, use_test_data, github_headers):
file_decoded = base64.b64decode(response_json['content'])
with open(filename + '.zip', 'wb') as zip_file:
zip_file.write(file_decoded)
with zipfile.ZipFile(zip_file.name, 'r') as ror_zip:
filenames = ror_zip.namelist()
dir_names = [f for f in filenames if ('json' not in f and 'csv' not in f)]
if dir_names:
raise SystemExit(f"Dump zip has extra directory and cannot be indexed")
return zip_file.name
except:
return None
raise SystemExit(f"Something went wrong saving zip file")

class Command(BaseCommand):
help = 'Downloads a specified ROR data dump from Github'
Expand All @@ -61,4 +66,5 @@ def handle(self, *args, **options):
github_headers = AUTH_HEADERS
else:
github_headers = HEADERS
ror_dump_zip = get_ror_dump_zip(filename, use_test_data, github_headers)
ror_dump_zip = get_ror_dump_zip(self, filename, use_test_data, github_headers)

37 changes: 20 additions & 17 deletions rorapi/management/commands/indexrordump.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,23 +114,26 @@ def handle(self, *args, **options):
for file in unzipped_files:
if file.endswith(".json"):
json_files.append(file)
for json_file in json_files:
index = None
json_path = os.path.join(DATA['WORKING_DIR'], filename, '') + json_file
if 'schema_v2' in json_file and (options['schema']==2 or options['schema'] is None):
self.stdout.write('Loading JSON')
with open(json_path, 'r') as it:
dataset = json.load(it)
self.stdout.write('Indexing ROR dataset ' + json_file)
index = ES_VARS['INDEX_V2']
index_dump(self, json_file, index, dataset)
if 'schema_v2' not in json_file and (options['schema']==1 or options['schema'] is None):
self.stdout.write('Loading JSON')
with open(json_path, 'r') as it:
dataset = json.load(it)
self.stdout.write('Indexing ROR dataset ' + json_file)
index = ES_VARS['INDEX_V1']
index_dump(self, json_file, index, dataset)
if json_files:
for json_file in json_files:
index = None
json_path = os.path.join(DATA['WORKING_DIR'], filename, '') + json_file
if 'schema_v2' in json_file and (options['schema']==2 or options['schema'] is None):
self.stdout.write('Loading JSON')
with open(json_path, 'r') as it:
dataset = json.load(it)
self.stdout.write('Indexing ROR dataset ' + json_file)
index = ES_VARS['INDEX_V2']
index_dump(self, json_file, index, dataset)
if 'schema_v2' not in json_file and (options['schema']==1 or options['schema'] is None):
self.stdout.write('Loading JSON')
with open(json_path, 'r') as it:
dataset = json.load(it)
self.stdout.write('Indexing ROR dataset ' + json_file)
index = ES_VARS['INDEX_V1']
index_dump(self, json_file, index, dataset)
else:
self.stdout.write("ROR data dump does not contain any JSON files")

else:
self.stdout.write("ROR data dump zip file does not exist")

0 comments on commit 3713bcc

Please sign in to comment.