Skip to content

Commit

Permalink
Merge pull request #400 from ror-community/dev
Browse files Browse the repository at this point in the history
merge dev to staging: prevent indexing empty file
  • Loading branch information
lizkrznarich authored Jul 29, 2024
2 parents cd6ac78 + b7b9aff commit 83859c2
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 47 deletions.
29 changes: 17 additions & 12 deletions rorapi/common/csv_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,21 +54,26 @@ def new_record_from_csv(csv_data, version):
if csv_data['names.types.' + v]:
for n in csv_data['names.types.' + v].strip(';').split(';'):
if LANG_DELIMITER in n:
name_val, lang = n.split("*")
if lang:
lang_errors, lang_code = get_lang_code(lang.strip())
if lang_errors:
errors.append("Could not convert language value to ISO code: {}".format(lang))
if n.count(LANG_DELIMITER) == 1:
name_val, lang = n.split("*")
if lang:
lang_errors, lang_code = get_lang_code(lang.strip())
if lang_errors:
errors.append("Could not convert language value to ISO code: {}".format(lang))
else:
name_val = None
lang_code = None
errors.append("Could not parse name value {} in names.types.{} because it contains multiple {} lang delimiter chars.".format(n, v, LANG_DELIMITER))
else:
name_val = n
lang_code = None

name_obj = {
"types": [v],
"value": name_val.strip(),
"lang": lang_code
}
temp_names.append(name_obj)
if name_val:
name_obj = {
"types": [v],
"value": name_val.strip(),
"lang": lang_code
}
temp_names.append(name_obj)
print("temp names 1:")
print(temp_names)
name_vals = [n['value'] for n in temp_names]
Expand Down
23 changes: 13 additions & 10 deletions rorapi/common/csv_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,18 +247,21 @@ def update_record_from_csv(csv_data, version):
"lang": None
}
if LANG_DELIMITER in val:
print("has lang delim")
name_val, lang = val.split("*")
vals_obj["value"] = name_val.strip()
if lang:
lang_errors, lang_code = get_lang_code(lang.strip())
if lang_errors:
errors.append("Could not convert language value to ISO code: {}".format(lang))
else:
vals_obj["lang"] = lang_code
if val.count(LANG_DELIMITER) == 1:
name_val, lang = val.split("*")
vals_obj["value"] = name_val.strip()
if lang:
lang_errors, lang_code = get_lang_code(lang.strip())
if lang_errors:
errors.append("Could not convert language value to ISO code: {}".format(lang))
else:
vals_obj["lang"] = lang_code
else:
errors.append("Could not parse name value {} in names.types.{} because it contains multiple {} lang delimiter chars.".format(val, t, LANG_DELIMITER))
else:
vals_obj["value"] = val.strip()
vals_obj_list.append(vals_obj)
if vals_obj["value"]:
vals_obj_list.append(vals_obj)
actions_values[k] = vals_obj_list
print("updated actions values")
print(actions_values)
Expand Down
12 changes: 9 additions & 3 deletions rorapi/management/commands/getrordump.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_ror_dump_sha(filename, use_test_data, github_headers):
except:
return None

def get_ror_dump_zip(filename, use_test_data, github_headers):
def get_ror_dump_zip(self, filename, use_test_data, github_headers):
sha = get_ror_dump_sha(filename, use_test_data, github_headers)
if sha:
if use_test_data:
Expand All @@ -46,9 +46,14 @@ def get_ror_dump_zip(filename, use_test_data, github_headers):
file_decoded = base64.b64decode(response_json['content'])
with open(filename + '.zip', 'wb') as zip_file:
zip_file.write(file_decoded)
with zipfile.ZipFile(zip_file.name, 'r') as ror_zip:
filenames = ror_zip.namelist()
dir_names = [f for f in filenames if ('json' not in f and 'csv' not in f)]
if dir_names:
raise SystemExit(f"Dump zip has extra directory and cannot be indexed")
return zip_file.name
except:
return None
raise SystemExit(f"Something went wrong saving zip file")

class Command(BaseCommand):
help = 'Downloads a specified ROR data dump from Github'
Expand All @@ -61,4 +66,5 @@ def handle(self, *args, **options):
github_headers = AUTH_HEADERS
else:
github_headers = HEADERS
ror_dump_zip = get_ror_dump_zip(filename, use_test_data, github_headers)
ror_dump_zip = get_ror_dump_zip(self, filename, use_test_data, github_headers)

37 changes: 20 additions & 17 deletions rorapi/management/commands/indexrordump.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,23 +114,26 @@ def handle(self, *args, **options):
for file in unzipped_files:
if file.endswith(".json"):
json_files.append(file)
for json_file in json_files:
index = None
json_path = os.path.join(DATA['WORKING_DIR'], filename, '') + json_file
if 'schema_v2' in json_file and (options['schema']==2 or options['schema'] is None):
self.stdout.write('Loading JSON')
with open(json_path, 'r') as it:
dataset = json.load(it)
self.stdout.write('Indexing ROR dataset ' + json_file)
index = ES_VARS['INDEX_V2']
index_dump(self, json_file, index, dataset)
if 'schema_v2' not in json_file and (options['schema']==1 or options['schema'] is None):
self.stdout.write('Loading JSON')
with open(json_path, 'r') as it:
dataset = json.load(it)
self.stdout.write('Indexing ROR dataset ' + json_file)
index = ES_VARS['INDEX_V1']
index_dump(self, json_file, index, dataset)
if json_files:
for json_file in json_files:
index = None
json_path = os.path.join(DATA['WORKING_DIR'], filename, '') + json_file
if 'schema_v2' in json_file and (options['schema']==2 or options['schema'] is None):
self.stdout.write('Loading JSON')
with open(json_path, 'r') as it:
dataset = json.load(it)
self.stdout.write('Indexing ROR dataset ' + json_file)
index = ES_VARS['INDEX_V2']
index_dump(self, json_file, index, dataset)
if 'schema_v2' not in json_file and (options['schema']==1 or options['schema'] is None):
self.stdout.write('Loading JSON')
with open(json_path, 'r') as it:
dataset = json.load(it)
self.stdout.write('Indexing ROR dataset ' + json_file)
index = ES_VARS['INDEX_V1']
index_dump(self, json_file, index, dataset)
else:
self.stdout.write("ROR data dump does not contain any JSON files")

else:
self.stdout.write("ROR data dump zip file does not exist")
13 changes: 8 additions & 5 deletions rorapi/management/commands/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,14 @@ def handle(self, *args, **options):
sha = get_ror_dump_sha(filename, use_test_data)

if sha:
GetRorDumpCommand().handle(*args, **options)
DeleteIndexCommand().handle(*args, **options)
CreateIndexCommand().handle(*args, **options)
IndexRorDumpCommand().handle(*args, **options)
msg = 'SUCCESS: ROR dataset {} indexed in version {}. Using test repo: {}'.format(filename, str(options['schema']), str(use_test_data))
try:
GetRorDumpCommand().handle(*args, **options)
DeleteIndexCommand().handle(*args, **options)
CreateIndexCommand().handle(*args, **options)
IndexRorDumpCommand().handle(*args, **options)
msg = 'SUCCESS: ROR dataset {} indexed in version {}. Using test repo: {}'.format(filename, str(options['schema']), str(use_test_data))
except:
msg = 'ERROR: Could not index ROR data dump. Check API logs for details.'
else:
msg = 'ERROR: ROR dataset for file {} not found. '.format(filename) \
+'Please generate the data dump first.'
Expand Down

0 comments on commit 83859c2

Please sign in to comment.