Skip to content

Commit

Permalink
Merge pull request #418 from maxz/better-bulk-upload-error-messages
Browse files Browse the repository at this point in the history
Output a more helpful error message on malformed bulk upload CSV data.
  • Loading branch information
jjjake authored Jul 27, 2021
2 parents 901a3c6 + e7747a6 commit 7425c1e
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 12 deletions.
11 changes: 9 additions & 2 deletions internetarchive/cli/ia_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@

from internetarchive.cli.argparser import get_args_dict, convert_str_list_to_unicode
from internetarchive.session import ArchiveSession
from internetarchive.utils import validate_s3_identifier, get_s3_xml_text, InvalidIdentifierException
from internetarchive.utils import (InvalidIdentifierException, get_s3_xml_text,
is_valid_metadata_key, validate_s3_identifier)

# Only import backports.csv for Python2 (in support of FreeBSD port).
PY2 = sys.version_info[0] == 2
Expand Down Expand Up @@ -263,6 +264,11 @@ def main(argv, session):
spreadsheet = csv.DictReader(csvfp)
prev_identifier = None
for row in spreadsheet:
for metadata_key in row:
if not is_valid_metadata_key(metadata_key):
print('error: "%s" is not a valid metadata key.' % metadata_key,
file=sys.stderr)
sys.exit(1)
upload_kwargs_copy = deepcopy(upload_kwargs)
if row.get('REMOTE_NAME'):
local_file = {row['REMOTE_NAME']: row['file']}
Expand All @@ -271,7 +277,8 @@ def main(argv, session):
local_file = row['file']
identifier = row.get('item', row.get('identifier'))
if not identifier:
print('error: no identifier column on spreadsheet!')
print('error: no identifier column on spreadsheet.',
file=sys.stderr)
sys.exit(1)
del row['file']
if 'identifier' in row:
Expand Down
11 changes: 11 additions & 0 deletions internetarchive/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,3 +358,14 @@ def delete_items_from_dict(d, to_delete):
for i in d:
delete_items_from_dict(i, to_delete)
return remove_none(d)


def is_valid_metadata_key(name):
# According to the documentation a metadata key
# has to be a valid XML tag name.
#
# The actual allowed tag names (at least as tested with the metadata API),
# are way more restrictive and only allow ".-A-Za-z_", possibly followed
# by an index in square brackets e. g. [0].
# On the other hand the Archive allows tags starting with the string "xml".
return bool(re.fullmatch('[A-Za-z][.\-0-9A-Za-z_]+(?:\[[0-9]+\])?', name))
10 changes: 5 additions & 5 deletions pex-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
requests>=2.9.1,<3.0.0
jsonpatch>=0.4
backports.csv
docopt>=0.6.0,<0.7.0
tqdm>=4.0.0
six>=1.13.0,<2.0.0
jsonpatch>=0.4
requests>=2.9.1,<3.0.0
schema>=0.4.0
backports.csv
setuptools
six>=1.13.0,<2.0.0
tqdm>=4.0.0
10 changes: 5 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@
],
},
install_requires=[
'requests>=2.9.1,<3.0.0',
'jsonpatch>=0.4',
'backports.csv < 1.07;python_version<"3.4"',
'docopt>=0.6.0,<0.7.0',
'tqdm>=4.0.0',
'six>=1.13.0,<2.0.0',
'jsonpatch>=0.4',
'requests>=2.9.1,<3.0.0',
'schema>=0.4.0',
'backports.csv < 1.07;python_version<"3.4"',
'six>=1.13.0,<2.0.0',
'tqdm>=4.0.0',
],
classifiers=[
'Development Status :: 5 - Production/Stable',
Expand Down

0 comments on commit 7425c1e

Please sign in to comment.