diff --git a/internetarchive/cli/ia_upload.py b/internetarchive/cli/ia_upload.py index 26a47f7f..9eab9686 100644 --- a/internetarchive/cli/ia_upload.py +++ b/internetarchive/cli/ia_upload.py @@ -77,7 +77,8 @@ from internetarchive.cli.argparser import get_args_dict, convert_str_list_to_unicode from internetarchive.session import ArchiveSession -from internetarchive.utils import validate_s3_identifier, get_s3_xml_text, InvalidIdentifierException +from internetarchive.utils import (InvalidIdentifierException, get_s3_xml_text, + is_valid_metadata_key, validate_s3_identifier) # Only import backports.csv for Python2 (in support of FreeBSD port). PY2 = sys.version_info[0] == 2 @@ -263,6 +264,11 @@ def main(argv, session): spreadsheet = csv.DictReader(csvfp) prev_identifier = None for row in spreadsheet: + for metadata_key in row: + if not is_valid_metadata_key(metadata_key): + print('error: "%s" is not a valid metadata key.' % metadata_key, + file=sys.stderr) + sys.exit(1) upload_kwargs_copy = deepcopy(upload_kwargs) if row.get('REMOTE_NAME'): local_file = {row['REMOTE_NAME']: row['file']} @@ -271,7 +277,8 @@ def main(argv, session): local_file = row['file'] identifier = row.get('item', row.get('identifier')) if not identifier: - print('error: no identifier column on spreadsheet!') + print('error: no identifier column on spreadsheet.', + file=sys.stderr) sys.exit(1) del row['file'] if 'identifier' in row: diff --git a/internetarchive/utils.py b/internetarchive/utils.py index dc973f02..d0d61deb 100644 --- a/internetarchive/utils.py +++ b/internetarchive/utils.py @@ -358,3 +358,14 @@ def delete_items_from_dict(d, to_delete): for i in d: delete_items_from_dict(i, to_delete) return remove_none(d) + + +def is_valid_metadata_key(name): + # According to the documentation a metadata key + # has to be a valid XML tag name. + # + # The actual allowed tag names (at least as tested with the metadata API), + # are way more restrictive and only allow ".-A-Za-z_", possibly followed + # by an index in square brackets e. g. [0]. + # On the other hand the Archive allows tags starting with the string "xml". + return bool(re.fullmatch('[A-Za-z][.\-0-9A-Za-z_]+(?:\[[0-9]+\])?', name)) diff --git a/pex-requirements.txt b/pex-requirements.txt index 9a6e9089..0679e4c3 100644 --- a/pex-requirements.txt +++ b/pex-requirements.txt @@ -1,8 +1,8 @@ -requests>=2.9.1,<3.0.0 -jsonpatch>=0.4 +backports.csv docopt>=0.6.0,<0.7.0 -tqdm>=4.0.0 -six>=1.13.0,<2.0.0 +jsonpatch>=0.4 +requests>=2.9.1,<3.0.0 schema>=0.4.0 -backports.csv setuptools +six>=1.13.0,<2.0.0 +tqdm>=4.0.0 diff --git a/setup.py b/setup.py index 92a4835c..ddf7f99d 100644 --- a/setup.py +++ b/setup.py @@ -36,13 +36,13 @@ ], }, install_requires=[ - 'requests>=2.9.1,<3.0.0', - 'jsonpatch>=0.4', + 'backports.csv < 1.07;python_version<"3.4"', 'docopt>=0.6.0,<0.7.0', - 'tqdm>=4.0.0', - 'six>=1.13.0,<2.0.0', + 'jsonpatch>=0.4', + 'requests>=2.9.1,<3.0.0', 'schema>=0.4.0', - 'backports.csv < 1.07;python_version<"3.4"', + 'six>=1.13.0,<2.0.0', + 'tqdm>=4.0.0', ], classifiers=[ 'Development Status :: 5 - Production/Stable',