From 7f70989af1fb18513c5c2723933d571f894d87c9 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 22 Feb 2022 19:21:42 +0000 Subject: [PATCH] Fix format selection for on-the-fly files --- HISTORY.rst | 1 + docs/source/cli.rst | 2 +- docs/source/quickstart.rst | 4 ++-- internetarchive/item.py | 12 ++++++------ tests/cli/test_ia_download.py | 10 ++++++++++ tests/test_item.py | 14 ++++++++++++++ 6 files changed, 34 insertions(+), 9 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index bf018299..e95a065b 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -33,6 +33,7 @@ Unreleased - Fixed uploading from a spreadsheet with ``--checksum`` crashing on skipped files. - Fixed minor bug in S3 overload check on upload error retries. - Fixed various messages being printed to stdout instead of stderr. +- Fixed format selection for on-the-fly files. 2.3.0 (2022-01-20) ++++++++++++++++++ diff --git a/docs/source/cli.rst b/docs/source/cli.rst index 9b6808d3..247ac724 100644 --- a/docs/source/cli.rst +++ b/docs/source/cli.rst @@ -258,7 +258,7 @@ See ``ia help download`` for more details. Downloading On-The-Fly Files ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Some files on archive.org are generated on-the-fly as requested. This currently includes non-original files of the formats EPUB, MOBI, DAISY, and archive.org's own MARC XML. These files can be downloaded using the ``--on-the-fly`` parameter: +Some files on archive.org are generated on-the-fly as requested. This currently includes non-original files of the formats EPUB, MOBI, DAISY, and archive.org's own MARCXML. These files can be downloaded using the ``--on-the-fly`` parameter: .. code:: bash diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index b1eb4911..67d2baf9 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -204,9 +204,9 @@ Or, a list of formats:: Downloading On-The-Fly Files ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Some files on archive.org are generated on-the-fly as requested. This currently includes non-original files of the formats EPUB, MOBI, DAISY, and archive.org's own MARC XML. These files can be downloaded using the ``on_the_fly`` parameter:: +Some files on archive.org are generated on-the-fly as requested. This currently includes non-original files of the formats EPUB, MOBI, DAISY, and archive.org's own MARCXML. These files can be downloaded using the ``on_the_fly`` parameter:: - >>> download('wonderfulwizardo00baumiala', verbose=True, glob_pattern='*_daisy.zip', on_the_fly=True) + >>> download('wonderfulwizardo00baumiala', verbose=True, formats='DAISY', on_the_fly=True) wonderfulwizardo00baumiala: downloading wonderfulwizardo00baumiala_daisy.zip: 100%|████| 153k/153k [00:00<00:00, 563kiB/s] diff --git a/internetarchive/item.py b/internetarchive/item.py index b1eebd76..80e2255d 100644 --- a/internetarchive/item.py +++ b/internetarchive/item.py @@ -532,13 +532,13 @@ def get_files(self, files=None, formats=None, glob_pattern=None, on_the_fly=None # Add support for on-the-fly files (e.g. EPUB). if on_the_fly: otf_files = [ - f'{self.identifier}.epub', - f'{self.identifier}.mobi', - f'{self.identifier}_daisy.zip', - f'{self.identifier}_archive_marc.xml', + ('EPUB', f'{self.identifier}.epub'), + ('MOBI', f'{self.identifier}.mobi'), + ('DAISY', f'{self.identifier}_daisy.zip'), + ('MARCXML', f'{self.identifier}_archive_marc.xml'), ] - for f in otf_files: - item_files.append({'name': f, 'otf': True}) + for format, f in otf_files: + item_files.append({'name': f, 'format': format, 'otf': True}) if not any(k for k in [files, formats, glob_pattern]): for f in item_files: diff --git a/tests/cli/test_ia_download.py b/tests/cli/test_ia_download.py index cec8c16a..4bdaea57 100644 --- a/tests/cli/test_ia_download.py +++ b/tests/cli/test_ia_download.py @@ -43,6 +43,16 @@ def test_format(tmpdir_ch): assert files_downloaded(path='nasa') == {'nasa_archive.torrent'} +def test_on_the_fly_format(): + i = 'wonderfulwizardo00baumiala' + + stdout, stderr = call_cmd(f'ia --insecure download --dry-run --format="DAISY" {i}') + assert stdout == '' + + stdout, stderr = call_cmd(f'ia --insecure download --dry-run --format="DAISY" --on-the-fly {i}') + assert stdout == f'http://archive.org/download/{i}/{i}_daisy.zip' + + def test_clobber(tmpdir_ch): cmd = 'ia --insecure download nasa nasa_meta.xml' call_cmd(cmd) diff --git a/tests/test_item.py b/tests/test_item.py index 844b703a..a4cf49c7 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -237,6 +237,20 @@ def test_download_dry_run(tmpdir, capsys, nasa_item): assert {x.split('/')[-1] for x in out.split('\n') if x} == expected +def test_download_dry_run_on_the_fly_formats(tmpdir, capsys, nasa_item): + tmpdir.chdir() + with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: + rsps.add(responses.GET, DOWNLOAD_URL_RE, + body='no dest dir', + adding_headers={'content-length': '100'}) + nasa_item.download(formats='MARCXML', on_the_fly=True, dry_run=True) + + expected = {'nasa_archive_marc.xml'} + out, err = capsys.readouterr() + + assert {x.split('/')[-1] for x in out.split('\n') if x} == expected + + def test_download_verbose(tmpdir, capsys, nasa_item): tmpdir.chdir() with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: