diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index 23451220ca58..1cf74989976a 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -50,6 +50,7 @@ FILENAME_VALID_CHARS, inflector, iter_start_of_line, + to_content_disposition, unicodify, UNKNOWN, ) @@ -437,7 +438,7 @@ def _serve_raw( element_identifier=kwd.get("element_identifier"), filename_pattern=kwd.get("filename_pattern"), ) - headers["Content-Disposition"] = f'attachment; filename="{filename}"' + headers["Content-Disposition"] = to_content_disposition(filename) return open(dataset.get_file_name(), mode="rb"), headers def to_archive(self, dataset: DatasetProtocol, name: str = "") -> Iterable: @@ -483,7 +484,7 @@ def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd): headers["content-type"] = ( "application/octet-stream" # force octet-stream so Safari doesn't append mime extensions to filename ) - headers["Content-Disposition"] = f'attachment; filename="{filename}"' + headers["Content-Disposition"] = to_content_disposition(filename) return open(data.get_file_name(), "rb"), headers def _serve_binary_file_contents_as_text(self, trans, data, headers, file_size, max_peek_size): @@ -660,16 +661,13 @@ def _download_filename( element_identifier: Optional[str] = None, filename_pattern: Optional[str] = None, ) -> str: - def escape(raw_identifier): - return "".join(c in FILENAME_VALID_CHARS and c or "_" for c in raw_identifier)[0:150] - if not to_ext or to_ext == "data": # If a client requests to_ext with the extension 'data', they are # deferring to the server, set it based on datatype. to_ext = dataset.extension template_values = { - "name": escape(dataset.name), + "name": dataset.name, "ext": to_ext, "hid": dataset.hid, } @@ -682,8 +680,9 @@ def escape(raw_identifier): if hdca is not None: # Use collection context to build up filename. - template_values["element_identifier"] = element_identifier - template_values["hdca_name"] = escape(hdca.name) + if element_identifier is not None: + template_values["element_identifier"] = element_identifier + template_values["hdca_name"] = hdca.name template_values["hdca_hid"] = hdca.hid return string.Template(filename_pattern).substitute(**template_values) diff --git a/lib/galaxy/util/__init__.py b/lib/galaxy/util/__init__.py index 1b0ec302fb71..d2c8617c08bd 100644 --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -49,6 +49,7 @@ Union, ) from urllib.parse import ( + quote, urlencode, urlparse, urlsplit, @@ -2006,3 +2007,11 @@ def lowercase_alphanum_to_hex(lowercase_alphanum: str) -> str: import numpy as np return np.base_repr(int(lowercase_alphanum, 36), 16).lower() + + +def to_content_disposition(target: str) -> str: + filename, ext = os.path.splitext(target) + character_limit = 255 - len(ext) + sanitized_filename = "".join(c in FILENAME_VALID_CHARS and c or "_" for c in filename)[0:character_limit] + ext + utf8_encoded_filename = quote(re.sub(r'[\/\\\?%*:|"<>]', "_", filename), safe="")[0:character_limit] + ext + return f"attachment; filename=\"{sanitized_filename}\"; filename*=UTF-8''{utf8_encoded_filename}" diff --git a/lib/galaxy/util/zipstream.py b/lib/galaxy/util/zipstream.py index 1cd1c77649e7..7b81d1c33f4b 100644 --- a/lib/galaxy/util/zipstream.py +++ b/lib/galaxy/util/zipstream.py @@ -11,6 +11,7 @@ import zipstream +from galaxy.util import to_content_disposition from .path import safe_walk CRC32_MIN = 1444 @@ -41,8 +42,7 @@ def response(self) -> Iterator[bytes]: def get_headers(self) -> Dict[str, str]: headers = {} if self.archive_name: - archive_name = self.archive_name.encode("latin-1", "replace").decode("latin-1") - headers["Content-Disposition"] = f'attachment; filename="{archive_name}.zip"' + headers["Content-Disposition"] = to_content_disposition(f"{self.archive_name}.zip") if self.upstream_mod_zip: headers["X-Archive-Files"] = "zip" else: diff --git a/lib/galaxy_test/api/test_dataset_collections.py b/lib/galaxy_test/api/test_dataset_collections.py index c9428ad84ffe..d7710c57b2fa 100644 --- a/lib/galaxy_test/api/test_dataset_collections.py +++ b/lib/galaxy_test/api/test_dataset_collections.py @@ -1,6 +1,7 @@ import zipfile from io import BytesIO from typing import List +from urllib.parse import quote from galaxy.util.unittest_utils import skip_if_github_down from galaxy_test.base.api_asserts import assert_object_id_error @@ -189,6 +190,7 @@ def test_download_non_english_characters(self): hdca_id = self.dataset_populator.fetch(payload, wait=True).json()["outputs"][0]["id"] create_response = self._download_dataset_collection(history_id=history_id, hdca_id=hdca_id) self._assert_status_code_is(create_response, 200) + assert quote(name, safe="") in create_response.headers["Content-Disposition"] @requires_new_user def test_hda_security(self): diff --git a/lib/galaxy_test/api/test_datasets.py b/lib/galaxy_test/api/test_datasets.py index dd9084c8c4f4..87ec35daf517 100644 --- a/lib/galaxy_test/api/test_datasets.py +++ b/lib/galaxy_test/api/test_datasets.py @@ -6,6 +6,7 @@ Dict, List, ) +from urllib.parse import quote from galaxy.model.unittest_utils.store_fixtures import ( deferred_hda_model_store_dict, @@ -897,3 +898,10 @@ def test_cannot_update_datatype_on_immutable_history(self, history_id): response = self._put(f"histories/{history_id}/contents/{hda_id}", data={"datatype": "tabular"}, json=True) self._assert_status_code_is(response, 403) assert response.json()["err_msg"] == "History is immutable" + + def test_download_non_english_characters(self, history_id): + name = "دیتاست" + hda = self.dataset_populator.new_dataset(history_id=history_id, name=name, content="data", wait=True) + response = self._get(f"histories/{history_id}/contents/{hda['id']}/display?to_ext=json") + self._assert_status_code_is(response, 200) + assert quote(name, safe="") in response.headers["Content-Disposition"]