From e2cb51db7c0ce2fe917e6dd82ed88a0ff5854fae Mon Sep 17 00:00:00 2001 From: "LAPTOP-8CGF3UCA\\Alain Khalil" Date: Fri, 8 Mar 2024 18:46:27 +0100 Subject: [PATCH 1/2] get_encoding_from_headers fails if charset name not specified #6646 --- src/requests/utils.py | 11 +++++++++-- tests/test_utils.py | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/requests/utils.py b/src/requests/utils.py index a603a8638c..d3c06f20b7 100644 --- a/src/requests/utils.py +++ b/src/requests/utils.py @@ -549,7 +549,13 @@ def get_encoding_from_headers(headers): content_type, params = _parse_content_type_header(content_type) if "charset" in params: - return params["charset"].strip("'\"") + charset = params["charset"] + if charset is True: # Check if charset is a boolean value + return "ISO-8859-1" + elif charset is False: # Check if charset is explicitly False + return None + else: + return charset.strip("'\"") if "text" in content_type: return "ISO-8859-1" @@ -557,7 +563,8 @@ def get_encoding_from_headers(headers): if "application/json" in content_type: # Assume UTF-8 based on RFC 4627: https://www.ietf.org/rfc/rfc4627.txt since the charset was unset return "utf-8" - + + return None def stream_decode_response_unicode(iterator, r): """Stream decodes an iterator.""" diff --git a/tests/test_utils.py b/tests/test_utils.py index 8988eaf69c..ad105dc507 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -617,6 +617,8 @@ def test__parse_content_type_header(value, expected): "utf-8", ), (CaseInsensitiveDict({"content-type": "text/plain"}), "ISO-8859-1"), + (CaseInsensitiveDict({"content-type": "text/html; charset"}), "ISO-8859-1"), + (CaseInsensitiveDict({"content-type": "application/json; charset"}), "ISO-8859-1"), ), ) def test_get_encoding_from_headers(value, expected): From c6905d2abc64fbda95653919270dc2a418ae4dd3 Mon Sep 17 00:00:00 2001 From: Alain KHALIL Date: Fri, 8 Mar 2024 21:30:59 +0100 Subject: [PATCH 2/2] I trimmed the trailing whitespaces. --- src/requests/utils.py | 9 ++++++--- tests/test_utils.py | 5 ++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/requests/utils.py b/src/requests/utils.py index d3c06f20b7..44fc70d251 100644 --- a/src/requests/utils.py +++ b/src/requests/utils.py @@ -550,9 +550,11 @@ def get_encoding_from_headers(headers): if "charset" in params: charset = params["charset"] - if charset is True: # Check if charset is a boolean value + # Check if charset is a boolean value + if charset is True: return "ISO-8859-1" - elif charset is False: # Check if charset is explicitly False + # Check if charset is explicitly False + elif charset is False: return None else: return charset.strip("'\"") @@ -563,9 +565,10 @@ def get_encoding_from_headers(headers): if "application/json" in content_type: # Assume UTF-8 based on RFC 4627: https://www.ietf.org/rfc/rfc4627.txt since the charset was unset return "utf-8" - + return None + def stream_decode_response_unicode(iterator, r): """Stream decodes an iterator.""" diff --git a/tests/test_utils.py b/tests/test_utils.py index ad105dc507..4043a1928f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -618,7 +618,10 @@ def test__parse_content_type_header(value, expected): ), (CaseInsensitiveDict({"content-type": "text/plain"}), "ISO-8859-1"), (CaseInsensitiveDict({"content-type": "text/html; charset"}), "ISO-8859-1"), - (CaseInsensitiveDict({"content-type": "application/json; charset"}), "ISO-8859-1"), + ( + CaseInsensitiveDict({"content-type": "application/json; charset"}), + "ISO-8859-1", + ), ), ) def test_get_encoding_from_headers(value, expected):