From f8aa36b92d95e6d2f133489f39f9d5c2412f1bd9 Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Mon, 1 Jul 2024 18:08:04 -0700 Subject: [PATCH 1/3] Test on urllib3 1.26.x --- .github/workflows/run-tests.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index c35af968c4..3a0053327b 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -57,3 +57,23 @@ jobs: - name: Run tests run: | make ci + + urllib3: + name: 'urllib3 1.x' + runs-on: 'ubuntu-latest' + strategy: + fail-fast: true + + steps: + - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 + - name: 'Set up Python 3.8' + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d + with: + python-version: '3.8' + - name: Install dependencies + run: | + make + python -m pip install "urllib3<2" + - name: Run tests + run: | + make ci From 4e383642a9ebb82a67eaeed199d0fbbb31991e3c Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Thu, 18 Jul 2024 09:43:49 -0700 Subject: [PATCH 2/3] Add conditional string encoding based on urllib3 major version --- src/requests/compat.py | 12 ++++++++++++ src/requests/utils.py | 5 ++++- tests/test_requests.py | 41 ++++++++++++++++++++++++----------------- 3 files changed, 40 insertions(+), 18 deletions(-) diff --git a/src/requests/compat.py b/src/requests/compat.py index 095de1b6ca..4e843c6cf1 100644 --- a/src/requests/compat.py +++ b/src/requests/compat.py @@ -10,6 +10,18 @@ import importlib import sys +# ------- +# urllib3 +# ------- +from urllib3 import __version__ as urllib3_version + +# Detect which major version of urllib3 is being used. +try: + is_urllib3_2 = int(urllib3_version.split(".")[0]) == 2 +except (TypeError, AttributeError): + # If we can't discern a version, prefer old functionality. + is_urllib3_2 = False + # ------------------- # Character Detection # ------------------- diff --git a/src/requests/utils.py b/src/requests/utils.py index ae6c42f6cb..be7fc1d2f6 100644 --- a/src/requests/utils.py +++ b/src/requests/utils.py @@ -38,6 +38,7 @@ getproxies, getproxies_environment, integer_types, + is_urllib3_2, ) from .compat import parse_http_list as _parse_list_header from .compat import ( @@ -136,7 +137,9 @@ def super_len(o): total_length = None current_position = 0 - if isinstance(o, str): + if is_urllib3_2 and isinstance(o, str): + # urllib3 2.x treats all strings as utf-8 instead + # of latin-1 (iso-8859-1) like http.client. o = o.encode("utf-8") if hasattr(o, "__len__"): diff --git a/tests/test_requests.py b/tests/test_requests.py index b4e9fe92ae..df0d329eaf 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -25,6 +25,7 @@ builtin_str, cookielib, getproxies, + is_urllib3_2, urlparse, ) from requests.cookies import cookiejar_from_dict, morsel_to_cookie @@ -1810,23 +1811,6 @@ def test_autoset_header_values_are_native(self, httpbin): assert p.headers["Content-Length"] == length - def test_content_length_for_bytes_data(self, httpbin): - data = "This is a string containing multi-byte UTF-8 ☃️" - encoded_data = data.encode("utf-8") - length = str(len(encoded_data)) - req = requests.Request("POST", httpbin("post"), data=encoded_data) - p = req.prepare() - - assert p.headers["Content-Length"] == length - - def test_content_length_for_string_data_counts_bytes(self, httpbin): - data = "This is a string containing multi-byte UTF-8 ☃️" - length = str(len(data.encode("utf-8"))) - req = requests.Request("POST", httpbin("post"), data=data) - p = req.prepare() - - assert p.headers["Content-Length"] == length - def test_nonhttp_schemes_dont_check_URLs(self): test_urls = ( "data:image/gif;base64,R0lGODlhAQABAHAAACH5BAUAAAAALAAAAAABAAEAAAICRAEAOw==", @@ -2966,6 +2950,29 @@ def response_handler(sock): assert client_cert is not None +def test_content_length_for_bytes_data(httpbin): + data = "This is a string containing multi-byte UTF-8 ☃️" + encoded_data = data.encode("utf-8") + length = str(len(encoded_data)) + req = requests.Request("POST", httpbin("post"), data=encoded_data) + p = req.prepare() + + assert p.headers["Content-Length"] == length + + +@pytest.mark.skipif( + not is_urllib3_2, + reason="urllib3 2.x encodes all strings to utf-8, urllib3 1.x uses latin-1", +) +def test_content_length_for_string_data_counts_bytes(httpbin): + data = "This is a string containing multi-byte UTF-8 ☃️" + length = str(len(data.encode("utf-8"))) + req = requests.Request("POST", httpbin("post"), data=data) + p = req.prepare() + + assert p.headers["Content-Length"] == length + + def test_json_decode_errors_are_serializable_deserializable(): json_decode_error = requests.exceptions.JSONDecodeError( "Extra data", From 01353d3b4afe3afa838f8f5c08e5f30a5b50cb05 Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Tue, 23 Jul 2024 04:28:03 -0700 Subject: [PATCH 3/3] Invert major version check --- src/requests/compat.py | 4 ++-- src/requests/utils.py | 6 +++--- tests/test_requests.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/requests/compat.py b/src/requests/compat.py index 4e843c6cf1..7f9d754350 100644 --- a/src/requests/compat.py +++ b/src/requests/compat.py @@ -17,10 +17,10 @@ # Detect which major version of urllib3 is being used. try: - is_urllib3_2 = int(urllib3_version.split(".")[0]) == 2 + is_urllib3_1 = int(urllib3_version.split(".")[0]) == 1 except (TypeError, AttributeError): # If we can't discern a version, prefer old functionality. - is_urllib3_2 = False + is_urllib3_1 = True # ------------------- # Character Detection diff --git a/src/requests/utils.py b/src/requests/utils.py index be7fc1d2f6..699683e5d9 100644 --- a/src/requests/utils.py +++ b/src/requests/utils.py @@ -38,7 +38,7 @@ getproxies, getproxies_environment, integer_types, - is_urllib3_2, + is_urllib3_1, ) from .compat import parse_http_list as _parse_list_header from .compat import ( @@ -137,8 +137,8 @@ def super_len(o): total_length = None current_position = 0 - if is_urllib3_2 and isinstance(o, str): - # urllib3 2.x treats all strings as utf-8 instead + if not is_urllib3_1 and isinstance(o, str): + # urllib3 2.x+ treats all strings as utf-8 instead # of latin-1 (iso-8859-1) like http.client. o = o.encode("utf-8") diff --git a/tests/test_requests.py b/tests/test_requests.py index df0d329eaf..d8fbb23688 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -25,7 +25,7 @@ builtin_str, cookielib, getproxies, - is_urllib3_2, + is_urllib3_1, urlparse, ) from requests.cookies import cookiejar_from_dict, morsel_to_cookie @@ -2961,7 +2961,7 @@ def test_content_length_for_bytes_data(httpbin): @pytest.mark.skipif( - not is_urllib3_2, + is_urllib3_1, reason="urllib3 2.x encodes all strings to utf-8, urllib3 1.x uses latin-1", ) def test_content_length_for_string_data_counts_bytes(httpbin):