From c2dc4252e117b1bfaa51f317b5b6a15a77c8ab24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Sat, 15 Jul 2023 10:55:38 -0400 Subject: [PATCH 01/46] Add rapid testing "watch" invocation. --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index d90dd5b..b125496 100644 --- a/Makefile +++ b/Makefile @@ -20,6 +20,9 @@ test: develop @clear @pytest +watch: develop + find . -iname \*.py | entr -c pytest --no-header --ff --maxfail=1 + release: ./setup.py sdist bdist_wheel upload ${RELEASE_OPTIONS} @echo -e "\nView online at: https://pypi.python.org/pypi/${PROJECT} or https://pypi.org/project/${PROJECT}/" From 0158960a21bbe3d8785dd23d9e0497ab32fb5f4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Wed, 2 Aug 2023 17:56:58 -0400 Subject: [PATCH 02/46] Ignore mypy caches. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2c80500..95ee2c8 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ third-party # Unit Test / Coverage Reports .pytest_cache .cache +.mypy_cache .cagoule.db .coverage .tox From dba872d390666b4c889ee393ea2cb68964cfad2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Wed, 2 Aug 2023 21:03:46 -0400 Subject: [PATCH 03/46] Migrate to modern TOML packaging metadata. --- pyproject.toml | 77 ++++++++++++++++++++++++++++++++ setup.py | 116 ------------------------------------------------- 2 files changed, 77 insertions(+), 116 deletions(-) create mode 100644 pyproject.toml delete mode 100755 setup.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3639f10 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,77 @@ +[build-system] +requires = ["setuptools>=61.2", "setuptools_scm[toml]>=6.2"] +build-backend = "setuptools.build_meta" + +[project] +name = "uri" +authors = [ + {name="Alice Bevan-McGregor", email="alice@gothcandy.com"}, + ] +description = "A type to represent, query, and manipulate a Uniform Resource Identifier." +readme = "README.rst" +requires-python = ">=3.8" +keywords = ['type', 'URI', 'URL', 'rfc', 'rfc'] +license = {text='MIT'} +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Utilities" + ] +dependencies = [] # URI has no direct runtime dependencies. +dynamic = ["version"] + +[project.optional-dependencies] +http = ['requests'] # Support for the http:// and https:// protocols. +test = [ + 'pytest', # test collector and extensible runner + 'pytest-cov', # coverage reporting + 'pytest-flakes', # syntax validation + 'pytest-isort', # import ordering + 'webob', # Request WSGI environment mocking. + ] +development = [ # Development-time dependencies. + 'pytest', # test collector and extensible runner + 'pytest-cov', # coverage reporting + 'pytest-flakes', # syntax validation + 'pytest-isort', # import ordering + 'webob', # Request WSGI environment mocking. + 'pre-commit', # Commit hooks for code quality. + 'mypy', # Type hinting analysis. + 'rope', # Project symbols collection. + 'bandit', # Automated security analysis. + 'ptipython', # Enhanced interactive REPL shell. + 'e', # python -me + ] + +[project.entry-points."uri.scheme"] +# https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml +# https://www.w3.org/wiki/UriSchemes +file = "uri.scheme:URLScheme" +ftp = "uri.scheme:URLScheme" +http = "uri.scheme:URLScheme" +https = "uri.scheme:URLScheme" +irc = "uri.scheme:URLScheme" +ldap = "uri.scheme:URLScheme" +telnet = "uri.scheme:URLScheme" +sftp = "uri.scheme:URLScheme" +# Care of https://github.com/APSL/uri/commit/709b4b73daae7b8651b92fd4fa63af41c4db2986 +mysql = "uri.scheme:URLScheme" +redis = "uri.scheme:URLScheme" +# https://docs.mongodb.com/manual/reference/connection-string +mongodb = "uri.scheme:URLScheme" + + +[tool.setuptools_scm] + + diff --git a/setup.py b/setup.py deleted file mode 100755 index 73fb849..0000000 --- a/setup.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - -import os -import sys -import codecs - -try: - from setuptools.core import setup, find_packages -except ImportError: - from setuptools import setup, find_packages - - -if sys.version_info < (3, 6): - raise SystemExit("Python 3.6 or later is required.") - -version = description = url = author = version_info = '' # Actually loaded on the next line; be quiet, linter. -exec(open(os.path.join("uri", "release.py")).read()) - -here = os.path.abspath(os.path.dirname(__file__)) - -tests_require = [ - 'pytest', # test collector and extensible runner - 'pytest-cov', # coverage reporting - 'pytest-flakes', # syntax validation - 'pytest-isort', # import ordering - 'webob', # Request WSGI environment mocking. - ] - -trove_map = { - 'plan': "Development Status :: 1 - Planning", - 'alpha': "Development Status :: 3 - Alpha", - 'beta': "Development Status :: 4 - Beta", - 'final': "Development Status :: 5 - Production/Stable", - } - - -# # Entry Point - -setup( - name = "uri", - version = version, - description = description, - long_description = codecs.open(os.path.join(here, 'README.rst'), 'r', 'utf8').read(), - url = url, - author = author.name, - author_email = author.email, - license = 'MIT', - keywords = ['type', 'URI', 'URL', 'rfc', 'rfc'], - classifiers = [ - trove_map[version_info.releaselevel], - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", - "Topic :: Software Development :: Libraries :: Python Modules", - "Topic :: Utilities" - ], - - packages = find_packages(exclude=['test', 'htmlcov']), - include_package_data = True, - package_data = {'': ['README.rst', 'LICENSE.txt']}, - zip_safe = False, - - # ## Dependency Declaration - python_requires = ">=3.6", - - setup_requires = [ - 'pytest-runner', - ] if {'pytest', 'test', 'ptr'}.intersection(sys.argv) else [], - - install_requires = [], # URI has no runtime dependencies. - - extras_require = dict( - http = ['requests'], # Support for the http:// and https:// protocols. - development = tests_require + [ # Development-time dependencies. - 'pre-commit', # Commit hooks for code quality. - 'mypy', # Type hinting analysis. - 'rope', # Project symbols collection. - 'bandit', # Automated security analysis. - 'ptipython', # Enhanced interactive REPL shell. - 'e', # python -me - ], - ), - - tests_require = tests_require, - - # ## Plugin Registration - - entry_points = { - 'uri.scheme': [ - # https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml - # https://www.w3.org/wiki/UriSchemes - 'file = uri.scheme:URLScheme', - 'ftp = uri.scheme:URLScheme', - 'http = uri.scheme:URLScheme', - 'https = uri.scheme:URLScheme', - 'irc = uri.scheme:URLScheme', - 'ldap = uri.scheme:URLScheme', - 'telnet = uri.scheme:URLScheme', - 'sftp = uri.scheme:URLScheme', - # Care of https://github.com/APSL/uri/commit/709b4b73daae7b8651b92fd4fa63af41c4db2986 - 'mysql = uri.scheme:URLScheme', - 'redis = uri.scheme:URLScheme', - # https://docs.mongodb.com/manual/reference/connection-string - 'mongodb = uri.scheme:URLScheme', - ], - }, -) From fbbad29116faed2f7f97f56fbc1cd0502609c5fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Fri, 11 Aug 2023 08:46:08 -0400 Subject: [PATCH 04/46] Migrate to TOML project metadata. --- .travis.yml | 52 ----------------------------- pyproject.toml | 89 ++++++++++++++++++++++++++++++++++++++++---------- setup.cfg | 78 ------------------------------------------- 3 files changed, 72 insertions(+), 147 deletions(-) delete mode 100644 .travis.yml delete mode 100644 setup.cfg diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 9f96ec6..0000000 --- a/.travis.yml +++ /dev/null @@ -1,52 +0,0 @@ -language: python -sudo: false -cache: pip - -addons: - sonarcloud: - organization: marrow - token: - secure: "a4sgEt0pqOFHTysCYOivgMORNrx80tbovuVXFMXfal8fXdJZWynSmw0Ulte+maWOQJ50f9Owo/UAPY0Em36wMeIi96j5ZTtLEkPYcnU12NW823t8Dkd1l5BImqpeE8B9A+hlJLnHx8RCbQ3WA1hxemrS8rrJHzy5Kr1S5c4GbEGKOOHBQ7jYU1dfnB7Gdu/BzY1bmitDkAKEysa/8mQ60JzVxv6/bsiNAr15iv+pwmUwOPni26j3+wSZKY1j5b2mlTqmRH7/aPBVRTDRYEBBGKgGHb0nprho74fKLmT5LCYoDovoSni6P8Pvf6APM0FonomaXTLin8Muhj0DRMBaFmBp0B1azHkXJB6e7K91aHRW1HG9lr8iN2dh+IxyBQY6jT0ajvnmU3eghHtYJtWulVoKaUbvMmoAs9Vl0n4orc4cq/3uma7VURF8UKb+4xy8dB+O9EtbugQ/dINfiNXhrToKK8WzzMHrze4Jvq+PaWjTO99fYeXpku8CHa0h8JG+RaP8HhXNpZS4RLSvAAHjZOSN1jtIvIWGuDeqfXuhkrWtZVDZOAT2eMqU/ziPqOReuhWcwCB0z89QNVIdaakBZ9BFJWOw+9j545FS+GYLJYdyQa61/h+YZblu7uYMjDsaShe0pjM60qOpRIvXrYJb8/HJBZ9IhmaTUY6gcBcTM5s=" - -git: - depth: false - -branches: - except: - - /^[^/]+/.+$/ - -python: - - "pypy3" - - "3.6" - - "3.7" - - "3.8" - - "3.9" - -env: - - '' - - 'OPTIONAL="markupsafe"' - -install: - - 'travis_retry pip install --upgrade setuptools pip' - - 'test -z "${OPTIONAL}" || pip install ${OPTIONAL}' - -script: - python setup.py test - -after_script: - bash <(curl -s https://codecov.io/bash) - -after_success: - - sonar-scanner - -notifications: - irc: - channels: - - 'irc.freenode.org#webcore' - use_notice: true - skip_join: true - on_success: change - on_failure: always - template: - - "%{repository_slug}:%{branch}@%{commit} %{message}" - - "Duration: %{duration} - Details: %{build_url}" diff --git a/pyproject.toml b/pyproject.toml index 3639f10..3c3c91e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,10 +19,10 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries :: Python Modules", @@ -35,43 +35,98 @@ dynamic = ["version"] http = ['requests'] # Support for the http:// and https:// protocols. test = [ 'pytest', # test collector and extensible runner - 'pytest-cov', # coverage reporting + 'pytest-cov[toml]', # coverage reporting 'pytest-flakes', # syntax validation 'pytest-isort', # import ordering 'webob', # Request WSGI environment mocking. + 'requests', # Support for the http:// and https:// protocols. ] development = [ # Development-time dependencies. 'pytest', # test collector and extensible runner - 'pytest-cov', # coverage reporting + 'pytest-cov[toml]', # coverage reporting 'pytest-flakes', # syntax validation 'pytest-isort', # import ordering 'webob', # Request WSGI environment mocking. + 'requests', # Support for the http:// and https:// protocols. 'pre-commit', # Commit hooks for code quality. 'mypy', # Type hinting analysis. 'rope', # Project symbols collection. 'bandit', # Automated security analysis. 'ptipython', # Enhanced interactive REPL shell. - 'e', # python -me + 'e', # python3 -me ] [project.entry-points."uri.scheme"] # https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml # https://www.w3.org/wiki/UriSchemes -file = "uri.scheme:URLScheme" -ftp = "uri.scheme:URLScheme" -http = "uri.scheme:URLScheme" -https = "uri.scheme:URLScheme" -irc = "uri.scheme:URLScheme" -ldap = "uri.scheme:URLScheme" -telnet = "uri.scheme:URLScheme" -sftp = "uri.scheme:URLScheme" +file = 'uri.scheme:URLScheme' +ftp = 'uri.scheme:URLScheme' +http = 'uri.scheme:URLScheme' +https = 'uri.scheme:URLScheme' +irc = 'uri.scheme:URLScheme' +ldap = 'uri.scheme:URLScheme' +telnet = 'uri.scheme:URLScheme' +sftp = 'uri.scheme:URLScheme' # Care of https://github.com/APSL/uri/commit/709b4b73daae7b8651b92fd4fa63af41c4db2986 -mysql = "uri.scheme:URLScheme" -redis = "uri.scheme:URLScheme" -# https://docs.mongodb.com/manual/reference/connection-string -mongodb = "uri.scheme:URLScheme" +mysql = 'uri.scheme:URLScheme' +redis = 'uri.scheme:URLScheme' +postgres = 'uri.scheme:URLScheme' +postgresql = 'uri.scheme:URLScheme' +mongodb = 'uri.scheme:URLScheme' # https://docs.mongodb.com/manual/reference/connection-string [tool.setuptools_scm] +[tool.pytest.ini_options] +addopts = '-l -r fEsxw --cov uri --no-cov-on-fail --cov-report term-missing --cov-report xml --flakes --isort --durations=5 --color=yes test' +#addopts = [ +# '-l', '-r', 'fEsxw', +# '--flakes', # PyFlakes formatting. +# '--isort', # Ensure we have properly sorted imports. +# '--cov-report term-missing', # Print report to terminal. +# '--cov-report xml', # But also write to X-Unit XML file. +# '--no-cov-on-fail', # Don't bother reporting if we failed. +# '--cov uri', # Specifically examine coverage for our package. +# '--durations=5', # Also track the slowest tests to execute. +# '--color=yes', # Pretty is nice. +# 'test' +# ] + +flakes-ignore = [ + 'test/*.py UnusedImport', + 'test/*/*.py UnusedImport ImportStarUsed', + ] + +filterwarnings = [ + 'default', + 'ignore::DeprecationWarning:isort.*', + ] + + +[tool.coverage.report] +exclude_lines = [ + # Re-enable the standard pragma, since we override it. + 'pragma: no( |-)?cov(er)?', + + # Don't complain if non-runnable code isn't run: + 'if 0:', + 'if False:', + 'if __name__ == .__main__.:', + ] + + +[tool.isort] +line_length = 120 +multi_line_output = 3 +balanced_wrapping = false +include_trailing_comma = true +indent = " " +sections = "FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER" +verbose = true +known_first_party = ["uri"] + + +[tool.black] +line_length = 120 + diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 7c31202..0000000 --- a/setup.cfg +++ /dev/null @@ -1,78 +0,0 @@ -[aliases] -test = pytest - -[check] -metadata = 1 -restructuredtext = 1 - -[clean] -build-base = .packaging/build -bdist-base = .packaging/dist - -[build] -build-base = .packaging/build - -[install] -optimize = 1 - -[bdist] -bdist-base = .packaging/dist -dist-dir = .packaging/release - -[bdist_wheel] -bdist-dir = .packaging/dist -dist-dir = .packaging/release - -[register] -;repository = https://pypi.python.org/pypi -strict = 1 - -[upload] -;repository = https://pypi.python.org/pypi -sign = 1 -;identity = ... - -[isort] -line_length = 120 -multi_line_output = 4 -balanced_wrapping = True -indent=' ' -sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER -skip = setup.py -verbose = True -known_first_party = - uri - -[tool:pytest] -addopts = - -l -r fEsxw - --flakes - --cov-report term-missing - --cov-report xml - --no-cov-on-fail - --cov uri - --durations=5 - --color=yes - --isort - test - -flakes-ignore = - test/*.py UnusedImport - test/*/*.py UnusedImport ImportStarUsed - -filterwarnings = - default - ignore::DeprecationWarning:isort.* - -[wheel] -universal = 0 - -[coverage:report] -exclude_lines = - # Re-enable the standard pragma, since we override it. - pragma: no( |-)?cov(er)? - - # Don't complain if non-runnable code isn't run: - if 0: - if False: - if __name__ == .__main__.: From 4dd6b08aea512560609097254a4bdd65dd3fac1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Fri, 11 Aug 2023 08:46:32 -0400 Subject: [PATCH 05/46] Update Python version dependencies for testing. --- .github/workflows/pytest.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index ac58ef0..6f24b8c 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -15,7 +15,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: [3.8, 3.9, 3.10, 3.11] steps: - uses: actions/checkout@v2 @@ -25,8 +25,8 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip - python -m pip install -e '.[development]' + python -m pip install -U setuptools pip wheel + python -m pip install -e '.[test]' - name: Test with pytest run: | pytest From 87c5851f7c0858ef2034c1a06d9787c1581950af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Fri, 11 Aug 2023 08:47:01 -0400 Subject: [PATCH 06/46] IRC notifiation workflow. --- .github/workflows/notify-irc.yml | 38 ++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .github/workflows/notify-irc.yml diff --git a/.github/workflows/notify-irc.yml b/.github/workflows/notify-irc.yml new file mode 100644 index 0000000..42657d1 --- /dev/null +++ b/.github/workflows/notify-irc.yml @@ -0,0 +1,38 @@ +name: "Push Notification" +on: [push, pull_request, create] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: irc push + uses: rectalogic/notify-irc@v1 + if: github.event_name == 'push' + with: + server: "irc.libera.chat" + channel: "#marrow" + nickname: marrow + notice: true + message: | + ${{ github.actor }} pushed ${{ github.event.ref }} ${{ github.event.compare }} + ${{ join(github.event.commits.*.message) }} + - name: irc pull request + uses: rectalogic/notify-irc@v1 + if: github.event_name == 'pull_request' + with: + server: "irc.libera.chat" + channel: "#marrow" + nickname: marrow + notice: true + message: | + ${{ github.actor }} opened PR ${{ github.event.html_url }} + - name: irc tag created + uses: rectalogic/notify-irc@v1 + if: github.event_name == 'create' && github.event.ref_type == 'tag' + with: + server: "irc.libera.chat" + channel: "#marrow" + nickname: marrow + notice: true + message: | + ${{ github.actor }} tagged ${{ github.repository }} ${{ github.event.ref }} From e35c3ccd793a4ed082592c747f823636c5ebfdb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Mon, 14 Aug 2023 23:17:49 -0400 Subject: [PATCH 07/46] Simplify. --- .gitignore | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 95ee2c8..1c1f0a2 100644 --- a/.gitignore +++ b/.gitignore @@ -21,9 +21,7 @@ var third-party # Unit Test / Coverage Reports -.pytest_cache -.cache -.mypy_cache +.*cache .cagoule.db .coverage .tox From 9ef336eb7d7df8e5d6bedc40da466b1ee199c4b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Mon, 14 Aug 2023 23:19:22 -0400 Subject: [PATCH 08/46] Remove release module; get metadata from packaging. --- uri/release.py | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 uri/release.py diff --git a/uri/release.py b/uri/release.py deleted file mode 100644 index c98a9a7..0000000 --- a/uri/release.py +++ /dev/null @@ -1,19 +0,0 @@ -# encoding: utf-8 - -"""Release information for the uri package.""" - -from __future__ import unicode_literals - -from collections import namedtuple - -level_map = {'plan': '.dev'} - -version_info = namedtuple('version_info', ('major', 'minor', 'micro', 'releaselevel', 'serial'))(3, 0, 0, 'final', 0) -version = ".".join([str(i) for i in version_info[:3]]) + \ - ((level_map.get(version_info.releaselevel, version_info.releaselevel[0]) + \ - str(version_info.serial)) if version_info.releaselevel != 'final' else '') - -author = namedtuple('Author', ['name', 'email'])("Alice Bevan-McGregor", 'alice@gothcandy.com') - -description = "A type to represent, query, and manipulate a Uniform Resource Identifier." -url = 'https://github.com/marrow/uri/' From 7f6e5985899de00673ff00d03ca1d33c319c0e04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Mon, 14 Aug 2023 23:48:35 -0400 Subject: [PATCH 09/46] Additional metadata; bandit and mypy configurations. --- .bandit | 2 +- pyproject.toml | 11 +++++++++++ sonar-project.properties | 4 ---- 3 files changed, 12 insertions(+), 5 deletions(-) delete mode 100644 sonar-project.properties diff --git a/.bandit b/.bandit index 4493ef2..6f6704c 100644 --- a/.bandit +++ b/.bandit @@ -1,2 +1,2 @@ [bandit] -exclude: /env,.eggs,.packaging,.cache +exclude: .*cache diff --git a/pyproject.toml b/pyproject.toml index 3c3c91e..6a59110 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,10 @@ development = [ # Development-time dependencies. 'bandit', # Automated security analysis. 'ptipython', # Enhanced interactive REPL shell. 'e', # python3 -me + 'build', # Python packaging build tool. + 'wheel', # "Wheel" package format support. + 'cibuildwheel', # Build automation. + 'twine', # Python package release tool. ] [project.entry-points."uri.scheme"] @@ -130,3 +134,10 @@ known_first_party = ["uri"] [tool.black] line_length = 120 + +[tool.mypy] +follow_imports = silent +strict_optional = True +warn_no_return = False +check_untyped_defs = True + diff --git a/sonar-project.properties b/sonar-project.properties deleted file mode 100644 index c8949ed..0000000 --- a/sonar-project.properties +++ /dev/null @@ -1,4 +0,0 @@ -sonar.projectKey=uri -sonar.sources=. -sonar.host.url=https://sonarcloud.io -sonar.login=travisci From 700afc7d89b7a20ad30416eb05e98f7e4c30d6ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 15 Aug 2023 00:06:37 -0400 Subject: [PATCH 10/46] Dependencies and mypy configuration disabled. --- pyproject.toml | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6a59110..3c50f30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,30 +31,35 @@ classifiers = [ dependencies = [] # URI has no direct runtime dependencies. dynamic = ["version"] +[project.urls] +Repository = "https://github.com/marrow/uri.git" + [project.optional-dependencies] http = ['requests'] # Support for the http:// and https:// protocols. test = [ - 'pytest', # test collector and extensible runner - 'pytest-cov[toml]', # coverage reporting - 'pytest-flakes', # syntax validation - 'pytest-isort', # import ordering + 'pytest', # Test collector and extensible runner. + 'pytest-cov[toml]', # Coverage reporting. + 'pytest-flakes', # Syntax validation. + 'pytest-isort', # Import ordering. + 'pytest-mypy', # Static type validation. 'webob', # Request WSGI environment mocking. 'requests', # Support for the http:// and https:// protocols. ] development = [ # Development-time dependencies. - 'pytest', # test collector and extensible runner - 'pytest-cov[toml]', # coverage reporting - 'pytest-flakes', # syntax validation - 'pytest-isort', # import ordering + 'pytest', # Test collector and extensible runner. + 'pytest-cov[toml]', # Coverage reporting. + 'pytest-flakes', # Syntax validation. + 'pytest-isort', # Import ordering. 'webob', # Request WSGI environment mocking. 'requests', # Support for the http:// and https:// protocols. 'pre-commit', # Commit hooks for code quality. 'mypy', # Type hinting analysis. 'rope', # Project symbols collection. 'bandit', # Automated security analysis. + 'black', # Syntax linting. 'ptipython', # Enhanced interactive REPL shell. - 'e', # python3 -me - 'build', # Python packaging build tool. + 'e', # Try: python3 -me + 'build[virtualenv]', # Python packaging build tool. 'wheel', # "Wheel" package format support. 'cibuildwheel', # Build automation. 'twine', # Python package release tool. @@ -83,7 +88,7 @@ mongodb = 'uri.scheme:URLScheme' # https://docs.mongodb.com/manual/reference/co [tool.pytest.ini_options] -addopts = '-l -r fEsxw --cov uri --no-cov-on-fail --cov-report term-missing --cov-report xml --flakes --isort --durations=5 --color=yes test' +addopts = '-l -r fEsxw --cov uri --mypy --no-cov-on-fail --cov-report term-missing --cov-report xml --flakes --isort --durations=5 --color=yes test' #addopts = [ # '-l', '-r', 'fEsxw', # '--flakes', # PyFlakes formatting. @@ -136,8 +141,8 @@ line_length = 120 [tool.mypy] -follow_imports = silent -strict_optional = True -warn_no_return = False -check_untyped_defs = True +#follow_imports = silent +#strict_optional = True +#warn_no_return = False +#check_untyped_defs = True From 3dee431bb6a4659df95919ff58ac5a01448b41d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 15 Aug 2023 00:06:54 -0400 Subject: [PATCH 11/46] Release module removed, version to be hard-coded. --- uri/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uri/__init__.py b/uri/__init__.py index d873484..0ed4e92 100644 --- a/uri/__init__.py +++ b/uri/__init__.py @@ -2,12 +2,12 @@ from pathlib import PurePosixPath as Path -from .release import version as __version__ - from .bucket import Bucket from .qso import QSO from .uri import URI +__version__ = '' + __all__ = [ 'Path', 'Bucket', From 4fe87644358e38455c08524596b419227a21cb23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Wed, 16 Aug 2023 11:59:58 -0400 Subject: [PATCH 12/46] Dynamic package metadata discovery. --- uri/__init__.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/uri/__init__.py b/uri/__init__.py index 0ed4e92..42a7c29 100644 --- a/uri/__init__.py +++ b/uri/__init__.py @@ -1,16 +1,23 @@ """A type to represent, query, and manipulate a Uniform Resource Identifier.""" +from importlib.metadata import metadata as _metadata, PackageNotFoundError as _NotFound from pathlib import PurePosixPath as Path +__all__ = set(locals()) + from .bucket import Bucket from .qso import QSO from .uri import URI -__version__ = '' +try: + _package = _metadata('uri') + __version__ = _package.get('version') + __author__ = _package.get('author-email') + del _package +except _NotFound: + __version__ = 'dev' + __author__ = "Local Development" + +__all__ = set(i for i in locals() if not i.startswith('_')) - __all__ +__license__ = 'MIT' -__all__ = [ - 'Path', - 'Bucket', - 'QSO', - 'URI', - ] # TODO: Re-watch that Python import internals presentation for the semi-auto way to do this. From a0f37a1e24ad1e2aae4b990e33aa6d7378ebf12a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Wed, 16 Aug 2023 12:00:17 -0400 Subject: [PATCH 13/46] Cleanup, add pytest hooks for mypy and black. --- pyproject.toml | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3c50f30..83a108e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ test = [ 'pytest-flakes', # Syntax validation. 'pytest-isort', # Import ordering. 'pytest-mypy', # Static type validation. + 'pytest-black', # Syntax linting. 'webob', # Request WSGI environment mocking. 'requests', # Support for the http:// and https:// protocols. ] @@ -50,6 +51,8 @@ development = [ # Development-time dependencies. 'pytest-cov[toml]', # Coverage reporting. 'pytest-flakes', # Syntax validation. 'pytest-isort', # Import ordering. + 'pytest-mypy', # Static type validation. + 'pytest-black', # Syntax linting. 'webob', # Request WSGI environment mocking. 'requests', # Support for the http:// and https:// protocols. 'pre-commit', # Commit hooks for code quality. @@ -89,18 +92,6 @@ mongodb = 'uri.scheme:URLScheme' # https://docs.mongodb.com/manual/reference/co [tool.pytest.ini_options] addopts = '-l -r fEsxw --cov uri --mypy --no-cov-on-fail --cov-report term-missing --cov-report xml --flakes --isort --durations=5 --color=yes test' -#addopts = [ -# '-l', '-r', 'fEsxw', -# '--flakes', # PyFlakes formatting. -# '--isort', # Ensure we have properly sorted imports. -# '--cov-report term-missing', # Print report to terminal. -# '--cov-report xml', # But also write to X-Unit XML file. -# '--no-cov-on-fail', # Don't bother reporting if we failed. -# '--cov uri', # Specifically examine coverage for our package. -# '--durations=5', # Also track the slowest tests to execute. -# '--color=yes', # Pretty is nice. -# 'test' -# ] flakes-ignore = [ 'test/*.py UnusedImport', From 537600dd5d3c71555c8f6738f4467aea5018a146 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Thu, 24 Aug 2023 16:13:26 -0400 Subject: [PATCH 14/46] Binary/Unicode confusion. --- uri/bucket.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/uri/bucket.py b/uri/bucket.py index c1c9af1..5f065d6 100644 --- a/uri/bucket.py +++ b/uri/bucket.py @@ -58,5 +58,6 @@ def __len__(self): def __str__(self): # Certain symbols are explicitly allowed, ref: http://pretty-rfc.herokuapp.com/RFC3986#query - iterator = (quote_plus(i.encode('utf8')).replace('%3F', '?').replace('%2F', '/') for i in self) if self.valid else self + iterator = (quote_plus(i.encode('utf8')).replace(b'%3F', b'?').replace(b'%2F', b'/') for i in self) if self.valid else self return self.sep.join(iterator) + From fe1245dd389e6f5a989e647d6c903ab43a100cb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Thu, 24 Aug 2023 16:32:16 -0400 Subject: [PATCH 15/46] Minor packaging updates, and a set_trace for stringification of buckets. --- Makefile | 3 ++- pyproject.toml | 4 +++- uri/bucket.py | 1 + uri/part/base.py | 2 +- uri/uri.py | 2 +- 5 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index b125496..2cca26a 100644 --- a/Makefile +++ b/Makefile @@ -28,6 +28,7 @@ release: @echo -e "\nView online at: https://pypi.python.org/pypi/${PROJECT} or https://pypi.org/project/${PROJECT}/" @echo -e "Remember to make a release announcement and upload contents of .packaging/release/ folder as a Release on GitHub.\n" -${PROJECT}.egg-info/PKG-INFO: setup.py setup.cfg uri/release.py +${PROJECT}.egg-info/PKG-INFO: pyproject.toml @mkdir -p ${VIRTUAL_ENV}/lib/pip-cache pip install --cache-dir "${VIRTUAL_ENV}/lib/pip-cache" -Ue ".[${USE}]" + diff --git a/pyproject.toml b/pyproject.toml index 3c50f30..8d7efde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ development = [ # Development-time dependencies. 'pytest-cov[toml]', # Coverage reporting. 'pytest-flakes', # Syntax validation. 'pytest-isort', # Import ordering. + 'pytest-mypy', # Static type validation. 'webob', # Request WSGI environment mocking. 'requests', # Support for the http:// and https:// protocols. 'pre-commit', # Commit hooks for code quality. @@ -88,7 +89,8 @@ mongodb = 'uri.scheme:URLScheme' # https://docs.mongodb.com/manual/reference/co [tool.pytest.ini_options] -addopts = '-l -r fEsxw --cov uri --mypy --no-cov-on-fail --cov-report term-missing --cov-report xml --flakes --isort --durations=5 --color=yes test' +addopts = '-l -r fEsxw --cov uri --no-cov-on-fail --cov-report term-missing --cov-report xml --flakes --isort --durations=5 --color=yes test' +#addopts = '-l -r fEsxw --cov uri --mypy --no-cov-on-fail --cov-report term-missing --cov-report xml --flakes --isort --durations=5 --color=yes test' #addopts = [ # '-l', '-r', 'fEsxw', # '--flakes', # PyFlakes formatting. diff --git a/uri/bucket.py b/uri/bucket.py index 5f065d6..7322020 100644 --- a/uri/bucket.py +++ b/uri/bucket.py @@ -58,6 +58,7 @@ def __len__(self): def __str__(self): # Certain symbols are explicitly allowed, ref: http://pretty-rfc.herokuapp.com/RFC3986#query + # XXX: __import__('wdb').set_trace() iterator = (quote_plus(i.encode('utf8')).replace(b'%3F', b'?').replace(b'%2F', b'/') for i in self) if self.valid else self return self.sep.join(iterator) diff --git a/uri/part/base.py b/uri/part/base.py index 2c1df31..1db2a07 100644 --- a/uri/part/base.py +++ b/uri/part/base.py @@ -3,7 +3,7 @@ class Part: - """Descriptor protocol objects for combantorial string parts with validation.""" + """Descriptor protocol objects for combinatorial string parts with validation.""" __slots__ = () diff --git a/uri/uri.py b/uri/uri.py index 59afcf6..2eed9ab 100644 --- a/uri/uri.py +++ b/uri/uri.py @@ -54,7 +54,7 @@ class URI: # Additional Compound Interfaces uri = URIPart(__parts__) # Whole-URI retrieval or storage as string. - safe_uri = URIPart(__safe_parts__, False) # URI retrieval without password component, useful for logging. + safe = safe_uri = URIPart(__safe_parts__, False) # URI retrieval without password component, useful for logging. base = BasePart() summary = URIPart(('host', 'path'), False) resource = URIPart(('path', 'query', 'fragment'), False) From bd88cec3e50dd26c4e6eb8ed23b4423888ee8538 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Sun, 10 Dec 2023 10:43:47 -0500 Subject: [PATCH 16/46] Updated metadata dependencies and watch command. --- Makefile | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index b125496..48e4cc6 100644 --- a/Makefile +++ b/Makefile @@ -21,13 +21,14 @@ test: develop @pytest watch: develop - find . -iname \*.py | entr -c pytest --no-header --ff --maxfail=1 + @clear + @find . -iname \*.py | entr -c pytest --no-header --ff --maxfail=1 -release: - ./setup.py sdist bdist_wheel upload ${RELEASE_OPTIONS} - @echo -e "\nView online at: https://pypi.python.org/pypi/${PROJECT} or https://pypi.org/project/${PROJECT}/" - @echo -e "Remember to make a release announcement and upload contents of .packaging/release/ folder as a Release on GitHub.\n" +#release: +# ./setup.py sdist bdist_wheel upload ${RELEASE_OPTIONS} +# @echo -e "\nView online at: https://pypi.python.org/pypi/${PROJECT} or https://pypi.org/project/${PROJECT}/" +# @echo -e "Remember to make a release announcement and upload contents of .packaging/release/ folder as a Release on GitHub.\n" -${PROJECT}.egg-info/PKG-INFO: setup.py setup.cfg uri/release.py +${PROJECT}.egg-info/PKG-INFO: pyproject.toml @mkdir -p ${VIRTUAL_ENV}/lib/pip-cache pip install --cache-dir "${VIRTUAL_ENV}/lib/pip-cache" -Ue ".[${USE}]" From a58ef171f77ef2f36ee330bf16576f4b9d49adbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Sun, 10 Dec 2023 10:44:24 -0500 Subject: [PATCH 17/46] Updated metadata. --- pyproject.toml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 83a108e..c089a25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ test = [ 'pytest-isort', # Import ordering. 'pytest-mypy', # Static type validation. 'pytest-black', # Syntax linting. + 'types-setuptools', # Typing stubs. 'webob', # Request WSGI environment mocking. 'requests', # Support for the http:// and https:// protocols. ] @@ -53,6 +54,7 @@ development = [ # Development-time dependencies. 'pytest-isort', # Import ordering. 'pytest-mypy', # Static type validation. 'pytest-black', # Syntax linting. + 'types-setuptools', # Typing stubs. 'webob', # Request WSGI environment mocking. 'requests', # Support for the http:// and https:// protocols. 'pre-commit', # Commit hooks for code quality. @@ -132,8 +134,13 @@ line_length = 120 [tool.mypy] -#follow_imports = silent -#strict_optional = True -#warn_no_return = False -#check_untyped_defs = True +follow_imports = "silent" +strict_optional = true +warn_no_return = false +#check_untyped_defs = true +#ignore_missing_imports = true + +# required to support namespace packages +# https://github.com/python/mypy/issues/14057 +explicit_package_bases = true From 8117a37c171ae4ab4286e3015ffc6e8f5cfe134d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Sun, 10 Dec 2023 10:45:10 -0500 Subject: [PATCH 18/46] Added useful programmer's representation. --- uri/scheme.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/uri/scheme.py b/uri/scheme.py index a336fee..b3d7841 100644 --- a/uri/scheme.py +++ b/uri/scheme.py @@ -25,6 +25,9 @@ def __bytes__(self): def __str__(self): return self.name + def __repr__(self): + return f"{self.__class__.__name__}('{self.name}')" + def is_relative(self, uri): return False From 1c4ea8cae740344303ba68050c57282f351e8e74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Sun, 10 Dec 2023 10:45:32 -0500 Subject: [PATCH 19/46] Annotations and validation. --- uri/part/scheme.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/uri/part/scheme.py b/uri/part/scheme.py index e084051..f7943bf 100644 --- a/uri/part/scheme.py +++ b/uri/part/scheme.py @@ -1,33 +1,32 @@ -from pkg_resources import iter_entry_points -from re import compile as r +from importlib.metadata import entry_points +from typing import ClassVar, Dict, Optional +from re import compile as r, Pattern from .base import Part from ..scheme import Scheme class SchemePart(Part): - __slots__ = () + __slots__: tuple = () # Do not populate a __dict__ dictionary attribute; only allocate space for these. - valid = r(r'[a-z][a-z0-9+.+-]*') - suffix = ':' - registry = {'': None} - empty = '' + registry: ClassVar[Dict[str, Optional[Scheme]]] = {'': None} + suffix: str = ':' # Protocol suffix when utilized as part of a complete URI; e.g. ':' or '://'. + valid: Pattern = r(r'[a-z][a-z0-9+.+-]*') # Protocol/scheme name validated when run unoptimized. - def load(self, plugin): - if plugin in self.registry: - return self.registry[plugin] + def load(self, plugin:str) -> Scheme: + assert self.valid.match(plugin), f"Invalid plugin name: {plugin!r}" + if plugin in self.registry: return self.registry[plugin] # Short circuit if we've seen this before. - try: - result, = iter_entry_points('uri.scheme', plugin) - result = result.load()(plugin) - except: - result = Scheme(plugin) + # If we haven't, attempt to load the explicit Scheme subclass to utilize for this named scheme. + try: result = entry_points(group='uri.scheme')[plugin].load() + except KeyError: result = Scheme(plugin) # Can't look up by registered name? It's generic. + else: result = result(plugin) # Otherwise, instantiate the subclass, informing it of its name. - self.registry[plugin] = result + self.registry[plugin] = result # Record the instance in a local registry / cache. return result - def render(self, obj, value): + def render(self, obj, value) -> str: result = super(SchemePart, self).render(obj, value) if obj._scheme and obj.scheme.slashed: From e4a7af812546d4ddec6156a3b48aee6792c1a76a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Sun, 10 Dec 2023 10:56:07 -0500 Subject: [PATCH 20/46] Add virtual environment to Bandit exclusions. --- .bandit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.bandit b/.bandit index 6f6704c..78a63b9 100644 --- a/.bandit +++ b/.bandit @@ -1,2 +1,2 @@ [bandit] -exclude: .*cache +exclude: .*cache,.venv From 0e463d36ac8ce35832eae91c900d8d361bff079d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Sun, 10 Dec 2023 10:56:22 -0500 Subject: [PATCH 21/46] Exclude hidden local virtual environment. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1c1f0a2..1d4c98c 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ dist # Virtual Environment Pseudo-Chroot +.venv bin include lib From 0a5698f7a77a35315f06a163e5cadc3c1c9b848c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Sun, 10 Dec 2023 10:56:32 -0500 Subject: [PATCH 22/46] Copyright year bump. --- LICENSE.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE.txt b/LICENSE.txt index 81e4e23..3010b45 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright © 2017-2018 Alice Bevan-McGregor and contributors. +Copyright © 2017-2023 Alice Bevan-McGregor and contributors. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: From d6386822a319b04414094d87726ff314a7f4f251 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Sun, 10 Dec 2023 10:56:42 -0500 Subject: [PATCH 23/46] Simplify test output. --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index bb4b715..76a5b6c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ PROJECT = uri USE = development -.PHONY: all develop clean veryclean test release +.PHONY: all develop clean veryclean test watch release all: clean develop test @@ -18,7 +18,7 @@ veryclean: clean test: develop @clear - @pytest + @pytest --no-header --no-summary watch: develop @clear From 78c2fb681e5584c855c99781a561ee433e6b858b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Sun, 10 Dec 2023 10:57:08 -0500 Subject: [PATCH 24/46] Copyright year bump, significant additions including migration instructions. --- README.rst | 271 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 245 insertions(+), 26 deletions(-) diff --git a/README.rst b/README.rst index cf82d92..da9e511 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ uri === - © 2017-2021 Alice Bevan-McGregor and contributors. + © 2017-2023 Alice Bevan-McGregor and contributors. .. @@ -28,12 +28,13 @@ robust as `Vagrant `__. If you add ``uri`` to the ``install_requires`` argument of the call to ``setup()`` in your application's ``setup.py`` file, ``uri`` will be automatically installed and made available when your own application or library is installed. We recommend using "less than" version numbers to ensure there are no unintentional -side-effects when updating. Use ``uri<2.1`` to get all bugfixes for the current release, and -``uri<3.0`` to get bugfixes and feature updates while ensuring that large breaking changes are not installed. +side-effects when updating. Use ``uri<2.1`` to get all bug fixes for the current release, and +``uri<3.0`` to get bug fixes and feature updates while ensuring that large breaking changes are not installed. -While uri does not have any hard dependencies on any other package, it is **strongly** recommended that applications -using uri in web-based applications also install the ``markupsafe`` package to provide more efficient string escaping and -some additional functionality. +While ``uri`` does not have any hard dependencies on any other package, it is **strongly** recommended that +applications using ``uri`` in web-based applications also install the +`MarkupSafe `__ package to provide more efficient string escaping and some +additional functionality. Development Version @@ -41,13 +42,13 @@ Development Version |developstatus| |developcover| |ghsince| |issuecount| |ghfork| -Development takes place on `GitHub `__ in the -`uri `__ project. Issue tracking, documentation, and downloads +Development takes place on `GitHub `__ in the `uri +`__ project. Issue tracking, documentation, downloads, and test automation are provided there. -Installing the current development version requires `Git `__, a distributed source code management -system. If you have Git you can run the following to download and *link* the development version into your Python -runtime:: +Installing the current development version requires `Git `__, a distributed source code +management system. If you have Git you can run the following to download and *link* the development version into your +Python runtime:: git clone https://github.com/marrow/uri.git (cd uri; python setup.py develop) @@ -75,7 +76,7 @@ defined by `RFC 3986 `_:: For details on these components, `please refer to Wikipedia `__. Each of these components is represented by an -appropraite rich datatype: +appropriate rich datatype: * The ``scheme`` of a URI represents an extensible API of string-like plugins. * Any IPv6 ``host`` is automatically wrapped and unwrapped in square braces. @@ -87,7 +88,7 @@ exposing a ``__link__`` method or attribute:: home = URI("https://github.com/marrow/") -The *scalar* attributes are combined into several *compound* groups for convienence: +The *scalar* attributes are combined into several *compound* groups for convenience: * The ``credentials`` are a colon (``:``) separated combination of: ``user`` + ``password`` — also accessible via the shorter ``auth`` or the longer ``authentication`` attributes. May be assigned using array/mapping notation. @@ -102,11 +103,11 @@ interoperability with ``pathlib.Path`` or ``urlsplit`` objects: * ``hostname`` is the long form of ``host``. * ``authentication`` is the long form of ``auth``. -In addition, several string views are provided for convienence, but ultimately all just call `str()` against the +In addition, several string views are provided for convenience, but ultimately all just call `str()` against the instance or one of the compound groups described above: * ``uri`` represents the entire URI as a string. -* ``safe_uri`` represents the enture URI, sans any password that may be present. +* ``safe_uri`` represents the entire URI, sans any password that may be present. * ``base`` is the combination of ``scheme`` and the ``heirarchical`` part. * ``summary`` is a useful shortcut for web presentation containing only the ``host`` and ``port`` of the URI. * ``qs`` is just the query string, as a plain string instead of QSO instance. @@ -157,8 +158,17 @@ renders URI-like when your application requires URL-like, you can `utilize packa `_ to register additional mappings. -For an example, and to see the core set handled this way, examine the ``setup.py`` and ``setup.cfg`` files within the -project. +For an example, and to see the core set handled this way, examine the ``setup.py`` and ``setup.cfg`` files within this +project. If you wish to imperatively define schemes, you can do so with code such as the following. It is **strongly +recommended** to not implement this as an *import time side effect*. To mutate the plugin registry directly:: + + from uri.scheme import URLScheme + from uri.part.scheme import SchemePart + + SchemePart.registry['amqp'] = URLScheme('amqp') + SchemePart.registry['amqps'] = URLScheme('amqps') + +Subsequent attempts to resolve ``entry_points`` by these names will now resolve to the objects you have specified. WSGI @@ -166,8 +176,8 @@ WSGI A WSGI request environment contains all of the details required to reconstruct the requested URI. The simplest example of why one might do this is to form a "base URI" for relative resolution. WSGI environment-wrapping objects such as -WebOb's ``Request`` class instances may be used as long as the object passed in exposes the original WSGI environment -using an attribute named ``environ``. +`WebOb's `_ ``Request`` class instances may be used as long as the object passed in exposes the +original WSGI environment using an attribute named ``environ``. To perform this task, use the ``URI.from_wsgi`` factory method:: @@ -178,20 +188,228 @@ To perform this task, use the ``URI.from_wsgi`` factory method:: assert str(uri) == 'https://example.com/foo/bar?baz=27' +Migrating +========= + +A vast majority of other URI parsers emit plain dictionaries or provide ``as_dict`` methods. URI objects can be +transformed into such using a fairly basic "dictionary comprehension":: + + uri = URI('http://www.example.com/3.0/dd/ff/') + {i: getattr(uri, i) for i in dir(uri) if i[0] != '_' and not callable(getattr(uri, i))} + +The above will produce a dictionary of all URI attributes that are not "private" (prefixed by an underscore) or +executable methods. + + +From ``furl`` +------------- + + https://github.com/gruns/furl + +* A majority of the object attributes have parity: ``scheme``, ``username``, ``password``, ``host``, even ``origin``. +* ``furl.args`` -> ``URI.query`` +* ``furl.add()``, ``furl.set()``, ``furl.remove()`` -> inline, chained manipulation is not supported. +* ``furl.url`` -> ``str(uri)`` or ``URI.uri`` +* ``furl.netloc`` -> ``URI.authority`` +* Fragments do not have ``path`` and ``query`` attributes; under ``URI`` the fragment is a pure string. +* ``furl.path`` -> ``URI.path`` where ``furl`` implements its own, ``URI.path`` are PurePosixPath instances. +* ``furl.join`` is accomplished via division operators under ``URI``, or for more complete relative resolution, use + the ``URI.resolve`` method. +* The ``URI`` class does not currently infer protocol-specific default port numbers. +* Manipulation via division operators preserves query string parameters under ``furl``, however the ``URI`` package + assumes relative URL resolution, which updates the path and clears parameters and fragment. To extend the path while + preserving these:: + + uri = URI('http://www.google.com/base?one=1&two=2') + uri.path /= 'path' + assert str(uri) == 'http://www.google.com/base/path?one=1&two=2' + + +From ``dj-mongohq-url`` +----------------------- + + https://github.com/ferrix/dj-mongohq-url + +Where your ``settings.py`` file's ``DATABASES`` declaration used ``dj_mongohq_url.config``, instead use:: + + from uri.parse.db import parse_dburi + + DATABASES = {'default': parse_dburi('mongodb://...')} + + +From ``django-url-tools`` +------------------------- + + https://bitbucket.org/monwara/django-url-tools + +The majority of the ``UrlHelper`` attributes are directly applicable to ``URI`` instances, occasionally with minor +differences, typically of naming. The differences are documented here, and "template tags" and "filters" are not +provided for. + +* Where ``UrlHelper.path`` are plain strings, ``URI.path`` attributes are `PurePosixPath + _` instances which support typecasting to a string if + needed. + +* ``UrlHelper.query_dict`` and ``UrlHelper.query`` are replaced with the dict-like ``URI.query`` attribute. + +* ``UrlHelper.query_string`` is shortened to ``URI.qs``, additionally, the object retrieved when accessing ``query`` + may be cast to a string as per the rich path representation. + +* ``UrlHelper.get_full_path`` -- equivalent to the ``URI.resource`` compound, combining path, query string, and + fragment identifier. + +* ``UrlHelper.get_full_quoted_path`` -- alternative currently not provided. + +* There are no direct equivalents provided for: + + * ``UrlHelper.hash`` -- **not** provided due to FIPS-unsafe dependence on MD5. + * ``UrlHelper.get_query_string`` -- encoding is handled automatically. + * ``UrlHelper.get_query_data`` -- this helper for subclass inheritance is not provided. + * ``UrlHelper.update_query_data`` -- manipulate the query directly using ``URI.query.update``. + * ``UrlHelper.overload_params`` -- can be accomplished using modern dictionary merge literal syntax. + * ``UrlHelper.toggle_params`` -- this seems an unusual use case, and can be resolved similarly to the last. + * ``UrlHelper.get_path`` -- unnecessary, access ``URI.path`` directly. + * ``UrlHelper.del_param`` and ``UrlHelper.del_params`` -- just utilize the ``del`` keyword (or ``pop`` method) on/of + the ``URI.query`` attribute. + + +From ``url2vapi`` +----------------- + + https://github.com/Drachenfels/url2vapi + +Where ``url2vapi`` provides a dictionary of parsed URL components, with some pattern-based extraction of API metadata, +``URI`` provides a rich object with descriptor attributes. Version parsing can be accomplished by extracting the +relevant path element and parsing it:: + + from pkg_resources import parse_version + from uri import URI + + url = 'http://www.example.com/3.0/dd/ff/' + uri = URI(url) + version = parse_version(uri.path.parts[1]) + +The ``ApiUrl`` class otherwise offers no functionality. The minimal "data model" provided only accounts for: + +* ``protocol`` -> ``scheme`` +* ``port`` is common, though URI port numbers are stored as integers, not strings. +* ``domain`` -> ``host`` +* ``remainder`` does not have an equivalent; there are several compound getters which may provide similar results. +* ``kwargs`` also has no particular equivalent. URI instances are not "arbitrarily extensible". +* Parsing of URL "parameters" incorrectly assume these are exclusive to the referenced resource, as per query string + arguments, when each path element may have its own distinct parameters. The difference between:: + + https://example.com/foo/bar/baz?prop=27 + https://example.com/foo/bar/baz;prop=27 + + And:: + + https://example.com/foo;prop=27/bar/baz;prop=27 + https://example.com/foo/bar;prop=27/baz + https://example.com/foo/bar/baz;prop=27 + + +From ``url-parser`` +------------------- + + https://github.com/AdaptedAS/url_parser + +* ``protocol`` -> ``scheme`` +* ``www`` has no equivalent; check for ``URI.host.startswith('www.')`` instead. +* ``sub_domain`` has no equivalent; parse/split ``URI.host`` instead. +* ``domain`` -> ``host`` +* ``top_domain`` has no equivalent; as per ``sub_domain``. +* ``dir`` -> ``path`` +* ``file`` -> ``path`` +* ``fragment`` is unchanged. +* ``query`` -> ``qs`` for the string form, ``query`` for a rich ``QSO`` instance interface. + + +From ``p.url`` +-------------- + + https://github.com/ultrabluewolf/p.url/ + +There may be a noticeable trend arising from several sections of "migrating from". Many seem to have accessor or +manipulation **methods** to mutate the object, rather than utilizing native data type interactions, this one does not +buck the trend. Additionally, many of the "attributes" of ``Purl`` are provided as invokable getter/setter methods, +not as static attributes nor automatic properties. In this comparison, attributes trailed by parenthesis are actually +methods, if ``[value]`` may be passed, the method is also the setter. Lastly, it provides its own ``InvalidUrlError`` +which does not subclass ``ValueError``. + +The result is a bit of a hodgepodge API that feels more at home in Java. + +* ``Purl.query`` is a plain dictionary attribute, not a getter method. Now a rich dict-like ``QSO`` object. +* ``Purl.querystring()`` -> ``URI.qs`` -- pure getter method in ``Purl``. +* ``Purl.add_query()`` and ``Purl.delete_query()`` -- just manipulate ``URI.query`` as a dictionary. +* An alternative to ``param`` for manipulation of path parameters is not provided, as these are protocol-defined. +* ``Purl.protocol([value])`` -> ``URI.scheme`` +* ``Purl.hostname([value])`` -> ``URI.host`` +* ``Purl.port([value])`` -> ``URI.port`` +* ``Purl.path([value])`` -> ``URI.path`` +* "Parameter expansion" (which is unrelated to actual URI path element parameters) is not currently supported; + recommended to simply use f-strings or ``str.format`` as appropriate. As curly braces have no special meaning to + ``URI``, you may populate these within one for later ``str(uri).format(...)`` interpolation. + + +From ``url`` +------------ + + https://github.com/seomoz/url-py + +The ``url`` package bundles Cython auto-generated C++ extensions. I do not understand why. + +It's nearly 16,000 lines of code. + +Sixteen thousand. + +A number of attributes are common such as ``scheme``, ``host``, ``hostname``, ``port``, etc. + +* ``URL.pld`` and ``URL.tld`` are left as an exercise for the reader. +* ``URL.params`` is not currently implemented. +* ``URL.query`` -> ``URI.qs`` with ``URI.query`` providing a rich dict-like interface. +* ``URL.unicode`` and ``URL.utf8`` are unimplemented. Native ``URI`` storage is Unicode, it's up to you to encode. +* ``URL.strip()`` is unnecessary under ``URI``; empty query strings, fragments, etc., naturally will not have + dividers. What many might consider to be an "invalid" query string often are not; an encoding for HTTP key-value + pairs is suggested for the HTTP scheme, however everything after the ``?`` is just a single string, up to server- + side interpretation. ``?????a=1`` is "perfectly fine". +* Re-ordering of query string parameters is not implemented; the need is dubious at this level. +* ``URL.deparam()`` may be implemented by using `del` to remove known query string arguments, or using the ``pop()`` + method to safely remove arguments that may only be conditionally present, while avoiding exceptions. +* ``URL.abspath()`` is not currently implemented; to be implemented within ``URI.resolve()``. +* ``URL.unescape()`` is not currently implemented. +* ``URL.relative()`` may be implemented more succinctly using division operators, e.g. ``base / target``. This also + supports HTTP reference protocol-relative resolution using the floor division operator, e.g. ``base // target``. +* ``URL.punycode()`` and ``URL.unpunycode()`` are not implemented, as the goal is for Unicode to be natively/naturally + supported with Punycode encoding automatic at instantiation and serialization to string time, reference `#18 + `_. + + + Version History =============== Version 3.0.0 ------------- +* Improved documentation, notably, incorporated the imperative registration of schemes example from `#14 + `_. +* Inclusion of adaption utilities and tests obviating the need for other utility packages, and documented migration + from several other URI or URL implementations. * Removed legacy Python 2 support adaptions. -* Removed Python 3 support less than Python 3.6 due to type annotation syntax changes. +* Removed Python 3 support less than Python 3.8 due to type annotation functionality and syntax changes. +* Broad adoption of type hinting annotations across virtually all methods and instance attributes. * Updated ABC import path references to correct Python 3.9 warnings. -* Added syntax sugar for assignment of URI authentication credentials by returning a mutated instance when sliced. #10 +* Added syntax sugar for assignment of URI authentication credentials by returning a mutated instance when sliced. `#10 + `_ * Additional ``__slots__`` declarations to improve memory efficiency. -* Added RFC example relative resolutions as tests. -* Added the ability to construct a URI from a populated WSGI request environment to reconstruct the requested URI. #13 +* Added RFC example relative resolutions as tests; we are a compatible resolver, not a strict one. +* Added ability to construct a URI from a populated WSGI request environment to reconstruct the requested URI. WebOb + added as a testing dependency to cover this feature. `#13 `_ * Migrated from Travis-CI to GitHub Actions for test runner automation. +* Added a significant number of additional pre-registered URL-like (``://``) schemes, based on Wikipedia references. +* Automatically utilize Punycode / IDNA encoding of internationalized domain names, ones containing non-ASCII. `#18 + `_ Version 2.0.1 @@ -199,10 +417,11 @@ Version 2.0.1 * Added non-standard `resource` compound view. * Removed Python 3.3 support, added 3.7, removed deprecated testing dependency. -* Scheme objects hash as per their string representation. #5 +* Scheme objects hash as per their string representation. `#5 `_ * Dead code clean-up. * Additional tests covering previously uncovered edge cases, such as assignment to a compound view property. -* Restrict assignment of rootless paths (no leading `/`) if an authority part is already present. #8 +* Restrict assignment of rootless paths (no leading `/`) if an authority part is already present. `#8 + `_ * Enable handling of the following schemes as per URL (colon + double slash): * sftp * mysql @@ -230,7 +449,7 @@ The URI package has been released under the MIT Open Source license. The MIT License --------------- -Copyright © 2017-2021 Alice Bevan-McGregor and contributors. +Copyright © 2017-2023 Alice Bevan-McGregor and contributors. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the From 4cbe3a0c8e4e9e2e2205086be76d4d62262fd8e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Sun, 10 Dec 2023 11:25:01 -0500 Subject: [PATCH 25/46] Dynamic package metadata lookup both instlaled, and not yet installed. --- uri/__init__.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/uri/__init__.py b/uri/__init__.py index 42a7c29..a98f2a4 100644 --- a/uri/__init__.py +++ b/uri/__init__.py @@ -1,23 +1,24 @@ """A type to represent, query, and manipulate a Uniform Resource Identifier.""" from importlib.metadata import metadata as _metadata, PackageNotFoundError as _NotFound -from pathlib import PurePosixPath as Path +from os import getlogin +from socket import gethostname -__all__ = set(locals()) +__all__ = set(locals()) # Initial set of symbols to exclude from our module exports. -from .bucket import Bucket -from .qso import QSO -from .uri import URI +from .bucket import Bucket # Query string fragment. +from .qso import QSO # An object representing a whole query string. +from .uri import URI # The primary class exposed by this package to represent a URL or URI. -try: +try: # Discover installed package metadata... _package = _metadata('uri') __version__ = _package.get('version') - __author__ = _package.get('author-email') + __author__ = f"{_package.get('author')} <{_package.get('author-email')}>" del _package -except _NotFound: +except _NotFound: # ...or generate "local development" version and author information. __version__ = 'dev' - __author__ = "Local Development" + __author__ = f"Local Development <{getlogin()}@{gethostname()}>" -__all__ = set(i for i in locals() if not i.startswith('_')) - __all__ -__license__ = 'MIT' +__license__ = 'MIT' # We could also get this from the package metadata, but it's not likely to change. +__all__ = set(i for i in locals() if not i.startswith('_')) - __all__ # Declare module exports for `import *` use. From eed7e6348c31c43a9d4e0b6f817f002d0ef50b85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 21:55:58 -0500 Subject: [PATCH 26/46] Clean up testing commands, add mypy watch. --- Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 76a5b6c..54a8e9d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ PROJECT = uri USE = development -.PHONY: all develop clean veryclean test watch release +.PHONY: all develop clean veryclean test release all: clean develop test @@ -18,12 +18,16 @@ veryclean: clean test: develop @clear - @pytest --no-header --no-summary + @pytest watch: develop @clear @find . -iname \*.py | entr -c pytest --no-header --ff --maxfail=1 +mpy: develop + @clear + @find uri -iname \*.py | entr -c mypy -p uri + #release: # ./setup.py sdist bdist_wheel upload ${RELEASE_OPTIONS} # @echo -e "\nView online at: https://pypi.python.org/pypi/${PROJECT} or https://pypi.org/project/${PROJECT}/" From d2f7ce6d1310325eac20dd5f7606afd233f742ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 21:56:31 -0500 Subject: [PATCH 27/46] Tweak plurality, include our standard Path object. --- uri/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/uri/__init__.py b/uri/__init__.py index a98f2a4..2081185 100644 --- a/uri/__init__.py +++ b/uri/__init__.py @@ -1,4 +1,4 @@ -"""A type to represent, query, and manipulate a Uniform Resource Identifier.""" +"""A type to represent, query, and manipulate Uniform Resource Identifiers.""" from importlib.metadata import metadata as _metadata, PackageNotFoundError as _NotFound from os import getlogin @@ -6,6 +6,8 @@ __all__ = set(locals()) # Initial set of symbols to exclude from our module exports. +from pathlib import PurePosixPath as Path + from .bucket import Bucket # Query string fragment. from .qso import QSO # An object representing a whole query string. from .uri import URI # The primary class exposed by this package to represent a URL or URI. From 4ecec7339fa93bdf281a9b1cf535a619ef9fa9d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 21:57:55 -0500 Subject: [PATCH 28/46] F-string use. --- uri/bucket.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uri/bucket.py b/uri/bucket.py index 7322020..0b620ad 100644 --- a/uri/bucket.py +++ b/uri/bucket.py @@ -13,7 +13,7 @@ def __init__(self, name, value='', sep="=", strict=False): if not value: if isinstance(name, str): if name.count(sep) > 1: - if strict: raise ValueError("Multiple occurrences of separator {!r} in: '{!s}'".format(sep, name)) + if strict: raise ValueError(f"Multiple occurrences of separator {sep!r} in: '{name}'") self.valid = False name, value = self.split(name) From cf1650536617b0069f8182cc49997ebee9861252 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 21:58:48 -0500 Subject: [PATCH 29/46] Unicode adjustments. --- uri/bucket.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/uri/bucket.py b/uri/bucket.py index 0b620ad..6614248 100644 --- a/uri/bucket.py +++ b/uri/bucket.py @@ -58,7 +58,6 @@ def __len__(self): def __str__(self): # Certain symbols are explicitly allowed, ref: http://pretty-rfc.herokuapp.com/RFC3986#query - # XXX: __import__('wdb').set_trace() - iterator = (quote_plus(i.encode('utf8')).replace(b'%3F', b'?').replace(b'%2F', b'/') for i in self) if self.valid else self + iterator = (quote_plus(i.encode('utf8')).replace('%3F', '?').replace('%2F', '/') for i in self) if self.valid else self return self.sep.join(iterator) From da85fe21b1146e354841735de087c2a201144f38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 21:59:30 -0500 Subject: [PATCH 30/46] Restore dropped tests. --- test/test_parser_dburi.py | 57 ++++++ test/test_url_normalize.py | 109 ++++++++++++ test/test_whatwg.py | 354 +++++++++++++++++++++++++++++++++++++ 3 files changed, 520 insertions(+) create mode 100644 test/test_parser_dburi.py create mode 100644 test/test_url_normalize.py create mode 100644 test/test_whatwg.py diff --git a/test/test_parser_dburi.py b/test/test_parser_dburi.py new file mode 100644 index 0000000..9e88c93 --- /dev/null +++ b/test/test_parser_dburi.py @@ -0,0 +1,57 @@ +import pytest + +from uri import URI +from uri.parse.db import parse_dburi +from uri.qso import SENTINEL + +EXAMPLES = { + # Examples from: https://github.com/ferrix/dj-mongohq-url/blob/master/test_dj_mongohq_url.py + '': { + 'name': '', + 'host': None, + 'user': None, + 'password': None, + 'port': None + }, + 'mongodb://heroku:wegauwhgeuioweg@linus.mongohq.com:10031/app4523234': { + 'engine': 'mongodb', + 'name': 'app4523234', + 'host': 'linus.mongohq.com', + 'user': 'heroku', + 'password': 'wegauwhgeuioweg', + 'port': 10031 + }, + 'postgis://uf07k1i6d8ia0v:wegauwhgeuioweg@ec2-107-21-253-135.compute-1.amazonaws.com:5431/d8r82722r2kuvn': { + 'engine': 'postgis', + 'name': 'd8r82722r2kuvn', + 'host': 'ec2-107-21-253-135.compute-1.amazonaws.com', + 'user': 'uf07k1i6d8ia0v', + 'password': 'wegauwhgeuioweg', + 'port': 5431 + }, + + # '': { + # 'engine': '', + # 'name': '' + # 'host': '' + # 'user': '' + # 'password': '' + # 'port': + # }, + } + + +@pytest.mark.parametrize('string,attributes', EXAMPLES.items()) +class TestDBURIParsing: + @pytest.mark.parametrize('component', URI.__all_parts__ | {'base', 'qs', 'summary', 'relative'}) + def test_component(self, string, attributes, component): + return + + instance = URI(string) + value = getattr(instance, component, SENTINEL) + + if component not in attributes: + assert value in (None, SENTINEL, '') + return + + assert value == attributes[component] diff --git a/test/test_url_normalize.py b/test/test_url_normalize.py new file mode 100644 index 0000000..ebbf9e4 --- /dev/null +++ b/test/test_url_normalize.py @@ -0,0 +1,109 @@ +"""Functional and representative tests for the URI datatype/representation.""" + +from urllib.parse import urljoin, urlparse + +import pytest + +from uri import Path +from uri.qso import SENTINEL +from uri.uri import URI + + +URI_COMPONENTS = [ + # From test_deconstruct_url.py EXPECTED_DATA + ('http://site.com/', dict( # ** for identity test to pass, must have path + scheme = 'http', + authority = 'site.com', + heirarchical = 'site.com/', + password = None, + host = 'site.com', + port = None, + path = Path('/'), # ** + relative = False, + summary = 'site.com/', + base = 'http://site.com/', + )), + ('http://user@www.example.com:8080/path/index.html?param=val#fragment', dict( + scheme = 'http', + auth = 'user', + authentication = 'user', + authority = 'user@www.example.com:8080', + heirarchical = 'user@www.example.com:8080/path/index.html', + host = 'www.example.com', + port = 8080, + path = Path('/path/index.html'), # ** + user = 'user', + username = 'user', + relative = False, + summary = 'www.example.com/path/index.html', + base = 'http://user@www.example.com:8080/path/index.html', + query = 'param=val', + qs = 'param=val', + fragment = 'fragment', + )), + # From test_normalize_host.py + ('http://xn--e1afmkfd.xn--80akhbyknj4f/', dict( # ** for identity test to pass, must provide encoded form + scheme = 'http', + authority = 'xn--e1afmkfd.xn--80akhbyknj4f', + heirarchical = 'xn--e1afmkfd.xn--80akhbyknj4f/', + password = None, + host = 'пример.испытание', + port = None, + path = Path('/'), + relative = False, + summary = 'пример.испытание/', + base = 'http://xn--e1afmkfd.xn--80akhbyknj4f/', + )), + ] + +for _uri, _parts in URI_COMPONENTS: + _parts['uri'] = _uri + if 'query' in _parts: _parts['qs'] = _parts['query'] + if 'host' in _parts: _parts['hostname'] = _parts['host'] + + +def test_normalize_scheme(): + instance = URI('http://site.com/') + assert instance.scheme == 'http' + + instance = URI('HTTP://site.com/') + assert instance.scheme == 'http' + + +def test_normalize_host(): + instance = URI('http://SITE.COM/') + assert instance.host == 'site.com' + + instance = URI('http://site.com./') + assert instance.host == 'site.com' + + +@pytest.mark.parametrize('string,attributes', URI_COMPONENTS) +class TestURLNormalize: + def test_truthiness(self, string, attributes): + instance = URI(string) + assert instance + + def test_identity(self, string, attributes): + instance = URI(string) + assert str(instance) == attributes['uri'] + + def test_identity_bytes(self, string, attributes): + instance = URI(string) + assert bytes(instance) == attributes['uri'].encode('utf-8') + + def test_identity_comparison(self, string, attributes): + instance = URI(string) + assert instance == attributes['uri'] + + @pytest.mark.parametrize('component', URI.__all_parts__ | {'base', 'qs', 'summary', 'relative'}) + def test_component(self, string, attributes, component): + instance = URI(string) + value = getattr(instance, component, SENTINEL) + + if component not in attributes: + assert value in (None, SENTINEL, '') + return + + assert value == attributes[component] + diff --git a/test/test_whatwg.py b/test/test_whatwg.py new file mode 100644 index 0000000..f2e0e82 --- /dev/null +++ b/test/test_whatwg.py @@ -0,0 +1,354 @@ +"""Functional and representative tests for the URI datatype/representation.""" + +from urllib.parse import urljoin, urlparse + +import pytest + +from uri import Path +from uri.qso import SENTINEL +from uri.uri import URI + +URI_COMPONENTS = [ + # From test_url.py + ('http://www.google.com:443/', dict( # test_url_scheme ** Changing scheme does not alter port. + scheme = 'http', # ** We do not "correct" this, either. + authority = 'www.google.com:443', + heirarchical = 'www.google.com:443/', + password = None, + host = 'www.google.com', + hostname = 'www.google.com', + port = 443, + path = Path('/'), # ** + relative = False, + summary = 'www.google.com/', + base = 'http://www.google.com:443/', + )), + ('https://www.google.com/', dict( # test_url_host ** + scheme = 'https', + authority = 'www.google.com', + heirarchical = 'www.google.com/', + host = 'www.google.com', + hostname = 'www.google.com', + path = Path('/'), # ** + relative = False, + summary = 'www.google.com/', + base = 'https://www.google.com/', + )), + + # From test_special_cases.py + ('http://1.1.1.1 &@2.2.2.2/# @3.3.3.3', dict( # test_spaces_with_multiple_ipv4_addresses ** + scheme = 'http', + authority = '1.1.1.1 &@2.2.2.2', # ** + heirarchical = '1.1.1.1 &@2.2.2.2/', + auth = '1.1.1.1 &', # ** + authentication = '1.1.1.1 &', # ** + user = '1.1.1.1 &', # ** + username = '1.1.1.1 &', # ** + host = '2.2.2.2', + fragment = ' @3.3.3.3', + path = Path('/'), + relative = False, + summary = '2.2.2.2/', + base = 'http://1.1.1.1 &@2.2.2.2/', + )), + ('http://google.com/#@evil.com/', dict( # test_fragment_with_hostname ** + scheme = 'http', + authority = 'google.com', + heirarchical = 'google.com/', + host = 'google.com', + path = Path('/'), + fragment = '@evil.com/', + relative = False, + base = 'http://google.com/', + summary = 'google.com/', + )), + ('http://foo@evil.com:80@google.com/', dict( # test_multiple_ats_within_authority + scheme = 'http', + authority = 'foo@evil.com:80@google.com', + auth = 'foo@evil.com:80', + heirarchical = 'foo@evil.com:80@google.com/', + host = 'google.com', + user = 'foo@evil.com', # ** + password = '80', + path = Path('/'), + summary = 'google.com/', + authentication = 'foo@evil.com:80', + relative = False, + base = 'http://foo@evil.com:80@google.com/', + username = 'foo@evil.com', + )), + ('http://foo@evil.com:80 @google.com/', dict( # test_multiple_ats_and_space_within_authority ** + scheme = 'http', + authority = 'foo@evil.com:80 @google.com', + authentication = 'foo@evil.com:80 ', + heirarchical = 'foo@evil.com:80 @google.com/', + host = 'google.com', + user = 'foo@evil.com', # ** + username = 'foo@evil.com', # ** + password = '80 ', # ** + path = Path('/'), + auth = 'foo@evil.com:80 ', + relative = False, + summary = 'google.com/', + base = 'http://foo@evil.com:80 @google.com/', + )), + ('http://orange.tw/sandbox/NN/passwd', dict( # test_unicode_double_dot_if_stripped_bom + scheme = 'http', + authority = 'orange.tw', + heirarchical = 'orange.tw/sandbox/NN/passwd', + host = 'orange.tw', + path = Path('/sandbox/NN/passwd'), # ** + relative = False, + summary = 'orange.tw/sandbox/NN/passwd', + base = 'http://orange.tw/sandbox/NN/passwd', + )), + # Python's urlsplit no longer preserves internal whitespace. + ('http://127.0.0.1\tfoo.google.com/', dict( # test_host_contains_tab_in_authority ** + scheme = 'http', + authority = '127.0.0.1foo.google.com', + heirarchical = '127.0.0.1foo.google.com/', + host = '127.0.0.1foo.google.com', # ** + path = Path('/'), + relative = False, + base = 'http://127.0.0.1foo.google.com/', + summary = '127.0.0.1foo.google.com/', + )), + # Omitted: test_host_contains_tab_in_authority_single_or_double_encoded, test_injection_within_authority + ('http://localhost\\@google.com:12345/', dict( # test_backslash_within_authority ** + scheme = 'http', + authority = 'localhost\\@google.com:12345', + auth = 'localhost\\', + authentication = 'localhost\\', + heirarchical = 'localhost\\@google.com:12345/', + user = 'localhost\\', + username = 'localhost\\', + host = 'google.com', # ** + port = 12345, + path = Path('/'), # ** + relative = False, + base = 'http://localhost\\@google.com:12345/', + summary = 'google.com/', + )), + ] + +for _uri, _parts in URI_COMPONENTS: + _parts['uri'] = _uri.replace("\t", "") + if 'query' in _parts: _parts['qs'] = _parts['query'] + if 'host' in _parts: _parts['hostname'] = _parts['host'] + + +@pytest.fixture +def instance(): + return URI('http://user:pass@example.com/over/there?name=ferret#anchor') + + +@pytest.fixture +def empty(): + return URI('http://example.com/over/there') + + +@pytest.mark.parametrize('string,attributes', URI_COMPONENTS) +class TestWhatwgURI: + def test_truthiness(self, string, attributes): + instance = URI(string) + assert instance + + def test_identity(self, string, attributes): + if "\t" in string: pytest.skip("Internal whitespace is cleaned.") + instance = URI(string) + assert str(instance) == attributes['uri'] + + def test_identity_bytes(self, string, attributes): + if "\t" in string: pytest.skip("Internal whitespace is cleaned.") + instance = URI(string) + assert bytes(instance) == attributes['uri'].encode('utf-8') + + def test_identity_comparison(self, string, attributes): + instance = URI(string) + assert instance == attributes['uri'] + + def test_inverse_bad_comparison(self, string, attributes): + instance = URI(string) + assert instance != "fnord" + + def test_length(self, string, attributes): + instance = URI(string) + if "\t" in string: assert len(instance) == len(string) - 1 + else: assert len(instance) == len(string) + + + @pytest.mark.parametrize('component', URI.__all_parts__ | {'base', 'qs', 'summary', 'relative'}) + def test_component(self, string, attributes, component): + instance = URI(string) + value = getattr(instance, component, SENTINEL) + + if component not in attributes: + assert value in (None, SENTINEL, '') + return + + assert value == attributes[component] + + +class TestWhatwgURL: + def test_url_scheme(self): + url = URI('http://www.google.com:443/') # ** + url.scheme = 'https' + + assert url.scheme == 'https' + assert url.port == 443 # ** Whatwg-URL clears port on scheme changes. Unsure why. Only if default? + assert str(url) == 'https://www.google.com:443/' # ** We do not elide default port numbers. + + def test_url_host(self): + url = URI("https://www.google.com") + url.hostname = "example.com" + + assert url.hostname == "example.com" + assert str(url) == "https://example.com/" + + def test_url_port(self): + url = URI("https://www.example.com") + url.port = 123 + + assert url.port == 123 + assert url.host == "www.example.com" # ** We do not include port number in host name. + assert url.authority == "www.example.com:123" # It is includes in the authority, however. + assert str(url) == "https://www.example.com:123/" + + url.port = 443 + + assert url.port == 443 # ** Similarly, we don't treat default assignments as None assignments. + assert url.host == "www.example.com" + assert str(url) == "https://www.example.com:443/" # ** + + def test_relative_url_with_url_contained(self, instance): + url = URI('https://www.google.com').resolve('/redirect?target=http://localhost:61020/') + + assert url.scheme == 'https' + assert url.host == 'www.google.com' + assert url.path == Path('/redirect') + assert str(url.query) == "target=http%3A//localhost%3A61020/" # ** We automatically encode and correct. + + def test_url_user_info(self): + url = URI("https://github.com") + url.user = "username" + + assert url.username == "username" + assert url.password is None + assert str(url) == "https://username@github.com/" + + url.password = "password" + + assert url.username == "username" + assert url.password == "password" + assert str(url) == "https://username:password@github.com/" + + url.username = None + + assert url.username is None + assert url.password == "password" + assert str(url) == "https://:password@github.com/" + + url.password = None + + assert url.username is None + assert url.password is None + assert str(url) == "https://github.com/" + + def test_url_query(self): + url = URI("https://www.google.com") + url.query = "a=1" # ** Don't include the prefix yourself. + + assert url.qs == "a=1" + assert str(url) == "https://www.google.com/?a=1" + + url.query = "" + + assert url.query == "" + assert str(url) == "https://www.google.com/" # ** If empty or None, we do not emit the separator. + + url.query = None + + assert not url.query # ** It isn't literally None, but it is falsy if omitted or empty. + assert str(url) == "https://www.google.com/" + # The above is due to the fact that `.query` returns a rich, dict-like object which permits mutation. + # Assigning None just clears this mutable structure. + + def test_url_fragment(self): + url = URI("https://www.google.com") + url.fragment = "abc" + + assert url.fragment == "abc" + assert str(url) == "https://www.google.com/#abc" + + url.fragment = "" + + assert url.fragment == "" + assert str(url) == "https://www.google.com/" # ** None and an empty string are both interpreted as "none". + + url.fragment = None + + assert url.fragment is None + assert str(url) == "https://www.google.com/" + + def test_url_origin(self): # ** Not _entirely_ the same, as the components come back recombined, not as a tuple. + url = URI("https://www.google.com") + assert url.origin == "https://www.google.com" + + @pytest.mark.xfail(reason="Need to look into definition of 'origin' for URI generally.") + def test_url_blob_origin(self): + url = URI("blob:https://www.google.com") + + assert url.origin == URI("https://www.google.com").origin + + +@pytest.mark.parametrize('url', [ + "https://www.google.com/", + "http://user:pass@www.example.com/", + "http://:pass@www.example.com/", + "http://user@www.example.com/", + "http://www.example.com:432/", + "http://www.example.com/?a=1;B=c", + "http://www.example.com/#Fragment", + "http://username:password@www.example.com:1234/?query=string#fragment", + ]) +@pytest.mark.parametrize('attr', ['netloc', 'hostname', 'port', 'path', 'query', 'fragment', 'username', 'password']) +def test_assert_same_urlparse_result(url, attr): + urllib = urlparse(url) + uri = URI(url) + + urllib_value = getattr(urllib, attr) + uri_value = getattr(uri, attr) + + if urllib_value == "" and uri_value is None: + pytest.xfail("URI uses None where urllib uses empty strings") + + elif isinstance(uri_value, Path): + assert urllib_value == str(uri_value) # First, ensure the string versions are equal... + pytest.xfail("URI uses rich Path objects where urllib uses strings, which compared OK") + + assert urllib_value == uri_value + + +@pytest.mark.parametrize(('base', 'href', 'expected'), [ + ("http://www.google.com/", "", "http://www.google.com/"), + ("http://www.google.com/", "/", "http://www.google.com/"), + ("http://www.google.com/", "maps/", "http://www.google.com/maps/"), + ("http://www.google.com/", "one/two/", "http://www.google.com/one/two/"), + ("http://www.google.com/mail", "/maps/", "http://www.google.com/maps/"), + ("http://www.google.com/", "./", "http://www.google.com/"), + ("http://www.google.com/maps", "..", "http://www.google.com/"), + ("http://www.google.com/", "https://www.google.com/", "https://www.google.com/"), + ("http://www.google.com/", "https://maps.google.com/", "https://maps.google.com/"), + ("https://www.google.com/", "https://www.google.com:1234/", "https://www.google.com:1234/"), + ("https://www.google.com/", "?query=string", "https://www.google.com/?query=string"), + ("https://www.google.com/", "#fragment", "https://www.google.com/#fragment"), + ("http://www.google.com/", "http://user:pass@www.google.com/", "http://user:pass@www.google.com/"), + ("http://www.google.com/", "http://user@www.google.com/", "http://user@www.google.com/"), + ("http://www.google.com/", "http://:pass@www.google.com/", "http://:pass@www.google.com/"), + ]) +def test_assert_same_urljoin_result(base, href, expected): + urllib = urljoin(base, href) + uri_resolve = URI(base).resolve(href) + uri_division = str(URI(base) / href) + + assert urllib == uri_resolve == uri_division == expected From 633e143c944904939f67820d5f7318cae0816541 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 21:59:53 -0500 Subject: [PATCH 31/46] Add back dropped `dburi` parser function. --- uri/parse/db.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 uri/parse/db.py diff --git a/uri/parse/db.py b/uri/parse/db.py new file mode 100644 index 0000000..029935c --- /dev/null +++ b/uri/parse/db.py @@ -0,0 +1,36 @@ +"""Attempt to parse a database "connection string", retrieving the relevant component parts.""" + +from pytest import fixture + +from .. import URI + + +def parse_dburi(url:str, uppercase:bool=False) -> dict: + """Parse a given URL or URI string and return the component parts relevant for database connectivity. + + These come in the general UNIX form: + + engine://[user:pass@]host[:port]/database[?options] + """ + + uri = URI(url) + + parts = { + 'engine': str(uri.scheme), + 'name': uri.path.parts[0], + 'host': uri.host, + 'user': uri.user, + 'password': uri.password, + 'port': uri.port, + 'options': uri.query, + } + + if not uri.scheme: del parts['engine'] # Parity with dj-mongohq-url + + if ',' in parts['host']: + parts['hosts'] = [i.strip() for i in parts.pop('host').split(',')] + + if uppercase: + for k in list(parts): parts[k.upper()] = parts.pop(k) + + return parts From 8ea71a2a78a003e0acf4b92c72ff65480f7b939a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 22:00:25 -0500 Subject: [PATCH 32/46] Annotations satisfying Mypy. --- uri/part/base.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/uri/part/base.py b/uri/part/base.py index 1db2a07..be6111e 100644 --- a/uri/part/base.py +++ b/uri/part/base.py @@ -1,33 +1,34 @@ from operator import attrgetter -from re import compile as r +from re import compile as r, Pattern +from typing import Any, Callable, Optional, Tuple, TypeVar, Union class Part: """Descriptor protocol objects for combinatorial string parts with validation.""" - __slots__ = () + __slots__: Tuple[str, ...] = () - valid = r(r'.*') - prefix = '' - suffix = '' - empty = '' + valid: Pattern = r(r'.*') + prefix: str = '' + suffix: str = '' + empty: str = '' - def render(self, obj, value): + def render(self, obj, value) -> str: if not value: return self.empty return self.prefix + str(value) + self.suffix class ProxyPart(Part): - __slots__ = () + __slots__: Tuple[str, ...] = () - attribute = None - cast = str + attribute: str + cast: Callable[[Any], str] = str - def __get__(self, obj, cls=None): + def __get__(self, obj, cls=None) -> Union[str, 'ProxyPart']: if obj is None: return self return getattr(obj, self.attribute) - def __set__(self, obj, value): + def __set__(self, obj, value:Optional[Union[bytes,str]]) -> None: if value == b'': value = None @@ -38,12 +39,12 @@ def __set__(self, obj, value): class GroupPart(Part): - __slots__ = ('_getters', '_join') + __slots__: Tuple[str, ...] = () - attributes = () - sep = '' + attributes: Tuple[str, ...] = () + sep: str = '' - def __get__(self, obj, cls=None): + def __get__(self, obj, cls:Optional[type]=None) -> Union[str, 'GroupPart']: if obj is None: return self cls = obj.__class__ @@ -58,6 +59,6 @@ def __set__(self, obj, value): class BasePart(GroupPart): - __slots__ = () + __slots__: Tuple[str, ...] = () - attributes = ('scheme', 'heirarchical') + attributes: Tuple[str, ...] = ('scheme', 'heirarchical') From 3c9b5298c0d52a2b0b3ddefbea8a75b71220c3fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 22:18:25 -0500 Subject: [PATCH 33/46] Utilize `get_all` for all metadata extraction. Give in and extract the license, too. --- uri/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/uri/__init__.py b/uri/__init__.py index 2081185..64eefe3 100644 --- a/uri/__init__.py +++ b/uri/__init__.py @@ -14,13 +14,13 @@ try: # Discover installed package metadata... _package = _metadata('uri') - __version__ = _package.get('version') - __author__ = f"{_package.get('author')} <{_package.get('author-email')}>" - del _package + __version__ = ", ".join(_package.get_all('version')) + __author__ = "\n".join(_package.get_all('author-email')) + except _NotFound: # ...or generate "local development" version and author information. __version__ = 'dev' __author__ = f"Local Development <{getlogin()}@{gethostname()}>" -__license__ = 'MIT' # We could also get this from the package metadata, but it's not likely to change. +__license__ = "".join(_package.get_all('license')) __all__ = set(i for i in locals() if not i.startswith('_')) - __all__ # Declare module exports for `import *` use. From 037229337599d52dd104a575d034d49966ddd875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 22:18:53 -0500 Subject: [PATCH 34/46] Add IDNA encoding/decoding test case. --- test/test_uri.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/test_uri.py b/test/test_uri.py index e359721..da1295c 100644 --- a/test/test_uri.py +++ b/test/test_uri.py @@ -146,6 +146,19 @@ relative = False, resource = 'oasis:names:specification:docbook:dtd:xml:4.1.2', )), + + # IDNA (Internationalized Domain Name) Encoding + ('https://xn--ls8h.la/', dict( + scheme = 'https', + path = Path('/'), + host = '💩.la', + authority = 'xn--ls8h.la', + heirarchical = 'xn--ls8h.la/', + summary = '💩.la/', + base = 'https://xn--ls8h.la/', + relative = False, + resource = '/', + )) ] for _uri, _parts in URI_COMPONENTS: From 1e2b959066a139b7b48f83abef171cd6507cdb63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 22:21:40 -0500 Subject: [PATCH 35/46] Scratch that, hardcode. --- uri/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uri/__init__.py b/uri/__init__.py index 64eefe3..f70ccf0 100644 --- a/uri/__init__.py +++ b/uri/__init__.py @@ -21,6 +21,6 @@ __version__ = 'dev' __author__ = f"Local Development <{getlogin()}@{gethostname()}>" -__license__ = "".join(_package.get_all('license')) +__license__ = "MIT" __all__ = set(i for i in locals() if not i.startswith('_')) - __all__ # Declare module exports for `import *` use. From 9385637f1556c0abc5e120b25a110e3c0a2a2b15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 23:32:21 -0500 Subject: [PATCH 36/46] Additional aliases. --- uri/uri.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/uri/uri.py b/uri/uri.py index 2eed9ab..3a45252 100644 --- a/uri/uri.py +++ b/uri/uri.py @@ -32,6 +32,7 @@ class URI: __slots__ = ('_scheme', '_user', '_password', '_host', '_port', '_path', '_trailing', '_query', '_fragment') __parts__ = ('scheme', 'authority', 'path', 'query', 'fragment') + __origin_parts__ = ('scheme', 'authority') __safe_parts__ = ('scheme', '_safe_auth', 'host', 'port', 'path', 'query', 'fragment') __all_parts__ = {'scheme', 'user', 'password', 'host', 'port', 'path', 'query', 'fragment', 'auth', 'authority', 'heirarchical', 'uri', 'username', 'hostname', 'authentication'} @@ -49,11 +50,12 @@ class URI: # Compound Parts auth = AuthenticationPart() _safe_auth = SafeAuthenticationPart() - authority = AuthorityPart() + authority = netloc = AuthorityPart() heirarchical = HeirarchicalPart() # Additional Compound Interfaces uri = URIPart(__parts__) # Whole-URI retrieval or storage as string. + origin = URIPart(__origin_parts__) # The top-level "origin" for this URL. safe = safe_uri = URIPart(__safe_parts__, False) # URI retrieval without password component, useful for logging. base = BasePart() summary = URIPart(('host', 'path'), False) @@ -209,7 +211,7 @@ def __iter__(self): def __len__(self): """The length of the URI as a string.""" - return len(self.uri) + return len(str(self.uri)) # Path-like behaviours. From e9aa947d9d72a3d77e06cda23f210ce8c0b7890a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 23:34:16 -0500 Subject: [PATCH 37/46] Annotations, documentation, tweaked plugin loading. --- uri/part/scheme.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/uri/part/scheme.py b/uri/part/scheme.py index f7943bf..622280e 100644 --- a/uri/part/scheme.py +++ b/uri/part/scheme.py @@ -1,5 +1,5 @@ from importlib.metadata import entry_points -from typing import ClassVar, Dict, Optional +from typing import Any, ClassVar, Dict, Optional, Union #, Self from re import compile as r, Pattern from .base import Part @@ -9,11 +9,16 @@ class SchemePart(Part): __slots__: tuple = () # Do not populate a __dict__ dictionary attribute; only allocate space for these. - registry: ClassVar[Dict[str, Optional[Scheme]]] = {'': None} + registry: ClassVar[Dict[str, Optional[Scheme]]] = {'': None} # Singleton cache of Scheme instances, by name. suffix: str = ':' # Protocol suffix when utilized as part of a complete URI; e.g. ':' or '://'. - valid: Pattern = r(r'[a-z][a-z0-9+.+-]*') # Protocol/scheme name validated when run unoptimized. + valid: Pattern = r(r'[a-z][a-z0-9+.+-]*') # Protocol/scheme name validated when run without optimization. def load(self, plugin:str) -> Scheme: + """Attempt to retrieve a Scheme for the given named protocol. + + Utilizes a cache, which results in URI utilizing singletons of each named protocol. + """ + assert self.valid.match(plugin), f"Invalid plugin name: {plugin!r}" if plugin in self.registry: return self.registry[plugin] # Short circuit if we've seen this before. @@ -22,11 +27,13 @@ def load(self, plugin:str) -> Scheme: except KeyError: result = Scheme(plugin) # Can't look up by registered name? It's generic. else: result = result(plugin) # Otherwise, instantiate the subclass, informing it of its name. - self.registry[plugin] = result # Record the instance in a local registry / cache. + self.registry[plugin] = result # Record the instance in a local registry / cache and return it. return result - def render(self, obj, value) -> str: + def render(self, obj:Any, value:str) -> str: + """Render the scheme component of a whole URI.""" + result = super(SchemePart, self).render(obj, value) if obj._scheme and obj.scheme.slashed: @@ -37,16 +44,15 @@ def render(self, obj, value) -> str: return result - def __get__(self, obj, cls=None): - if obj is None: return self - scheme = obj._scheme + def __get__(self, obj:Any, cls:Optional[Any]=None) -> Optional[Union['SchemePart', Scheme]]: + """Accessed as a class attribute, return this instance, otherwise decant a Scheme from the containing object.""" - if scheme is not None: - scheme = self.load(scheme) - - return scheme + if obj is None: return self + return None if obj._scheme is None else self.load(obj._scheme) - def __set__(self, obj, value): + def __set__(self, obj:Any, value:Optional[Union[str,bytes]]) -> None: + """Assign a new named scheme to this URI.""" + if isinstance(value, bytes): value = value.decode('ascii') @@ -54,4 +60,4 @@ def __set__(self, obj, value): obj._scheme = None return - obj._scheme = Scheme(value).name + obj._scheme = self.load(value).name # This gives the plugin registry a chance to normalize the recorded name. From a7d257143827a48a1bd729de95900ec1ba1a4b29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 23:34:30 -0500 Subject: [PATCH 38/46] Implement normalization explicitly. --- uri/part/host.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/uri/part/host.py b/uri/part/host.py index 3f65e7e..0fd99a2 100644 --- a/uri/part/host.py +++ b/uri/part/host.py @@ -1,4 +1,6 @@ +from functools import partial from socket import inet_pton, AF_INET6, error as SocketError +from typing import Any, Optional, Union from .base import ProxyPart @@ -8,10 +10,23 @@ class HostPart(ProxyPart): attribute = '_host' + def cast(self, value:str) -> str: + value = value.rstrip('.') # Remove extraneous "DNS root authority" notation. + + if value.startswith('xn--'): # Process IDNA - internationalized domain names. + value = value.encode('ascii').decode('idna') + + return value + def render(self, obj, value): result = super(HostPart, self).render(obj, value) if result: + try: + result.encode('ascii') + except UnicodeEncodeError: + result = result.encode('idna').decode('ascii') + try: # Identify and armour IPv6 address literals. inet_pton(AF_INET6, value) except SocketError: From 575ab3f97c43d0cc34d41b452e03694a7be8aff8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 23:35:05 -0500 Subject: [PATCH 39/46] Adjust test setup. --- test/test_uri.py | 3 ++- test/test_url_normalize.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_uri.py b/test/test_uri.py index da1295c..2057fc1 100644 --- a/test/test_uri.py +++ b/test/test_uri.py @@ -162,9 +162,10 @@ ] for _uri, _parts in URI_COMPONENTS: - _parts['uri'] = _uri + _parts.setdefault('uri', _uri) if 'query' in _parts: _parts['qs'] = _parts['query'] if 'host' in _parts: _parts['hostname'] = _parts['host'] + _parts.setdefault('length', len(_parts.get('uri', _uri))) @pytest.fixture diff --git a/test/test_url_normalize.py b/test/test_url_normalize.py index ebbf9e4..c119f5d 100644 --- a/test/test_url_normalize.py +++ b/test/test_url_normalize.py @@ -57,9 +57,10 @@ ] for _uri, _parts in URI_COMPONENTS: - _parts['uri'] = _uri + _parts.setdefault('uri', _uri) if 'query' in _parts: _parts['qs'] = _parts['query'] if 'host' in _parts: _parts['hostname'] = _parts['host'] + _parts.setdefault('length', len(_parts.get('uri', _uri))) def test_normalize_scheme(): From 148b21a2b5868cbd9b8e4e5c3de294b526e3327d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 23:35:32 -0500 Subject: [PATCH 40/46] Tweak IDN testing. Only host utilizes decoded form. --- test/test_uri.py | 7 ++++--- test/test_url_normalize.py | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/test/test_uri.py b/test/test_uri.py index 2057fc1..5fb853d 100644 --- a/test/test_uri.py +++ b/test/test_uri.py @@ -148,16 +148,17 @@ )), # IDNA (Internationalized Domain Name) Encoding - ('https://xn--ls8h.la/', dict( + ('https://💩.la/', dict( scheme = 'https', path = Path('/'), host = '💩.la', authority = 'xn--ls8h.la', heirarchical = 'xn--ls8h.la/', - summary = '💩.la/', + summary = 'xn--ls8h.la/', base = 'https://xn--ls8h.la/', relative = False, resource = '/', + uri = 'https://xn--ls8h.la/', )) ] @@ -213,7 +214,7 @@ def test_inverse_bad_comparison(self, string, attributes): def test_length(self, string, attributes): instance = URI(string) - assert len(instance) == len(string) + assert len(instance) == attributes['length'] @pytest.mark.parametrize('component', URI.__all_parts__ | {'base', 'qs', 'summary', 'relative'}) def test_component(self, string, attributes, component): diff --git a/test/test_url_normalize.py b/test/test_url_normalize.py index c119f5d..c4cdbf9 100644 --- a/test/test_url_normalize.py +++ b/test/test_url_normalize.py @@ -8,7 +8,6 @@ from uri.qso import SENTINEL from uri.uri import URI - URI_COMPONENTS = [ # From test_deconstruct_url.py EXPECTED_DATA ('http://site.com/', dict( # ** for identity test to pass, must have path @@ -42,7 +41,7 @@ fragment = 'fragment', )), # From test_normalize_host.py - ('http://xn--e1afmkfd.xn--80akhbyknj4f/', dict( # ** for identity test to pass, must provide encoded form + ('http://пример.испытание/', dict( # ** for identity test to pass, must provide encoded form scheme = 'http', authority = 'xn--e1afmkfd.xn--80akhbyknj4f', heirarchical = 'xn--e1afmkfd.xn--80akhbyknj4f/', @@ -51,8 +50,9 @@ port = None, path = Path('/'), relative = False, - summary = 'пример.испытание/', + summary = 'xn--e1afmkfd.xn--80akhbyknj4f/', base = 'http://xn--e1afmkfd.xn--80akhbyknj4f/', + uri = 'http://xn--e1afmkfd.xn--80akhbyknj4f/', )), ] From cce2fc211f682ceed19957be89415f6a714c62ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 23:50:11 -0500 Subject: [PATCH 41/46] Clarify we want a class, not Any. --- uri/part/scheme.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uri/part/scheme.py b/uri/part/scheme.py index 622280e..62d62f9 100644 --- a/uri/part/scheme.py +++ b/uri/part/scheme.py @@ -44,7 +44,7 @@ def render(self, obj:Any, value:str) -> str: return result - def __get__(self, obj:Any, cls:Optional[Any]=None) -> Optional[Union['SchemePart', Scheme]]: + def __get__(self, obj:Any, cls:Optional[type]=None) -> Optional[Union['SchemePart', Scheme]]: """Accessed as a class attribute, return this instance, otherwise decant a Scheme from the containing object.""" if obj is None: return self From c4fbcd49cbb8a3e3f4459fd8d30b322f9c313a68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 23:50:50 -0500 Subject: [PATCH 42/46] Validate specific construction case. You can construct a URI entirely from a protocol (including colon), then build from there using pathlike notation. --- test/test_003_path_like_division.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/test_003_path_like_division.py b/test/test_003_path_like_division.py index 19f0b07..16ea14d 100644 --- a/test/test_003_path_like_division.py +++ b/test/test_003_path_like_division.py @@ -28,3 +28,9 @@ def test_issue_003_path_on_path_division(): # And it's resolved. assert str(base / target) == "http://ats.example.com/job/detail/sample-job" + + +def test_pathlike_construction(): + target = URI("http:") // "example.com" + assert str(target) == "http://example.com/" + assert str(target / "foo") == "http://example.com/foo" From 9c91c9bbede0bbabed71dd1d7f1d2c65d3cf3305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Tue, 12 Dec 2023 23:52:45 -0500 Subject: [PATCH 43/46] No longer need to skip these tests. --- test/test_whatwg.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/test_whatwg.py b/test/test_whatwg.py index f2e0e82..25dc787 100644 --- a/test/test_whatwg.py +++ b/test/test_whatwg.py @@ -154,12 +154,10 @@ def test_truthiness(self, string, attributes): assert instance def test_identity(self, string, attributes): - if "\t" in string: pytest.skip("Internal whitespace is cleaned.") instance = URI(string) assert str(instance) == attributes['uri'] def test_identity_bytes(self, string, attributes): - if "\t" in string: pytest.skip("Internal whitespace is cleaned.") instance = URI(string) assert bytes(instance) == attributes['uri'].encode('utf-8') From 34cdeed4ef85cfe3feccd0b2ec02ca4d2888b3fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Wed, 13 Dec 2023 00:23:12 -0500 Subject: [PATCH 44/46] Typing; __all__ is a list, not a tuple. --- uri/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uri/__init__.py b/uri/__init__.py index f70ccf0..24d73b1 100644 --- a/uri/__init__.py +++ b/uri/__init__.py @@ -4,7 +4,7 @@ from os import getlogin from socket import gethostname -__all__ = set(locals()) # Initial set of symbols to exclude from our module exports. +__all__ = list(locals()) # Initial set of symbols to exclude from our module exports. from pathlib import PurePosixPath as Path @@ -22,5 +22,5 @@ __author__ = f"Local Development <{getlogin()}@{gethostname()}>" __license__ = "MIT" -__all__ = set(i for i in locals() if not i.startswith('_')) - __all__ # Declare module exports for `import *` use. +__all__ = list(set(i for i in locals() if not i.startswith('_')) - set(__all__)) # Declare module exports for `import *` use. From 8f090ea79de57bf5eaf409872383b9d0a5a38f71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Wed, 13 Dec 2023 00:23:21 -0500 Subject: [PATCH 45/46] Whitespace. --- test/test_whatwg.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_whatwg.py b/test/test_whatwg.py index 25dc787..8114803 100644 --- a/test/test_whatwg.py +++ b/test/test_whatwg.py @@ -173,7 +173,6 @@ def test_length(self, string, attributes): instance = URI(string) if "\t" in string: assert len(instance) == len(string) - 1 else: assert len(instance) == len(string) - @pytest.mark.parametrize('component', URI.__all_parts__ | {'base', 'qs', 'summary', 'relative'}) def test_component(self, string, attributes, component): From 8a2bd793aab96edf3241ce1cef1ea200ade5099d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Zo=C3=AB=20Bevan=E2=80=93McGregor?= Date: Wed, 13 Dec 2023 00:23:51 -0500 Subject: [PATCH 46/46] Disable PyPy inclusion in Pytest execution, for now. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1ec0376..d59a6d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,7 +94,7 @@ mongodb = 'uri.scheme:URLScheme' [tool.pytest.ini_options] -addopts = '-l -r fEsxw --cov uri --mypy --no-cov-on-fail --cov-report term-missing --cov-report xml --flakes --isort --durations=5 --color=yes test' +addopts = '-l -r fEsxw --cov uri --no-cov-on-fail --cov-report term-missing --cov-report xml --flakes --isort --durations=5 --color=yes test' flakes-ignore = [ 'test/*.py UnusedImport',