Skip to content

Commit

Permalink
add PYTHON_3_7_QUOTING
Browse files Browse the repository at this point in the history
  • Loading branch information
Markus Klein committed Aug 12, 2019
1 parent a4eda13 commit d25f55f
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 17 deletions.
19 changes: 10 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,20 @@ Encoding

The following encodings are offered. `DEFAULT_QUOTING` is used in case the quoting parameter is not specified.

| Name | Additional encoded characters |
|----------------------|------------------------------------------------------------------------------------|
| SIMPLE_QUOTING | |
| DEFAULT_QUOTING | space, `<`,`>`,`` ` ``,`?`,`{`,`}` |
| QUERY_QUOTING | space, `"`,`#`,`<`,`>` |
| PATH_SEGMENT_QUOTING | space, `"`,`#`,`<`,`>`,`` ` ``,`?`,`%`,`/` |
| USERINFO_QUOTING | space, `"`,`#`,`<`,`>`,`` ` ``,`?`,`{`,`}`,`/`,`:`,`;`,`=`,`@`,`\`,`[`,`]`,`^`,`|` |
| Name | Additional encoded characters |
|----------------------|------------------------------------------------------------------------------------------------------------------------|
| SIMPLE_QUOTING | |
| DEFAULT_QUOTING | space, `<`,`>`,`` ` ``,`?`,`{`,`}` |
| QUERY_QUOTING | space, `"`,`#`,`<`,`>` |
| PATH_SEGMENT_QUOTING | space, `"`,`#`,`<`,`>`,`` ` ``,`?`,`%`,`/` |
| USERINFO_QUOTING | space, `"`,`#`,`<`,`>`,`` ` ``,`?`,`{`,`}`,`/`,`:`,`;`,`=`,`@`,`\`,`[`,`]`,`^`,`|` |
| PYTHON_3_7_QUOTING | space, `"`,`#`,`<`,`>`,`` ` ``,`?`,`{`,`}`,`$`,`%`,`&`,`\`,`(`,`)`,`,`,`=`,`;`,`:`,`!`,`\`,`@`,`[`,`]`,`^`,`|`,`+`,`*` |

Non printable and non standard ASCII characters are always quoted.
Non printable and non standard ASCII characters are always quoted. The `PYTHON_3_7_QUOTING` is going to work the same way in every Python version the name is only refering to the `urllib` default encoding used in Python 3.7.

Development
-----------

a
This library is a thin wrapper around the Rust crate [`percent-encoding`](https://crates.io/crates/percent-encoding). It exposes part of its functionality to python via a C interface using [`milksnake`](https://github.com/getsentry/milksnake).

To build it you need to [install Rust and Cargo](https://www.rust-lang.org/en-US/install.html). Than you can proceed to build the wheel with:
Expand Down
2 changes: 1 addition & 1 deletion bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import timeit

def benchmark_quote_urlquote():
urlquote.quote(LOREMIPSUM)
urlquote.quote(LOREMIPSUM, urlquote.quoting.PYTHON_3_7_QUOTING)

def benchmark_quote_urllib():
urllib_quote(LOREMIPSUM)
Expand Down
2 changes: 2 additions & 0 deletions rust/native.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ extern const Quoting* DEFAULT_QUOTING;

extern const Quoting* PATH_SEGMENT_QUOTING;

extern const Quoting* PYTHON_3_7_QUOTING;

extern const Quoting* QUERY_QUOTING;

extern const Quoting* SIMPLE_QUOTING;
Expand Down
12 changes: 10 additions & 2 deletions rust/src/quoting.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use percent_encoding::{
percent_encode, EncodeSet, DEFAULT_ENCODE_SET, PATH_SEGMENT_ENCODE_SET, QUERY_ENCODE_SET,
SIMPLE_ENCODE_SET, USERINFO_ENCODE_SET,
define_encode_set, percent_encode, EncodeSet, DEFAULT_ENCODE_SET, PATH_SEGMENT_ENCODE_SET,
QUERY_ENCODE_SET, SIMPLE_ENCODE_SET, USERINFO_ENCODE_SET,
};

/// All ASCII charcters less than hexidecimal 20 and greater than 7E are encoded. This includes
Expand Down Expand Up @@ -40,6 +40,14 @@ pub static PATH_SEGMENT_QUOTING: &Quoting = &Quoting(&PATH_SEGMENT_ENCODE_SET);
#[no_mangle]
pub static USERINFO_QUOTING: &Quoting = &Quoting(&USERINFO_ENCODE_SET);

define_encode_set! {
/// This emulates the urllib default encoding used by Python 3.7.
pub PYTHON_3_7_ENCODE_SET = [DEFAULT_ENCODE_SET] | {'$', '%', '&', '\'', '(', ')', ',', '=', ';',':','!','\\','@','[',']','^','|','+','*'}
}

#[no_mangle]
pub static PYTHON_3_7_QUOTING: &Quoting = &Quoting(&PYTHON_3_7_ENCODE_SET);

/// A `Quoting` decides which characters are going to be quoted.
pub struct Quoting(
// This is an opaque public strict type alias in order to avoid talking about
Expand Down
11 changes: 7 additions & 4 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,16 @@ def test_unquote_string():
assert expected == actual

@pytest.mark.skipif(
sys.version_info < (3,3),
sys.version_info < (3,7),
reason="requires python3")
def test_userinfo_quoting():
def test_python_3_7_quoting():
'''
This test verifies that the userinfo encoding is identical with the defaul urllib encoding
'''

quot = quoting.USERINFO_QUOTING
from urllib.parse import quote as urllib_quote

quot = quoting.PYTHON_3_7_QUOTING

# Control characters
ascii_bytes = bytes(range(0, 32))
Expand All @@ -49,6 +51,7 @@ def test_userinfo_quoting():
utf8_bytes = ascii_str.encode('utf-8')


expected = "%20!%22%23$%&'()*+,-.%2F0123456789%3A%3B%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~"
# expected = "%20!%22%23$%&'()*+,-.%2F0123456789%3A%3B%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~"
expected = urllib_quote(utf8_bytes)
actual = quote(utf8_bytes, quot).decode('utf-8')
assert expected == actual
8 changes: 7 additions & 1 deletion urlquote/quoting.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,10 @@
# inequality qualifiers (<), (>), backtick (`), question mark (?), and curly brackets ({), (}),
# forward slash (/), colon (:), semi-colon (;), equality (=), at (@), backslash (\\), square
# brackets ([), (]), caret (\^), and pipe (|) are encoded.
USERINFO_QUOTING = lib.USERINFO_QUOTING
USERINFO_QUOTING = lib.USERINFO_QUOTING

# This is the default quoting used by urlib.parse.quote
#
# Quotes everything but alphanumeric letters, numbers and dash (-), underscore (_), slash (/),
# point (.) and tilde (~).
PYTHON_3_7_QUOTING = lib.PYTHON_3_7_QUOTING

0 comments on commit d25f55f

Please sign in to comment.