Skip to content

Commit

Permalink
Add uri_normalize function
Browse files Browse the repository at this point in the history
  • Loading branch information
petere committed Apr 12, 2015
1 parent 527cc23 commit 8a02974
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 2 deletions.
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,19 @@ A number of functions are provided to extract parts of a URI:

Extracts the fragment part of a URI (roughly speaking, everything
after the `#`). If there is no fragment part, returns null.

Other functions:

- `uri_normalize(uri) returns uri`

Performs syntax-based normalization of the URI. This includes
case normalization, percent-encoding normalization, and removing
redundant `.` and `..` path segments. See
[RFC 3986 section 6.2.2](http://tools.ietf.org/html/rfc3986#section-6.2.2)
for the full details.

Note that this module (and similar modules in other programming
languages) compares URIs for equality in their original form,
without normalization. If you want to consider distinct URIs
without regard for mostly irrelevant syntax differences, pass them
through this function.
66 changes: 64 additions & 2 deletions test/expected/test.out
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ VALUES ('http://www.postgresql.org/'),
('/'),
('foobar'),
('/foobar');
-- normalization test values from <https://tools.ietf.org/html/rfc3986#section-6.2.2>
INSERT INTO test (b)
VALUES ('HTTP://www.EXAMPLE.com/'),
('http://www.ex%41mple.com/'),
('eXAMPLE://a/./b/../b/%63/%7bfoo%7d');
SELECT * FROM test;
a | b
----+-----------------------------------------------------------------------------------------
Expand All @@ -33,7 +38,10 @@ SELECT * FROM test;
12 | /
13 | foobar
14 | /foobar
(14 rows)
15 | HTTP://www.EXAMPLE.com/
16 | http://www.ex%41mple.com/
17 | eXAMPLE://a/./b/../b/%63/%7bfoo%7d
(17 rows)

-- error cases
SELECT uri 'http://host:port/';
Expand All @@ -42,6 +50,7 @@ LINE 1: SELECT uri 'http://host:port/';
^
\x on
SELECT b AS uri,
uri_normalize(b),
uri_scheme(b),
uri_userinfo(b),
uri_host(b),
Expand All @@ -54,6 +63,7 @@ SELECT b AS uri,
FROM test;
-[ RECORD 1 ]--+----------------------------------------------------------------------------------------
uri | http://www.postgresql.org/
uri_normalize | http://www.postgresql.org/
uri_scheme | http
uri_userinfo | _null_
uri_host | www.postgresql.org
Expand All @@ -65,6 +75,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 2 ]--+----------------------------------------------------------------------------------------
uri | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
uri_normalize | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
uri_scheme | http
uri_userinfo | _null_
uri_host | www.postgresql.org
Expand All @@ -76,6 +87,7 @@ uri_query | _null_
uri_fragment | XFUNC-SQL-FUNCTION-ARGUMENTS
-[ RECORD 3 ]--+----------------------------------------------------------------------------------------
uri | https://duckduckgo.com/?q=postgresql&ia=about
uri_normalize | https://duckduckgo.com/?q=postgresql&ia=about
uri_scheme | https
uri_userinfo | _null_
uri_host | duckduckgo.com
Expand All @@ -87,6 +99,7 @@ uri_query | q=postgresql&ia=about
uri_fragment | _null_
-[ RECORD 4 ]--+----------------------------------------------------------------------------------------
uri | ftp://ftp.gnu.org/gnu/bison
uri_normalize | ftp://ftp.gnu.org/gnu/bison
uri_scheme | ftp
uri_userinfo | _null_
uri_host | ftp.gnu.org
Expand All @@ -98,6 +111,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 5 ]--+----------------------------------------------------------------------------------------
uri | mailto:[email protected]
uri_normalize | mailto:[email protected]
uri_scheme | mailto
uri_userinfo | _null_
uri_host | _null_
Expand All @@ -109,6 +123,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 6 ]--+----------------------------------------------------------------------------------------
uri | ssh://[email protected]:29418/openstack/nova.git
uri_normalize | ssh://[email protected]:29418/openstack/nova.git
uri_scheme | ssh
uri_userinfo | username
uri_host | review.openstack.org
Expand All @@ -120,6 +135,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 7 ]--+----------------------------------------------------------------------------------------
uri | http://admin:[email protected]
uri_normalize | http://admin:[email protected]
uri_scheme | http
uri_userinfo | admin:password
uri_host | 192.168.0.1
Expand All @@ -131,6 +147,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 8 ]--+----------------------------------------------------------------------------------------
uri | http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html
uri_normalize | http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:80/index.html
uri_scheme | http
uri_userinfo | _null_
uri_host | FEDC:BA98:7654:3210:FEDC:BA98:7654:3210
Expand All @@ -142,6 +159,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 9 ]--+----------------------------------------------------------------------------------------
uri | http://[1080::8:800:200C:417A]/foo
uri_normalize | http://[1080:0000:0000:0000:0008:0800:200c:417a]/foo
uri_scheme | http
uri_userinfo | _null_
uri_host | 1080::8:800:200C:417A
Expand All @@ -153,6 +171,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 10 ]-+----------------------------------------------------------------------------------------
uri | http://host:
uri_normalize | http://host:
uri_scheme | http
uri_userinfo | _null_
uri_host | host
Expand All @@ -164,6 +183,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 11 ]-+----------------------------------------------------------------------------------------
uri |
uri_normalize |
uri_scheme | _null_
uri_userinfo | _null_
uri_host | _null_
Expand All @@ -175,6 +195,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 12 ]-+----------------------------------------------------------------------------------------
uri | /
uri_normalize | /
uri_scheme | _null_
uri_userinfo | _null_
uri_host | _null_
Expand All @@ -186,6 +207,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 13 ]-+----------------------------------------------------------------------------------------
uri | foobar
uri_normalize | foobar
uri_scheme | _null_
uri_userinfo | _null_
uri_host | _null_
Expand All @@ -197,6 +219,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 14 ]-+----------------------------------------------------------------------------------------
uri | /foobar
uri_normalize | /foobar
uri_scheme | _null_
uri_userinfo | _null_
uri_host | _null_
Expand All @@ -206,6 +229,42 @@ uri_path | /foobar
uri_path_array | {foobar}
uri_query | _null_
uri_fragment | _null_
-[ RECORD 15 ]-+----------------------------------------------------------------------------------------
uri | HTTP://www.EXAMPLE.com/
uri_normalize | http://www.example.com/
uri_scheme | HTTP
uri_userinfo | _null_
uri_host | www.EXAMPLE.com
uri_host_inet | _null_
uri_port | _null_
uri_path | /
uri_path_array | {""}
uri_query | _null_
uri_fragment | _null_
-[ RECORD 16 ]-+----------------------------------------------------------------------------------------
uri | http://www.ex%41mple.com/
uri_normalize | http://www.example.com/
uri_scheme | http
uri_userinfo | _null_
uri_host | www.ex%41mple.com
uri_host_inet | _null_
uri_port | _null_
uri_path | /
uri_path_array | {""}
uri_query | _null_
uri_fragment | _null_
-[ RECORD 17 ]-+----------------------------------------------------------------------------------------
uri | eXAMPLE://a/./b/../b/%63/%7bfoo%7d
uri_normalize | example://a/b/c/%7Bfoo%7D
uri_scheme | eXAMPLE
uri_userinfo | _null_
uri_host | a
uri_host_inet | _null_
uri_port | _null_
uri_path | /./b/../b/%63/%7bfoo%7d
uri_path_array | {.,b,..,b,%63,%7bfoo%7d}
uri_query | _null_
uri_fragment | _null_

\x off
SELECT DISTINCT b FROM test ORDER BY b;
Expand All @@ -214,16 +273,19 @@ SELECT DISTINCT b FROM test ORDER BY b;

/
/foobar
HTTP://www.EXAMPLE.com/
eXAMPLE://a/./b/../b/%63/%7bfoo%7d
foobar
ftp://ftp.gnu.org/gnu/bison
http://[1080::8:800:200C:417A]/foo
http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html
http://admin:[email protected]
http://host:
http://www.ex%41mple.com/
http://www.postgresql.org/
http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
https://duckduckgo.com/?q=postgresql&ia=about
mailto:[email protected]
ssh://[email protected]:29418/openstack/nova.git
(14 rows)
(17 rows)

7 changes: 7 additions & 0 deletions test/sql/test.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ VALUES ('http://www.postgresql.org/'),
('foobar'),
('/foobar');

-- normalization test values from <https://tools.ietf.org/html/rfc3986#section-6.2.2>
INSERT INTO test (b)
VALUES ('HTTP://www.EXAMPLE.com/'),
('http://www.ex%41mple.com/'),
('eXAMPLE://a/./b/../b/%63/%7bfoo%7d');

SELECT * FROM test;

-- error cases
Expand All @@ -28,6 +34,7 @@ SELECT uri 'http://host:port/';

\x on
SELECT b AS uri,
uri_normalize(b),
uri_scheme(b),
uri_userinfo(b),
uri_host(b),
Expand Down
29 changes: 29 additions & 0 deletions uri.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,35 @@ uri_path_array(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
}

PG_FUNCTION_INFO_V1(uri_normalize);
Datum
uri_normalize(PG_FUNCTION_ARGS)
{
Datum arg = PG_GETARG_DATUM(0);
char *s = TextDatumGetCString(arg);
UriUriA uri;
int rc;
int charsRequired;
char *ret;

parse_uri(s, &uri);

if ((rc = uriNormalizeSyntaxA(&uri)) != URI_SUCCESS)
elog(ERROR, "uriNormalizeSyntaxA() failed: error code %d", rc);

if ((rc = uriToStringCharsRequiredA(&uri, &charsRequired)) != URI_SUCCESS)
elog(ERROR, "uriToStringCharsRequiredA() failed: error code %d", rc);
charsRequired++;

ret = palloc(charsRequired);
if ((rc = uriToStringA(ret, &uri, charsRequired, NULL)) != URI_SUCCESS)
elog(ERROR, "uriToStringA() failed: error code %d", rc);

uriFreeUriMembersA(&uri);

PG_RETURN_URI_P((uritype *) cstring_to_text(ret));
}

static int
cmp_text_range(UriTextRangeA a, UriTextRangeA b)
{
Expand Down
7 changes: 7 additions & 0 deletions uri.sql
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ CREATE FUNCTION uri_path_array(uri) RETURNS text[]
AS '$libdir/uri';


CREATE FUNCTION uri_normalize(uri) RETURNS uri
IMMUTABLE
STRICT
LANGUAGE C
AS '$libdir/uri';


CREATE FUNCTION uri_lt(uri, uri) RETURNS boolean
IMMUTABLE
STRICT
Expand Down

0 comments on commit 8a02974

Please sign in to comment.