From 7370ec7ae861fc1e090a0971180f12dfec0b1175 Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Tue, 12 Sep 2023 08:50:15 -0700 Subject: [PATCH 1/3] AK: Implement `URL::serialize_path()` to spec This commit also reverts db5ad0c since code outside of the web spec expects serialized paths to be percent decoded. Also, there are issues trying to implement the concept "opaque path". For now, we still use the old cannot_be_a_base_url(), but its usage needs to be removed in favor of a has_opaque_path() as the spec has changed since then. --- AK/URL.cpp | 21 +++++++++++++++------ AK/URL.h | 6 +++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/AK/URL.cpp b/AK/URL.cpp index 6bc43c4af3fc02..de14fa6110393f 100644 --- a/AK/URL.cpp +++ b/AK/URL.cpp @@ -260,16 +260,25 @@ bool URL::is_special_scheme(StringView scheme) return scheme.is_one_of("ftp", "file", "http", "https", "ws", "wss"); } -DeprecatedString URL::serialize_path() const +// https://url.spec.whatwg.org/#url-path-serializer +DeprecatedString URL::serialize_path(ApplyPercentDecoding apply_percent_decoding) const { + // If url has an opaque path, then return url’s path. + // FIXME: Reimplement this step once we modernize the URL implementation to meet the spec. if (cannot_be_a_base_url()) return m_paths[0]; - StringBuilder builder; - for (auto& path : m_paths) { - builder.append('/'); - builder.append(percent_decode(path)); + + // Let output be the empty string. + StringBuilder output; + + // For each segment of url’s path: append U+002F (/) followed by segment to output. + for (auto const& segment : m_paths) { + output.append('/'); + output.append(apply_percent_decoding == ApplyPercentDecoding::Yes ? percent_decode(segment) : segment); } - return builder.to_deprecated_string(); + + // Return output. + return output.to_deprecated_string(); } // https://url.spec.whatwg.org/#concept-url-serializer diff --git a/AK/URL.h b/AK/URL.h index 01221b300483df..8d77a676611756 100644 --- a/AK/URL.h +++ b/AK/URL.h @@ -107,7 +107,11 @@ class URL { m_paths.append(""); } - DeprecatedString serialize_path() const; + enum class ApplyPercentDecoding { + Yes, + No + }; + DeprecatedString serialize_path(ApplyPercentDecoding = ApplyPercentDecoding::Yes) const; DeprecatedString serialize(ExcludeFragment = ExcludeFragment::No) const; DeprecatedString serialize_for_display() const; DeprecatedString to_deprecated_string() const { return serialize(); } From c18967bae3f81fa29a24a1011339140ccac1867c Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Tue, 12 Sep 2023 10:38:14 -0700 Subject: [PATCH 2/3] LibWeb/URL: Don't have `URL::pathname()` perform percent decode Since the spec expects us to use AK::URL::serialize_path() without doing any percent decoding. --- Userland/Libraries/LibWeb/URL/URL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Userland/Libraries/LibWeb/URL/URL.cpp b/Userland/Libraries/LibWeb/URL/URL.cpp index 28dde4d2eb6a93..f4961ae02ddf56 100644 --- a/Userland/Libraries/LibWeb/URL/URL.cpp +++ b/Userland/Libraries/LibWeb/URL/URL.cpp @@ -353,7 +353,7 @@ WebIDL::ExceptionOr URL::pathname() const auto& vm = realm().vm(); // The pathname getter steps are to return the result of URL path serializing this’s URL. - return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.serialize_path())); + return TRY_OR_THROW_OOM(vm, String::from_deprecated_string(m_url.serialize_path(AK::URL::ApplyPercentDecoding::No))); } // https://url.spec.whatwg.org/#ref-for-dom-url-pathname%E2%91%A0 From c30ee788dd5c1b81a52b23dfa11df9230ea40196 Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Sun, 10 Sep 2023 16:50:29 -0700 Subject: [PATCH 3/3] LibWeb/URL: Add `strip_trailing_spaces_from_an_opaque_path()` Also remove 2 FIXMEs by including this function. --- ...strip_trailing_spaces_from_opaque_path.txt | 4 +++ ...trip_trailing_spaces_from_opaque_path.html | 14 +++++++++ Userland/Libraries/LibWeb/URL/URL.cpp | 29 +++++++++++++++++-- Userland/Libraries/LibWeb/URL/URL.h | 13 +++++++++ 4 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 Tests/LibWeb/Text/expected/URL/strip_trailing_spaces_from_opaque_path.txt create mode 100644 Tests/LibWeb/Text/input/URL/strip_trailing_spaces_from_opaque_path.html diff --git a/Tests/LibWeb/Text/expected/URL/strip_trailing_spaces_from_opaque_path.txt b/Tests/LibWeb/Text/expected/URL/strip_trailing_spaces_from_opaque_path.txt new file mode 100644 index 00000000000000..817503998b0637 --- /dev/null +++ b/Tests/LibWeb/Text/expected/URL/strip_trailing_spaces_from_opaque_path.txt @@ -0,0 +1,4 @@ +pathname => 'foobar ' +pathname => 'foobar' +pathname => 'baz ' +pathname => 'baz' diff --git a/Tests/LibWeb/Text/input/URL/strip_trailing_spaces_from_opaque_path.html b/Tests/LibWeb/Text/input/URL/strip_trailing_spaces_from_opaque_path.html new file mode 100644 index 00000000000000..c8c4dce7423d12 --- /dev/null +++ b/Tests/LibWeb/Text/input/URL/strip_trailing_spaces_from_opaque_path.html @@ -0,0 +1,14 @@ + + diff --git a/Userland/Libraries/LibWeb/URL/URL.cpp b/Userland/Libraries/LibWeb/URL/URL.cpp index f4961ae02ddf56..c03ae086f61c8e 100644 --- a/Userland/Libraries/LibWeb/URL/URL.cpp +++ b/Userland/Libraries/LibWeb/URL/URL.cpp @@ -403,7 +403,8 @@ WebIDL::ExceptionOr URL::set_search(String const& search) // 2. Empty this’s query object’s list. m_query->m_list.clear(); - // FIXME: 3. Potentially strip trailing spaces from an opaque path with this. + // 3. Potentially strip trailing spaces from an opaque path with this. + strip_trailing_spaces_from_an_opaque_path(*this); // 4. Return. return {}; @@ -457,7 +458,8 @@ void URL::set_hash(String const& hash) // 1. Set this’s URL’s fragment to null. m_url.set_fragment({}); - // FIXME: 2. Potentially strip trailing spaces from an opaque path with this. + // 2. Potentially strip trailing spaces from an opaque path with this. + strip_trailing_spaces_from_an_opaque_path(*this); // 3. Return. return; @@ -535,6 +537,29 @@ bool host_is_domain(AK::URL::Host const& host) return host.has() && host.get() != String {}; } +// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path +void strip_trailing_spaces_from_an_opaque_path(URL& url) +{ + // 1. If url’s URL does not have an opaque path, then return. + // FIXME: Reimplement this step once we modernize the URL implementation to meet the spec. + if (!url.cannot_be_a_base_url()) + return; + + // 2. If url’s URL’s fragment is non-null, then return. + if (url.fragment().has_value()) + return; + + // 3. If url’s URL’s query is non-null, then return. + if (url.query().has_value()) + return; + + // 4. Remove all trailing U+0020 SPACE code points from url’s URL’s path. + // NOTE: At index 0 since the first step tells us that the URL only has one path segment. + auto opaque_path = url.path_segment_at_index(0); + auto trimmed_path = opaque_path.trim(" "sv, TrimMode::Right); + url.set_paths({ trimmed_path }); +} + // https://url.spec.whatwg.org/#concept-url-parser AK::URL parse(StringView input, Optional const& base_url) { diff --git a/Userland/Libraries/LibWeb/URL/URL.h b/Userland/Libraries/LibWeb/URL/URL.h index ca67ebc0d5edf0..e3fd2f2e522016 100644 --- a/Userland/Libraries/LibWeb/URL/URL.h +++ b/Userland/Libraries/LibWeb/URL/URL.h @@ -55,6 +55,15 @@ class URL : public Bindings::PlatformObject { WebIDL::ExceptionOr pathname() const; void set_pathname(String const&); + Optional const& fragment() const { return m_url.fragment(); } + + DeprecatedString path_segment_at_index(size_t index) const { return m_url.path_segment_at_index(index); } + + void set_paths(Vector const& paths) { return m_url.set_paths(paths); } + + // FIXME: Reimplement this to meet the definition in https://url.spec.whatwg.org/#url-opaque-path once we modernize URL to meet the spec. + bool cannot_be_a_base_url() const { return m_url.cannot_be_a_base_url(); } + WebIDL::ExceptionOr search() const; WebIDL::ExceptionOr set_search(String const&); @@ -65,6 +74,7 @@ class URL : public Bindings::PlatformObject { WebIDL::ExceptionOr to_json() const; + Optional const& query() const { return m_url.query(); } void set_query(Badge, Optional query) { m_url.set_query(move(query)); } private: @@ -80,6 +90,9 @@ class URL : public Bindings::PlatformObject { HTML::Origin url_origin(AK::URL const&); bool host_is_domain(AK::URL::Host const&); +// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path +void strip_trailing_spaces_from_an_opaque_path(URL& url); + // https://url.spec.whatwg.org/#concept-url-parser AK::URL parse(StringView input, Optional const& base_url = {});