From 72bcabf9e138f1e90dc80507a991c2c68270145d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rimas=20Misevi=C4=8Dius?= Date: Wed, 11 Oct 2023 00:13:06 +0300 Subject: [PATCH] Add tests for the IDNToUnicode function --- src/url_idna.cpp | 24 ++++++++++++++++++++---- test/test-url_host.cpp | 42 +++++++++++++++++++++++++++++++++++------- 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/src/url_idna.cpp b/src/url_idna.cpp index cc16ec8..5d711f6 100644 --- a/src/url_idna.cpp +++ b/src/url_idna.cpp @@ -35,7 +35,7 @@ const UIDNA* getUIDNA() { UErrorCode err = U_ZERO_ERROR; // https://url.spec.whatwg.org/#idna // UseSTD3ASCIIRules = false - // Nontransitional_Processing + // Transitional_Processing = false // CheckBidi = true // CheckJoiners = true uidna_ptr = uidna_openUTS46( @@ -93,7 +93,7 @@ namespace icu { validation_errc IDNToASCII(const char16_t* src, std::size_t src_len, simple_buffer& output) { // https://url.spec.whatwg.org/#concept-domain-to-ascii // https://www.unicode.org/reports/tr46/#ToASCII - static const uint32_t UIDNA_ERR_MASK = ~static_cast( + static constexpr uint32_t UIDNA_ERR_MASK = ~static_cast( // VerifyDnsLength = false UIDNA_ERROR_EMPTY_LABEL | UIDNA_ERROR_LABEL_TOO_LONG @@ -152,6 +152,21 @@ validation_errc IDNToASCII(const char16_t* src, std::size_t src_len, simple_buff // TODO: common function template for IDNToASCII and IDNToUnicode validation_errc IDNToUnicode(const char* src, std::size_t src_len, simple_buffer& output) { +#if 0 + // https://url.spec.whatwg.org/#concept-domain-to-unicode + // https://www.unicode.org/reports/tr46/#ToUnicode + static constexpr uint32_t UIDNA_ERR_MASK = ~static_cast( + // VerifyDnsLength = false + UIDNA_ERROR_EMPTY_LABEL + | UIDNA_ERROR_LABEL_TOO_LONG + | UIDNA_ERROR_DOMAIN_NAME_TOO_LONG + // CheckHyphens = false + | UIDNA_ERROR_LEADING_HYPHEN + | UIDNA_ERROR_TRAILING_HYPHEN + | UIDNA_ERROR_HYPHEN_3_4 + ); +#endif + // uidna_nameToUnicodeUTF8 uses int32_t length // https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/uidna_8h.html#afd9ae1e0ae5318e20c87bcb0149c3ada if (src_len > util::unsigned_limit::max()) @@ -171,11 +186,12 @@ validation_errc IDNToUnicode(const char* src, std::size_t src_len, simple_buffer &info, &err); if (U_SUCCESS(err)) { output.resize(output_length); + // https://url.spec.whatwg.org/#concept-domain-to-unicode + // TODO: Signify domain-to-Unicode validation errors for any returned errors (i.e. + // if (info.errors & UIDNA_ERR_MASK) != 0), and then, return result. return validation_errc::ok; } - // https://url.spec.whatwg.org/#concept-domain-to-unicode - // TODO: Signify domain-to-Unicode validation errors for any returned errors, and then, return result. if (err != U_BUFFER_OVERFLOW_ERROR) return validation_errc::domain_to_unicode; diff --git a/test/test-url_host.cpp b/test/test-url_host.cpp index 2bd809d..b0bb120 100644 --- a/test/test-url_host.cpp +++ b/test/test-url_host.cpp @@ -24,7 +24,16 @@ class host_out : public upa::host_output { } }; -TEST_SUITE("host_parser::parse_host (isNotSpecial = true)") { +static std::string long_host() { + // Host length = 10 + 102 * 10 = 1030 > 1024 (simple_buffer's fixed buffer length) + // Use "xn--" label to avoid ASCII fast path + std::string strHost = "xn--2da.90"; + for (int i = 0; i < 102; ++i) + strHost.append(".bcde12345"); + return strHost; +} + +TEST_SUITE("host_parser::parse_host (is_opaque = true)") { TEST_CASE("HostType::Empty") { const std::string strHost = ""; host_out out; @@ -53,7 +62,7 @@ TEST_SUITE("host_parser::parse_host (isNotSpecial = true)") { } } -TEST_SUITE("host_parser::parse_host (isNotSpecial = false)") { +TEST_SUITE("host_parser::parse_host (is_opaque = false)") { TEST_CASE("HostType::Empty") { const std::string strHost = ""; host_out out; @@ -73,11 +82,7 @@ TEST_SUITE("host_parser::parse_host (isNotSpecial = false)") { } TEST_CASE("HostType::Domain with long host") { - // Host length = 10 + 102 * 10 = 1030 > 1024 (simple_buffer's fixed buffer length) - // Use "xn--" label to avoid ASCII fast path - std::string strHost = "xn--2da.90"; - for (int i = 0; i < 102; ++i) - strHost.append(".bcde12345"); + std::string strHost = long_host(); host_out out; REQUIRE(upa::host_parser::parse_host(strHost.data(), strHost.data() + strHost.length(), false, out) == upa::validation_errc::ok); @@ -241,3 +246,26 @@ TEST_SUITE("url_host") { CHECK(hm.type() == upa::HostType::IPv4); } } + + +// Test upa::IDNToUnicode function + +TEST_SUITE("IDNToUnicode") { + TEST_CASE("Valid input") { + upa::simple_buffer output; + CHECK(upa::IDNToUnicode("abc", 3, output) == upa::validation_errc::ok); + CHECK(upa::string_view(output.data(), output.size()) == "abc"); + } + + TEST_CASE("Valid long input") { + const std::string input = long_host(); + upa::simple_buffer output; + CHECK(upa::IDNToUnicode(input.data(), input.length(), output) == upa::validation_errc::ok); + } + + TEST_CASE("Invalid input") { + upa::simple_buffer output; + // IDNA errors are not failures for this function, so it returns `ok` + CHECK(upa::IDNToUnicode("xn--a.op", 8, output) == upa::validation_errc::ok); + } +}