Skip to content

Commit

Permalink
Add tests for the IDNToUnicode function
Browse files Browse the repository at this point in the history
  • Loading branch information
rmisev committed Oct 10, 2023
1 parent 45293e1 commit 72bcabf
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 11 deletions.
24 changes: 20 additions & 4 deletions src/url_idna.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ const UIDNA* getUIDNA() {
UErrorCode err = U_ZERO_ERROR;
// https://url.spec.whatwg.org/#idna
// UseSTD3ASCIIRules = false
// Nontransitional_Processing
// Transitional_Processing = false
// CheckBidi = true
// CheckJoiners = true
uidna_ptr = uidna_openUTS46(
Expand Down Expand Up @@ -93,7 +93,7 @@ namespace icu {
validation_errc IDNToASCII(const char16_t* src, std::size_t src_len, simple_buffer<char16_t>& output) {
// https://url.spec.whatwg.org/#concept-domain-to-ascii
// https://www.unicode.org/reports/tr46/#ToASCII
static const uint32_t UIDNA_ERR_MASK = ~static_cast<uint32_t>(
static constexpr uint32_t UIDNA_ERR_MASK = ~static_cast<uint32_t>(
// VerifyDnsLength = false
UIDNA_ERROR_EMPTY_LABEL
| UIDNA_ERROR_LABEL_TOO_LONG
Expand Down Expand Up @@ -152,6 +152,21 @@ validation_errc IDNToASCII(const char16_t* src, std::size_t src_len, simple_buff
// TODO: common function template for IDNToASCII and IDNToUnicode

validation_errc IDNToUnicode(const char* src, std::size_t src_len, simple_buffer<char>& output) {
#if 0
// https://url.spec.whatwg.org/#concept-domain-to-unicode
// https://www.unicode.org/reports/tr46/#ToUnicode
static constexpr uint32_t UIDNA_ERR_MASK = ~static_cast<uint32_t>(
// VerifyDnsLength = false
UIDNA_ERROR_EMPTY_LABEL
| UIDNA_ERROR_LABEL_TOO_LONG
| UIDNA_ERROR_DOMAIN_NAME_TOO_LONG
// CheckHyphens = false
| UIDNA_ERROR_LEADING_HYPHEN
| UIDNA_ERROR_TRAILING_HYPHEN
| UIDNA_ERROR_HYPHEN_3_4
);
#endif

// uidna_nameToUnicodeUTF8 uses int32_t length
// https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/uidna_8h.html#afd9ae1e0ae5318e20c87bcb0149c3ada
if (src_len > util::unsigned_limit<int32_t>::max())
Expand All @@ -171,11 +186,12 @@ validation_errc IDNToUnicode(const char* src, std::size_t src_len, simple_buffer
&info, &err);
if (U_SUCCESS(err)) {
output.resize(output_length);
// https://url.spec.whatwg.org/#concept-domain-to-unicode
// TODO: Signify domain-to-Unicode validation errors for any returned errors (i.e.
// if (info.errors & UIDNA_ERR_MASK) != 0), and then, return result.
return validation_errc::ok;
}

// https://url.spec.whatwg.org/#concept-domain-to-unicode
// TODO: Signify domain-to-Unicode validation errors for any returned errors, and then, return result.
if (err != U_BUFFER_OVERFLOW_ERROR)
return validation_errc::domain_to_unicode;

Check warning on line 196 in src/url_idna.cpp

View check run for this annotation

Codecov / codecov/patch

src/url_idna.cpp#L196

Added line #L196 was not covered by tests

Expand Down
42 changes: 35 additions & 7 deletions test/test-url_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,16 @@ class host_out : public upa::host_output {
}
};

TEST_SUITE("host_parser::parse_host (isNotSpecial = true)") {
static std::string long_host() {
// Host length = 10 + 102 * 10 = 1030 > 1024 (simple_buffer's fixed buffer length)
// Use "xn--" label to avoid ASCII fast path
std::string strHost = "xn--2da.90";
for (int i = 0; i < 102; ++i)
strHost.append(".bcde12345");
return strHost;
}

TEST_SUITE("host_parser::parse_host (is_opaque = true)") {
TEST_CASE("HostType::Empty") {
const std::string strHost = "";
host_out out;
Expand Down Expand Up @@ -53,7 +62,7 @@ TEST_SUITE("host_parser::parse_host (isNotSpecial = true)") {
}
}

TEST_SUITE("host_parser::parse_host (isNotSpecial = false)") {
TEST_SUITE("host_parser::parse_host (is_opaque = false)") {
TEST_CASE("HostType::Empty") {
const std::string strHost = "";
host_out out;
Expand All @@ -73,11 +82,7 @@ TEST_SUITE("host_parser::parse_host (isNotSpecial = false)") {
}

TEST_CASE("HostType::Domain with long host") {
// Host length = 10 + 102 * 10 = 1030 > 1024 (simple_buffer's fixed buffer length)
// Use "xn--" label to avoid ASCII fast path
std::string strHost = "xn--2da.90";
for (int i = 0; i < 102; ++i)
strHost.append(".bcde12345");
std::string strHost = long_host();
host_out out;

REQUIRE(upa::host_parser::parse_host(strHost.data(), strHost.data() + strHost.length(), false, out) == upa::validation_errc::ok);
Expand Down Expand Up @@ -241,3 +246,26 @@ TEST_SUITE("url_host") {
CHECK(hm.type() == upa::HostType::IPv4);
}
}


// Test upa::IDNToUnicode function

TEST_SUITE("IDNToUnicode") {
TEST_CASE("Valid input") {
upa::simple_buffer<char> output;
CHECK(upa::IDNToUnicode("abc", 3, output) == upa::validation_errc::ok);
CHECK(upa::string_view(output.data(), output.size()) == "abc");
}

TEST_CASE("Valid long input") {
const std::string input = long_host();
upa::simple_buffer<char> output;
CHECK(upa::IDNToUnicode(input.data(), input.length(), output) == upa::validation_errc::ok);
}

TEST_CASE("Invalid input") {
upa::simple_buffer<char> output;
// IDNA errors are not failures for this function, so it returns `ok`
CHECK(upa::IDNToUnicode("xn--a.op", 8, output) == upa::validation_errc::ok);
}
}

0 comments on commit 72bcabf

Please sign in to comment.