From 4bc971158b22f6b0d99420a46991b4a5d9f92fdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rimas=20Misevi=C4=8Dius?= Date: Tue, 7 May 2024 20:22:08 +0300 Subject: [PATCH] Use ICU's C API only Replace the C++ ICU function icu::toUCharPtr with the locally implemented to_UChar_ptr function. --- LICENSE | 4 ++-- include/upa/config.h | 14 +++++++++++++- src/url_idna.cpp | 32 +++++++++++++++++++------------- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/LICENSE b/LICENSE index b7abc1f..f2a3b9d 100644 --- a/LICENSE +++ b/LICENSE @@ -59,8 +59,8 @@ project licensed as follows: ------------------------------------------------------------------------------- -Files url_utf.cpp, url_utf.h contains portions of modified code from the ICU -project licensed as follows: +Files config.h, url_utf.cpp, url_utf.h contains portions of modified code from +the ICU project licensed as follows: UNICODE LICENSE V3 diff --git a/include/upa/config.h b/include/upa/config.h index f225287..f314acd 100644 --- a/include/upa/config.h +++ b/include/upa/config.h @@ -1,4 +1,4 @@ -// Copyright 2016-2023 Rimas Misevičius +// Copyright 2016-2024 Rimas Misevičius // Distributed under the BSD-style license that can be // found in the LICENSE file. // @@ -39,4 +39,16 @@ # define UPA_CONSTEXPR_14 inline #endif +// Barrier for pointer anti-aliasing optimizations even across function boundaries. +// This is a slightly modified U_ALIASING_BARRIER macro from the char16ptr.h file +// of the ICU 75.1 library. +// Discussion: https://github.com/sg16-unicode/sg16/issues/67 +#ifndef UPA_ALIASING_BARRIER +# if defined(__clang__) || defined(__GNUC__) +# define UPA_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory"); // NOLINT(*-macro-*) +# else +# define UPA_ALIASING_BARRIER(ptr) +# endif +#endif + #endif // UPA_CONFIG_H diff --git a/src/url_idna.cpp b/src/url_idna.cpp index dac17d9..07b5362 100644 --- a/src/url_idna.cpp +++ b/src/url_idna.cpp @@ -1,4 +1,4 @@ -// Copyright 2016-2023 Rimas Misevičius +// Copyright 2016-2024 Rimas Misevičius // Distributed under the BSD-style license that can be // found in the LICENSE file. // @@ -7,15 +7,15 @@ // Copyright 2013 The Chromium Authors. All rights reserved. // +#include "upa/config.h" #include "upa/url_idna.h" #include "upa/util.h" -// ICU +// ICU: only C API is used (U_SHOW_CPLUSPLUS_API 0) +// https://unicode-org.github.io/icu/userguide/icu4c/build.html#icu-as-a-system-level-library +#define U_SHOW_CPLUSPLUS_API 0 // NOLINT(*-macro-*) #include "unicode/uchar.h" // u_getUnicodeVersion #include "unicode/uclean.h" #include "unicode/uidna.h" -#if (U_ICU_VERSION_MAJOR_NUM) >= 59 -# include "unicode/char16ptr.h" -#endif #if (U_ICU_VERSION_MAJOR_NUM) < 68 # include #endif @@ -75,15 +75,21 @@ unsigned idna_unicode_version() { // Conversion to ICU UChar +namespace { + static_assert(sizeof(UChar) == sizeof(char16_t), "UChar must be the same size as char16_t"); -#if (U_ICU_VERSION_MAJOR_NUM) < 59 -// toUCharPtr functions are defined in ICU 59 -namespace icu { - inline const UChar* toUCharPtr(const char16_t* p) { return reinterpret_cast(p); } - inline UChar* toUCharPtr(char16_t* p) { return reinterpret_cast(p); } +inline const UChar* to_UChar_ptr(const char16_t* p) noexcept { + UPA_ALIASING_BARRIER(p) + return reinterpret_cast(p); } -#endif + +inline UChar* to_UChar_ptr(char16_t* p) noexcept { + UPA_ALIASING_BARRIER(p) + return reinterpret_cast(p); +} + +} // namespace // Implements the domain to ASCII algorithm // https://url.spec.whatwg.org/#concept-domain-to-ascii @@ -117,8 +123,8 @@ validation_errc domain_to_ascii(const char16_t* src, std::size_t src_len, simple UErrorCode err = U_ZERO_ERROR; UIDNAInfo info = UIDNA_INFO_INITIALIZER; const int32_t output_length = uidna_nameToASCII(uidna, - icu::toUCharPtr(src), static_cast(src_len), - icu::toUCharPtr(output.data()), static_cast(output.capacity()), + to_UChar_ptr(src), static_cast(src_len), + to_UChar_ptr(output.data()), static_cast(output.capacity()), &info, &err); if (U_SUCCESS(err) && (info.errors & UIDNA_ERR_MASK) == 0) { output.resize(output_length);