Skip to content

Commit

Permalink
Merge pull request #8559 from xokdvium/dev/fix-char-traits-utf32
Browse files Browse the repository at this point in the history
util: Replace std::basic_string<unsigned> with std::basic_string<char32_t>
  • Loading branch information
tautschnig authored Jan 7, 2025
2 parents 5ae1452 + 684bf42 commit 36b2335
Show file tree
Hide file tree
Showing 7 changed files with 17 additions and 23 deletions.
4 changes: 2 additions & 2 deletions src/ansi-c/literals/convert_character_literal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ exprt convert_character_literal(
PRECONDITION(src[1] == '\'');
PRECONDITION(src[src.size() - 1] == '\'');

std::basic_string<unsigned int> value=
unescape_wide_string(std::string(src, 2, src.size()-3));
std::basic_string<char32_t> value =
unescape_wide_string(std::string(src, 2, src.size() - 3));
// the parser rejects empty character constants
CHECK_RETURN(!value.empty());

Expand Down
14 changes: 6 additions & 8 deletions src/ansi-c/literals/convert_string_literal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ Author: Daniel Kroening, [email protected]

#include "unescape_string.h"

std::basic_string<unsigned int> convert_one_string_literal(
const std::string &src)
std::basic_string<char32_t> convert_one_string_literal(const std::string &src)
{
PRECONDITION(src.size() >= 2);

Expand All @@ -28,8 +27,8 @@ std::basic_string<unsigned int> convert_one_string_literal(
PRECONDITION(src[src.size() - 1] == '"');
PRECONDITION(src[2] == '"');

std::basic_string<unsigned int> value=
unescape_wide_string(std::string(src, 3, src.size()-4));
std::basic_string<char32_t> value =
unescape_wide_string(std::string(src, 3, src.size() - 4));

// turn into utf-8
const std::string utf8_value = utf32_native_endian_to_utf8(value);
Expand Down Expand Up @@ -57,7 +56,7 @@ std::basic_string<unsigned int> convert_one_string_literal(
unescape_string(std::string(src, 1, src.size()-2));

// pad into wide string
std::basic_string<unsigned int> value;
std::basic_string<char32_t> value;
value.resize(char_value.size());
for(std::size_t i=0; i<char_value.size(); i++)
value[i]=char_value[i];
Expand All @@ -72,7 +71,7 @@ exprt convert_string_literal(const std::string &src)
// e.g., something like "asd" "xyz".
// GCC allows "asd" L"xyz"!

std::basic_string<unsigned int> value;
std::basic_string<char32_t> value;

char wide=0;

Expand Down Expand Up @@ -101,8 +100,7 @@ exprt convert_string_literal(const std::string &src)
INVARIANT(j < src.size(), "non-terminated string constant '" + src + "'");

std::string tmp_src=std::string(src, i, j-i+1);
std::basic_string<unsigned int> tmp_value=
convert_one_string_literal(tmp_src);
std::basic_string<char32_t> tmp_value = convert_one_string_literal(tmp_src);
value.append(tmp_value);
i=j;
}
Expand Down
12 changes: 5 additions & 7 deletions src/ansi-c/literals/unescape_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,16 @@ static void append_universal_char(
unsigned int value,
std::string &dest)
{
std::basic_string<unsigned int> value_str(1, value);
std::basic_string<char32_t> value_str(1, value);

// turn into utf-8
const std::string utf8_value = utf32_native_endian_to_utf8(value_str);

dest.append(utf8_value);
}

static void append_universal_char(
unsigned int value,
std::basic_string<unsigned int> &dest)
static void
append_universal_char(unsigned int value, std::basic_string<char32_t> &dest)
{
dest.push_back(value);
}
Expand Down Expand Up @@ -153,10 +152,9 @@ std::string unescape_string(const std::string &src)
return unescape_string_templ<char>(src);
}

std::basic_string<unsigned int> unescape_wide_string(
const std::string &src)
std::basic_string<char32_t> unescape_wide_string(const std::string &src)
{
return unescape_string_templ<unsigned int>(src);
return unescape_string_templ<char32_t>(src);
}

unsigned hex_to_unsigned(const char *hex, std::size_t digits)
Expand Down
2 changes: 1 addition & 1 deletion src/ansi-c/literals/unescape_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Author: Daniel Kroening, [email protected]
#include <string>

std::string unescape_string(const std::string &);
std::basic_string<unsigned int> unescape_wide_string(const std::string &);
std::basic_string<char32_t> unescape_wide_string(const std::string &);

unsigned hex_to_unsigned(const char *, std::size_t digits);
unsigned octal_to_unsigned(const char *, std::size_t digits);
Expand Down
2 changes: 1 addition & 1 deletion src/ansi-c/scanner.l
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ int make_identifier()
for(; *p!=0 && digits>0; digits--, p++);
p--; // go back for p++ later

std::basic_string<unsigned> utf32;
std::basic_string<char32_t> utf32;
utf32+=letter;

// turn into utf-8
Expand Down
3 changes: 1 addition & 2 deletions src/util/unicode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,7 @@ static void utf8_append_code(unsigned int c, std::string &result)

/// \param s: UTF-32 encoded wide string
/// \return utf8-encoded string with the same unicode characters as the input.
std::string
utf32_native_endian_to_utf8(const std::basic_string<unsigned int> &s)
std::string utf32_native_endian_to_utf8(const std::basic_string<char32_t> &s)
{
std::string result;

Expand Down
3 changes: 1 addition & 2 deletions src/util/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ std::wstring widen(const std::string &s);
# define widen_if_needed(s) (s)
#endif

std::string
utf32_native_endian_to_utf8(const std::basic_string<unsigned int> &s);
std::string utf32_native_endian_to_utf8(const std::basic_string<char32_t> &s);

/// \param utf8_str: UTF-8 string
/// \return UTF-32 encoding of the string
Expand Down

0 comments on commit 36b2335

Please sign in to comment.