Skip to content

Commit

Permalink
So JSON is unicode, nothing to do with UTF8
Browse files Browse the repository at this point in the history
  • Loading branch information
mikee47 committed Aug 31, 2024
1 parent 1905dda commit e8c2dee
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 71 deletions.
35 changes: 20 additions & 15 deletions Sming/Core/Data/Format/Formatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,15 @@ unsigned escapeControls(String& value, Options options)
for(auto& c : value) {
if(escapeChar(c, options)) {
extra += 1; // "\"
} else if(options[Option::unicode]) {
if(uint8_t(c) < 0x20 || (c & 0x80)) {
extra += 5; // "\uNNNN"
}
} else if(uint8_t(c) < 0x20) {
extra += options[Option::unicode] ? 5 : 3; // "\uNNNN" or "\xnn"
} else if(options[Option::utf8] && (c & 0x80)) {
extra += 3; // "\xnn"
} else if((c & 0x80) && options[Option::utf8]) {
// Characters such as £ (0xa3) are escaped to 0xc2 0xa3 in UTF-8
extra += 1; // '\xc2' prefix
extra += 1; // 0xc2
}
}
if(extra == 0) {
Expand All @@ -80,24 +84,25 @@ unsigned escapeControls(String& value, Options options)
auto esc = escapeChar(c, options);
if(esc) {
*out++ = '\\';
*out++ = esc;
} else if(c < 0x20) {
*out++ = '\\';
if(options[Option::unicode]) {
c = esc;
} else if(options[Option::unicode]) {
if(uint8_t(c) < 0x20 || (c & 0x80)) {
*out++ = '\\';
*out++ = 'u';
*out++ = '0';
*out++ = '0';
} else {
*out++ = 'x';
*out++ = hexchar(uint8_t(c) >> 4);
c = hexchar(uint8_t(c) & 0x0f);
}
} else if(uint8_t(c) < 0x20) {
*out++ = '\\';
*out++ = 'x';
*out++ = hexchar(uint8_t(c) >> 4);
*out++ = hexchar(uint8_t(c) & 0x0f);
} else if(options[Option::utf8] && (c & 0x80)) {
*out++ = '\xc2';
*out++ = c;
} else {
*out++ = c;
c = hexchar(uint8_t(c) & 0x0f);
} else if((c & 0x80) && options[Option::utf8]) {
*out++ = 0xc2;
}
*out++ = c;
}
return extra;
}
Expand Down
57 changes: 2 additions & 55 deletions Sming/Core/Data/Format/Json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,59 +17,6 @@ namespace Format
{
Json json;

namespace
{
bool IsValidUtf8(const char* str, unsigned length)
{
if(str == nullptr) {
return true;
}

unsigned i = 0;
while(i < length) {
char c = str[i++];
if((c & 0x80) == 0) {
continue;
}

if(i >= length) {
return false; // incomplete multibyte char
}

if(c & 0x20) {
c = str[i++];
if((c & 0xC0) != 0x80) {
return false; // malformed trail byte or out of range char
}
if(i >= length) {
return false; // incomplete multibyte char
}
}

c = str[i++];
if((c & 0xC0) != 0x80) {
return false; // malformed trail byte
}
}

return true;
}

void escapeText(String& value)
{
escapeControls(value, Option::unicode | Option::utf8 | Option::doublequote | Option::backslash);
if(!IsValidUtf8(value.c_str(), value.length())) {
debug_w("Invalid UTF8: %s", value.c_str());
for(unsigned i = 0; i < value.length(); ++i) {
char& c = value[i];
if(c < 0x20 || uint8_t(c) > 127)
c = '_';
}
}
}

} // namespace

/*
* Check for invalid characters and replace them - can break browser
* operation otherwise.
Expand All @@ -79,12 +26,12 @@ void escapeText(String& value)
*/
void Json::escape(String& value) const
{
escapeText(value);
escapeControls(value, Option::unicode | Option::doublequote | Option::backslash);
}

void Json::quote(String& value) const
{
escapeText(value);
escape(value);
auto len = value.length();
if(value.setLength(len + 2)) {
auto s = value.begin();
Expand Down
2 changes: 1 addition & 1 deletion tests/HostTests/modules/Formatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class FormatterTest : public TestGroup
TEST_CASE("JSON")
{
DEFINE_FSTR_LOCAL(text1b, "A JSON\\ntest string\\twith escapes\\u0012\\u0000\\n"
"Worth \\\"maybe\\\" \xc2\xa3 0.53. Yen \xc2\xa5 5bn.")
"Worth \\\"maybe\\\" \\u00a3 0.53. Yen \\u00a5 5bn.")

Serial << text1 << endl;
String s(text1);
Expand Down

0 comments on commit e8c2dee

Please sign in to comment.