From e8c2dee176a5dceaff49cb741821daa9df2968e3 Mon Sep 17 00:00:00 2001 From: mikee47 Date: Sat, 31 Aug 2024 11:19:47 +0100 Subject: [PATCH] So JSON is unicode, nothing to do with UTF8 --- Sming/Core/Data/Format/Formatter.cpp | 35 +++++++++------- Sming/Core/Data/Format/Json.cpp | 57 +-------------------------- tests/HostTests/modules/Formatter.cpp | 2 +- 3 files changed, 23 insertions(+), 71 deletions(-) diff --git a/Sming/Core/Data/Format/Formatter.cpp b/Sming/Core/Data/Format/Formatter.cpp index 1ef30a932c..546900a25a 100644 --- a/Sming/Core/Data/Format/Formatter.cpp +++ b/Sming/Core/Data/Format/Formatter.cpp @@ -57,11 +57,15 @@ unsigned escapeControls(String& value, Options options) for(auto& c : value) { if(escapeChar(c, options)) { extra += 1; // "\" + } else if(options[Option::unicode]) { + if(uint8_t(c) < 0x20 || (c & 0x80)) { + extra += 5; // "\uNNNN" + } } else if(uint8_t(c) < 0x20) { - extra += options[Option::unicode] ? 5 : 3; // "\uNNNN" or "\xnn" - } else if(options[Option::utf8] && (c & 0x80)) { + extra += 3; // "\xnn" + } else if((c & 0x80) && options[Option::utf8]) { // Characters such as £ (0xa3) are escaped to 0xc2 0xa3 in UTF-8 - extra += 1; // '\xc2' prefix + extra += 1; // 0xc2 } } if(extra == 0) { @@ -80,24 +84,25 @@ unsigned escapeControls(String& value, Options options) auto esc = escapeChar(c, options); if(esc) { *out++ = '\\'; - *out++ = esc; - } else if(c < 0x20) { - *out++ = '\\'; - if(options[Option::unicode]) { + c = esc; + } else if(options[Option::unicode]) { + if(uint8_t(c) < 0x20 || (c & 0x80)) { + *out++ = '\\'; *out++ = 'u'; *out++ = '0'; *out++ = '0'; - } else { - *out++ = 'x'; + *out++ = hexchar(uint8_t(c) >> 4); + c = hexchar(uint8_t(c) & 0x0f); } + } else if(uint8_t(c) < 0x20) { + *out++ = '\\'; + *out++ = 'x'; *out++ = hexchar(uint8_t(c) >> 4); - *out++ = hexchar(uint8_t(c) & 0x0f); - } else if(options[Option::utf8] && (c & 0x80)) { - *out++ = '\xc2'; - *out++ = c; - } else { - *out++ = c; + c = hexchar(uint8_t(c) & 0x0f); + } else if((c & 0x80) && options[Option::utf8]) { + *out++ = 0xc2; } + *out++ = c; } return extra; } diff --git a/Sming/Core/Data/Format/Json.cpp b/Sming/Core/Data/Format/Json.cpp index fd4f1210a2..522e96757a 100644 --- a/Sming/Core/Data/Format/Json.cpp +++ b/Sming/Core/Data/Format/Json.cpp @@ -17,59 +17,6 @@ namespace Format { Json json; -namespace -{ -bool IsValidUtf8(const char* str, unsigned length) -{ - if(str == nullptr) { - return true; - } - - unsigned i = 0; - while(i < length) { - char c = str[i++]; - if((c & 0x80) == 0) { - continue; - } - - if(i >= length) { - return false; // incomplete multibyte char - } - - if(c & 0x20) { - c = str[i++]; - if((c & 0xC0) != 0x80) { - return false; // malformed trail byte or out of range char - } - if(i >= length) { - return false; // incomplete multibyte char - } - } - - c = str[i++]; - if((c & 0xC0) != 0x80) { - return false; // malformed trail byte - } - } - - return true; -} - -void escapeText(String& value) -{ - escapeControls(value, Option::unicode | Option::utf8 | Option::doublequote | Option::backslash); - if(!IsValidUtf8(value.c_str(), value.length())) { - debug_w("Invalid UTF8: %s", value.c_str()); - for(unsigned i = 0; i < value.length(); ++i) { - char& c = value[i]; - if(c < 0x20 || uint8_t(c) > 127) - c = '_'; - } - } -} - -} // namespace - /* * Check for invalid characters and replace them - can break browser * operation otherwise. @@ -79,12 +26,12 @@ void escapeText(String& value) */ void Json::escape(String& value) const { - escapeText(value); + escapeControls(value, Option::unicode | Option::doublequote | Option::backslash); } void Json::quote(String& value) const { - escapeText(value); + escape(value); auto len = value.length(); if(value.setLength(len + 2)) { auto s = value.begin(); diff --git a/tests/HostTests/modules/Formatter.cpp b/tests/HostTests/modules/Formatter.cpp index 52b08ef751..0e0593f4e7 100644 --- a/tests/HostTests/modules/Formatter.cpp +++ b/tests/HostTests/modules/Formatter.cpp @@ -17,7 +17,7 @@ class FormatterTest : public TestGroup TEST_CASE("JSON") { DEFINE_FSTR_LOCAL(text1b, "A JSON\\ntest string\\twith escapes\\u0012\\u0000\\n" - "Worth \\\"maybe\\\" \xc2\xa3 0.53. Yen \xc2\xa5 5bn.") + "Worth \\\"maybe\\\" \\u00a3 0.53. Yen \\u00a5 5bn.") Serial << text1 << endl; String s(text1);