diff --git a/Sming/Core/Data/Format/Formatter.cpp b/Sming/Core/Data/Format/Formatter.cpp index f519306f6f..546900a25a 100644 --- a/Sming/Core/Data/Format/Formatter.cpp +++ b/Sming/Core/Data/Format/Formatter.cpp @@ -12,26 +12,25 @@ #include "Formatter.h" -namespace +namespace Format { /** * @brief Get character used for standard escapes * @param c Code to be escaped + * @param options * @retval char Corresponding character, NUL if there isn't a standard escape */ -char escapeChar(char c) +char escapeChar(char c, Options options) { switch(c) { case '\0': - return '0'; - case '\'': - return '\''; + return options[Option::unicode] ? '\0' : '0'; case '\"': - return '"'; - case '\?': - return '?'; + return options[Option::doublequote] ? c : '\0'; + case '\'': + return options[Option::singlequote] ? c : '\0'; case '\\': - return '\\'; + return options[Option::backslash] ? c : '\0'; case '\a': return 'a'; case '\b': @@ -51,19 +50,22 @@ char escapeChar(char c) } } -} // namespace - -namespace Format -{ -unsigned escapeControls(String& value) +unsigned escapeControls(String& value, Options options) { // Count number of extra characters we'll need to insert unsigned extra{0}; for(auto& c : value) { - if(escapeChar(c)) { + if(escapeChar(c, options)) { extra += 1; // "\" + } else if(options[Option::unicode]) { + if(uint8_t(c) < 0x20 || (c & 0x80)) { + extra += 5; // "\uNNNN" + } } else if(uint8_t(c) < 0x20) { extra += 3; // "\xnn" + } else if((c & 0x80) && options[Option::utf8]) { + // Characters such as £ (0xa3) are escaped to 0xc2 0xa3 in UTF-8 + extra += 1; // 0xc2 } } if(extra == 0) { @@ -79,18 +81,28 @@ unsigned escapeControls(String& value) in += extra; while(len--) { uint8_t c = *in++; - auto esc = escapeChar(c); + auto esc = escapeChar(c, options); if(esc) { *out++ = '\\'; - *out++ = esc; - } else if(c < 0x20) { + c = esc; + } else if(options[Option::unicode]) { + if(uint8_t(c) < 0x20 || (c & 0x80)) { + *out++ = '\\'; + *out++ = 'u'; + *out++ = '0'; + *out++ = '0'; + *out++ = hexchar(uint8_t(c) >> 4); + c = hexchar(uint8_t(c) & 0x0f); + } + } else if(uint8_t(c) < 0x20) { *out++ = '\\'; *out++ = 'x'; *out++ = hexchar(uint8_t(c) >> 4); - *out++ = hexchar(uint8_t(c) & 0x0f); - } else { - *out++ = c; + c = hexchar(uint8_t(c) & 0x0f); + } else if((c & 0x80) && options[Option::utf8]) { + *out++ = 0xc2; } + *out++ = c; } return extra; } diff --git a/Sming/Core/Data/Format/Formatter.h b/Sming/Core/Data/Format/Formatter.h index 9b45322165..45e59aeb84 100644 --- a/Sming/Core/Data/Format/Formatter.h +++ b/Sming/Core/Data/Format/Formatter.h @@ -14,15 +14,26 @@ #include #include +#include namespace Format { +enum class Option { + unicode, //< Use unicode escapes \uNNNN, otherwise hex \xNN + utf8, ///< Convert extended ASCII to UTF8 + doublequote, + singlequote, + backslash, +}; +using Options = BitSet; + /** * @brief Escape standard control codes such as `\n` (below ASCII 0x20) * @param value String to be modified + * @param options * @retval unsigned Number of control characters found and replaced */ -unsigned escapeControls(String& value); +unsigned escapeControls(String& value, Options options); /** * @brief Virtual class to perform format-specific String adjustments diff --git a/Sming/Core/Data/Format/Json.cpp b/Sming/Core/Data/Format/Json.cpp index 536bb017df..522e96757a 100644 --- a/Sming/Core/Data/Format/Json.cpp +++ b/Sming/Core/Data/Format/Json.cpp @@ -17,46 +17,6 @@ namespace Format { Json json; -namespace -{ -bool IsValidUtf8(const char* str, unsigned length) -{ - if(str == nullptr) { - return true; - } - - unsigned i = 0; - while(i < length) { - char c = str[i++]; - if((c & 0x80) == 0) { - continue; - } - - if(i >= length) { - return false; // incomplete multibyte char - } - - if(c & 0x20) { - c = str[i++]; - if((c & 0xC0) != 0x80) { - return false; // malformed trail byte or out of range char - } - if(i >= length) { - return false; // incomplete multibyte char - } - } - - c = str[i++]; - if((c & 0xC0) != 0x80) { - return false; // malformed trail byte - } - } - - return true; -} - -} // namespace - /* * Check for invalid characters and replace them - can break browser * operation otherwise. @@ -66,17 +26,19 @@ bool IsValidUtf8(const char* str, unsigned length) */ void Json::escape(String& value) const { - escapeControls(value); - if(!IsValidUtf8(value.c_str(), value.length())) { - debug_w("Invalid UTF8: %s", value.c_str()); - for(unsigned i = 0; i < value.length(); ++i) { - char& c = value[i]; - if(c < 0x20 || uint8_t(c) > 127) - c = '_'; - } - } + escapeControls(value, Option::unicode | Option::doublequote | Option::backslash); +} - value.replace("\"", "\\\""); +void Json::quote(String& value) const +{ + escape(value); + auto len = value.length(); + if(value.setLength(len + 2)) { + auto s = value.begin(); + memmove(s + 1, s, len); + s[0] = '"'; + s[len + 1] = '"'; + } } } // namespace Format diff --git a/Sming/Core/Data/Format/Json.h b/Sming/Core/Data/Format/Json.h index 2c7f35f983..4abd2a7ee4 100644 --- a/Sming/Core/Data/Format/Json.h +++ b/Sming/Core/Data/Format/Json.h @@ -20,6 +20,7 @@ class Json : public Standard { public: void escape(String& value) const override; + void quote(String& value) const override; MimeType mimeType() const override { diff --git a/tests/HostTests/modules/Formatter.cpp b/tests/HostTests/modules/Formatter.cpp index 38fbd0bbca..0e0593f4e7 100644 --- a/tests/HostTests/modules/Formatter.cpp +++ b/tests/HostTests/modules/Formatter.cpp @@ -10,17 +10,41 @@ class FormatterTest : public TestGroup void execute() override { + // Note: \xa3 is unicode for £ DEFINE_FSTR_LOCAL(text1, "A JSON\ntest string\twith escapes\x12\0\n" - "Worth maybe \xc2\xa3" - "0.53.") - DEFINE_FSTR_LOCAL(text1b, "A JSON\\ntest string\\twith escapes\\x12\\0\\n" - "Worth maybe \xc2\xa3" - "0.53.") - - Serial << text1 << endl; - String s(text1); - Format::json.escape(s); - REQUIRE_EQ(s, text1b); + "Worth \"maybe\" \xa3 0.53. Yen \xa5 5bn.") + + TEST_CASE("JSON") + { + DEFINE_FSTR_LOCAL(text1b, "A JSON\\ntest string\\twith escapes\\u0012\\u0000\\n" + "Worth \\\"maybe\\\" \\u00a3 0.53. Yen \\u00a5 5bn.") + + Serial << text1 << endl; + String s(text1); + Format::json.escape(s); + REQUIRE_EQ(s, text1b); + + s = text1; + Format::json.quote(s); + String quoted = String('"') + text1b + '"'; + REQUIRE_EQ(s, quoted); + } + + TEST_CASE("C++") + { + DEFINE_FSTR_LOCAL( + text1a, "A JSON\\ntest string\\twith escapes\\x12\\0\\nWorth \\\"maybe\\\" \xa3 0.53. Yen \xa5 5bn.") + String s(text1); + Format::escapeControls(s, Format::Option::doublequote | Format::Option::backslash); + REQUIRE_EQ(s, text1a); + + DEFINE_FSTR_LOCAL( + text1b, + "A JSON\\ntest string\\twith escapes\\x12\\0\\nWorth \\\"maybe\\\" \xc2\xa3 0.53. Yen \xc2\xa5 5bn.") + s = text1; + Format::escapeControls(s, Format::Option::utf8 | Format::Option::doublequote | Format::Option::backslash); + REQUIRE_EQ(s, text1b); + } } };