Skip to content

Commit

Permalink
Fix Json quoting and UTF-8 handling
Browse files Browse the repository at this point in the history
  • Loading branch information
mikee47 committed Aug 31, 2024
1 parent 23cd5d9 commit 1905dda
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 14 deletions.
6 changes: 6 additions & 0 deletions Sming/Core/Data/Format/Formatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ unsigned escapeControls(String& value, Options options)
extra += 1; // "\"
} else if(uint8_t(c) < 0x20) {
extra += options[Option::unicode] ? 5 : 3; // "\uNNNN" or "\xnn"
} else if(options[Option::utf8] && (c & 0x80)) {
// Characters such as £ (0xa3) are escaped to 0xc2 0xa3 in UTF-8
extra += 1; // '\xc2' prefix
}
}
if(extra == 0) {
Expand Down Expand Up @@ -89,6 +92,9 @@ unsigned escapeControls(String& value, Options options)
}
*out++ = hexchar(uint8_t(c) >> 4);
*out++ = hexchar(uint8_t(c) & 0x0f);
} else if(options[Option::utf8] && (c & 0x80)) {
*out++ = '\xc2';
*out++ = c;
} else {
*out++ = c;
}
Expand Down
1 change: 1 addition & 0 deletions Sming/Core/Data/Format/Formatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ namespace Format
{
enum class Option {
unicode, //< Use unicode escapes \uNNNN, otherwise hex \xNN
utf8, ///< Convert extended ASCII to UTF8
doublequote,
singlequote,
backslash,
Expand Down
33 changes: 25 additions & 8 deletions Sming/Core/Data/Format/Json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,19 @@ bool IsValidUtf8(const char* str, unsigned length)
return true;
}

void escapeText(String& value)
{
escapeControls(value, Option::unicode | Option::utf8 | Option::doublequote | Option::backslash);
if(!IsValidUtf8(value.c_str(), value.length())) {
debug_w("Invalid UTF8: %s", value.c_str());
for(unsigned i = 0; i < value.length(); ++i) {
char& c = value[i];
if(c < 0x20 || uint8_t(c) > 127)
c = '_';
}
}
}

} // namespace

/*
Expand All @@ -66,14 +79,18 @@ bool IsValidUtf8(const char* str, unsigned length)
*/
void Json::escape(String& value) const
{
escapeControls(value, Option::unicode | Option::doublequote | Option::backslash);
if(!IsValidUtf8(value.c_str(), value.length())) {
debug_w("Invalid UTF8: %s", value.c_str());
for(unsigned i = 0; i < value.length(); ++i) {
char& c = value[i];
if(c < 0x20 || uint8_t(c) > 127)
c = '_';
}
escapeText(value);
}

void Json::quote(String& value) const
{
escapeText(value);
auto len = value.length();
if(value.setLength(len + 2)) {
auto s = value.begin();
memmove(s + 1, s, len);
s[0] = '"';
s[len + 1] = '"';
}
}

Expand Down
1 change: 1 addition & 0 deletions Sming/Core/Data/Format/Json.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Json : public Standard
{
public:
void escape(String& value) const override;
void quote(String& value) const override;

MimeType mimeType() const override
{
Expand Down
23 changes: 17 additions & 6 deletions tests/HostTests/modules/Formatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,39 @@ class FormatterTest : public TestGroup

void execute() override
{
// Note: \xa3 is unicode for £
DEFINE_FSTR_LOCAL(text1, "A JSON\ntest string\twith escapes\x12\0\n"
"Worth \"maybe\" \xc2\xa3"
"0.53.")
"Worth \"maybe\" \xa3 0.53. Yen \xa5 5bn.")

TEST_CASE("JSON")
{
DEFINE_FSTR_LOCAL(text1b, "A JSON\\ntest string\\twith escapes\\u0012\\u0000\\n"
"Worth \\\"maybe\\\" \xc2\xa3"
"0.53.")
"Worth \\\"maybe\\\" \xc2\xa3 0.53. Yen \xc2\xa5 5bn.")

Serial << text1 << endl;
String s(text1);
Format::json.escape(s);
REQUIRE_EQ(s, text1b);

s = text1;
Format::json.quote(s);
String quoted = String('"') + text1b + '"';
REQUIRE_EQ(s, quoted);
}

TEST_CASE("C++")
{
DEFINE_FSTR_LOCAL(text1b, "A JSON\\ntest string\\twith escapes\\x12\\0\\nWorth \\\"maybe\\\" £0.53.")

DEFINE_FSTR_LOCAL(
text1a, "A JSON\\ntest string\\twith escapes\\x12\\0\\nWorth \\\"maybe\\\" \xa3 0.53. Yen \xa5 5bn.")
String s(text1);
Format::escapeControls(s, Format::Option::doublequote | Format::Option::backslash);
REQUIRE_EQ(s, text1a);

DEFINE_FSTR_LOCAL(
text1b,
"A JSON\\ntest string\\twith escapes\\x12\\0\\nWorth \\\"maybe\\\" \xc2\xa3 0.53. Yen \xc2\xa5 5bn.")
s = text1;
Format::escapeControls(s, Format::Option::utf8 | Format::Option::doublequote | Format::Option::backslash);
REQUIRE_EQ(s, text1b);
}
}
Expand Down

0 comments on commit 1905dda

Please sign in to comment.