Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include unicode formatting support for JSON escapes #2880

Merged
merged 5 commits into from
Aug 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 33 additions & 21 deletions Sming/Core/Data/Format/Formatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,25 @@

#include "Formatter.h"

namespace
namespace Format
{
/**
* @brief Get character used for standard escapes
* @param c Code to be escaped
* @param options
* @retval char Corresponding character, NUL if there isn't a standard escape
*/
char escapeChar(char c)
char escapeChar(char c, Options options)
{
switch(c) {
case '\0':
return '0';
case '\'':
return '\'';
return options[Option::unicode] ? '\0' : '0';
case '\"':
return '"';
case '\?':
return '?';
return options[Option::doublequote] ? c : '\0';
case '\'':
return options[Option::singlequote] ? c : '\0';
case '\\':
return '\\';
return options[Option::backslash] ? c : '\0';
case '\a':
return 'a';
case '\b':
Expand All @@ -51,19 +50,22 @@ char escapeChar(char c)
}
}

} // namespace

namespace Format
{
unsigned escapeControls(String& value)
unsigned escapeControls(String& value, Options options)
{
// Count number of extra characters we'll need to insert
unsigned extra{0};
for(auto& c : value) {
if(escapeChar(c)) {
if(escapeChar(c, options)) {
extra += 1; // "\"
} else if(options[Option::unicode]) {
if(uint8_t(c) < 0x20 || (c & 0x80)) {
extra += 5; // "\uNNNN"
}
} else if(uint8_t(c) < 0x20) {
extra += 3; // "\xnn"
} else if((c & 0x80) && options[Option::utf8]) {
// Characters such as £ (0xa3) are escaped to 0xc2 0xa3 in UTF-8
extra += 1; // 0xc2
}
}
if(extra == 0) {
Expand All @@ -79,18 +81,28 @@ unsigned escapeControls(String& value)
in += extra;
while(len--) {
uint8_t c = *in++;
auto esc = escapeChar(c);
auto esc = escapeChar(c, options);
if(esc) {
*out++ = '\\';
*out++ = esc;
} else if(c < 0x20) {
c = esc;
} else if(options[Option::unicode]) {
if(uint8_t(c) < 0x20 || (c & 0x80)) {
*out++ = '\\';
*out++ = 'u';
*out++ = '0';
*out++ = '0';
*out++ = hexchar(uint8_t(c) >> 4);
c = hexchar(uint8_t(c) & 0x0f);
}
} else if(uint8_t(c) < 0x20) {
*out++ = '\\';
*out++ = 'x';
*out++ = hexchar(uint8_t(c) >> 4);
*out++ = hexchar(uint8_t(c) & 0x0f);
} else {
*out++ = c;
c = hexchar(uint8_t(c) & 0x0f);
} else if((c & 0x80) && options[Option::utf8]) {
*out++ = 0xc2;
}
*out++ = c;
}
return extra;
}
Expand Down
13 changes: 12 additions & 1 deletion Sming/Core/Data/Format/Formatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,26 @@

#include <WString.h>
#include <Data/WebConstants.h>
#include <Data/BitSet.h>

namespace Format
{
enum class Option {
unicode, //< Use unicode escapes \uNNNN, otherwise hex \xNN
utf8, ///< Convert extended ASCII to UTF8
doublequote,
singlequote,
backslash,
};
using Options = BitSet<uint8_t, Option, 5>;

/**
* @brief Escape standard control codes such as `\n` (below ASCII 0x20)
* @param value String to be modified
* @param options
* @retval unsigned Number of control characters found and replaced
*/
unsigned escapeControls(String& value);
unsigned escapeControls(String& value, Options options);

/**
* @brief Virtual class to perform format-specific String adjustments
Expand Down
62 changes: 12 additions & 50 deletions Sming/Core/Data/Format/Json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,46 +17,6 @@ namespace Format
{
Json json;

namespace
{
bool IsValidUtf8(const char* str, unsigned length)
{
if(str == nullptr) {
return true;
}

unsigned i = 0;
while(i < length) {
char c = str[i++];
if((c & 0x80) == 0) {
continue;
}

if(i >= length) {
return false; // incomplete multibyte char
}

if(c & 0x20) {
c = str[i++];
if((c & 0xC0) != 0x80) {
return false; // malformed trail byte or out of range char
}
if(i >= length) {
return false; // incomplete multibyte char
}
}

c = str[i++];
if((c & 0xC0) != 0x80) {
return false; // malformed trail byte
}
}

return true;
}

} // namespace

/*
* Check for invalid characters and replace them - can break browser
* operation otherwise.
Expand All @@ -66,17 +26,19 @@ bool IsValidUtf8(const char* str, unsigned length)
*/
void Json::escape(String& value) const
{
escapeControls(value);
if(!IsValidUtf8(value.c_str(), value.length())) {
debug_w("Invalid UTF8: %s", value.c_str());
for(unsigned i = 0; i < value.length(); ++i) {
char& c = value[i];
if(c < 0x20 || uint8_t(c) > 127)
c = '_';
}
}
escapeControls(value, Option::unicode | Option::doublequote | Option::backslash);
}

value.replace("\"", "\\\"");
void Json::quote(String& value) const
{
escape(value);
auto len = value.length();
if(value.setLength(len + 2)) {
auto s = value.begin();
memmove(s + 1, s, len);
s[0] = '"';
s[len + 1] = '"';
}
}

} // namespace Format
1 change: 1 addition & 0 deletions Sming/Core/Data/Format/Json.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Json : public Standard
{
public:
void escape(String& value) const override;
void quote(String& value) const override;

MimeType mimeType() const override
{
Expand Down
44 changes: 34 additions & 10 deletions tests/HostTests/modules/Formatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,41 @@ class FormatterTest : public TestGroup

void execute() override
{
// Note: \xa3 is unicode for £
DEFINE_FSTR_LOCAL(text1, "A JSON\ntest string\twith escapes\x12\0\n"
"Worth maybe \xc2\xa3"
"0.53.")
DEFINE_FSTR_LOCAL(text1b, "A JSON\\ntest string\\twith escapes\\x12\\0\\n"
"Worth maybe \xc2\xa3"
"0.53.")

Serial << text1 << endl;
String s(text1);
Format::json.escape(s);
REQUIRE_EQ(s, text1b);
"Worth \"maybe\" \xa3 0.53. Yen \xa5 5bn.")

TEST_CASE("JSON")
{
DEFINE_FSTR_LOCAL(text1b, "A JSON\\ntest string\\twith escapes\\u0012\\u0000\\n"
"Worth \\\"maybe\\\" \\u00a3 0.53. Yen \\u00a5 5bn.")

Serial << text1 << endl;
String s(text1);
Format::json.escape(s);
REQUIRE_EQ(s, text1b);

s = text1;
Format::json.quote(s);
String quoted = String('"') + text1b + '"';
REQUIRE_EQ(s, quoted);
}

TEST_CASE("C++")
{
DEFINE_FSTR_LOCAL(
text1a, "A JSON\\ntest string\\twith escapes\\x12\\0\\nWorth \\\"maybe\\\" \xa3 0.53. Yen \xa5 5bn.")
String s(text1);
Format::escapeControls(s, Format::Option::doublequote | Format::Option::backslash);
REQUIRE_EQ(s, text1a);

DEFINE_FSTR_LOCAL(
text1b,
"A JSON\\ntest string\\twith escapes\\x12\\0\\nWorth \\\"maybe\\\" \xc2\xa3 0.53. Yen \xc2\xa5 5bn.")
s = text1;
Format::escapeControls(s, Format::Option::utf8 | Format::Option::doublequote | Format::Option::backslash);
REQUIRE_EQ(s, text1b);
}
}
};

Expand Down
Loading