Skip to content

Commit

Permalink
📐 Reformated code
Browse files Browse the repository at this point in the history
  • Loading branch information
AnotherFoxGuy committed May 10, 2018
1 parent 6a57318 commit 5eb614d
Show file tree
Hide file tree
Showing 43 changed files with 4,272 additions and 3,000 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ endif (COMMAND cmake_policy)

project(rorserver)

set_property(GLOBAL PROPERTY USE_FOLDERS ON)

# setup paths
SET(RUNTIME_OUTPUT_DIRECTORY "${rorserver_SOURCE_DIR}/bin/")
SET(LIBRARY_OUTPUT_DIRECTORY "${rorserver_SOURCE_DIR}/lib/")
Expand Down
180 changes: 107 additions & 73 deletions source/common/UnicodeStrings.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
/// char16_t, std::u16string = UTF-16LE (not UCS-2 -> 2 or 4 bytes!)
/// char32_t, std::u32string = UTF-32LE
/// wchar_t, std::wstring = Forbidden, except for using wchar APIs.
///
///
/// Inspired by http://www.utf8everywhere.org
///
/// CRASH COURSE OF TEXT PROCESSING
Expand Down Expand Up @@ -56,111 +56,145 @@
#include <string>
#include <string.h>

namespace Str {

// UTF-8 byte signatures
// Spec: 0/1 are bit values, 'x' are codepoint bits
//
// type | spec | mask | value
// -----------------------------------
// 1byte | 0xxxxxxx | 10000000 | 00000000
// 2byte | 110xxxxx | 11100000 | 11000000
// 3byte | 1110xxxx | 11110000 | 11100000
// 4byte | 11110xxx | 11111000 | 11110000
// cont. | 10xxxxxx | 11000000 | 10000000

static const char UTF8_SIG_LEAD_1b = '\x00';
static const char UTF8_SIG_LEAD_2b = '\xC0';
static const char UTF8_SIG_LEAD_3b = '\xE0';
static const char UTF8_SIG_LEAD_4b = '\xF0';
static const char UTF8_SIG_CONT = '\x80';
static const char UTF8_MASK_LEAD_1b = '\x80';
static const char UTF8_MASK_LEAD_2b = '\xE0';
static const char UTF8_MASK_LEAD_3b = '\xF0';
static const char UTF8_MASK_LEAD_4b = '\xF8';
static const char UTF8_MASK_CONT = '\xC0';
static const char *UTF8_REPLACEMENT_CHAR = u8"\uFFFD"; // The � REPLACEMENT CHARACTER

inline bool IsUtf8Lead1b(const char c) { return (c & UTF8_MASK_LEAD_1b) == UTF8_SIG_LEAD_1b; }

inline bool IsUtf8Lead2b(const char c) { return (c & UTF8_MASK_LEAD_2b) == UTF8_SIG_LEAD_2b; }

inline bool IsUtf8Lead3b(const char c) { return (c & UTF8_MASK_LEAD_3b) == UTF8_SIG_LEAD_3b; }

inline bool IsUtf8Lead4b(const char c) { return (c & UTF8_MASK_LEAD_4b) == UTF8_SIG_LEAD_4b; }

inline bool IsUtf8Cont(const char c) { return (c & UTF8_MASK_CONT) == UTF8_SIG_CONT; }

/// Mercilessly replaces all invalid code units with supplied sequence.
/// OctetIterator_T can be either STL iterator or plain old `const char*`.
/// @param OctetIterator_T start Start of source string. Required.
/// @param OctetIterator_T end End of source. Required.
/// @param const char* sub Substitute sequence; default: U+FFFD � REPLACEMENT CHARACTER.
namespace Str
{
// UTF-8 byte signatures
// Spec: 0/1 are bit values, 'x' are codepoint bits
//
// type | spec | mask | value
// -----------------------------------
// 1byte | 0xxxxxxx | 10000000 | 00000000
// 2byte | 110xxxxx | 11100000 | 11000000
// 3byte | 1110xxxx | 11110000 | 11100000
// 4byte | 11110xxx | 11111000 | 11110000
// cont. | 10xxxxxx | 11000000 | 10000000

static const char UTF8_SIG_LEAD_1b = '\x00';
static const char UTF8_SIG_LEAD_2b = '\xC0';
static const char UTF8_SIG_LEAD_3b = '\xE0';
static const char UTF8_SIG_LEAD_4b = '\xF0';
static const char UTF8_SIG_CONT = '\x80';
static const char UTF8_MASK_LEAD_1b = '\x80';
static const char UTF8_MASK_LEAD_2b = '\xE0';
static const char UTF8_MASK_LEAD_3b = '\xF0';
static const char UTF8_MASK_LEAD_4b = '\xF8';
static const char UTF8_MASK_CONT = '\xC0';
static const char *UTF8_REPLACEMENT_CHAR = u8"\uFFFD"; // The � REPLACEMENT CHARACTER

inline bool IsUtf8Lead1b(const char c)
{
return (c & UTF8_MASK_LEAD_1b) == UTF8_SIG_LEAD_1b;
}

inline bool IsUtf8Lead2b(const char c)
{
return (c & UTF8_MASK_LEAD_2b) == UTF8_SIG_LEAD_2b;
}

inline bool IsUtf8Lead3b(const char c)
{
return (c & UTF8_MASK_LEAD_3b) == UTF8_SIG_LEAD_3b;
}

inline bool IsUtf8Lead4b(const char c)
{
return (c & UTF8_MASK_LEAD_4b) == UTF8_SIG_LEAD_4b;
}

inline bool IsUtf8Cont(const char c)
{
return (c & UTF8_MASK_CONT) == UTF8_SIG_CONT;
}

/// Mercilessly replaces all invalid code units with supplied sequence.
/// OctetIterator_T can be either STL iterator or plain old `const char*`.
/// @param OctetIterator_T start Start of source string. Required.
/// @param OctetIterator_T end End of source. Required.
/// @param const char* sub Substitute sequence; default: U+FFFD � REPLACEMENT CHARACTER.
template<typename OctetIterator_T>
std::string SanitizeUtf8(OctetIterator_T start, OctetIterator_T end, const char *sub = UTF8_REPLACEMENT_CHAR) {
std::string SanitizeUtf8(OctetIterator_T start, OctetIterator_T end, const char *sub = UTF8_REPLACEMENT_CHAR)
{
std::string res;
char buffer[] = {'\0', '\0', '\0', '\0', '\0'}; // 4 octets + NULL terminator
int buffer_next = 0; // Next available octet slot
int buffer_max = 0; // Current character's declared number of octets.
for (OctetIterator_T pos = start; pos != end; ++pos) {
if (buffer_next == 0) // We're at lead byte and must detect.
char buffer[] = { '\0', '\0', '\0', '\0', '\0' }; // 4 octets + NULL terminator
int buffer_next = 0; // Next available octet slot
int buffer_max = 0; // Current character's declared number of octets.

for (OctetIterator_T pos = start; pos != end; ++pos)
{
if (buffer_next == 0) // We're at lead byte and must detect.
{
if (IsUtf8Lead1b(*pos)) {
if (IsUtf8Lead1b(*pos))
{
res += *pos;
} else if (IsUtf8Cont(*pos)) {
}
else if (IsUtf8Cont(*pos))
{
res += sub;
} else {
buffer_max = (IsUtf8Lead2b(*pos)) ? 2 : buffer_max;
buffer_max = (IsUtf8Lead3b(*pos)) ? 3 : buffer_max;
buffer_max = (IsUtf8Lead4b(*pos)) ? 4 : buffer_max;
}
else
{
buffer_max = (IsUtf8Lead2b(*pos)) ? 2 : buffer_max;
buffer_max = (IsUtf8Lead3b(*pos)) ? 3 : buffer_max;
buffer_max = (IsUtf8Lead4b(*pos)) ? 4 : buffer_max;
buffer[buffer_next++] = *pos;
}
} else {
if (IsUtf8Cont(*pos)) {
}
else
{
if (IsUtf8Cont(*pos))
{
buffer[buffer_next++] = *pos;
if (buffer_next == buffer_max) {
if (buffer_next == buffer_max)
{
buffer[buffer_max] = '\0';
res += buffer;
buffer_next = 0;
res += buffer;
buffer_next = 0;
}
} else {
res += sub;
}
else
{
res += sub;
buffer_next = 0;
}
}
}

if (buffer_next != 0) {
if (buffer_next != 0)
res += sub;
}

return res; // We rely on C++11's move semantics -> no copy here.
return res; // We rely on C++11's move semantics -> no copy here.
}

/// Convenience overload for plain old NULL-terminated C-strings
inline std::string SanitizeUtf8(const char *str, const char *sub = UTF8_REPLACEMENT_CHAR) {
/// Convenience overload for plain old NULL-terminated C-strings
inline std::string SanitizeUtf8(const char *str, const char *sub = UTF8_REPLACEMENT_CHAR)
{
return SanitizeUtf8(str, str + strlen(str), sub);
}

/// Replicates behavior of `isspace()` under "C" locale - to be independent and faster.
inline bool IsWhitespaceAscii(char c) {
return (c == ' ') // (0x20) space (SPC)
/// Replicates behavior of `isspace()` under "C" locale - to be independent and faster.
inline bool IsWhitespaceAscii(char c)
{
return (c == ' ') // (0x20) space (SPC)
|| (c == '\t') // (0x09) horizontal tab (TAB)
|| (c == '\n') // (0x0a) newline (LF)
|| (c == '\v') // (0x0b) vertical tab (VT)
|| (c == '\f') // (0x0c) feed (FF)
|| (c == '\r'); // (0x0d) carriage return (CR)
}

// TODO: implement `TrimUtf8()`!
/// @param start Pointer to first character (or NUL-terminating character if the string is empty)
/// @param end Pointer to after-the-last character (the NUL-terminating character)
inline void TrimAscii(char *&start, char *&end) {
// TODO: implement `TrimUtf8()`!
/// @param start Pointer to first character (or NUL-terminating character if the string is empty)
/// @param end Pointer to after-the-last character (the NUL-terminating character)
inline void TrimAscii(char *&start, char *&end)
{
while ((start != end) && IsWhitespaceAscii(*start))
{
++start;
}

while ((start != end) && IsWhitespaceAscii(*(end - 1)))
{
--end;
}
}

} // namespace Str
12 changes: 6 additions & 6 deletions source/gui/resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
// Microsoft Developer Studio generated include file.
// Used by icon.rc
//
#define IDI_ICON1 101
#define IDI_ICON1 101

// Next default values for new objects
//
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 102
#define _APS_NEXT_COMMAND_VALUE 40001
#define _APS_NEXT_CONTROL_VALUE 1000
#define _APS_NEXT_SYMED_VALUE 101
#define _APS_NEXT_RESOURCE_VALUE 102
#define _APS_NEXT_COMMAND_VALUE 40001
#define _APS_NEXT_CONTROL_VALUE 1000
#define _APS_NEXT_SYMED_VALUE 101
#endif
#endif
Loading

0 comments on commit 5eb614d

Please sign in to comment.