From 7020094accbf752deee1cb930676ec7765aeab9f Mon Sep 17 00:00:00 2001 From: Mike Date: Tue, 18 Jun 2024 09:28:11 +0100 Subject: [PATCH] Extend `CsvReader` capabilities, move into new library (#2805) This PR extends the capabilities `CsvReader` class, introduced in #2403. Goals: - Extend testing, verify efficient use of memory - Add Parser capability to allow filtering and processing of (very) large files streamed via network, in files, etc. - Add seeking support to allow indexing, bookmarking, etc. - Add iterator support The code has been moved into a separate library. Changes: **Fix String move to avoid de-allocating buffers** Use longer of two buffers for result. Example: ``` String s1 = "Greater than SSO buffer length"; String s2; s1 = ""; // Buffer remains allocated s2 = std::move(s1); // The move we need to fix ``` At present this move will result in s1's buffer being de-allocated. This can lead to performance degratation due to subsequent memory reallocations where a String is being passed around as a general buffer. This change uses the larger of the two buffers when deciding how to behave. Checks added to HostTests to enforce predictable behaviour. **Add CStringArray::release method** Allows efficient conversion to a regular `String` object for manipulation. **Fix CStringArray operator+=** Must take reference, not copy - inefficient and fails when used with FlashString. **Move CsvReader into separate library** The `CsvReader` class has been moved out of `Core/Data` and into `CsvReader` which has additional capabilities. Changes to existing code: - Add ``CsvReader`` to your project's :cpp:envvar:`COMPONENT_DEPENDS` - Change ``#include `` to ``#include `` - Change ``CsvReader`` class to :cpp:class:`CSV::Reader` --- .gitmodules | 4 + Sming/Core/Data/CStringArray.h | 10 +- Sming/Core/Data/CsvReader.cpp | 124 ------------------- Sming/Core/Data/CsvReader.h | 130 +------------------- Sming/Libraries/CsvReader | 1 + Sming/Wiring/WString.cpp | 6 +- docs/source/upgrading/5.1-5.2.rst | 10 ++ samples/Basic_Templates/app/CsvTemplate.h | 4 +- samples/Basic_Templates/app/application.cpp | 3 +- samples/Basic_Templates/component.mk | 1 + tests/HostTests/modules/CStringArray.cpp | 26 ++++ tests/HostTests/modules/String.cpp | 64 ++++++++-- tests/HostTests/modules/TemplateStream.cpp | 27 ---- 13 files changed, 112 insertions(+), 298 deletions(-) delete mode 100644 Sming/Core/Data/CsvReader.cpp create mode 160000 Sming/Libraries/CsvReader diff --git a/.gitmodules b/.gitmodules index 9d62de59ac..a1404fb3cd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -217,6 +217,10 @@ path = Sming/Libraries/CS5460/CS5460 url = https://github.com/xxzl0130/CS5460.git ignore = dirty +[submodule "Libraries.CsvReader"] + path = Sming/Libraries/CsvReader + url = https://github.com/mikee47/CsvReader + ignore = dirty [submodule "Libraries.DFRobotDFPlayerMini"] path = Sming/Libraries/DFRobotDFPlayerMini url = https://github.com/DFRobot/DFRobotDFPlayerMini.git diff --git a/Sming/Core/Data/CStringArray.h b/Sming/Core/Data/CStringArray.h index 27afb9a66c..3b192c6b54 100644 --- a/Sming/Core/Data/CStringArray.h +++ b/Sming/Core/Data/CStringArray.h @@ -89,6 +89,14 @@ class CStringArray : private String return *this; } + /** + * @brief Give up underlying String object to caller so it can be manipulated + */ + String release() + { + return std::move(*this); + } + /** @brief Append a new string (or array of strings) to the end of the array * @param str * @param length Length of new string in array (default is length of str) @@ -127,7 +135,7 @@ class CStringArray : private String * @brief Append numbers, etc. to the array * @param value char, int, float, etc. as supported by String */ - template CStringArray& operator+=(T value) + template CStringArray& operator+=(const T& value) { add(String(value)); return *this; diff --git a/Sming/Core/Data/CsvReader.cpp b/Sming/Core/Data/CsvReader.cpp deleted file mode 100644 index 569ce02a0b..0000000000 --- a/Sming/Core/Data/CsvReader.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/**** - * Sming Framework Project - Open Source framework for high efficiency native ESP8266 development. - * Created 2015 by Skurydin Alexey - * http://github.com/SmingHub/Sming - * All files of the Sming Core are provided under the LGPL v3 license. - * - * CsvReader.cpp - * - * @author: 2021 - Mikee47 - * - ****/ - -#include "CsvReader.h" -#include - -void CsvReader::reset() -{ - source->seekFrom(0, SeekOrigin::Start); - if(!userHeadingsProvided) { - readRow(); - headings = row; - } - row = nullptr; -} - -bool CsvReader::readRow() -{ - constexpr size_t blockSize{512}; - - String buffer(std::move(reinterpret_cast(row))); - constexpr char quoteChar{'"'}; - enum class FieldKind { - unknown, - quoted, - unquoted, - }; - FieldKind fieldKind{}; - bool escape{false}; - bool quote{false}; - char lc{'\0'}; - unsigned writepos{0}; - - while(true) { - if(buffer.length() == maxLineLength) { - debug_w("[CSV] Line buffer limit reached %u", maxLineLength); - return false; - } - size_t buflen = std::min(writepos + blockSize, maxLineLength); - if(!buffer.setLength(buflen)) { - debug_e("[CSV] Out of memory %u", buflen); - return false; - } - auto len = source->readBytes(buffer.begin() + writepos, buflen - writepos); - if(len == 0) { - if(writepos == 0) { - return false; - } - buffer.setLength(writepos); - row = std::move(buffer); - return true; - } - buflen = writepos + len; - unsigned readpos = writepos; - - for(; readpos < buflen; ++readpos) { - char c = buffer[readpos]; - if(escape) { - switch(c) { - case 'n': - c = '\n'; - break; - case 'r': - c = '\r'; - break; - case 't': - c = '\t'; - break; - default:; - // Just accept character - } - escape = false; - } else { - if(fieldKind == FieldKind::unknown) { - if(c == quoteChar) { - fieldKind = FieldKind::quoted; - quote = true; - lc = '\0'; - continue; - } - fieldKind = FieldKind::unquoted; - } - if(c == quoteChar) { - quote = !quote; - if(fieldKind == FieldKind::quoted) { - if(lc == quoteChar) { - buffer[writepos++] = c; - lc = '\0'; - } else { - lc = c; - } - continue; - } - } else if(c == '\\') { - escape = true; - continue; - } else if(!quote) { - if(c == fieldSeparator) { - c = '\0'; - fieldKind = FieldKind::unknown; - } else if(c == '\r') { - continue; - } else if(c == '\n') { - source->seekFrom(readpos + 1 - buflen, SeekOrigin::Current); - buffer.setLength(writepos); - row = std::move(buffer); - return true; - } - } - } - buffer[writepos++] = c; - lc = c; - } - } -} diff --git a/Sming/Core/Data/CsvReader.h b/Sming/Core/Data/CsvReader.h index 596a62a27e..2fcd76036a 100644 --- a/Sming/Core/Data/CsvReader.h +++ b/Sming/Core/Data/CsvReader.h @@ -12,132 +12,4 @@ #pragma once -#include "Stream/DataSourceStream.h" -#include "CStringArray.h" -#include - -/** - * @brief Class to parse a CSV file - * - * Spec: https://www.ietf.org/rfc/rfc4180.txt - * - * 1. Each record is located on a separate line - * 2. Line ending for last record in the file is optional - * 3. Field headings are provided either in the source data or in constructor (but not both) - * 4. Fields separated with ',' and whitespace considered part of field content - * 5. Fields may or may not be quoted - if present, will be removed during parsing - * 6. Fields may contain line breaks, quotes or commas - * 7. Quotes may be escaped thus "" if field itself is quoted - * - * Additional features: - * - * - Line breaks can be \n or \r\n - * - Escapes codes within fields will be converted: \n \r \t \", \\ - * - Field separator can be changed in constructor - */ -class CsvReader -{ -public: - /** - * @brief Construct a CSV reader - * @param source Stream to read CSV text from - * @param fieldSeparator - * @param headings Required if source data does not contain field headings as first row - * @param maxLineLength Limit size of buffer to guard against malformed data - */ - CsvReader(IDataSourceStream* source, char fieldSeparator = ',', const CStringArray& headings = nullptr, - size_t maxLineLength = 2048) - : source(source), fieldSeparator(fieldSeparator), userHeadingsProvided(headings), maxLineLength(maxLineLength), - headings(headings) - { - reset(); - } - - /** - * @brief Reset reader to start of CSV file - * - * Cursor is set to 'before start'. - * Call 'next()' to fetch first record. - */ - void reset(); - - /** - * @brief Seek to next record - */ - bool next() - { - return readRow(); - } - - /** - * @brief Get number of columns - */ - unsigned count() const - { - return headings.count(); - } - - /** - * @brief Get a value from the current row - * @param index Column index, starts at 0 - * @retval const char* nullptr if index is not valid - */ - const char* getValue(unsigned index) - { - return row[index]; - } - - /** - * @brief Get a value from the current row - * @param index Column name - * @retval const char* nullptr if name is not found - */ - const char* getValue(const char* name) - { - return getValue(getColumn(name)); - } - - /** - * @brief Get index of column given its name - * @param name Column name to find - * @retval int -1 if name is not found - */ - int getColumn(const char* name) - { - return headings.indexOf(name); - } - - /** - * @brief Determine if row is valid - */ - explicit operator bool() const - { - return bool(row); - } - - /** - * @brief Get headings - */ - const CStringArray& getHeadings() const - { - return headings; - } - - /** - * @brief Get current row - */ - const CStringArray& getRow() const - { - return row; - } - -private: - bool readRow(); - - std::unique_ptr source; - char fieldSeparator; - bool userHeadingsProvided; - size_t maxLineLength; - CStringArray headings; - CStringArray row; -}; +static_assert(false, "CsvReader class has been moved to the CsvReader library."); diff --git a/Sming/Libraries/CsvReader b/Sming/Libraries/CsvReader new file mode 160000 index 0000000000..8f4d416442 --- /dev/null +++ b/Sming/Libraries/CsvReader @@ -0,0 +1 @@ +Subproject commit 8f4d416442292927d15fe00d80130fb2fc7d8bb6 diff --git a/Sming/Wiring/WString.cpp b/Sming/Wiring/WString.cpp index b2d3b83aac..401851540b 100644 --- a/Sming/Wiring/WString.cpp +++ b/Sming/Wiring/WString.cpp @@ -276,16 +276,14 @@ void String::move(String& rhs) (void)reserve(rhs_len); } - // If we already have capacity, copy the data and free rhs buffers - if(capacity() >= rhs_len) { + // If we have more capacity than the target, copy the data and free rhs buffers + if(rhs.sso.set || capacity() > rhs.capacity()) { memmove(buffer(), rhs.buffer(), rhs_len); setlen(rhs_len); rhs.invalidate(); return; } - assert(!rhs.sso.set); - // We don't have enough space so perform a pointer swap if(!sso.set) { free(ptr.buffer); diff --git a/docs/source/upgrading/5.1-5.2.rst b/docs/source/upgrading/5.1-5.2.rst index 96f7fc41a4..cc8815a84f 100644 --- a/docs/source/upgrading/5.1-5.2.rst +++ b/docs/source/upgrading/5.1-5.2.rst @@ -91,3 +91,13 @@ These will now fail to compile. Copy construction and assignment has been explicitly deleted so avoid unintentional side-effects. Objects should always be passed by reference. + + +**CsvReader library** + +The :cpp:type:`CsvReader` class has been moved out of ``Core/Data`` and into :library:`CsvReader` +which has additional capabilities. Changes to existing code: + +- Add ``CsvReader`` to your project's :cpp:envvar:`COMPONENT_DEPENDS` +- Change ``#include `` to ``#include `` +- Change ``CsvReader`` class to :cpp:class:`CSV::Reader` diff --git a/samples/Basic_Templates/app/CsvTemplate.h b/samples/Basic_Templates/app/CsvTemplate.h index b29b4ae349..cdf56fdd8a 100644 --- a/samples/Basic_Templates/app/CsvTemplate.h +++ b/samples/Basic_Templates/app/CsvTemplate.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include /** @@ -35,5 +35,5 @@ class CsvTemplate : public SectionTemplate } private: - CsvReader csv; + CSV::Reader csv; }; diff --git a/samples/Basic_Templates/app/application.cpp b/samples/Basic_Templates/app/application.cpp index efca95a384..a2bbae2d40 100644 --- a/samples/Basic_Templates/app/application.cpp +++ b/samples/Basic_Templates/app/application.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include "CsvTemplate.h" namespace @@ -51,7 +50,7 @@ void printCars() void printClassics(const FlashString& templateSource, Format::Formatter& formatter) { // The CSV data source - CsvReader csv(new FileStream(Filename::classics_csv)); + CSV::Reader csv(new FileStream(Filename::classics_csv)); // Use a regular SectionTemplate class to process the template SectionTemplate tmpl(new FSTR::Stream(templateSource)); diff --git a/samples/Basic_Templates/component.mk b/samples/Basic_Templates/component.mk index 092ca88129..bcb5ca8f81 100644 --- a/samples/Basic_Templates/component.mk +++ b/samples/Basic_Templates/component.mk @@ -1,3 +1,4 @@ +COMPONENT_DEPENDS := CsvReader HWCONFIG := basic_templates DISABLE_NETWORK := 1 diff --git a/tests/HostTests/modules/CStringArray.cpp b/tests/HostTests/modules/CStringArray.cpp index 61c8394099..bb640bf83f 100644 --- a/tests/HostTests/modules/CStringArray.cpp +++ b/tests/HostTests/modules/CStringArray.cpp @@ -24,6 +24,7 @@ class CStringArrayTest : public TestGroup "b\0" "c\0" "d\0"); + DEFINE_FSTR_LOCAL(FS_BasicJoined, "a,b,c,d") TEST_CASE("Empty construction") { @@ -231,6 +232,31 @@ class CStringArrayTest : public TestGroup csa.add(F("test\0again")); REQUIRE_EQ(csa.join("}+{"), "a}+{}+{test}+{again"); REQUIRE_EQ(csa.join(nullptr), "atestagain"); + + csa = FS_Basic; + REQUIRE_EQ(csa.join(), FS_BasicJoined); + } + + TEST_CASE("release") + { + CStringArray csa(FS_Basic); + csa += FS_Basic; // Allocate > SSO + Serial << csa.join() << endl; + auto cstrWant = csa.c_str(); + String s = csa.release(); + REQUIRE(!csa); + REQUIRE(s.c_str() == cstrWant); + + REQUIRE(s == String(FS_Basic) + FS_Basic); + + csa = std::move(s); + REQUIRE(csa == String(FS_Basic) + FS_Basic); + + String js; + js += FS_BasicJoined; + js += ','; + js += FS_BasicJoined; + REQUIRE(csa.join() == js); } } }; diff --git a/tests/HostTests/modules/String.cpp b/tests/HostTests/modules/String.cpp index 8c48e8e5e5..b453142c27 100644 --- a/tests/HostTests/modules/String.cpp +++ b/tests/HostTests/modules/String.cpp @@ -15,6 +15,7 @@ class StringTest : public TestGroup nonTemplateTest(); testString(); + testMove(); testMakeHexString(); } @@ -154,17 +155,62 @@ class StringTest : public TestGroup } } + void testMove() + { + DEFINE_FSTR_LOCAL(shortText, "Not long") + DEFINE_FSTR_LOCAL(longText, "Greater than SSO buffer length") + + TEST_CASE("Move into unassigned string") + { + // Normal move + String s1 = longText; + String s2 = std::move(s1); + REQUIRE(!s1); + REQUIRE(s2.length() == longText.length()); + } + + TEST_CASE("Move between allocated strings of same length") + { + String s1 = longText; + auto cstrWant = s1.c_str(); + String s2 = std::move(s1); + REQUIRE(s2.c_str() == cstrWant); + } + + TEST_CASE("Move to allocated string of shorter length") + { + String s1 = longText; + String s2 = shortText; + auto cstrWant = s1.c_str(); + s2 = std::move(s1); + REQUIRE(s2.c_str() == cstrWant); + } + + TEST_CASE("Move to allocated string of longer length") + { + String s1 = longText; + String s2; + auto cstrWant = s1.c_str(); + s1 = ""; // Buffer remains allocated + s2 = std::move(s1); + REQUIRE(s2.c_str() == cstrWant); + } + } + void testMakeHexString() { - uint8_t hwaddr[] = {0xaa, 0xbb, 0xcc, 0xdd, 0x12, 0x55, 0x00}; - REQUIRE(makeHexString(nullptr, 6) == String::empty); - REQUIRE(makeHexString(hwaddr, 0) == String::empty); - REQUIRE(makeHexString(hwaddr, 6) == F("aabbccdd1255")); - REQUIRE(makeHexString(hwaddr, 6, ':') == F("aa:bb:cc:dd:12:55")); - REQUIRE(makeHexString(hwaddr, 7) == F("aabbccdd125500")); - REQUIRE(makeHexString(hwaddr, 7, ':') == F("aa:bb:cc:dd:12:55:00")); - REQUIRE(makeHexString(hwaddr, 1, ':') == F("aa")); - REQUIRE(makeHexString(hwaddr, 0, ':') == String::empty); + TEST_CASE("makeHexString") + { + uint8_t hwaddr[] = {0xaa, 0xbb, 0xcc, 0xdd, 0x12, 0x55, 0x00}; + REQUIRE(makeHexString(nullptr, 6) == String::empty); + REQUIRE(makeHexString(hwaddr, 0) == String::empty); + REQUIRE(makeHexString(hwaddr, 6) == F("aabbccdd1255")); + REQUIRE(makeHexString(hwaddr, 6, ':') == F("aa:bb:cc:dd:12:55")); + REQUIRE(makeHexString(hwaddr, 7) == F("aabbccdd125500")); + REQUIRE(makeHexString(hwaddr, 7, ':') == F("aa:bb:cc:dd:12:55:00")); + REQUIRE(makeHexString(hwaddr, 1, ':') == F("aa")); + REQUIRE(makeHexString(hwaddr, 0, ':') == String::empty); + } } }; diff --git a/tests/HostTests/modules/TemplateStream.cpp b/tests/HostTests/modules/TemplateStream.cpp index 66f1a6c9bc..36c725dd5a 100644 --- a/tests/HostTests/modules/TemplateStream.cpp +++ b/tests/HostTests/modules/TemplateStream.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #ifdef ARCH_HOST #include @@ -23,12 +22,6 @@ DEFINE_FSTR_LOCAL(template3_1, "Document Title