diff --git a/far/RegExp.cpp b/far/RegExp.cpp index 1fa8e861b6..5e2ddc8196 100644 --- a/far/RegExp.cpp +++ b/far/RegExp.cpp @@ -1720,7 +1720,20 @@ struct RegExp::StateStackItem int forward{}; }; -static const RegExp::StateStackItem& FindStateByPos(const std::vector& stack, RegExp::REOpCode* pos, int op) +class RegExp::state_stack +{ +public: + using state = std::vector>; + +private: + state::allocator_type::arena_type m_Arena; + +public: + state State{ m_Arena }; +}; + + +static const RegExp::StateStackItem& FindStateByPos(span const stack, RegExp::REOpCode* pos, int op) { return *std::find_if(ALL_CONST_REVERSE_RANGE(stack), [&](const auto& i){ return i.pos == pos && i.op == op; }); } @@ -1753,7 +1766,9 @@ int RegExp::StrCmp(const wchar_t*& str, const wchar_t* start, const wchar_t* end return 1; } -bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wchar_t* strend, std::vector& match, named_regex_match& NamedMatch, std::vector& stack) const +static constexpr RegExpMatch DefaultMatch{ -1, -1 }; + +bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wchar_t* strend, regex_match& RegexMatch, named_regex_match& NamedMatch, state_stack& StateStack) const { int i,j; int minimizing; @@ -1762,11 +1777,13 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc UniSet *cl; int inrangebracket=0; + auto& match = RegexMatch.Matches; + auto& stack = StateStack.State; + stack.clear(); match.clear(); NamedMatch.Matches.clear(); - - match.resize(bracketscount, { -1, -1 }); + match.resize(bracketscount, DefaultMatch); for(const auto* op = code.data(), *end = op + code.size(); op != end; ++op) { @@ -1777,7 +1794,7 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc if (str<=strend) { - const auto MinSkip = [&](StateStackItem& st, function_ref const cmp) + const auto MinSkip = [&](StateStackItem& st, auto const& cmp) { int jj; switch (std::next(op)->op) @@ -2822,19 +2839,18 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc if (stack.empty()) return false; - const auto ps = std::prev(stack.end()); + auto& ps = stack.back(); - //dpf(("ps->op:%s\n",ops[ps->op])); - switch (ps->op) + switch (ps.op) { case opAlternative: { - str=ps->savestr; - op=ps->pos; + str = ps.savestr; + op = ps.pos; if (op->alternative.nextalt) { - ps->pos=op->alternative.nextalt; + ps.pos=op->alternative.nextalt; } else { @@ -2850,56 +2866,57 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc case opTypeRange: case opNotTypeRange: { - str=ps->savestr-1; - op=ps->pos; + str = ps.savestr-1; + op = ps.pos; - if (strstartstr) + if (str < ps.startstr) { continue; } - ps->savestr=str; + ps.savestr = str; break; } case opNamedRefRange: case opBackRefRange: { - if (ps->op == opBackRefRange) + if (ps.op == opBackRefRange) { - m = &match[ps->pos->range.refindex]; + m = &match[ps.pos->range.refindex]; } else { - const auto Iterator = NamedMatch.Matches.find(ps->pos->range.refname); + const auto Iterator = NamedMatch.Matches.find(ps.pos->range.refname); if (Iterator == NamedMatch.Matches.cend()) break; m = &match[Iterator->second]; } - str=ps->savestr-(m->end-m->start); - op=ps->pos; + str = ps.savestr-(m->end-m->start); + op = ps.pos; - if (strstartstr) + if (str < ps.startstr) { continue; } - ps->savestr=str; + ps.savestr = str; break; } case opAnyMinRange: { - if (!(ps->max--))continue; + if (!(ps.max--)) + continue; - str=ps->savestr; - op=ps->pos; + str = ps.savestr; + op = ps.pos; - if (ps->pos->range.op==opCharAny) + if (ps.pos->range.op == opCharAny) { - if (strsavestr=str; + ps.savestr = str; } else { @@ -2911,7 +2928,7 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc if (strsavestr=str; + ps.savestr = str; } else { @@ -2923,17 +2940,18 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc } case opSymbolMinRange: { - if (!(ps->max--))continue; + if (!(ps.max--)) + continue; - str=ps->savestr; - op=ps->pos; + str = ps.savestr; + op = ps.pos; if (ignorecase) { if (strsymbol) { str++; - ps->savestr=str; + ps.savestr = str; } else { @@ -2945,7 +2963,7 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc if (strsymbol) { str++; - ps->savestr=str; + ps.savestr = str; } else { @@ -2957,17 +2975,18 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc } case opNotSymbolMinRange: { - if (!(ps->max--))continue; + if (!(ps.max--)) + continue; - str=ps->savestr; - op=ps->pos; + str = ps.savestr; + op = ps.pos; if (ignorecase) { if (strsymbol) { str++; - ps->savestr=str; + ps.savestr = str; } else { @@ -2979,7 +2998,7 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc if (strsymbol) { str++; - ps->savestr=str; + ps.savestr = str; } else { @@ -2991,15 +3010,16 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc } case opClassMinRange: { - if (!(ps->max--))continue; + if (!(ps.max--)) + continue; - str=ps->savestr; - op=ps->pos; + str = ps.savestr; + op = ps.pos; if (strrange.symbolclass->GetBit(*str)) { str++; - ps->savestr=str; + ps.savestr = str; } else { @@ -3010,15 +3030,16 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc } case opTypeMinRange: { - if (!(ps->max--))continue; + if (!(ps.max--)) + continue; - str=ps->savestr; - op=ps->pos; + str = ps.savestr; + op = ps.pos; if (strrange.type)) { str++; - ps->savestr=str; + ps.savestr = str; } else { @@ -3029,15 +3050,16 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc } case opNotTypeMinRange: { - if (!(ps->max--))continue; + if (!(ps.max--)) + continue; - str=ps->savestr; - op=ps->pos; + str = ps.savestr; + op = ps.pos; if (strrange.type)) { str++; - ps->savestr=str; + ps.savestr=str; } else { @@ -3049,11 +3071,12 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc case opNamedRefMinRange: case opBackRefMinRange: { - if (!(ps->max--))continue; + if (!(ps.max--)) + continue; - str=ps->savestr; - op=ps->pos; - if (ps->op == opBackRefMinRange) + str = ps.savestr; + op = ps.pos; + if (ps.op == opBackRefMinRange) { m = &match[op->range.refindex]; } @@ -3068,7 +3091,7 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc if (str+m->end-m->startstart,start+m->end)) { - ps->savestr=str; + ps.savestr = str; } else { @@ -3079,18 +3102,18 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc } case opBracketRange: { - if (ps->min) + if (ps.min) { inrangebracket--; continue; } - if (ps->forward) + if (ps.forward) { - ps->forward=0; - op=ps->pos->range.bracket.pairindex; + ps.forward = 0; + op = ps.pos->range.bracket.pairindex; inrangebracket--; - str=ps->savestr; + str = ps.savestr; if (op->range.nextalt) { @@ -3114,17 +3137,17 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc } case opBracketMinRange: { - if (!(ps->max--)) + if (!(ps.max--)) { inrangebracket--; continue; } - if (ps->forward) + if (ps.forward) { - ps->forward=0; - op=ps->pos; - str=ps->savestr; + ps.forward = 0; + op = ps.pos; + str = ps.savestr; if (op->range.bracket.index >= 0) { @@ -3155,24 +3178,24 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc } case opOpenBracket: { - j=ps->pos->bracket.index; + j = ps.pos->bracket.index; if (j >= 0) { - match[j].start=ps->min; - match[j].end=ps->max; + match[j].start = ps.min; + match[j].end = ps.max; } continue; } case opNamedBracket: { - j = ps->pos->nbracket.index; + j = ps.pos->nbracket.index; if (j >= 0) { - match[j].start = ps->min; - match[j].end = ps->max; + match[j].start = ps.min; + match[j].end = ps.max; } continue; @@ -3185,10 +3208,10 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc case opNotLookBehind: case opNotLookAhead: { - op=ps->pos->assert.pairindex; - str=ps->savestr; + op = ps.pos->assert.pairindex; + str = ps.savestr; - if (ps->forward) + if (ps.forward) { stack.pop_back(); break; @@ -3211,12 +3234,12 @@ bool RegExp::InnerMatch(const wchar_t* const start, const wchar_t* str, const wc return true; } -bool RegExp::Match(string_view const text, std::vector& match, named_regex_match* NamedMatch) const +bool RegExp::Match(string_view const text, regex_match& match, named_regex_match* NamedMatch) const { return MatchEx(text, 0, match, NamedMatch); } -bool RegExp::MatchEx(string_view const text, size_t const From, std::vector& match, named_regex_match* NamedMatch) const +bool RegExp::MatchEx(string_view const text, size_t const From, regex_match& match, named_regex_match* NamedMatch) const { // Logic errors, no need to catch them if (code.empty()) @@ -3248,12 +3271,12 @@ bool RegExp::MatchEx(string_view const text, size_t const From, std::vector stack; + state_stack stack; if (!InnerMatch(start, textstart, tempend, match, *NamedMatch, stack)) return false; - for (auto& i: match) + for (auto& i: match.Matches) { if (i.start == -1 || i.end == -1 || i.start > i.end) i.start = i.end = -1; @@ -3523,12 +3546,12 @@ bool RegExp::Optimize() return true; } -bool RegExp::Search(string_view const text, std::vector& match, named_regex_match* NamedMatch) const +bool RegExp::Search(string_view const text, regex_match& match, named_regex_match* NamedMatch) const { return SearchEx(text, 0, match, NamedMatch); } -bool RegExp::SearchEx(string_view const text, size_t const From, std::vector& match, named_regex_match* NamedMatch) const +bool RegExp::SearchEx(string_view const text, size_t const From, regex_match& match, named_regex_match* NamedMatch) const { // Logic errors, no need to catch them if (code.empty()) @@ -3551,7 +3574,7 @@ bool RegExp::SearchEx(string_view const text, size_t const From, std::vector stack; + state_stack stack; auto str = textstart; @@ -3564,9 +3587,9 @@ bool RegExp::SearchEx(string_view const text, size_t const From, std::vector i.end) i.start = i.end = -1; @@ -3617,9 +3640,11 @@ bool RegExp::SearchEx(string_view const text, size_t const From, std::vector Match(1); + regex_match Match; + Match.Matches.resize(1); return Search(Str, Match); } @@ -3788,7 +3813,7 @@ TEST_CASE("regex.corner.empty_needle") }; RegExp re; - std::vector Match; + regex_match Match; for (const auto Flag: { OP_NONE, OP_OPTIMIZE }) { @@ -3800,7 +3825,7 @@ TEST_CASE("regex.corner.empty_needle") REQUIRE(re.Match(i.Haystack, Match) == MatchExpected); if (MatchExpected) - REQUIRE(Match == i.Match); + REQUIRE(Match.Matches == i.Match); } } } @@ -3826,7 +3851,7 @@ TEST_CASE("regex.corner.empty_haystack") }; RegExp re; - std::vector Match; + regex_match Match; for (const auto& i: Tests) { @@ -3838,7 +3863,7 @@ TEST_CASE("regex.corner.empty_haystack") REQUIRE(re.Match({}, Match) == MatchExpected); if (MatchExpected) - REQUIRE(Match == i.Match); + REQUIRE(Match.Matches == i.Match); } } } @@ -3857,14 +3882,14 @@ TEST_CASE("regex.list.special") }; RegExp re; - std::vector Match; + regex_match Match; for (const auto& i: Tests) { re.Compile(i.Regex); REQUIRE(!re.Match(i.BadMatch, Match)); REQUIRE(re.Match(i.GoodMatch, Match)); - REQUIRE(Match == i.Match); + REQUIRE(Match.Matches == i.Match); } } @@ -3934,7 +3959,7 @@ TEST_CASE("regex.regression") }; RegExp re; - std::vector Match; + regex_match Match; for (const auto& i: Tests) { @@ -3946,7 +3971,7 @@ TEST_CASE("regex.regression") REQUIRE(re.Search(i.Input, Match) == MatchExpected); if (MatchExpected) - REQUIRE(Match == i.Match); + REQUIRE(Match.Matches == i.Match); } } } @@ -3968,7 +3993,7 @@ TEST_CASE("regex.named_groups") }; RegExp re; - std::vector Match; + regex_match Match; named_regex_match NamedMatch; for (const auto& i: Tests) @@ -3983,7 +4008,7 @@ TEST_CASE("regex.named_groups") if (!MatchExpected) continue; - REQUIRE(Match == i.Match); + REQUIRE(Match.Matches == i.Match); REQUIRE(i.NamedMatch.size() == NamedMatch.Matches.size()); diff --git a/far/RegExp.hpp b/far/RegExp.hpp index 904b431f59..af596a6416 100644 --- a/far/RegExp.hpp +++ b/far/RegExp.hpp @@ -46,6 +46,7 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common/string_utils.hpp" // External: +#include "stack_allocator.hpp" //---------------------------------------------------------------------------- @@ -101,9 +102,34 @@ enum OP_STRICT = 0x0040, }; -struct named_regex_match +class regex_match { - unordered_string_map Matches; +public: + using matches = std::vector>; + +private: + matches::allocator_type::arena_type m_Arena; + +public: + matches Matches{ m_Arena }; +}; + +class named_regex_match +{ +public: + using matches = std::unordered_map< + string, + size_t, + string_comparer, + string_comparer, + stack_allocator, 4096> + >; + +private: + matches::allocator_type::arena_type m_Arena; + +public: + matches Matches{ m_Arena }; }; class regex_exception: public far_exception @@ -145,8 +171,11 @@ class RegExp struct REOpCode; struct UniSet; struct StateStackItem; + class state_stack; private: + + // code std::vector code; @@ -170,7 +199,7 @@ class RegExp int CalcLength(string_view src); void InnerCompile(const wchar_t* start, const wchar_t* src, int srclength, int options); - bool InnerMatch(const wchar_t* start, const wchar_t* str, const wchar_t* strend, std::vector& match, named_regex_match& NamedMatch, std::vector& stack) const; + bool InnerMatch(const wchar_t* start, const wchar_t* str, const wchar_t* strend, regex_match& RegexMatch, named_regex_match& NamedMatch, state_stack& Statetack) const; void TrimTail(const wchar_t* start, const wchar_t*& strend) const; @@ -210,20 +239,20 @@ class RegExp \param NamedMatch - storage of named brackets. \sa SMatch */ - bool Match(string_view text, std::vector& match, named_regex_match* NamedMatch = {}) const; + bool Match(string_view text, regex_match& match, named_regex_match* NamedMatch = {}) const; /*! Advanced version of match. Can be used for multiple matches on one string (to imitate /g modifier of perl regexp */ - bool MatchEx(string_view text, size_t From, std::vector& match, named_regex_match* NamedMatch = {}) const; + bool MatchEx(string_view text, size_t From, regex_match& match, named_regex_match* NamedMatch = {}) const; /*! Try to find substring that will match regexp. Parameters and return value are the same as for Match. It is highly recommended to call Optimize before Search. */ - bool Search(string_view text, std::vector& match, named_regex_match* NamedMatch = {}) const; + bool Search(string_view text, regex_match& match, named_regex_match* NamedMatch = {}) const; /*! Advanced version of search. Can be used for multiple searches on one string (to imitate /g modifier of perl regexp */ - bool SearchEx(string_view text, size_t From, std::vector& match, named_regex_match* NamedMatch = {}) const; + bool SearchEx(string_view text, size_t From, regex_match& match, named_regex_match* NamedMatch = {}) const; bool Search(string_view Str) const; diff --git a/far/changelog b/far/changelog index 1f82154970..c6e4379fe1 100644 --- a/far/changelog +++ b/far/changelog @@ -1,3 +1,8 @@ +-------------------------------------------------------------------------------- +drkns 2023-10-29 01:30:01+01:00 - build 6206 + +1. gh-741: Far regular expressions - performance. + -------------------------------------------------------------------------------- drkns 2023-10-26 18:32:38+01:00 - build 6205 diff --git a/far/editor.cpp b/far/editor.cpp index 2c02b9b339..ee57089c26 100644 --- a/far/editor.cpp +++ b/far/editor.cpp @@ -3405,7 +3405,7 @@ void Editor::DoSearchReplace(const SearchReplaceDisposition Disposition) auto CurPtr = FindAll ? FirstLine() : m_it_CurLine, TmpPtr = CurPtr; - std::vector Match; + regex_match Match; named_regex_match NamedMatch; RegExp re; diff --git a/far/far.vcxproj b/far/far.vcxproj index 3f92938e9a..3f56f3cacf 100644 --- a/far/far.vcxproj +++ b/far/far.vcxproj @@ -438,6 +438,7 @@ cl /nologo /c /Fo"$(IntDir)%(Filename)_c++.testobj" /TP api_test.c + diff --git a/far/far.vcxproj.filters b/far/far.vcxproj.filters index bc5f851125..7a1df14ff9 100644 --- a/far/far.vcxproj.filters +++ b/far/far.vcxproj.filters @@ -1111,6 +1111,9 @@ Header Files + + Header Files + Header Files diff --git a/far/filemasks.cpp b/far/filemasks.cpp index 8975feb15a..c0d9a3ef1c 100644 --- a/far/filemasks.cpp +++ b/far/filemasks.cpp @@ -105,7 +105,7 @@ class filemasks::masks bool operator==(string_view FileName) const; bool empty() const; - using last_regex_matches = std::pair const*, named_regex_match const*>; + using last_regex_matches = std::pair const*, unordered_string_map const*>; last_regex_matches last_matches() const; private: @@ -113,7 +113,7 @@ class filemasks::masks { RegExp Regex; mutable std::vector Match; - mutable named_regex_match NamedMatch; + mutable unordered_string_map NamedMatch; }; std::variant, regex_data> m_Masks; @@ -413,7 +413,15 @@ bool filemasks::masks::operator==(const string_view FileName) const }, [&](const regex_data& Data) { - return Data.Regex.Search(FileName, Data.Match, &Data.NamedMatch); + regex_match Match; + named_regex_match NamedMatch; + if (!Data.Regex.Search(FileName, Match, &NamedMatch)) + return false; + + Data.Match.assign(ALL_CONST_RANGE(Match.Matches)); + for (const auto& [k, v]: NamedMatch.Matches) + Data.NamedMatch.emplace(k, v); + return true; } }, m_Masks); } @@ -507,7 +515,7 @@ TEST_CASE("masks_with_matches") Masks.assign(L"/(.+)\\.(?:.+)\\.(?{scratch}.+)/"sv); std::vector Matches; - named_regex_match NamedMatches; + unordered_string_map NamedMatches; filemasks::regex_matches const RegexMatches{ Matches, NamedMatches }; const auto Test = L"none.shall.pass"sv; @@ -524,7 +532,7 @@ TEST_CASE("masks_with_matches") REQUIRE(Matches[2].start == 11); REQUIRE(Matches[2].end == 15); - REQUIRE(NamedMatches.Matches.size() == 1u); - REQUIRE(NamedMatches.Matches.at(L"scratch"s) == 2u); + REQUIRE(NamedMatches.size() == 1u); + REQUIRE(NamedMatches.at(L"scratch"s) == 2u); } #endif diff --git a/far/filemasks.hpp b/far/filemasks.hpp index 0d7800cdaa..fa5cbb59fc 100644 --- a/far/filemasks.hpp +++ b/far/filemasks.hpp @@ -41,13 +41,13 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Common: #include "common/preprocessor.hpp" +#include "common/string_utils.hpp" // External: //---------------------------------------------------------------------------- struct RegExpMatch; -struct named_regex_match; enum FM_FLAGS { @@ -66,7 +66,7 @@ class filemasks filemasks& operator=(filemasks&&) noexcept; bool assign(string_view Str, DWORD Flags = 0); - using regex_matches = std::pair&, named_regex_match&>; + using regex_matches = std::pair&, unordered_string_map&>; bool check(string_view Name, regex_matches const* Matches = {}) const; bool empty() const; diff --git a/far/filetype.cpp b/far/filetype.cpp index a5cfd6dd98..c0809edd45 100644 --- a/far/filetype.cpp +++ b/far/filetype.cpp @@ -98,7 +98,7 @@ bool ProcessLocalFileTypes(string_view const Name, string_view const ShortName, { string Command; std::vector Matches; - named_regex_match NamedMatches; + unordered_string_map NamedMatches; }; const auto AddMatches = [&](menu_data const& Data) @@ -111,7 +111,7 @@ bool ProcessLocalFileTypes(string_view const Name, string_view const ShortName, ); } - for (const auto& [GroupName, GroupNumber]: Data.NamedMatches.Matches) + for (const auto& [GroupName, GroupNumber]: Data.NamedMatches) { const auto& Match = Data.Matches[GroupNumber]; Context.Variables.emplace( diff --git a/far/help.cpp b/far/help.cpp index 58a60bcb63..bad10accc3 100644 --- a/far/help.cpp +++ b/far/help.cpp @@ -1958,7 +1958,7 @@ void Help::Search(const os::fs::file& HelpFile,uintptr_t nCodePage) bool TopicFound=false; string strCurTopic, strEntryName; - std::vector Match; + regex_match Match; named_regex_match NamedMatch; RegExp re; diff --git a/far/map_file.cpp b/far/map_file.cpp index 5a6e23a3bc..62f16c3ed6 100644 --- a/far/map_file.cpp +++ b/far/map_file.cpp @@ -180,7 +180,8 @@ static void read_vc(std::istream& Stream, unordered_string_set& Files, std::map< ReBase.Compile(L"^ +Preferred load address is ([0-9A-Fa-f]+)$"sv, OP_OPTIMIZE); ReSymbol.Compile(L"^ +[0-9A-Fa-f]+:[0-9A-Fa-f]+ +([^ ]+) +([0-9A-Fa-f]+) .+ ([^ ]+)$"sv, OP_OPTIMIZE); - std::vector m; + regex_match Match; + auto& m = Match.Matches; m.reserve(3); uintptr_t BaseAddress{}; @@ -190,13 +191,13 @@ static void read_vc(std::istream& Stream, unordered_string_set& Files, std::map< if (i.Str.empty()) continue; - if (!BaseAddress && ReBase.Search(i.Str, m)) + if (!BaseAddress && ReBase.Search(i.Str, Match)) { BaseAddress = from_string(get_match(i.Str, m[1]), {}, 16); continue; } - if (ReSymbol.Search(i.Str, m)) + if (ReSymbol.Search(i.Str, Match)) { auto Address = from_string(get_match(i.Str, m[2]), {}, 16); if (!Address) @@ -222,7 +223,8 @@ static void read_clang(std::istream& Stream, unordered_string_set& Files, std::m ReObject.Compile(L"^[0-9A-Fa-f]+ [0-9A-Fa-f]+ +[0-9]+ (.+)$"sv); ReSymbol.Compile(L"^([0-9A-Fa-f]+) [0-9A-Fa-f]+ 0 (.+)$"sv); - std::vector m; + regex_match Match; + auto& m = Match.Matches; m.reserve(2); string ObjName; @@ -232,7 +234,7 @@ static void read_clang(std::istream& Stream, unordered_string_set& Files, std::m if (i.Str.empty()) continue; - if (ReSymbol.Search(i.Str, m)) + if (ReSymbol.Search(i.Str, Match)) { map_file::line Line; Line.Name = get_match(i.Str, m[2]); @@ -242,7 +244,7 @@ static void read_clang(std::istream& Stream, unordered_string_set& Files, std::m continue; } - if (ReObject.Search(i.Str, m)) + if (ReObject.Search(i.Str, Match)) { ObjName = get_match(i.Str, m[1]); continue; @@ -257,7 +259,8 @@ static void read_gcc(std::istream& Stream, unordered_string_set& Files, std::map ReFileName.Compile(L"^\\[ *[0-9]+\\]\\(.+\\)\\(.+\\)\\(.+\\)\\(.+\\) \\(nx 1\\) 0x[0-9A-Fa-f]+ (.+)$"sv); ReSymbol.Compile(L"^\\[ *[0-9]+\\]\\(.+\\)\\(.+\\)\\(.+\\)\\(.+\\) \\(nx 0\\) 0x([0-9A-Fa-f]+) (.+)$"sv); - std::vector m; + regex_match Match; + auto& m = Match.Matches; m.reserve(2); const auto BaseAddress = 0x1000; @@ -269,14 +272,14 @@ static void read_gcc(std::istream& Stream, unordered_string_set& Files, std::map if (i.Str.empty()) continue; - if (ReFile.Search(i.Str, m) && ReFileName.Search(LastLine, m)) + if (ReFile.Search(i.Str, Match) && ReFileName.Search(LastLine, Match)) { FileName = get_match(LastLine, m[1]); LastLine.clear(); continue; } - if (ReSymbol.Search(i.Str, m)) + if (ReSymbol.Search(i.Str, Match)) { map_file::line Line; Line.Name = get_match(i.Str, m[2]); diff --git a/far/plugapi.cpp b/far/plugapi.cpp index 9a350216c4..5b95e94922 100644 --- a/far/plugapi.cpp +++ b/far/plugapi.cpp @@ -2822,13 +2822,13 @@ intptr_t WINAPI apiRegExpControl(HANDLE hHandle, FAR_REGEXP_CONTROL_COMMANDS Com { auto& Handle = *static_cast(hHandle); const auto data = static_cast(Param2); - std::vector Match; + regex_match Match; if (!Handle.Regex.MatchEx({ data->Text, static_cast(data->Length) }, data->Position, Match, &Handle.NamedMatch)) return false; - const auto MaxSize = std::min(static_cast(data->Count), Match.size()); - std::copy_n(Match.cbegin(), MaxSize, data->Match); + const auto MaxSize = std::min(static_cast(data->Count), Match.Matches.size()); + std::copy_n(Match.Matches.cbegin(), MaxSize, data->Match); data->Count = MaxSize; return true; } @@ -2837,13 +2837,13 @@ intptr_t WINAPI apiRegExpControl(HANDLE hHandle, FAR_REGEXP_CONTROL_COMMANDS Com { auto& Handle = *static_cast(hHandle); const auto data = static_cast(Param2); - std::vector Match; + regex_match Match; if (!Handle.Regex.SearchEx({ data->Text, static_cast(data->Length) }, data->Position, Match, &Handle.NamedMatch)) return false; - const auto MaxSize = std::min(static_cast(data->Count), Match.size()); - std::copy_n(Match.cbegin(), MaxSize, data->Match); + const auto MaxSize = std::min(static_cast(data->Count), Match.Matches.size()); + std::copy_n(Match.Matches.cbegin(), MaxSize, data->Match); data->Count = MaxSize; return true; } diff --git a/far/stack_allocator.hpp b/far/stack_allocator.hpp new file mode 100644 index 0000000000..00bf88b5c2 --- /dev/null +++ b/far/stack_allocator.hpp @@ -0,0 +1,52 @@ +#ifndef STACK_ALLOCATOR_HPP_7214ED21_CB3F_4E83_9723_F7707D14C876 +#define STACK_ALLOCATOR_HPP_7214ED21_CB3F_4E83_9723_F7707D14C876 +#pragma once + +/* +stack_allocator.hpp + + +*/ +/* +Copyright © 2023 Far Group +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the authors may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// Internal: + +// Platform: + +// Common: + +// External: +#include "thirdparty/short_alloc/short_alloc.h" + +//---------------------------------------------------------------------------- + +template +using stack_allocator = short_alloc; + + +#endif // STACK_ALLOCATOR_HPP_7214ED21_CB3F_4E83_9723_F7707D14C876 diff --git a/far/stddlg.cpp b/far/stddlg.cpp index fc27d42346..27f17ab9fb 100644 --- a/far/stddlg.cpp +++ b/far/stddlg.cpp @@ -1198,7 +1198,7 @@ void regex_playground() RegexDlgItems[rp_edit_substitution].strHistory = L"RegexTestSubstitution"sv; RegExp Regex; - std::vector Match; + regex_match Match; named_regex_match NamedMatch; std::vector ListStrings; @@ -1231,7 +1231,7 @@ void regex_playground() const auto TestStr = view_as(Dlg->SendMessage(DM_GETCONSTTEXTPTR, rp_edit_test, {})); const auto ReplaceStr = view_as(Dlg->SendMessage(DM_GETCONSTTEXTPTR, rp_edit_substitution, {})); - const auto Str = ReplaceBrackets(TestStr, ReplaceStr, Match, &NamedMatch); + const auto Str = ReplaceBrackets(TestStr, ReplaceStr, Match.Matches, &NamedMatch); Status = status::normal; Dlg->SendMessage(DM_SETTEXTPTR, rp_edit_result, UNSAFE_CSTR(Str)); }; @@ -1244,7 +1244,7 @@ void regex_playground() const auto clear_matches = [&] { - Match.clear(); + Match.Matches.clear(); NamedMatch.Matches.clear(); ListItems.clear(); @@ -1292,15 +1292,15 @@ void regex_playground() ListItems.clear(); ListStrings.clear(); - reserve_exp_noshrink(ListItems, Match.size()); - reserve_exp_noshrink(ListStrings, Match.size()); + reserve_exp_noshrink(ListItems, Match.Matches.size()); + reserve_exp_noshrink(ListStrings, Match.Matches.size()); const auto match_str = [&](RegExpMatch const& m) { return m.start < 0? L""s : far::format(L"{}-{} {}"sv, m.start, m.end, get_match(TestStr, m)); }; - for (const auto& [i, Index] : enumerate(Match)) + for (const auto& [i, Index] : enumerate(Match.Matches)) { ListStrings.emplace_back(far::format(L"${}: {}"sv, Index, match_str(i))); ListItems.push_back({ i.start < 0? LIF_GRAYED : LIF_NONE, ListStrings.back().c_str(), 0, 0 }); @@ -1308,7 +1308,7 @@ void regex_playground() for (const auto& [k, v] : NamedMatch.Matches) { - const auto& m = Match[v]; + const auto& m = Match.Matches[v]; ListStrings[v] = far::format(L"${{{}}}: {}"sv, k, match_str(m)); ListItems[v].Text = ListStrings[v].c_str(); } diff --git a/far/strmix.cpp b/far/strmix.cpp index ac7b072488..5415f0d706 100644 --- a/far/strmix.cpp +++ b/far/strmix.cpp @@ -800,7 +800,7 @@ namespace bool SearchStringRegex( string_view const Source, const RegExp& re, - std::vector& Match, + regex_match& Match, named_regex_match* const NamedMatch, intptr_t Position, search_replace_string_options const options, @@ -818,15 +818,15 @@ namespace if (!re.SearchEx(Source, CurrentPosition, Match, NamedMatch)) return false; - if (options.WholeWords && !CanContainWholeWord(Source, Match[0].start, Match[0].end - Match[0].start, WordDiv)) + if (options.WholeWords && !CanContainWholeWord(Source, Match.Matches[0].start, Match.Matches[0].end - Match.Matches[0].start, WordDiv)) { ++CurrentPosition; continue; } - ReplaceStr = ReplaceBrackets(Source, ReplaceStr, Match, NamedMatch); - CurPos = Match[0].start; - SearchLength = Match[0].end - Match[0].start; + ReplaceStr = ReplaceBrackets(Source, ReplaceStr, Match.Matches, NamedMatch); + CurPos = Match.Matches[0].start; + SearchLength = Match.Matches[0].end - Match.Matches[0].start; return true; } while (static_cast(CurrentPosition) != Source.size()); @@ -835,23 +835,23 @@ namespace bool found = false; intptr_t pos = 0; - std::vector FoundMatch; + regex_match FoundMatch; named_regex_match FoundNamedMatch; while (re.SearchEx(Source, pos, Match, NamedMatch)) { - pos = Match[0].start; + pos = Match.Matches[0].start; if (pos > Position) break; - if (options.WholeWords && !CanContainWholeWord(Source, Match[0].start, Match[0].end - Match[0].start, WordDiv)) + if (options.WholeWords && !CanContainWholeWord(Source, Match.Matches[0].start, Match.Matches[0].end - Match.Matches[0].start, WordDiv)) { ++pos; continue; } found = true; - FoundMatch = std::move(Match); + FoundMatch.Matches = std::move(Match.Matches); if (NamedMatch) FoundNamedMatch.Matches = std::move(NamedMatch->Matches); ++pos; @@ -859,9 +859,9 @@ namespace if (found) { - ReplaceStr = ReplaceBrackets(Source, ReplaceStr, FoundMatch, NamedMatch? &FoundNamedMatch : nullptr); - CurPos = FoundMatch[0].start; - SearchLength = FoundMatch[0].end - FoundMatch[0].start; + ReplaceStr = ReplaceBrackets(Source, ReplaceStr, FoundMatch.Matches, NamedMatch? &FoundNamedMatch : nullptr); + CurPos = FoundMatch.Matches[0].start; + SearchLength = FoundMatch.Matches[0].end - FoundMatch.Matches[0].start; } @@ -874,7 +874,7 @@ bool SearchString( string_view const Needle, i_searcher const& NeedleSearcher, const RegExp& re, - std::vector& Match, + regex_match& Match, named_regex_match* const NamedMatch, int& CurPos, search_replace_string_options const options, @@ -902,7 +902,7 @@ bool SearchAndReplaceString( string_view const Needle, i_searcher const& NeedleSearcher, const RegExp& re, - std::vector& Match, + regex_match& Match, named_regex_match* const NamedMatch, string& ReplaceStr, int& CurPos, diff --git a/far/strmix.hpp b/far/strmix.hpp index 3c59e87dd2..44c8fc48ba 100644 --- a/far/strmix.hpp +++ b/far/strmix.hpp @@ -51,7 +51,8 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. class RegExp; struct RegExpMatch; -struct named_regex_match; +class regex_match; +class named_regex_match; namespace legacy { @@ -192,7 +193,7 @@ bool SearchString( string_view Needle, i_searcher const& NeedleSearcher, const RegExp& re, - std::vector& Match, + regex_match& Match, named_regex_match* NamedMatch, int& CurPos, search_replace_string_options options, @@ -206,7 +207,7 @@ bool SearchAndReplaceString( string_view Needle, i_searcher const& NeedleSearcher, const RegExp& re, - std::vector& Match, + regex_match& Match, named_regex_match* NamedMatch, string& ReplaceStr, int& CurPos, diff --git a/far/thirdparty/short_alloc/short_alloc.h b/far/thirdparty/short_alloc/short_alloc.h new file mode 100644 index 0000000000..01c2d9680e --- /dev/null +++ b/far/thirdparty/short_alloc/short_alloc.h @@ -0,0 +1,160 @@ +#ifndef SHORT_ALLOC_H +#define SHORT_ALLOC_H + +// The MIT License (MIT) +// +// Copyright (c) 2015 Howard Hinnant +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include +#include + +template +class arena +{ + alignas(alignment) char buf_[N]; + char* ptr_; + +public: + ~arena() {ptr_ = nullptr;} + arena() noexcept : ptr_(buf_) {} + arena(const arena&) = delete; + arena& operator=(const arena&) = delete; + + template char* allocate(std::size_t n); + void deallocate(char* p, std::size_t n) noexcept; + + static constexpr std::size_t size() noexcept {return N;} + std::size_t used() const noexcept {return static_cast(ptr_ - buf_);} + void reset() noexcept {ptr_ = buf_;} + +private: + static + std::size_t + align_up(std::size_t n) noexcept + {return (n + (alignment-1)) & ~(alignment-1);} + + bool + pointer_in_buffer(char* p) noexcept + { + return std::uintptr_t(buf_) <= std::uintptr_t(p) && + std::uintptr_t(p) <= std::uintptr_t(buf_) + N; + } +}; + +template +template +char* +arena::allocate(std::size_t n) +{ + static_assert(ReqAlign <= alignment, "alignment is too small for this arena"); + assert(pointer_in_buffer(ptr_) && "short_alloc has outlived arena"); + auto const aligned_n = align_up(n); + if (static_cast(buf_ + N - ptr_) >= aligned_n) + { + char* r = ptr_; + ptr_ += aligned_n; + return r; + } + + static_assert(alignment <= alignof(std::max_align_t), "you've chosen an " + "alignment that is larger than alignof(std::max_align_t), and " + "cannot be guaranteed by normal operator new"); + return static_cast(::operator new(n)); +} + +template +void +arena::deallocate(char* p, std::size_t n) noexcept +{ + assert(pointer_in_buffer(ptr_) && "short_alloc has outlived arena"); + if (pointer_in_buffer(p)) + { + n = align_up(n); + if (p + n == ptr_) + ptr_ = p; + } + else + ::operator delete(p); +} + +template +class short_alloc +{ +public: + using value_type = T; + static auto constexpr alignment = Align; + static auto constexpr size = N; + using arena_type = arena; + +private: + arena_type& a_; + +public: + short_alloc(const short_alloc&) = default; + short_alloc& operator=(const short_alloc&) = delete; + + short_alloc(arena_type& a) noexcept : a_(a) + { + static_assert(size % alignment == 0, + "size N needs to be a multiple of alignment Align"); + } + template + short_alloc(const short_alloc& a) noexcept + : a_(a.a_) {} + + template struct rebind {using other = short_alloc<_Up, N, alignment>;}; + + T* allocate(std::size_t n) + { + return reinterpret_cast(a_.template allocate(n*sizeof(T))); + } + void deallocate(T* p, std::size_t n) noexcept + { + a_.deallocate(reinterpret_cast(p), n*sizeof(T)); + } + + template + friend + bool + operator==(const short_alloc& x, const short_alloc& y) noexcept; + + template friend class short_alloc; +}; + +template +inline +bool +operator==(const short_alloc& x, const short_alloc& y) noexcept +{ + return N == M && A1 == A2 && &x.a_ == &y.a_; +} + +template +inline +bool +operator!=(const short_alloc& x, const short_alloc& y) noexcept +{ + return !(x == y); +} + +#endif // SHORT_ALLOC_H diff --git a/far/vbuild.m4 b/far/vbuild.m4 index 255d828b96..53fee887e2 100644 --- a/far/vbuild.m4 +++ b/far/vbuild.m4 @@ -1 +1 @@ -6205 +6206 diff --git a/far/viewer.cpp b/far/viewer.cpp index 33a61f6c1f..299c60499b 100644 --- a/far/viewer.cpp +++ b/far/viewer.cpp @@ -2449,7 +2449,7 @@ struct Viewer::search_data string word_div; // IN: Word delimiter characters if Text; empty otherwise bool first_Rex{true}; RegExp Rex; // IN: Compiled regex if Regex - std::vector RexMatch; + regex_match RexMatch; }; enum SEARCHER_RESULT: int @@ -2875,10 +2875,10 @@ SEARCHER_RESULT Viewer::search_regex_forward(search_data* sd) if (!sd->Rex.SearchEx({ line, static_cast(nw) }, off, sd->RexMatch)) // doesn't match break; - const auto fpos = bpos + GetStrBytesNum(t_line, sd->RexMatch[0].start); + const auto fpos = bpos + GetStrBytesNum(t_line, sd->RexMatch.Matches[0].start); if ( fpos < cpos ) { - off = sd->RexMatch[0].start + 1; // skip + off = sd->RexMatch.Matches[0].start + 1; // skip continue; } else if (swrap == SearchWrap_CYCLE && !tail_part && fpos >= StartSearchPos) @@ -2888,7 +2888,7 @@ SEARCHER_RESULT Viewer::search_regex_forward(search_data* sd) else // found { sd->MatchPos = fpos; - sd->search_len = GetStrBytesNum(t_line + off + sd->RexMatch[0].start, sd->RexMatch[0].end - sd->RexMatch[0].start); + sd->search_len = GetStrBytesNum(t_line + off + sd->RexMatch.Matches[0].start, sd->RexMatch.Matches[0].end - sd->RexMatch.Matches[0].start); return Search_Found; } } @@ -2938,8 +2938,8 @@ SEARCHER_RESULT Viewer::search_regex_backward(search_data* sd) if (!sd->Rex.SearchEx({ line, static_cast(nw) }, off, sd->RexMatch)) break; - const auto fpos = bpos + GetStrBytesNum(t_line, sd->RexMatch[0].start); - const auto flen = GetStrBytesNum(t_line + sd->RexMatch[0].start, sd->RexMatch[0].end - sd->RexMatch[0].start); + const auto fpos = bpos + GetStrBytesNum(t_line, sd->RexMatch.Matches[0].start); + const auto flen = GetStrBytesNum(t_line + sd->RexMatch.Matches[0].start, sd->RexMatch.Matches[0].end - sd->RexMatch.Matches[0].start); if (fpos+flen > cpos) break; @@ -2949,7 +2949,7 @@ SEARCHER_RESULT Viewer::search_regex_backward(search_data* sd) prev_len = flen; } - off = sd->RexMatch[0].start + 1; // skip + off = sd->RexMatch.Matches[0].start + 1; // skip } if (prev_len >= 0)