Skip to content

Commit

Permalink
FarGroupgh-741: Far regular expressions - performance
Browse files Browse the repository at this point in the history
  • Loading branch information
alabuzhev committed Oct 29, 2023
1 parent 0303fa7 commit 97ac948
Show file tree
Hide file tree
Showing 19 changed files with 450 additions and 163 deletions.
219 changes: 122 additions & 97 deletions far/RegExp.cpp

Large diffs are not rendered by default.

43 changes: 36 additions & 7 deletions far/RegExp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common/string_utils.hpp"

// External:
#include "stack_allocator.hpp"

//----------------------------------------------------------------------------

Expand Down Expand Up @@ -101,9 +102,34 @@ enum
OP_STRICT = 0x0040,
};

struct named_regex_match
class regex_match
{
unordered_string_map<size_t> Matches;
public:
using matches = std::vector<RegExpMatch, stack_allocator<RegExpMatch, 4096>>;

private:
matches::allocator_type::arena_type m_Arena;

public:
matches Matches{ m_Arena };
};

class named_regex_match
{
public:
using matches = std::unordered_map<
string,
size_t,
string_comparer,
string_comparer,
stack_allocator<std::pair<string const, size_t>, 4096>
>;

private:
matches::allocator_type::arena_type m_Arena;

public:
matches Matches{ m_Arena };
};

class regex_exception: public far_exception
Expand Down Expand Up @@ -145,8 +171,11 @@ class RegExp
struct REOpCode;
struct UniSet;
struct StateStackItem;
class state_stack;

private:


// code
std::vector<REOpCode> code;

Expand All @@ -170,7 +199,7 @@ class RegExp
int CalcLength(string_view src);
void InnerCompile(const wchar_t* start, const wchar_t* src, int srclength, int options);

bool InnerMatch(const wchar_t* start, const wchar_t* str, const wchar_t* strend, std::vector<RegExpMatch>& match, named_regex_match& NamedMatch, std::vector<StateStackItem>& stack) const;
bool InnerMatch(const wchar_t* start, const wchar_t* str, const wchar_t* strend, regex_match& RegexMatch, named_regex_match& NamedMatch, state_stack& Statetack) const;

void TrimTail(const wchar_t* start, const wchar_t*& strend) const;

Expand Down Expand Up @@ -210,20 +239,20 @@ class RegExp
\param NamedMatch - storage of named brackets.
\sa SMatch
*/
bool Match(string_view text, std::vector<RegExpMatch>& match, named_regex_match* NamedMatch = {}) const;
bool Match(string_view text, regex_match& match, named_regex_match* NamedMatch = {}) const;
/*! Advanced version of match. Can be used for multiple matches
on one string (to imitate /g modifier of perl regexp
*/
bool MatchEx(string_view text, size_t From, std::vector<RegExpMatch>& match, named_regex_match* NamedMatch = {}) const;
bool MatchEx(string_view text, size_t From, regex_match& match, named_regex_match* NamedMatch = {}) const;
/*! Try to find substring that will match regexp.
Parameters and return value are the same as for Match.
It is highly recommended to call Optimize before Search.
*/
bool Search(string_view text, std::vector<RegExpMatch>& match, named_regex_match* NamedMatch = {}) const;
bool Search(string_view text, regex_match& match, named_regex_match* NamedMatch = {}) const;
/*! Advanced version of search. Can be used for multiple searches
on one string (to imitate /g modifier of perl regexp
*/
bool SearchEx(string_view text, size_t From, std::vector<RegExpMatch>& match, named_regex_match* NamedMatch = {}) const;
bool SearchEx(string_view text, size_t From, regex_match& match, named_regex_match* NamedMatch = {}) const;

bool Search(string_view Str) const;

Expand Down
5 changes: 5 additions & 0 deletions far/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
--------------------------------------------------------------------------------
drkns 2023-10-29 01:30:01+01:00 - build 6206

1. gh-741: Far regular expressions - performance.

--------------------------------------------------------------------------------
drkns 2023-10-26 18:32:38+01:00 - build 6205

Expand Down
2 changes: 1 addition & 1 deletion far/editor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3405,7 +3405,7 @@ void Editor::DoSearchReplace(const SearchReplaceDisposition Disposition)

auto CurPtr = FindAll ? FirstLine() : m_it_CurLine, TmpPtr = CurPtr;

std::vector<RegExpMatch> Match;
regex_match Match;
named_regex_match NamedMatch;
RegExp re;

Expand Down
1 change: 1 addition & 0 deletions far/far.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ cl /nologo /c /Fo"$(IntDir)%(Filename)_c++.testobj" /TP api_test.c
<ClInclude Include="stddlg.hpp" />
<ClInclude Include="string_sort.hpp" />
<ClInclude Include="string_utils.hpp" />
<ClInclude Include="stack_allocator.hpp" />
<ClInclude Include="strmix.hpp" />
<ClInclude Include="taskbar.hpp" />
<ClInclude Include="testing.hpp" />
Expand Down
3 changes: 3 additions & 0 deletions far/far.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -1111,6 +1111,9 @@
<ClInclude Include="sqlitedb.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="stack_allocator.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="stddlg.hpp">
<Filter>Header Files</Filter>
</ClInclude>
Expand Down
20 changes: 14 additions & 6 deletions far/filemasks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,15 +105,15 @@ class filemasks::masks
bool operator==(string_view FileName) const;
bool empty() const;

using last_regex_matches = std::pair<std::vector<RegExpMatch> const*, named_regex_match const*>;
using last_regex_matches = std::pair<std::vector<RegExpMatch> const*, unordered_string_map<size_t> const*>;
last_regex_matches last_matches() const;

private:
struct regex_data
{
RegExp Regex;
mutable std::vector<RegExpMatch> Match;
mutable named_regex_match NamedMatch;
mutable unordered_string_map<size_t> NamedMatch;
};

std::variant<std::vector<string>, regex_data> m_Masks;
Expand Down Expand Up @@ -413,7 +413,15 @@ bool filemasks::masks::operator==(const string_view FileName) const
},
[&](const regex_data& Data)
{
return Data.Regex.Search(FileName, Data.Match, &Data.NamedMatch);
regex_match Match;
named_regex_match NamedMatch;
if (!Data.Regex.Search(FileName, Match, &NamedMatch))
return false;

Data.Match.assign(ALL_CONST_RANGE(Match.Matches));
for (const auto& [k, v]: NamedMatch.Matches)
Data.NamedMatch.emplace(k, v);
return true;
}
}, m_Masks);
}
Expand Down Expand Up @@ -507,7 +515,7 @@ TEST_CASE("masks_with_matches")
Masks.assign(L"/(.+)\\.(?:.+)\\.(?{scratch}.+)/"sv);

std::vector<RegExpMatch> Matches;
named_regex_match NamedMatches;
unordered_string_map<size_t> NamedMatches;
filemasks::regex_matches const RegexMatches{ Matches, NamedMatches };
const auto Test = L"none.shall.pass"sv;

Expand All @@ -524,7 +532,7 @@ TEST_CASE("masks_with_matches")
REQUIRE(Matches[2].start == 11);
REQUIRE(Matches[2].end == 15);

REQUIRE(NamedMatches.Matches.size() == 1u);
REQUIRE(NamedMatches.Matches.at(L"scratch"s) == 2u);
REQUIRE(NamedMatches.size() == 1u);
REQUIRE(NamedMatches.at(L"scratch"s) == 2u);
}
#endif
4 changes: 2 additions & 2 deletions far/filemasks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Common:
#include "common/preprocessor.hpp"
#include "common/string_utils.hpp"

// External:

//----------------------------------------------------------------------------

struct RegExpMatch;
struct named_regex_match;

enum FM_FLAGS
{
Expand All @@ -66,7 +66,7 @@ class filemasks
filemasks& operator=(filemasks&&) noexcept;

bool assign(string_view Str, DWORD Flags = 0);
using regex_matches = std::pair<std::vector<RegExpMatch>&, named_regex_match&>;
using regex_matches = std::pair<std::vector<RegExpMatch>&, unordered_string_map<size_t>&>;
bool check(string_view Name, regex_matches const* Matches = {}) const;
bool empty() const;

Expand Down
4 changes: 2 additions & 2 deletions far/filetype.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ bool ProcessLocalFileTypes(string_view const Name, string_view const ShortName,
{
string Command;
std::vector<RegExpMatch> Matches;
named_regex_match NamedMatches;
unordered_string_map<size_t> NamedMatches;
};

const auto AddMatches = [&](menu_data const& Data)
Expand All @@ -111,7 +111,7 @@ bool ProcessLocalFileTypes(string_view const Name, string_view const ShortName,
);
}

for (const auto& [GroupName, GroupNumber]: Data.NamedMatches.Matches)
for (const auto& [GroupName, GroupNumber]: Data.NamedMatches)
{
const auto& Match = Data.Matches[GroupNumber];
Context.Variables.emplace(
Expand Down
2 changes: 1 addition & 1 deletion far/help.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1958,7 +1958,7 @@ void Help::Search(const os::fs::file& HelpFile,uintptr_t nCodePage)
bool TopicFound=false;
string strCurTopic, strEntryName;

std::vector<RegExpMatch> Match;
regex_match Match;
named_regex_match NamedMatch;
RegExp re;

Expand Down
21 changes: 12 additions & 9 deletions far/map_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ static void read_vc(std::istream& Stream, unordered_string_set& Files, std::map<
ReBase.Compile(L"^ +Preferred load address is ([0-9A-Fa-f]+)$"sv, OP_OPTIMIZE);
ReSymbol.Compile(L"^ +[0-9A-Fa-f]+:[0-9A-Fa-f]+ +([^ ]+) +([0-9A-Fa-f]+) .+ ([^ ]+)$"sv, OP_OPTIMIZE);

std::vector<RegExpMatch> m;
regex_match Match;
auto& m = Match.Matches;
m.reserve(3);

uintptr_t BaseAddress{};
Expand All @@ -190,13 +191,13 @@ static void read_vc(std::istream& Stream, unordered_string_set& Files, std::map<
if (i.Str.empty())
continue;

if (!BaseAddress && ReBase.Search(i.Str, m))
if (!BaseAddress && ReBase.Search(i.Str, Match))
{
BaseAddress = from_string<uintptr_t>(get_match(i.Str, m[1]), {}, 16);
continue;
}

if (ReSymbol.Search(i.Str, m))
if (ReSymbol.Search(i.Str, Match))
{
auto Address = from_string<uintptr_t>(get_match(i.Str, m[2]), {}, 16);
if (!Address)
Expand All @@ -222,7 +223,8 @@ static void read_clang(std::istream& Stream, unordered_string_set& Files, std::m
ReObject.Compile(L"^[0-9A-Fa-f]+ [0-9A-Fa-f]+ +[0-9]+ (.+)$"sv);
ReSymbol.Compile(L"^([0-9A-Fa-f]+) [0-9A-Fa-f]+ 0 (.+)$"sv);

std::vector<RegExpMatch> m;
regex_match Match;
auto& m = Match.Matches;
m.reserve(2);

string ObjName;
Expand All @@ -232,7 +234,7 @@ static void read_clang(std::istream& Stream, unordered_string_set& Files, std::m
if (i.Str.empty())
continue;

if (ReSymbol.Search(i.Str, m))
if (ReSymbol.Search(i.Str, Match))
{
map_file::line Line;
Line.Name = get_match(i.Str, m[2]);
Expand All @@ -242,7 +244,7 @@ static void read_clang(std::istream& Stream, unordered_string_set& Files, std::m
continue;
}

if (ReObject.Search(i.Str, m))
if (ReObject.Search(i.Str, Match))
{
ObjName = get_match(i.Str, m[1]);
continue;
Expand All @@ -257,7 +259,8 @@ static void read_gcc(std::istream& Stream, unordered_string_set& Files, std::map
ReFileName.Compile(L"^\\[ *[0-9]+\\]\\(.+\\)\\(.+\\)\\(.+\\)\\(.+\\) \\(nx 1\\) 0x[0-9A-Fa-f]+ (.+)$"sv);
ReSymbol.Compile(L"^\\[ *[0-9]+\\]\\(.+\\)\\(.+\\)\\(.+\\)\\(.+\\) \\(nx 0\\) 0x([0-9A-Fa-f]+) (.+)$"sv);

std::vector<RegExpMatch> m;
regex_match Match;
auto& m = Match.Matches;
m.reserve(2);

const auto BaseAddress = 0x1000;
Expand All @@ -269,14 +272,14 @@ static void read_gcc(std::istream& Stream, unordered_string_set& Files, std::map
if (i.Str.empty())
continue;

if (ReFile.Search(i.Str, m) && ReFileName.Search(LastLine, m))
if (ReFile.Search(i.Str, Match) && ReFileName.Search(LastLine, Match))
{
FileName = get_match(LastLine, m[1]);
LastLine.clear();
continue;
}

if (ReSymbol.Search(i.Str, m))
if (ReSymbol.Search(i.Str, Match))
{
map_file::line Line;
Line.Name = get_match(i.Str, m[2]);
Expand Down
12 changes: 6 additions & 6 deletions far/plugapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2822,13 +2822,13 @@ intptr_t WINAPI apiRegExpControl(HANDLE hHandle, FAR_REGEXP_CONTROL_COMMANDS Com
{
auto& Handle = *static_cast<regex_handle*>(hHandle);
const auto data = static_cast<RegExpSearch*>(Param2);
std::vector<RegExpMatch> Match;
regex_match Match;

if (!Handle.Regex.MatchEx({ data->Text, static_cast<size_t>(data->Length) }, data->Position, Match, &Handle.NamedMatch))
return false;

const auto MaxSize = std::min(static_cast<size_t>(data->Count), Match.size());
std::copy_n(Match.cbegin(), MaxSize, data->Match);
const auto MaxSize = std::min(static_cast<size_t>(data->Count), Match.Matches.size());
std::copy_n(Match.Matches.cbegin(), MaxSize, data->Match);
data->Count = MaxSize;
return true;
}
Expand All @@ -2837,13 +2837,13 @@ intptr_t WINAPI apiRegExpControl(HANDLE hHandle, FAR_REGEXP_CONTROL_COMMANDS Com
{
auto& Handle = *static_cast<regex_handle*>(hHandle);
const auto data = static_cast<RegExpSearch*>(Param2);
std::vector<RegExpMatch> Match;
regex_match Match;

if (!Handle.Regex.SearchEx({ data->Text, static_cast<size_t>(data->Length) }, data->Position, Match, &Handle.NamedMatch))
return false;

const auto MaxSize = std::min(static_cast<size_t>(data->Count), Match.size());
std::copy_n(Match.cbegin(), MaxSize, data->Match);
const auto MaxSize = std::min(static_cast<size_t>(data->Count), Match.Matches.size());
std::copy_n(Match.Matches.cbegin(), MaxSize, data->Match);
data->Count = MaxSize;
return true;
}
Expand Down
52 changes: 52 additions & 0 deletions far/stack_allocator.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#ifndef STACK_ALLOCATOR_HPP_7214ED21_CB3F_4E83_9723_F7707D14C876
#define STACK_ALLOCATOR_HPP_7214ED21_CB3F_4E83_9723_F7707D14C876
#pragma once

/*
stack_allocator.hpp
*/
/*
Copyright © 2023 Far Group
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the authors may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

// Internal:

// Platform:

// Common:

// External:
#include "thirdparty/short_alloc/short_alloc.h"

//----------------------------------------------------------------------------

template<class T, std::size_t N, std::size_t Align = alignof(std::max_align_t)>
using stack_allocator = short_alloc<T, N, Align>;


#endif // STACK_ALLOCATOR_HPP_7214ED21_CB3F_4E83_9723_F7707D14C876
Loading

0 comments on commit 97ac948

Please sign in to comment.