Skip to content

Commit

Permalink
Fuzzy unicode search, fix file reading in some cases
Browse files Browse the repository at this point in the history
  • Loading branch information
alabuzhev committed Sep 25, 2021
1 parent bc3d5e2 commit 31a8325
Show file tree
Hide file tree
Showing 17 changed files with 618 additions and 310 deletions.
8 changes: 8 additions & 0 deletions far/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
--------------------------------------------------------------------------------
drkns 25.09.2021 03:00:00 +0100 - build 5890

1. Experimental: fuzzy search in panels, editor and help: a can match àáâãäåāăą, еи - ёй, き- ぎ, Æ - ae, ß - ss and so on and vice versa.
Beware of bugs.

2. Wrong files reading in certain rare scenarios, e.g. when a multibyte character or EOL falls on the read buffer boundary.

--------------------------------------------------------------------------------
drkns 19.09.2021 14:47:00 +0100 - build 5889

Expand Down
19 changes: 16 additions & 3 deletions far/common/utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,24 @@ class base: public T
using base_type = T;
};

inline size_t grow_exp_noshrink(size_t const Current, size_t const Desired)
inline size_t grow_exp_noshrink(size_t const Current, std::optional<size_t> const Desired)
{
// Unlike vector, string is allowed to shrink (another splendid design decision from the committee):
// "Calling reserve() with a res_arg argument less than capacity() is in effect a non-binding shrink request." (21.4.4 basic_string capacity)
// gcc decided to go mental and made that a _binding_ shrink request.
if (Desired < Current)
if (Desired && *Desired < Current)
return Current;

// For vector reserve typically allocates exactly the requested amount instead of exponential growth.
// This can be really bad if called in a loop.
return std::max(Current + Current / 2, Desired);
const auto LowerBound = Current + (Current + 1) / 2;
return Desired? std::max(LowerBound, *Desired) : LowerBound;
}

template<typename container>
void reserve_exp_noshrink(container& Container)
{
Container.reserve(grow_exp_noshrink(Container.capacity(), {}));
}

template<typename container>
Expand All @@ -64,6 +71,12 @@ void reserve_exp_noshrink(container& Container, size_t const DesiredCapacity)
Container.reserve(grow_exp_noshrink(Container.capacity(), DesiredCapacity));
}

template<typename container>
void resize_exp_noshrink(container& Container)
{
Container.resize(grow_exp_noshrink(Container.size(), {}), {});
}

template<typename container>
void resize_exp_noshrink(container& Container, size_t const DesiredSize)
{
Expand Down
52 changes: 34 additions & 18 deletions far/editor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3462,8 +3462,8 @@ bool Editor::Search(bool Next)
QuotedStr = quote_unconditional(strSearchStr);
}

const auto strSearchStrUpper = Case? strSearchStr : upper(strSearchStr);
const auto strSearchStrLower = Case? strSearchStr : lower(strSearchStr);
searchers Searchers;
const auto& Searcher = init_searcher(Searchers, Case, strLastSearchStr);

const time_check TimeCheck;
std::optional<single_progress> Progress;
Expand Down Expand Up @@ -3495,7 +3495,23 @@ bool Editor::Search(bool Next)
auto strReplaceStrCurrent = ReplaceMode? strReplaceStr : L""s;

int SearchLength;
if (SearchAndReplaceString(CurPtr->GetString(), strSearchStr, strSearchStrUpper, strSearchStrLower, re, m.data(), &hm, strReplaceStrCurrent, CurPos, Case, WholeWords, ReverseSearch, Regexp, PreserveStyle, &SearchLength, GetWordDiv()))
if (SearchAndReplaceString(
CurPtr->GetString(),
strSearchStr,
Searcher,
re,
m.data(),
&hm,
strReplaceStrCurrent,
CurPos,
Case,
WholeWords,
ReverseSearch,
Regexp,
PreserveStyle,
&SearchLength,
GetWordDiv()
))
{
Match = true;

Expand Down Expand Up @@ -6630,11 +6646,11 @@ void Editor::GetCacheParams(EditorPosCache &pc) const
pc.bm=m_SavePos;
}

static std::string_view GetLineBytes(string_view const Str, std::vector<char>& Buffer, uintptr_t const Codepage, encoding::error_position* const ErrorPosition)
static std::string_view GetLineBytes(string_view const Str, std::vector<char>& Buffer, uintptr_t const Codepage, encoding::diagnostics* const Diagnostics)
{
for (;;)
{
auto const Length = encoding::get_bytes(Codepage, Str, Buffer, ErrorPosition);
auto const Length = encoding::get_bytes(Codepage, Str, Buffer, Diagnostics);

if (Length <= Buffer.size())
return { Buffer.data(), Length };
Expand All @@ -6648,11 +6664,11 @@ bool Editor::SetLineCodePage(iterator const& Iterator, uintptr_t const Codepage,
if (Codepage == m_codepage || Iterator->m_Str.empty())
return true;

encoding::error_position ErrorPosition;
const auto Bytes = GetLineBytes(Iterator->m_Str, decoded, m_codepage, Validate? &ErrorPosition : nullptr);
auto Result = !Bytes.empty() && !ErrorPosition;
encoding::get_chars(Codepage, Bytes, Iterator->m_Str, &ErrorPosition);
Result = Result && !Iterator->m_Str.empty() && !ErrorPosition;
encoding::diagnostics Diagnostics;
const auto Bytes = GetLineBytes(Iterator->m_Str, decoded, m_codepage, Validate? &Diagnostics : nullptr);
auto Result = !Bytes.empty() && !Diagnostics.ErrorPosition;
encoding::get_chars(Codepage, Bytes, Iterator->m_Str, &Diagnostics);
Result = Result && !Iterator->m_Str.empty() && !Diagnostics.ErrorPosition;
Iterator->Changed();

return Result;
Expand All @@ -6670,19 +6686,19 @@ bool Editor::TryCodePage(uintptr_t const Codepage, uintptr_t& ErrorCodepage, siz
if (i->m_Str.empty())
continue;

encoding::error_position ErrorPosition;
const auto Bytes = GetLineBytes(i->m_Str, decoded, m_codepage, &ErrorPosition);
encoding::diagnostics Diagnostics;
const auto Bytes = GetLineBytes(i->m_Str, decoded, m_codepage, &Diagnostics);

if (Bytes.empty() || ErrorPosition)
if (Bytes.empty() || Diagnostics.ErrorPosition)
{
ErrorCodepage = m_codepage;
ErrorLine = LineNumber;
ErrorPos = *ErrorPosition;
ErrorPos = *Diagnostics.ErrorPosition;
ErrorChar = i->m_Str[ErrorPos];
return false;
}

if (!encoding::get_chars_count(Codepage, Bytes, &ErrorPosition) || ErrorPosition)
if (!encoding::get_chars_count(Codepage, Bytes, &Diagnostics) || Diagnostics.ErrorPosition)
{
ErrorCodepage = Codepage;
ErrorLine = LineNumber;
Expand All @@ -6691,12 +6707,12 @@ bool Editor::TryCodePage(uintptr_t const Codepage, uintptr_t& ErrorCodepage, siz
const auto Info = GetCodePageInfo(m_codepage);
if (Info && Info->MaxCharSize == 1)
{
ErrorPos = *ErrorPosition;
ErrorPos = *Diagnostics.ErrorPosition;
}
else
{
const auto BytesCount = encoding::get_bytes(m_codepage, i->m_Str, decoded, &ErrorPosition);
ErrorPos = encoding::get_chars_count(m_codepage, { decoded.data(), std::min(*ErrorPosition, BytesCount) });
const auto BytesCount = encoding::get_bytes(m_codepage, i->m_Str, decoded, &Diagnostics);
ErrorPos = encoding::get_chars_count(m_codepage, { decoded.data(), std::min(*Diagnostics.ErrorPosition, BytesCount) });
}

ErrorChar = i->m_Str[ErrorPos];
Expand Down
Loading

0 comments on commit 31a8325

Please sign in to comment.