Skip to content

Commit

Permalink
feat: add batch remove invalid index feature
Browse files Browse the repository at this point in the history
- Add RemoveIndexTask method in DBus interface to support batch removing invalid index
- Refactor fulltext searcher to collect invalid index paths and remove them in batch
- Add Remove task type and handler in text index service
- Remove deprecated index operation functions in fulltext searcher
- Add hasRunningTask check to avoid concurrent operations

This change improves the efficiency of invalid index cleanup by handling them in batch instead of one by one.

Log: add batch remove invalid index feature
  • Loading branch information
Johnson-zs committed Nov 30, 2024
1 parent 6f5d0c5 commit 0a6afc2
Show file tree
Hide file tree
Showing 11 changed files with 180 additions and 126 deletions.
4 changes: 4 additions & 0 deletions assets/dbus/org.deepin.Filemanager.TextIndex.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
<arg type="b" direction="out"/>
<arg name="path" type="s" direction="in"/>
</method>
<method name="RemoveIndexTask">
<arg type="b" direction="out"/>
<arg name="paths" type="as" direction="in"/>
</method>
<method name="StopCurrentTask">
<arg type="b" direction="out"/>
</method>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,86 +68,6 @@ IndexReaderPtr FullTextSearcherPrivate::newIndexReader()
return IndexReader::open(FSDirectory::open(indexStorePath().toStdWString()), true);
}

void FullTextSearcherPrivate::indexDocs(const IndexWriterPtr &writer, const QString &file, IndexType type)
{
Q_ASSERT(writer);

try {
switch (type) {
case kAddIndex: {
fmDebug() << "Adding [" << file << "]";
// 添加
writer->addDocument(fileDocument(file));
break;
}
case kUpdateIndex: {
fmDebug() << "Update file: [" << file << "]";
// 定义一个更新条件
TermPtr term = newLucene<Term>(L"path", file.toStdWString());
// 更新
writer->updateDocument(term, fileDocument(file));
break;
}
case kDeleteIndex: {
fmDebug() << "Delete file: [" << file << "]";
// 定义一个删除条件
TermPtr term = newLucene<Term>(L"path", file.toStdWString());
// 删除
writer->deleteDocuments(term);
break;
}
}
} catch (const LuceneException &e) {
QMetaEnum enumType = QMetaEnum::fromType<FullTextSearcherPrivate::IndexType>();
fmWarning() << QString::fromStdWString(e.getError()) << " type: " << enumType.valueToKey(type);
} catch (const std::exception &e) {
QMetaEnum enumType = QMetaEnum::fromType<FullTextSearcherPrivate::IndexType>();
fmWarning() << QString(e.what()) << " type: " << enumType.valueToKey(type);
} catch (...) {
fmWarning() << "Index document failed! " << file;
}
}

bool FullTextSearcherPrivate::checkUpdate(const IndexReaderPtr &reader, const QString &file, IndexType &type)
{
Q_ASSERT(reader);

try {
SearcherPtr searcher = newLucene<IndexSearcher>(reader);
TermQueryPtr query = newLucene<TermQuery>(newLucene<Term>(L"path", file.toStdWString()));

// 文件路径为唯一值,所以搜索一个结果就行了
TopDocsPtr topDocs = searcher->search(query, 1);
int32_t numTotalHits = topDocs->totalHits;
if (numTotalHits == 0) {
type = kAddIndex;
return true;
} else {
DocumentPtr doc = searcher->doc(topDocs->scoreDocs[0]->doc);
auto info = InfoFactory::create<FileInfo>(QUrl::fromLocalFile(file),
Global::CreateFileInfoType::kCreateFileInfoSync);
if (!info)
return false;

const QDateTime &modifyTime { info->timeOf(TimeInfoType::kLastModified).toDateTime() };
const QString &modifyEpoch { QString::number(modifyTime.toSecsSinceEpoch()) };
const String &storeTime { doc->get(L"modified") };
if (modifyEpoch.toStdWString() != storeTime) {
type = kUpdateIndex;
return true;
}
}
} catch (const LuceneException &e) {
fmWarning() << QString::fromStdWString(e.getError()) << " file: " << file;
} catch (const std::exception &e) {
fmWarning() << QString(e.what()) << " file: " << file;
} catch (...) {
fmWarning() << "The file checked failed!" << file;
}

return false;
}

void FullTextSearcherPrivate::tryNotify()
{
int cur = notifyTimer.elapsed();
Expand All @@ -158,26 +78,6 @@ void FullTextSearcherPrivate::tryNotify()
}
}

DocumentPtr FullTextSearcherPrivate::fileDocument(const QString &file)
{
DocumentPtr doc = newLucene<Document>();
// file path
doc->add(newLucene<Field>(L"path", file.toStdWString(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));

// file last modified time
auto info = InfoFactory::create<FileInfo>(QUrl::fromLocalFile(file),
Global::CreateFileInfoType::kCreateFileInfoSync);
const QDateTime &modifyTime { info->timeOf(TimeInfoType::kLastModified).toDateTime() };
const QString &modifyEpoch { QString::number(modifyTime.toSecsSinceEpoch()) };
doc->add(newLucene<Field>(L"modified", modifyEpoch.toStdWString(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));

// file contents
QString contents = DocParser::convertFile(file.toStdString()).c_str();
doc->add(newLucene<Field>(L"contents", contents.toStdWString(), Field::STORE_YES, Field::INDEX_ANALYZED));

return doc;
}

bool FullTextSearcherPrivate::doSearch(const QString &path, const QString &keyword)
{
fmInfo() << "search path: " << path << " keyword: " << keyword;
Expand Down Expand Up @@ -207,6 +107,7 @@ bool FullTextSearcherPrivate::doSearch(const QString &path, const QString &keywo
Collection<ScoreDocPtr> scoreDocs = topDocs->scoreDocs;

QHash<QString, QSet<QString>> hiddenFileHash;
QSet<QString> invalidIndexPaths; // 存储无效的索引路径
for (auto scoreDoc : scoreDocs) {
//中断
if (status.loadAcquire() != AbstractSearcher::kRuning)
Expand All @@ -219,10 +120,9 @@ bool FullTextSearcherPrivate::doSearch(const QString &path, const QString &keywo
const QUrl &url = QUrl::fromLocalFile(StringUtils::toUTF8(resultPath).c_str());
auto info = InfoFactory::create<FileInfo>(url,
Global::CreateFileInfoType::kCreateFileInfoSync);
// delete invalid index
// 收集无效的索引路径
if (!info || !info->exists()) {
// TODO(zhangs):
// indexDocs(writer, url.path(), kDeleteIndex);
invalidIndexPaths.insert(url.path());
continue;
}

Expand All @@ -247,6 +147,15 @@ bool FullTextSearcherPrivate::doSearch(const QString &path, const QString &keywo

reader->close();
writer->close();

// 如果有无效的索引路径,一次性启动移除任务
if (!invalidIndexPaths.isEmpty()) {
auto client = TextIndexClient::instance();
client->startTask(TextIndexClient::TaskType::Remove,
QStringList(invalidIndexPaths.begin(), invalidIndexPaths.end()));
invalidIndexPaths.clear();
}

} catch (const LuceneException &e) {
fmWarning() << QString::fromStdWString(e.getError());
} catch (const std::exception &e) {
Expand Down Expand Up @@ -353,6 +262,15 @@ bool FullTextSearcher::search()
auto serviceStatus = client->checkService();
if (serviceStatus != TextIndexClient::ServiceStatus::Available) {
// 如果服务不可用,直接执行搜索
fmWarning() << "Service is not available, search directly";
d->doSearchAndEmit(path, key);
return true;
}

// 检查到有服务正在运行,也直接执行搜索
auto hasRunningTask = client->hasRunningTask();
if (hasRunningTask) {
fmWarning() << "Service is running, search directly";
d->doSearchAndEmit(path, key);
return true;
}
Expand All @@ -361,6 +279,7 @@ bool FullTextSearcher::search()
auto indexExistsResult = client->indexExists();
if (!indexExistsResult.has_value()) {
// 如果无法确定索引状态,直接执行搜索
fmWarning() << "Failed to check index status, search directly";
d->doSearchAndEmit(path, key);
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,7 @@ class FullTextSearcherPrivate : public QObject
return path;
}

Lucene::DocumentPtr fileDocument(const QString &file);
QString dealKeyword(const QString &keyword);
void indexDocs(const Lucene::IndexWriterPtr &writer, const QString &file, IndexType type);
bool checkUpdate(const Lucene::IndexReaderPtr &reader, const QString &file, IndexType &type);
void tryNotify();

bool isUpdated = false;
Expand All @@ -82,6 +79,8 @@ class FullTextSearcherPrivate : public QObject
FullTextSearcher *q = nullptr;

void doSearchAndEmit(const QString &path, const QString &key);

QSet<QString> invalidIndexPaths; // 存储无效的索引路径
};

DPSEARCH_END_NAMESPACE
Expand Down
72 changes: 57 additions & 15 deletions src/plugins/filemanager/dfmplugin-search/utils/textindexclient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,42 +115,64 @@ std::optional<bool> TextIndexClient::indexExists()
}

void TextIndexClient::startTask(TaskType type, const QString &path)
{
QStringList paths;
paths << path;
startTask(type, paths);
}

void TextIndexClient::startTask(TaskType type, const QStringList &paths)
{
if (!ensureInterface()) {
emit taskFailed(type, path, "Failed to connect to service");
emit taskFailed(type, paths.join("|"), "Failed to connect to service");
return;
}

// 检查是否有任务在运行
auto pendingHasTask = interface->HasRunningTask();
pendingHasTask.waitForFinished();
if (pendingHasTask.isError() || pendingHasTask.value()) {
emit taskFailed(type, path, "Another task is running");
emit taskFailed(type, paths.join("|"), "Another task is running");
return;
}

// 启动任务
QDBusPendingReply<bool> pendingTask;
if (type == TaskType::Create) {
pendingTask = interface->CreateIndexTask(path);
} else {
pendingTask = interface->UpdateIndexTask(path);
switch (type) {
case TaskType::Create:
pendingTask = interface->CreateIndexTask(paths.first()); // Create只支持单路径
break;
case TaskType::Update:
pendingTask = interface->UpdateIndexTask(paths.first()); // Update只支持单路径
break;
case TaskType::Remove:
pendingTask = interface->RemoveIndexTask(paths);
break;
}

pendingTask.waitForFinished();
if (pendingTask.isError() || !pendingTask.value()) {
emit taskFailed(type, path,
pendingTask.isError() ? pendingTask.error().message() : "Failed to start task");
emit taskFailed(type, paths.join("|"),
pendingTask.isError() ? pendingTask.error().message() : "Failed to start task");
return;
}

emit taskStarted(type, path);
runningTaskPath = path;
emit taskStarted(type, paths.join("|"));
runningTaskPath = paths.join("|");
}

void TextIndexClient::onDBusTaskFinished(const QString &type, const QString &path, bool success)
{
TaskType taskType = type == "create" ? TaskType::Create : TaskType::Update;
TaskType taskType;
if (type == "create")
taskType = TaskType::Create;
else if (type == "update")
taskType = TaskType::Update;
else if (type == "remove")
taskType = TaskType::Remove;
else
return;

if (success) {
emit taskFinished(taskType, path, true);
} else {
Expand All @@ -161,20 +183,40 @@ void TextIndexClient::onDBusTaskFinished(const QString &type, const QString &pat

void TextIndexClient::onDBusTaskProgressChanged(const QString &type, const QString &path, qlonglong count)
{
TaskType taskType = type == "create" ? TaskType::Create : TaskType::Update;
TaskType taskType;
if (type == "create")
taskType = TaskType::Create;
else if (type == "update")
taskType = TaskType::Update;
else if (type == "remove")
taskType = TaskType::Remove;
else
return;

emit taskProgressChanged(taskType, path, count);
}

std::optional<bool> TextIndexClient::hasRunningRootTask()
std::optional<bool> TextIndexClient::hasRunningTask()
{
if (!ensureInterface())
return std::nullopt;

auto pendingHasTask = interface->HasRunningTask();
pendingHasTask.waitForFinished();

if (pendingHasTask.isError())
if (pendingHasTask.isError()) {
fmWarning() << "[TextIndex] Failed to check running task:" << pendingHasTask.error().message();
return std::nullopt;
}

return pendingHasTask.value();
}

return pendingHasTask.value() && runningTaskPath == "/";
std::optional<bool> TextIndexClient::hasRunningRootTask()
{
auto hasTask = hasRunningTask();
if (!hasTask)
return std::nullopt;

return *hasTask && runningTaskPath == "/";
}
11 changes: 10 additions & 1 deletion src/plugins/filemanager/dfmplugin-search/utils/textindexclient.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ class TextIndexClient : public QObject
public:
enum class TaskType {
Create,
Update
Update,
Remove
};
Q_ENUM(TaskType)

Expand All @@ -36,6 +37,7 @@ class TextIndexClient : public QObject

// 异步方法,通过信号返回结果
void startTask(TaskType type, const QString &path);
void startTask(TaskType type, const QStringList &paths);

// 返回值:
// - std::nullopt: 服务不可用或出错
Expand All @@ -52,6 +54,13 @@ class TextIndexClient : public QObject
// - false: 根目录索引任务未运行
std::optional<bool> hasRunningRootTask();

// 检查是否有任务在运行
// 返回值:
// - std::nullopt: 服务不可用或出错
// - true: 有任务在运行
// - false: 没有任务在运行
std::optional<bool> hasRunningTask();

Q_SIGNALS:
void taskStarted(TaskType type, const QString &path);
void taskFinished(TaskType type, const QString &path, bool success);
Expand Down
3 changes: 2 additions & 1 deletion src/services/textindex/task/indextask.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ class IndexTask : public QObject
public:
enum class Type {
Create,
Update
Update,
Remove
};
Q_ENUM(Type)

Expand Down
Loading

0 comments on commit 0a6afc2

Please sign in to comment.