From 0a6afc233eebf39d1a5916cb2120b3003b83a74b Mon Sep 17 00:00:00 2001 From: Zhang Sheng Date: Sat, 30 Nov 2024 17:00:36 +0800 Subject: [PATCH] feat: add batch remove invalid index feature - Add RemoveIndexTask method in DBus interface to support batch removing invalid index - Refactor fulltext searcher to collect invalid index paths and remove them in batch - Add Remove task type and handler in text index service - Remove deprecated index operation functions in fulltext searcher - Add hasRunningTask check to avoid concurrent operations This change improves the efficiency of invalid index cleanup by handling them in batch instead of one by one. Log: add batch remove invalid index feature --- .../dbus/org.deepin.Filemanager.TextIndex.xml | 4 + .../searcher/fulltext/fulltextsearcher.cpp | 125 +++--------------- .../searcher/fulltext/fulltextsearcher_p.h | 5 +- .../utils/textindexclient.cpp | 72 +++++++--- .../dfmplugin-search/utils/textindexclient.h | 11 +- src/services/textindex/task/indextask.h | 3 +- src/services/textindex/task/taskhandler.cpp | 58 ++++++++ src/services/textindex/task/taskhandler.h | 1 + src/services/textindex/task/taskmanager.cpp | 20 ++- src/services/textindex/textindexdbus.cpp | 6 + src/services/textindex/textindexdbus.h | 1 + 11 files changed, 180 insertions(+), 126 deletions(-) diff --git a/assets/dbus/org.deepin.Filemanager.TextIndex.xml b/assets/dbus/org.deepin.Filemanager.TextIndex.xml index 993fdd5c37..896d2d9b0c 100644 --- a/assets/dbus/org.deepin.Filemanager.TextIndex.xml +++ b/assets/dbus/org.deepin.Filemanager.TextIndex.xml @@ -19,6 +19,10 @@ + + + + diff --git a/src/plugins/filemanager/dfmplugin-search/searchmanager/searcher/fulltext/fulltextsearcher.cpp b/src/plugins/filemanager/dfmplugin-search/searchmanager/searcher/fulltext/fulltextsearcher.cpp index 39b3085e12..08953b4985 100644 --- a/src/plugins/filemanager/dfmplugin-search/searchmanager/searcher/fulltext/fulltextsearcher.cpp +++ b/src/plugins/filemanager/dfmplugin-search/searchmanager/searcher/fulltext/fulltextsearcher.cpp @@ -68,86 +68,6 @@ IndexReaderPtr FullTextSearcherPrivate::newIndexReader() return IndexReader::open(FSDirectory::open(indexStorePath().toStdWString()), true); } -void FullTextSearcherPrivate::indexDocs(const IndexWriterPtr &writer, const QString &file, IndexType type) -{ - Q_ASSERT(writer); - - try { - switch (type) { - case kAddIndex: { - fmDebug() << "Adding [" << file << "]"; - // 添加 - writer->addDocument(fileDocument(file)); - break; - } - case kUpdateIndex: { - fmDebug() << "Update file: [" << file << "]"; - // 定义一个更新条件 - TermPtr term = newLucene(L"path", file.toStdWString()); - // 更新 - writer->updateDocument(term, fileDocument(file)); - break; - } - case kDeleteIndex: { - fmDebug() << "Delete file: [" << file << "]"; - // 定义一个删除条件 - TermPtr term = newLucene(L"path", file.toStdWString()); - // 删除 - writer->deleteDocuments(term); - break; - } - } - } catch (const LuceneException &e) { - QMetaEnum enumType = QMetaEnum::fromType(); - fmWarning() << QString::fromStdWString(e.getError()) << " type: " << enumType.valueToKey(type); - } catch (const std::exception &e) { - QMetaEnum enumType = QMetaEnum::fromType(); - fmWarning() << QString(e.what()) << " type: " << enumType.valueToKey(type); - } catch (...) { - fmWarning() << "Index document failed! " << file; - } -} - -bool FullTextSearcherPrivate::checkUpdate(const IndexReaderPtr &reader, const QString &file, IndexType &type) -{ - Q_ASSERT(reader); - - try { - SearcherPtr searcher = newLucene(reader); - TermQueryPtr query = newLucene(newLucene(L"path", file.toStdWString())); - - // 文件路径为唯一值,所以搜索一个结果就行了 - TopDocsPtr topDocs = searcher->search(query, 1); - int32_t numTotalHits = topDocs->totalHits; - if (numTotalHits == 0) { - type = kAddIndex; - return true; - } else { - DocumentPtr doc = searcher->doc(topDocs->scoreDocs[0]->doc); - auto info = InfoFactory::create(QUrl::fromLocalFile(file), - Global::CreateFileInfoType::kCreateFileInfoSync); - if (!info) - return false; - - const QDateTime &modifyTime { info->timeOf(TimeInfoType::kLastModified).toDateTime() }; - const QString &modifyEpoch { QString::number(modifyTime.toSecsSinceEpoch()) }; - const String &storeTime { doc->get(L"modified") }; - if (modifyEpoch.toStdWString() != storeTime) { - type = kUpdateIndex; - return true; - } - } - } catch (const LuceneException &e) { - fmWarning() << QString::fromStdWString(e.getError()) << " file: " << file; - } catch (const std::exception &e) { - fmWarning() << QString(e.what()) << " file: " << file; - } catch (...) { - fmWarning() << "The file checked failed!" << file; - } - - return false; -} - void FullTextSearcherPrivate::tryNotify() { int cur = notifyTimer.elapsed(); @@ -158,26 +78,6 @@ void FullTextSearcherPrivate::tryNotify() } } -DocumentPtr FullTextSearcherPrivate::fileDocument(const QString &file) -{ - DocumentPtr doc = newLucene(); - // file path - doc->add(newLucene(L"path", file.toStdWString(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); - - // file last modified time - auto info = InfoFactory::create(QUrl::fromLocalFile(file), - Global::CreateFileInfoType::kCreateFileInfoSync); - const QDateTime &modifyTime { info->timeOf(TimeInfoType::kLastModified).toDateTime() }; - const QString &modifyEpoch { QString::number(modifyTime.toSecsSinceEpoch()) }; - doc->add(newLucene(L"modified", modifyEpoch.toStdWString(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); - - // file contents - QString contents = DocParser::convertFile(file.toStdString()).c_str(); - doc->add(newLucene(L"contents", contents.toStdWString(), Field::STORE_YES, Field::INDEX_ANALYZED)); - - return doc; -} - bool FullTextSearcherPrivate::doSearch(const QString &path, const QString &keyword) { fmInfo() << "search path: " << path << " keyword: " << keyword; @@ -207,6 +107,7 @@ bool FullTextSearcherPrivate::doSearch(const QString &path, const QString &keywo Collection scoreDocs = topDocs->scoreDocs; QHash> hiddenFileHash; + QSet invalidIndexPaths; // 存储无效的索引路径 for (auto scoreDoc : scoreDocs) { //中断 if (status.loadAcquire() != AbstractSearcher::kRuning) @@ -219,10 +120,9 @@ bool FullTextSearcherPrivate::doSearch(const QString &path, const QString &keywo const QUrl &url = QUrl::fromLocalFile(StringUtils::toUTF8(resultPath).c_str()); auto info = InfoFactory::create(url, Global::CreateFileInfoType::kCreateFileInfoSync); - // delete invalid index + // 收集无效的索引路径 if (!info || !info->exists()) { - // TODO(zhangs): - // indexDocs(writer, url.path(), kDeleteIndex); + invalidIndexPaths.insert(url.path()); continue; } @@ -247,6 +147,15 @@ bool FullTextSearcherPrivate::doSearch(const QString &path, const QString &keywo reader->close(); writer->close(); + + // 如果有无效的索引路径,一次性启动移除任务 + if (!invalidIndexPaths.isEmpty()) { + auto client = TextIndexClient::instance(); + client->startTask(TextIndexClient::TaskType::Remove, + QStringList(invalidIndexPaths.begin(), invalidIndexPaths.end())); + invalidIndexPaths.clear(); + } + } catch (const LuceneException &e) { fmWarning() << QString::fromStdWString(e.getError()); } catch (const std::exception &e) { @@ -353,6 +262,15 @@ bool FullTextSearcher::search() auto serviceStatus = client->checkService(); if (serviceStatus != TextIndexClient::ServiceStatus::Available) { // 如果服务不可用,直接执行搜索 + fmWarning() << "Service is not available, search directly"; + d->doSearchAndEmit(path, key); + return true; + } + + // 检查到有服务正在运行,也直接执行搜索 + auto hasRunningTask = client->hasRunningTask(); + if (hasRunningTask) { + fmWarning() << "Service is running, search directly"; d->doSearchAndEmit(path, key); return true; } @@ -361,6 +279,7 @@ bool FullTextSearcher::search() auto indexExistsResult = client->indexExists(); if (!indexExistsResult.has_value()) { // 如果无法确定索引状态,直接执行搜索 + fmWarning() << "Failed to check index status, search directly"; d->doSearchAndEmit(path, key); return true; } diff --git a/src/plugins/filemanager/dfmplugin-search/searchmanager/searcher/fulltext/fulltextsearcher_p.h b/src/plugins/filemanager/dfmplugin-search/searchmanager/searcher/fulltext/fulltextsearcher_p.h index 6cf32d7cf5..d14aa1567d 100644 --- a/src/plugins/filemanager/dfmplugin-search/searchmanager/searcher/fulltext/fulltextsearcher_p.h +++ b/src/plugins/filemanager/dfmplugin-search/searchmanager/searcher/fulltext/fulltextsearcher_p.h @@ -59,10 +59,7 @@ class FullTextSearcherPrivate : public QObject return path; } - Lucene::DocumentPtr fileDocument(const QString &file); QString dealKeyword(const QString &keyword); - void indexDocs(const Lucene::IndexWriterPtr &writer, const QString &file, IndexType type); - bool checkUpdate(const Lucene::IndexReaderPtr &reader, const QString &file, IndexType &type); void tryNotify(); bool isUpdated = false; @@ -82,6 +79,8 @@ class FullTextSearcherPrivate : public QObject FullTextSearcher *q = nullptr; void doSearchAndEmit(const QString &path, const QString &key); + + QSet invalidIndexPaths; // 存储无效的索引路径 }; DPSEARCH_END_NAMESPACE diff --git a/src/plugins/filemanager/dfmplugin-search/utils/textindexclient.cpp b/src/plugins/filemanager/dfmplugin-search/utils/textindexclient.cpp index 40cb8eba29..8e671f7353 100644 --- a/src/plugins/filemanager/dfmplugin-search/utils/textindexclient.cpp +++ b/src/plugins/filemanager/dfmplugin-search/utils/textindexclient.cpp @@ -115,9 +115,16 @@ std::optional TextIndexClient::indexExists() } void TextIndexClient::startTask(TaskType type, const QString &path) +{ + QStringList paths; + paths << path; + startTask(type, paths); +} + +void TextIndexClient::startTask(TaskType type, const QStringList &paths) { if (!ensureInterface()) { - emit taskFailed(type, path, "Failed to connect to service"); + emit taskFailed(type, paths.join("|"), "Failed to connect to service"); return; } @@ -125,32 +132,47 @@ void TextIndexClient::startTask(TaskType type, const QString &path) auto pendingHasTask = interface->HasRunningTask(); pendingHasTask.waitForFinished(); if (pendingHasTask.isError() || pendingHasTask.value()) { - emit taskFailed(type, path, "Another task is running"); + emit taskFailed(type, paths.join("|"), "Another task is running"); return; } // 启动任务 QDBusPendingReply pendingTask; - if (type == TaskType::Create) { - pendingTask = interface->CreateIndexTask(path); - } else { - pendingTask = interface->UpdateIndexTask(path); + switch (type) { + case TaskType::Create: + pendingTask = interface->CreateIndexTask(paths.first()); // Create只支持单路径 + break; + case TaskType::Update: + pendingTask = interface->UpdateIndexTask(paths.first()); // Update只支持单路径 + break; + case TaskType::Remove: + pendingTask = interface->RemoveIndexTask(paths); + break; } pendingTask.waitForFinished(); if (pendingTask.isError() || !pendingTask.value()) { - emit taskFailed(type, path, - pendingTask.isError() ? pendingTask.error().message() : "Failed to start task"); + emit taskFailed(type, paths.join("|"), + pendingTask.isError() ? pendingTask.error().message() : "Failed to start task"); return; } - emit taskStarted(type, path); - runningTaskPath = path; + emit taskStarted(type, paths.join("|")); + runningTaskPath = paths.join("|"); } void TextIndexClient::onDBusTaskFinished(const QString &type, const QString &path, bool success) { - TaskType taskType = type == "create" ? TaskType::Create : TaskType::Update; + TaskType taskType; + if (type == "create") + taskType = TaskType::Create; + else if (type == "update") + taskType = TaskType::Update; + else if (type == "remove") + taskType = TaskType::Remove; + else + return; + if (success) { emit taskFinished(taskType, path, true); } else { @@ -161,11 +183,20 @@ void TextIndexClient::onDBusTaskFinished(const QString &type, const QString &pat void TextIndexClient::onDBusTaskProgressChanged(const QString &type, const QString &path, qlonglong count) { - TaskType taskType = type == "create" ? TaskType::Create : TaskType::Update; + TaskType taskType; + if (type == "create") + taskType = TaskType::Create; + else if (type == "update") + taskType = TaskType::Update; + else if (type == "remove") + taskType = TaskType::Remove; + else + return; + emit taskProgressChanged(taskType, path, count); } -std::optional TextIndexClient::hasRunningRootTask() +std::optional TextIndexClient::hasRunningTask() { if (!ensureInterface()) return std::nullopt; @@ -173,8 +204,19 @@ std::optional TextIndexClient::hasRunningRootTask() auto pendingHasTask = interface->HasRunningTask(); pendingHasTask.waitForFinished(); - if (pendingHasTask.isError()) + if (pendingHasTask.isError()) { + fmWarning() << "[TextIndex] Failed to check running task:" << pendingHasTask.error().message(); return std::nullopt; + } + + return pendingHasTask.value(); +} - return pendingHasTask.value() && runningTaskPath == "/"; +std::optional TextIndexClient::hasRunningRootTask() +{ + auto hasTask = hasRunningTask(); + if (!hasTask) + return std::nullopt; + + return *hasTask && runningTaskPath == "/"; } diff --git a/src/plugins/filemanager/dfmplugin-search/utils/textindexclient.h b/src/plugins/filemanager/dfmplugin-search/utils/textindexclient.h index 92cc1d1858..8fff93d0fa 100644 --- a/src/plugins/filemanager/dfmplugin-search/utils/textindexclient.h +++ b/src/plugins/filemanager/dfmplugin-search/utils/textindexclient.h @@ -21,7 +21,8 @@ class TextIndexClient : public QObject public: enum class TaskType { Create, - Update + Update, + Remove }; Q_ENUM(TaskType) @@ -36,6 +37,7 @@ class TextIndexClient : public QObject // 异步方法,通过信号返回结果 void startTask(TaskType type, const QString &path); + void startTask(TaskType type, const QStringList &paths); // 返回值: // - std::nullopt: 服务不可用或出错 @@ -52,6 +54,13 @@ class TextIndexClient : public QObject // - false: 根目录索引任务未运行 std::optional hasRunningRootTask(); + // 检查是否有任务在运行 + // 返回值: + // - std::nullopt: 服务不可用或出错 + // - true: 有任务在运行 + // - false: 没有任务在运行 + std::optional hasRunningTask(); + Q_SIGNALS: void taskStarted(TaskType type, const QString &path); void taskFinished(TaskType type, const QString &path, bool success); diff --git a/src/services/textindex/task/indextask.h b/src/services/textindex/task/indextask.h index 085c4e38fb..91e5f13fee 100644 --- a/src/services/textindex/task/indextask.h +++ b/src/services/textindex/task/indextask.h @@ -20,7 +20,8 @@ class IndexTask : public QObject public: enum class Type { Create, - Update + Update, + Remove }; Q_ENUM(Type) diff --git a/src/services/textindex/task/taskhandler.cpp b/src/services/textindex/task/taskhandler.cpp index debad36254..71ce810d17 100644 --- a/src/services/textindex/task/taskhandler.cpp +++ b/src/services/textindex/task/taskhandler.cpp @@ -378,3 +378,61 @@ TaskHandler TaskHandlers::UpdateIndexHandler() return false; }; } + +TaskHandler TaskHandlers::RemoveIndexHandler() +{ + return [](const QString &pathList, TaskState &running) -> bool { + fmInfo() << "Removing index for paths:" << pathList; + + try { + IndexWriterPtr writer = newLucene( + FSDirectory::open(indexStorePath().toStdWString()), + newLucene(), + false, + IndexWriter::MaxFieldLengthLIMITED); + + // 添加 writer 的 ScopeGuard + ScopeGuard writerCloser([&writer]() { + try { + if (writer) writer->close(); + } catch (...) { + // 忽略关闭时的异常 + } + }); + + // 将路径列表字符串转换为QStringList + QStringList paths = pathList.split("|", Qt::SkipEmptyParts); + + ProgressReporter reporter; + for (const QString &path : paths) { + if (!running.isRunning()) + break; + + try { + fmDebug() << "Removing index for path:" << path; + TermPtr term = newLucene(L"path", path.toStdWString()); + writer->deleteDocuments(term); + reporter.increment(); + } catch (const std::exception &e) { + fmWarning() << "Failed to remove index for path:" << path << e.what(); + // 继续处理其他路径 + } + } + + if (!running.isRunning()) { + fmInfo() << "Remove index task was interrupted"; + return false; + } + + writer->optimize(); + return true; + } catch (const LuceneException &e) { + fmWarning() << "Remove index failed with Lucene exception:" + << QString::fromStdWString(e.getError()); + } catch (const std::exception &e) { + fmWarning() << "Remove index failed with exception:" << e.what(); + } + + return false; + }; +} diff --git a/src/services/textindex/task/taskhandler.h b/src/services/textindex/task/taskhandler.h index dd72159bb5..cfb9b81e16 100644 --- a/src/services/textindex/task/taskhandler.h +++ b/src/services/textindex/task/taskhandler.h @@ -20,6 +20,7 @@ using TaskHandler = std::function; namespace TaskHandlers { TaskHandler CreateIndexHandler(); TaskHandler UpdateIndexHandler(); +TaskHandler RemoveIndexHandler(); } SERVICETEXTINDEX_END_NAMESPACE diff --git a/src/services/textindex/task/taskmanager.cpp b/src/services/textindex/task/taskmanager.cpp index fc8edc4eae..d08a0bc649 100644 --- a/src/services/textindex/task/taskmanager.cpp +++ b/src/services/textindex/task/taskmanager.cpp @@ -101,9 +101,21 @@ bool TaskManager::startTask(IndexTask::Type type, const QString &path) } // 获取对应的任务处理器 - TaskHandler handler = (type == IndexTask::Type::Create) - ? TaskHandlers::CreateIndexHandler() - : TaskHandlers::UpdateIndexHandler(); + TaskHandler handler; + switch (type) { + case IndexTask::Type::Create: + handler = TaskHandlers::CreateIndexHandler(); + break; + case IndexTask::Type::Update: + handler = TaskHandlers::UpdateIndexHandler(); + break; + case IndexTask::Type::Remove: + handler = TaskHandlers::RemoveIndexHandler(); + break; + default: + fmWarning() << "Unknown task type:" << static_cast(type); + return false; + } currentTask = new IndexTask(type, path, handler); currentTask->moveToThread(&workerThread); @@ -125,6 +137,8 @@ QString TaskManager::typeToString(IndexTask::Type type) return "create"; case IndexTask::Type::Update: return "update"; + case IndexTask::Type::Remove: + return "remove"; default: return "unknown"; } diff --git a/src/services/textindex/textindexdbus.cpp b/src/services/textindex/textindexdbus.cpp index c4a7531e46..af30034d29 100644 --- a/src/services/textindex/textindexdbus.cpp +++ b/src/services/textindex/textindexdbus.cpp @@ -48,6 +48,12 @@ bool TextIndexDBus::UpdateIndexTask(const QString &path) return d->taskManager->startTask(IndexTask::Type::Update, path); } +bool TextIndexDBus::RemoveIndexTask(const QStringList &paths) +{ + QString pathString = paths.join("|"); + return d->taskManager->startTask(IndexTask::Type::Remove, pathString); +} + bool TextIndexDBus::StopCurrentTask() { if (!d->taskManager->hasRunningTask()) diff --git a/src/services/textindex/textindexdbus.h b/src/services/textindex/textindexdbus.h index 51e265874d..1f6e38803b 100644 --- a/src/services/textindex/textindexdbus.h +++ b/src/services/textindex/textindexdbus.h @@ -26,6 +26,7 @@ class TextIndexDBus : public QObject, public QDBusContext public Q_SLOTS: bool CreateIndexTask(const QString &path); bool UpdateIndexTask(const QString &path); + bool RemoveIndexTask(const QStringList &paths); bool StopCurrentTask(); bool HasRunningTask(); bool IndexDatabaseExists();