From d0e540bc3162e889ef685f687ec99bbfe520089f Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Mon, 19 Jun 2023 16:36:28 -0700 Subject: [PATCH 1/3] Eliminated the "local=0" option from the director index builder --- src/replica/DirectorIndexApp.cc | 15 ++++------- src/replica/DirectorIndexApp.h | 5 ---- src/replica/DirectorIndexJob.cc | 35 ++++++++++++-------------- src/replica/DirectorIndexJob.h | 25 ++++++++---------- src/replica/HttpDirectorIndexModule.cc | 2 +- src/replica/HttpIngestTransModule.cc | 10 ++++---- 6 files changed, 37 insertions(+), 55 deletions(-) diff --git a/src/replica/DirectorIndexApp.cc b/src/replica/DirectorIndexApp.cc index 42226cc609..26da23c053 100644 --- a/src/replica/DirectorIndexApp.cc +++ b/src/replica/DirectorIndexApp.cc @@ -73,11 +73,6 @@ DirectorIndexApp::DirectorIndexApp(int argc, char* argv[]) " the table will be scanned, and the scan won't include the super-transaction" " column 'qserv_trans_id'.", _transactionId) - .flag("local", - "This flag is used to load contributions using 'LOAD DATA LOCAL INFILE' protocol" - " instead of 'LOAD DATA INFILE'. See MySQL documentation for further details" - " on this subject.", - _localFile) .flag("all-workers", "The flag for selecting all workers regardless of their status (DISABLED or READ-ONLY).", _allWorkers) @@ -104,11 +99,11 @@ int DirectorIndexApp::runImpl() { auto const controller = Controller::create(serviceProvider()); string const noParentJobId; - auto const job = DirectorIndexJob::create( - _database, _table, _transactionId != numeric_limits::max(), _transactionId, - _allWorkers, _localFile, controller, noParentJobId, - nullptr, // no callback - PRIORITY_NORMAL); + auto const job = DirectorIndexJob::create(_database, _table, + _transactionId != numeric_limits::max(), + _transactionId, _allWorkers, controller, noParentJobId, + nullptr, // no callback + PRIORITY_NORMAL); job->start(); job->wait(); diff --git a/src/replica/DirectorIndexApp.h b/src/replica/DirectorIndexApp.h index c877341044..3dc478e2ef 100644 --- a/src/replica/DirectorIndexApp.h +++ b/src/replica/DirectorIndexApp.h @@ -72,11 +72,6 @@ class DirectorIndexApp : public Application { /// A unique identifier of a super-transaction (not used if its value stays default) TransactionId _transactionId = std::numeric_limits::max(); - /// This flag is used to load contributions using "LOAD DATA LOCAL INFILE" protocol - /// instead of just "LOAD DATA INFILE". See MySQL documentation for further details - /// on this subject. - bool _localFile = false; - /// A connection URL to the MySQL service of the Qserv master database. std::string _qservCzarDbUrl; diff --git a/src/replica/DirectorIndexJob.cc b/src/replica/DirectorIndexJob.cc index f84e199388..2dfd915026 100644 --- a/src/replica/DirectorIndexJob.cc +++ b/src/replica/DirectorIndexJob.cc @@ -75,23 +75,22 @@ string DirectorIndexJob::typeName() { return "DirectorIndexJob"; } DirectorIndexJob::Ptr DirectorIndexJob::create(string const& databaseName, string const& directorTableName, bool hasTransactions, TransactionId transactionId, - bool allWorkers, bool localFile, - Controller::Ptr const& controller, string const& parentJobId, - CallbackType const& onFinish, int priority) { + bool allWorkers, Controller::Ptr const& controller, + string const& parentJobId, CallbackType const& onFinish, + int priority) { return Ptr(new DirectorIndexJob(databaseName, directorTableName, hasTransactions, transactionId, - allWorkers, localFile, controller, parentJobId, onFinish, priority)); + allWorkers, controller, parentJobId, onFinish, priority)); } DirectorIndexJob::DirectorIndexJob(string const& databaseName, string const& directorTableName, bool hasTransactions, TransactionId transactionId, bool allWorkers, - bool localFile, Controller::Ptr const& controller, - string const& parentJobId, CallbackType const& onFinish, int priority) + Controller::Ptr const& controller, string const& parentJobId, + CallbackType const& onFinish, int priority) : Job(controller, parentJobId, "INDEX", priority), _directorTableName(directorTableName), _hasTransactions(hasTransactions), _transactionId(transactionId), _allWorkers(allWorkers), - _localFile(localFile), _onFinish(onFinish) { try { _database = controller->serviceProvider()->config()->databaseInfo(databaseName); @@ -127,7 +126,6 @@ list> DirectorIndexJob::extendedPersistentState() cons result.emplace_back("has_transactions", bool2str(hasTransactions())); result.emplace_back("transaction_id", to_string(transactionId())); result.emplace_back("all_workers", bool2str(allWorkers())); - result.emplace_back("local_file", bool2str(localFile())); return result; } @@ -388,27 +386,26 @@ void DirectorIndexJob::_loadDataIntoTable() { if (request == nullptr) break; // Load request's data into the destination table. + bool const localFile = true; try { string const query = g.loadDataInfile( request->responseData().fileName, directorIndexTableName(database(), directorTable()), controller()->serviceProvider()->config()->get("worker", "ingest-charset-name"), - localFile()); + localFile); h.conn->executeInOwnTransaction([&](auto conn) { conn->execute(query); // Loading operations based on this mechanism won't result in throwing exceptions in // case of certain types of problems encountered during the loading, such as // out-of-range data, duplicate keys, etc. These errors are reported as warnings // which need to be retrieved using a special call to the database API. - if (localFile()) { - auto const warnings = conn->warnings(); - if (!warnings.empty()) { - auto const& w = warnings.front(); - throw database::mysql::Error( - "query: " + query + - " failed with total number of problems: " + to_string(warnings.size()) + - ", first problem (Level,Code,Message) was: " + w.level + "," + - to_string(w.code) + "," + w.message); - } + auto const warnings = conn->warnings(); + if (!warnings.empty()) { + auto const& w = warnings.front(); + throw database::mysql::Error( + "query: " + query + + " failed with total number of problems: " + to_string(warnings.size()) + + ", first problem (Level,Code,Message) was: " + w.level + "," + to_string(w.code) + + "," + w.message); } }); diff --git a/src/replica/DirectorIndexJob.h b/src/replica/DirectorIndexJob.h index 4af3ace64a..c22ed5e301 100644 --- a/src/replica/DirectorIndexJob.h +++ b/src/replica/DirectorIndexJob.h @@ -54,6 +54,12 @@ namespace lsst::qserv::replica { * the "director" index retrieval requests for the relevant chunks to * the workers. Results are directly loaded into the "director" index of * the specified director table. + * + * Contributions are always loaded into the index table using the "LOCAL" attribute + * of the query: + * @code + * LOAD DATA LOCAL INFILE ... + * @endcode */ class DirectorIndexJob : public Job { public: @@ -82,21 +88,13 @@ class DirectorIndexJob : public Job { * @param allWorkers engage all known workers regardless of their status. * If the flag is set to 'false' then only 'ENABLED' workers which are not * in the 'READ-ONLY' state will be involved into the operation. - * @param localFile If the flag is set to 'true' then index contribution files - * retrieved from workers would be loaded into the "director" index" table using MySQL - * statement "LOAD DATA LOCAL INFILE". Otherwise, contributions will be loaded - * using "LOAD DATA INFILE", which will require the files be directly visible by - * the MySQL server where the table is residing. Note that the non-local - * option results in the better performance of the operation. On the other hand, - * the local option requires the server be properly configured to allow this - * mechanism. * @param controller is needed launching requests and accessing the Configuration * @param parentJobId an identifier of the parent job * @param onFinish a function to be called upon a completion of the job * @param priority the priority level of the job */ static Ptr create(std::string const& databaseName, std::string const& directorTableName, - bool hasTransactions, TransactionId transactionId, bool allWorkers, bool localFile, + bool hasTransactions, TransactionId transactionId, bool allWorkers, Controller::Ptr const& controller, std::string const& parentJobId, CallbackType const& onFinish, int priority); @@ -115,7 +113,6 @@ class DirectorIndexJob : public Job { bool hasTransactions() const { return _hasTransactions; } TransactionId transactionId() const { return _transactionId; } bool allWorkers() const { return _allWorkers; } - bool localFile() const { return _localFile; } /// @see Job::progress virtual Job::Progress progress() const override; @@ -156,7 +153,7 @@ class DirectorIndexJob : public Job { private: DirectorIndexJob(std::string const& databaseName, std::string const& directorTableName, - bool hasTransactions, TransactionId transactionId, bool allWorkers, bool localFile, + bool hasTransactions, TransactionId transactionId, bool allWorkers, Controller::Ptr const& controller, std::string const& parentJobId, CallbackType const& onFinish, int priority); @@ -216,11 +213,9 @@ class DirectorIndexJob : public Job { bool const _hasTransactions; TransactionId const _transactionId; bool const _allWorkers; - bool const _localFile; - - CallbackType _onFinish; /// @note is reset when the job finishes - DatabaseInfo _database; /// Initialized by the c-tor + CallbackType _onFinish; ///< Is reset when the job finishes + DatabaseInfo _database; ///< Is initialized by the c-tor /// A collection of chunks to be processed at specific workers std::map> _chunks; diff --git a/src/replica/HttpDirectorIndexModule.cc b/src/replica/HttpDirectorIndexModule.cc index 93aa0720cc..6e78bfe8b7 100644 --- a/src/replica/HttpDirectorIndexModule.cc +++ b/src/replica/HttpDirectorIndexModule.cc @@ -174,7 +174,7 @@ json HttpDirectorIndexModule::_buildDirectorIndex() { string const noParentJobId; auto const job = DirectorIndexJob::create(database.name, tableName, noTransactions, noTransactionId, - allWorkers, localFile, controller(), noParentJobId, + allWorkers, controller(), noParentJobId, nullptr, // no callback config->get("controller", "catalog-management-priority-level")); job->start(); diff --git a/src/replica/HttpIngestTransModule.cc b/src/replica/HttpIngestTransModule.cc index dad4f4fd72..3b128afa39 100644 --- a/src/replica/HttpIngestTransModule.cc +++ b/src/replica/HttpIngestTransModule.cc @@ -399,11 +399,11 @@ json HttpIngestTransModule::_endTransaction() { auto const table = database.findTable(tableName); if (table.isPublished) continue; bool const hasTransactions = true; - auto const job = DirectorIndexJob::create( - database.name, table.name, hasTransactions, transactionId, allWorkers, - localLoadDirectorIndex(database.name), controller(), noParentJobId, - nullptr, // no callback - config->get("controller", "ingest-priority-level")); + auto const job = + DirectorIndexJob::create(database.name, table.name, hasTransactions, + transactionId, allWorkers, controller(), noParentJobId, + nullptr, // no callback + config->get("controller", "ingest-priority-level")); json transEventData = {{"job", job->id()}, {"table", table.name}}; transaction = databaseServices->updateTransaction(transactionId, "begin " + transEvent, transEventData); From 6dfcd0250818a9f0a85bdc4a84a231f587bd361d Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Mon, 19 Jun 2023 17:22:02 -0700 Subject: [PATCH 2/3] Allowed the REST API of the Repl/Ingest sys to return multiple warnings --- src/replica/HttpModuleBase.cc | 24 +++++++++++++++++------- src/replica/HttpModuleBase.h | 3 ++- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/replica/HttpModuleBase.cc b/src/replica/HttpModuleBase.cc index 86fe11586f..0914d750a3 100644 --- a/src/replica/HttpModuleBase.cc +++ b/src/replica/HttpModuleBase.cc @@ -37,7 +37,16 @@ using json = nlohmann::json; namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.replica.HttpModuleBase"); + +string packWarnings(list const& warnings) { + string packed; + for (auto const& msg : warnings) { + if (!packed.empty()) packed += "; "; + packed += msg; + } + return packed; } +} // namespace namespace lsst::qserv::replica { @@ -79,15 +88,13 @@ void HttpModuleBase::checkApiVersion(string const& func, unsigned int minVersion try { if (req()->method == "GET") { if (!query().has(versionAttrName)) { - _warningOnVersionMissing = "No version number was provided in the request's query."; - warn(_warningOnVersionMissing); + warn("No version number was provided in the request's query."); return; } version = query().requiredUInt(versionAttrName); } else { if (!body().has(versionAttrName)) { - _warningOnVersionMissing = "No version number was provided in the request's body."; - warn(_warningOnVersionMissing); + warn("No version number was provided in the request's body."); return; } version = body().required(versionAttrName); @@ -107,7 +114,10 @@ void HttpModuleBase::info(string const& msg) const { LOGS(_log, LOG_LVL_INFO, co void HttpModuleBase::debug(string const& msg) const { LOGS(_log, LOG_LVL_DEBUG, context() << msg); } -void HttpModuleBase::warn(string const& msg) const { LOGS(_log, LOG_LVL_WARN, context() << msg); } +void HttpModuleBase::warn(string const& msg) const { + LOGS(_log, LOG_LVL_WARN, context() << msg); + _warnings.push_back(msg); +} void HttpModuleBase::error(string const& msg) const { LOGS(_log, LOG_LVL_ERROR, context() << msg); } @@ -117,7 +127,7 @@ void HttpModuleBase::_sendError(string const& func, string const& errorMsg, json result["success"] = 0; result["error"] = errorMsg; result["error_ext"] = errorExt.is_null() ? json::object() : errorExt; - result["warning"] = _warningOnVersionMissing; + result["warning"] = ::packWarnings(_warnings); resp()->send(result.dump(), "application/json"); } @@ -125,7 +135,7 @@ void HttpModuleBase::_sendData(json& result) { result["success"] = 1; result["error"] = ""; result["error_ext"] = json::object(); - result["warning"] = _warningOnVersionMissing; + result["warning"] = ::packWarnings(_warnings); resp()->send(result.dump(), "application/json"); } diff --git a/src/replica/HttpModuleBase.h b/src/replica/HttpModuleBase.h index 14c1128e12..7bcd31bc3a 100644 --- a/src/replica/HttpModuleBase.h +++ b/src/replica/HttpModuleBase.h @@ -23,6 +23,7 @@ #define LSST_QSERV_HTTPMODULEBASE_H // System headers +#include #include #include #include @@ -230,7 +231,7 @@ class HttpModuleBase { /// The optional warning message to be sent to a caller if the API version /// number wasn't mentoned in the request. - mutable std::string _warningOnVersionMissing; + mutable std::list _warnings; }; } // namespace lsst::qserv::replica From fbe0972adea0df36bd0bb7bcb98b93165b441287 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Mon, 19 Jun 2023 16:45:03 -0700 Subject: [PATCH 3/3] Eliminated the "local" index construction option from the REST API Any attempts to specify the removed options are now reported as warnings back to callers of the nodified services. The underlying REST framework was extended to return multiple warnings to the callers. Also, incremented the version number of the REST API to 20. --- .../python/lsst/qserv/admin/replicationInterface.py | 2 +- src/replica/HttpDirectorIndexModule.cc | 5 +++-- src/replica/HttpIngestModule.cc | 7 +++---- src/replica/HttpMetaModule.cc | 2 +- src/replica/HttpModule.cc | 12 ------------ src/replica/HttpModule.h | 13 ------------- src/www/qserv/js/Common.js | 2 +- 7 files changed, 9 insertions(+), 34 deletions(-) diff --git a/src/admin/python/lsst/qserv/admin/replicationInterface.py b/src/admin/python/lsst/qserv/admin/replicationInterface.py index 7593be258d..1a87ffbc6a 100644 --- a/src/admin/python/lsst/qserv/admin/replicationInterface.py +++ b/src/admin/python/lsst/qserv/admin/replicationInterface.py @@ -201,7 +201,7 @@ def __init__( self.repl_ctrl = urlparse(repl_ctrl_uri) self.auth_key = auth_key self.admin_auth_key = admin_auth_key - self.repl_api_version = 19 + self.repl_api_version = 20 _log.debug(f"ReplicationInterface %s", self.repl_ctrl) def version(self) -> str: diff --git a/src/replica/HttpDirectorIndexModule.cc b/src/replica/HttpDirectorIndexModule.cc index 6e78bfe8b7..d8e82097da 100644 --- a/src/replica/HttpDirectorIndexModule.cc +++ b/src/replica/HttpDirectorIndexModule.cc @@ -68,13 +68,14 @@ json HttpDirectorIndexModule::_buildDirectorIndex() { string const directorTableName = body().optional("director_table", string()); bool const allowForPublished = body().optional("allow_for_published", 0) != 0; bool const rebuild = body().optional("rebuild", 0) != 0; - bool const localFile = body().optional("local", 0) != 0; + if (body().has("local")) { + warn("Option 'local' is obsolete as of the version 20 of the API."); + } debug(__func__, "database=" + databaseName); debug(__func__, "director_table=" + directorTableName); debug(__func__, "allow_for_published=" + bool2str(allowForPublished)); debug(__func__, "rebuild=" + bool2str(rebuild)); - debug(__func__, "local=" + bool2str(localFile)); auto const database = config->databaseInfo(databaseName); if (database.isPublished and not allowForPublished) { diff --git a/src/replica/HttpIngestModule.cc b/src/replica/HttpIngestModule.cc index fbc4027472..315f5a034a 100644 --- a/src/replica/HttpIngestModule.cc +++ b/src/replica/HttpIngestModule.cc @@ -198,14 +198,15 @@ json HttpIngestModule::_addDatabase() { auto const numSubStripes = body().required("num_sub_stripes"); auto const overlap = body().required("overlap"); auto const enableAutoBuildDirectorIndex = body().optional("auto_build_secondary_index", 1); - auto const enableLocalLoadDirectorIndex = body().optional("local_load_secondary_index", 0); + if (body().has("local_load_secondary_index")) { + warn("Option 'local_load_secondary_index' is obsolete as of the version 20 of the API."); + } debug(__func__, "database=" + databaseName); debug(__func__, "num_stripes=" + to_string(numStripes)); debug(__func__, "num_sub_stripes=" + to_string(numSubStripes)); debug(__func__, "overlap=" + to_string(overlap)); debug(__func__, "auto_build_secondary_index=" + to_string(enableAutoBuildDirectorIndex ? 1 : 0)); - debug(__func__, "local_load_secondary_index=" + to_string(enableLocalLoadDirectorIndex ? 1 : 0)); if (overlap < 0) throw HttpError(__func__, "overlap can't have a negative value"); @@ -263,8 +264,6 @@ json HttpIngestModule::_addDatabase() { // the index. databaseServices->saveIngestParam(database.name, "secondary-index", "auto-build", to_string(enableAutoBuildDirectorIndex ? 1 : 0)); - databaseServices->saveIngestParam(database.name, "secondary-index", "local-load", - to_string(enableLocalLoadDirectorIndex ? 1 : 0)); // Tell workers to reload their configurations error = reconfigureWorkers(database, allWorkers, workerReconfigTimeoutSec()); diff --git a/src/replica/HttpMetaModule.cc b/src/replica/HttpMetaModule.cc index 18983cf76e..64b7aa263c 100644 --- a/src/replica/HttpMetaModule.cc +++ b/src/replica/HttpMetaModule.cc @@ -34,7 +34,7 @@ using json = nlohmann::json; namespace lsst::qserv::replica { -unsigned int const HttpMetaModule::version = 19; +unsigned int const HttpMetaModule::version = 20; void HttpMetaModule::process(ServiceProvider::Ptr const& serviceProvider, string const& context, qhttp::Request::Ptr const& req, qhttp::Response::Ptr const& resp, diff --git a/src/replica/HttpModule.cc b/src/replica/HttpModule.cc index 312c61a1f7..8234778a93 100644 --- a/src/replica/HttpModule.cc +++ b/src/replica/HttpModule.cc @@ -113,18 +113,6 @@ bool HttpModule::autoBuildDirectorIndex(string const& databaseName) const { return false; } -bool HttpModule::localLoadDirectorIndex(string const& database) const { - auto const databaseServices = controller()->serviceProvider()->databaseServices(); - try { - DatabaseIngestParam const paramInfo = - databaseServices->ingestParam(database, "secondary-index", "local-load"); - return paramInfo.value != "0"; - } catch (DatabaseServicesNotFound const& ex) { - info(__func__, "the director index local-load mode was not specified"); - } - return false; -} - DatabaseInfo HttpModule::getDatabaseInfo(string const& func, bool throwIfPublished) const { debug(func); auto const databaseServices = controller()->serviceProvider()->databaseServices(); diff --git a/src/replica/HttpModule.h b/src/replica/HttpModule.h index 2e28509261..10c089eaaf 100644 --- a/src/replica/HttpModule.h +++ b/src/replica/HttpModule.h @@ -115,19 +115,6 @@ class HttpModule : public EventLogger, public HttpModuleBase { */ bool autoBuildDirectorIndex(std::string const& database) const; - /** - * Fetch a mode of loading contributions into the "director" index as requested by - * a catalog ingest workflow and recorded at the database creation time. A value of - * the parameter is recorded in a database. - * - * @param database The name of a database for which a value of the parameter - * is requested. - * @return 'true' if the index was requested to be loaded using MySQL protocol - * "LOAD DATA LOCAL INFILE" instead of just "LOAD DATA INFILE". See MySQL - * documentation for further explanation of the protocol. - */ - bool localLoadDirectorIndex(std::string const& database) const; - /** * Get database info for a database that was specified in a request, either explicitly * in attribute "database" or implicitly in attribute "transation_id". The method may diff --git a/src/www/qserv/js/Common.js b/src/www/qserv/js/Common.js index 531d9dd521..1ec3f9eccc 100644 --- a/src/www/qserv/js/Common.js +++ b/src/www/qserv/js/Common.js @@ -3,7 +3,7 @@ define([ function(sqlFormatter) { class Common { - static RestAPIVersion = 19; + static RestAPIVersion = 20; static query2text(query, expanded) { if (expanded) { return sqlFormatter.format(query, Common._sqlFormatterConfig);