From 5bab7280f4f3b0906c30b7205c9e186522ed6caf Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Tue, 10 Dec 2024 20:05:06 -0800 Subject: [PATCH] The HTTP-based backend of the Replication worker services --- src/replica/apps/WorkerApp.cc | 5 +- src/replica/proto/CMakeLists.txt | 1 + src/replica/proto/Protocol.cc | 171 ++++++ src/replica/proto/Protocol.h | 139 +++++ src/replica/util/Common.cc | 40 +- src/replica/util/Common.h | 34 ++ src/replica/util/Performance.cc | 1 + src/replica/util/Performance.h | 40 +- src/replica/worker/CMakeLists.txt | 12 + .../worker/WorkerCreateReplicaHttpRequest.cc | 467 ++++++++++++++ .../worker/WorkerCreateReplicaHttpRequest.h | 186 ++++++ .../worker/WorkerDeleteReplicaHttpRequest.cc | 116 ++++ .../worker/WorkerDeleteReplicaHttpRequest.h | 99 +++ .../worker/WorkerDirectorIndexHttpRequest.cc | 292 +++++++++ .../worker/WorkerDirectorIndexHttpRequest.h | 149 +++++ src/replica/worker/WorkerEchoHttpRequest.cc | 90 +++ src/replica/worker/WorkerEchoHttpRequest.h | 99 +++ .../WorkerFindAllReplicasHttpRequest.cc | 157 +++++ .../worker/WorkerFindAllReplicasHttpRequest.h | 101 ++++ .../worker/WorkerFindReplicaHttpRequest.cc | 233 +++++++ .../worker/WorkerFindReplicaHttpRequest.h | 104 ++++ src/replica/worker/WorkerHttpProcessor.cc | 568 ++++++++++++++++++ src/replica/worker/WorkerHttpProcessor.h | 366 +++++++++++ .../worker/WorkerHttpProcessorThread.cc | 121 ++++ .../worker/WorkerHttpProcessorThread.h | 113 ++++ src/replica/worker/WorkerHttpRequest.cc | 275 +++++++++ src/replica/worker/WorkerHttpRequest.h | 352 +++++++++++ src/replica/worker/WorkerHttpSvc.cc | 149 +++++ src/replica/worker/WorkerHttpSvc.h | 84 +++ src/replica/worker/WorkerHttpSvcMod.cc | 242 ++++++++ src/replica/worker/WorkerHttpSvcMod.h | 172 ++++++ src/replica/worker/WorkerSqlHttpRequest.cc | 416 +++++++++++++ src/replica/worker/WorkerSqlHttpRequest.h | 183 ++++++ 33 files changed, 5547 insertions(+), 30 deletions(-) create mode 100644 src/replica/proto/Protocol.cc create mode 100644 src/replica/proto/Protocol.h create mode 100644 src/replica/worker/WorkerCreateReplicaHttpRequest.cc create mode 100644 src/replica/worker/WorkerCreateReplicaHttpRequest.h create mode 100644 src/replica/worker/WorkerDeleteReplicaHttpRequest.cc create mode 100644 src/replica/worker/WorkerDeleteReplicaHttpRequest.h create mode 100644 src/replica/worker/WorkerDirectorIndexHttpRequest.cc create mode 100644 src/replica/worker/WorkerDirectorIndexHttpRequest.h create mode 100644 src/replica/worker/WorkerEchoHttpRequest.cc create mode 100644 src/replica/worker/WorkerEchoHttpRequest.h create mode 100644 src/replica/worker/WorkerFindAllReplicasHttpRequest.cc create mode 100644 src/replica/worker/WorkerFindAllReplicasHttpRequest.h create mode 100644 src/replica/worker/WorkerFindReplicaHttpRequest.cc create mode 100644 src/replica/worker/WorkerFindReplicaHttpRequest.h create mode 100644 src/replica/worker/WorkerHttpProcessor.cc create mode 100644 src/replica/worker/WorkerHttpProcessor.h create mode 100644 src/replica/worker/WorkerHttpProcessorThread.cc create mode 100644 src/replica/worker/WorkerHttpProcessorThread.h create mode 100644 src/replica/worker/WorkerHttpRequest.cc create mode 100644 src/replica/worker/WorkerHttpRequest.h create mode 100644 src/replica/worker/WorkerHttpSvc.cc create mode 100644 src/replica/worker/WorkerHttpSvc.h create mode 100644 src/replica/worker/WorkerHttpSvcMod.cc create mode 100644 src/replica/worker/WorkerHttpSvcMod.h create mode 100644 src/replica/worker/WorkerSqlHttpRequest.cc create mode 100644 src/replica/worker/WorkerSqlHttpRequest.h diff --git a/src/replica/apps/WorkerApp.cc b/src/replica/apps/WorkerApp.cc index 31c023640..f1362b91a 100644 --- a/src/replica/apps/WorkerApp.cc +++ b/src/replica/apps/WorkerApp.cc @@ -39,7 +39,7 @@ #include "replica/services/ServiceProvider.h" #include "replica/util/FileUtils.h" #include "replica/worker/FileServer.h" -#include "replica/worker/WorkerProcessor.h" +#include "replica/worker/WorkerHttpSvc.h" #include "replica/worker/WorkerServer.h" // LSST headers @@ -113,6 +113,9 @@ int WorkerApp::runImpl() { auto const reqProcSvr = WorkerServer::create(serviceProvider(), worker); thread reqProcSvrThread([reqProcSvr]() { reqProcSvr->run(); }); + auto const reqProcHttpSvr = WorkerHttpSvc::create(serviceProvider(), worker); + thread reqProcHttpSvrThread([reqProcHttpSvr]() { reqProcHttpSvr->run(); }); + auto const fileSvr = FileServer::create(serviceProvider(), worker); thread fileSvrThread([fileSvr]() { fileSvr->run(); }); diff --git a/src/replica/proto/CMakeLists.txt b/src/replica/proto/CMakeLists.txt index b61599d8c..7eb8d830d 100644 --- a/src/replica/proto/CMakeLists.txt +++ b/src/replica/proto/CMakeLists.txt @@ -4,4 +4,5 @@ add_library(replica_proto OBJECT) target_sources(replica_proto PRIVATE ${REPLICA_PB_SRCS} ${REPLICA_PB_HDRS} + Protocol.cc ) diff --git a/src/replica/proto/Protocol.cc b/src/replica/proto/Protocol.cc new file mode 100644 index 000000000..7d53155c5 --- /dev/null +++ b/src/replica/proto/Protocol.cc @@ -0,0 +1,171 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "replica/proto/Protocol.h" + +// System headers +#include + +using namespace std; + +namespace lsst::qserv::replica::protocol { + +string toString(SqlRequestType status) { + switch (status) { + case SqlRequestType::QUERY: + return "QUERY"; + case SqlRequestType::CREATE_DATABASE: + return "CREATE_DATABASE"; + case SqlRequestType::DROP_DATABASE: + return "DROP_DATABASE"; + case SqlRequestType::ENABLE_DATABASE: + return "ENABLE_DATABASE"; + case SqlRequestType::DISABLE_DATABASE: + return "DISABLE_DATABASE"; + case SqlRequestType::GRANT_ACCESS: + return "GRANT_ACCESS"; + case SqlRequestType::CREATE_TABLE: + return "CREATE_TABLE"; + case SqlRequestType::DROP_TABLE: + return "DROP_TABLE"; + case SqlRequestType::REMOVE_TABLE_PARTITIONING: + return "REMOVE_TABLE_PARTITIONING"; + case SqlRequestType::DROP_TABLE_PARTITION: + return "DROP_TABLE_PARTITION"; + case SqlRequestType::GET_TABLE_INDEX: + return "GET_TABLE_INDEX"; + case SqlRequestType::CREATE_TABLE_INDEX: + return "CREATE_TABLE_INDEX"; + case SqlRequestType::DROP_TABLE_INDEX: + return "DROP_TABLE_INDEX"; + case SqlRequestType::ALTER_TABLE: + return "ALTER_TABLE"; + case SqlRequestType::TABLE_ROW_STATS: + return "TABLE_ROW_STATS"; + default: + throw logic_error("Unhandled SQL request type: " + to_string(static_cast(status))); + } +} + +string toString(Status status) { + switch (status) { + case Status::CREATED: + return "CREATED"; + case Status::SUCCESS: + return "SUCCESS"; + case Status::QUEUED: + return "QUEUED"; + case Status::IN_PROGRESS: + return "IN_PROGRESS"; + case Status::IS_CANCELLING: + return "IS_CANCELLING"; + case Status::BAD: + return "BAD"; + case Status::FAILED: + return "FAILED"; + case Status::CANCELLED: + return "CANCELLED"; + default: + throw logic_error("Unhandled status: " + to_string(static_cast(status))); + } +} + +string toString(StatusExt extendedStatus) { + switch (extendedStatus) { + case StatusExt::NONE: + return "NONE"; + case StatusExt::INVALID_PARAM: + return "INVALID_PARAM"; + case StatusExt::INVALID_ID: + return "INVALID_ID"; + case StatusExt::FOLDER_STAT: + return "FOLDER_STAT"; + case StatusExt::FOLDER_CREATE: + return "FOLDER_CREATE"; + case StatusExt::FILE_STAT: + return "FILE_STAT"; + case StatusExt::FILE_SIZE: + return "FILE_SIZE"; + case StatusExt::FOLDER_READ: + return "FOLDER_READ"; + case StatusExt::FILE_READ: + return "FILE_READ"; + case StatusExt::FILE_ROPEN: + return "FILE_ROPEN"; + case StatusExt::FILE_CREATE: + return "FILE_CREATE"; + case StatusExt::FILE_OPEN: + return "FILE_OPEN"; + case StatusExt::FILE_RESIZE: + return "FILE_RESIZE"; + case StatusExt::FILE_WRITE: + return "FILE_WRITE"; + case StatusExt::FILE_COPY: + return "FILE_COPY"; + case StatusExt::FILE_DELETE: + return "FILE_DELETE"; + case StatusExt::FILE_RENAME: + return "FILE_RENAME"; + case StatusExt::FILE_EXISTS: + return "FILE_EXISTS"; + case StatusExt::SPACE_REQ: + return "SPACE_REQ"; + case StatusExt::NO_FOLDER: + return "NO_FOLDER"; + case StatusExt::NO_FILE: + return "NO_FILE"; + case StatusExt::NO_ACCESS: + return "NO_ACCESS"; + case StatusExt::NO_SPACE: + return "NO_SPACE"; + case StatusExt::FILE_MTIME: + return "FILE_MTIME"; + case StatusExt::MYSQL_ERROR: + return "MYSQL_ERROR"; + case StatusExt::LARGE_RESULT: + return "LARGE_RESULT"; + case StatusExt::NO_SUCH_TABLE: + return "NO_SUCH_TABLE"; + case StatusExt::NOT_PARTITIONED_TABLE: + return "NOT_PARTITIONED_TABLE"; + case StatusExt::NO_SUCH_PARTITION: + return "NO_SUCH_PARTITION"; + case StatusExt::MULTIPLE: + return "MULTIPLE"; + case StatusExt::OTHER_EXCEPTION: + return "OTHER_EXCEPTION"; + case StatusExt::FOREIGN_INSTANCE: + return "FOREIGN_INSTANCE"; + case StatusExt::DUPLICATE_KEY: + return "DUPLICATE_KEY"; + case StatusExt::CANT_DROP_KEY: + return "CANT_DROP_KEY"; + default: + throw logic_error("Unhandled extended status: " + to_string(static_cast(extendedStatus))); + } +} + +string toString(Status status, StatusExt extendedStatus) { + return toString(status) + "::" + toString(extendedStatus); +} + +} // namespace lsst::qserv::replica::protocol diff --git a/src/replica/proto/Protocol.h b/src/replica/proto/Protocol.h new file mode 100644 index 000000000..b6ca3f916 --- /dev/null +++ b/src/replica/proto/Protocol.h @@ -0,0 +1,139 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_REPLICA_PROTOCOL_H +#define LSST_QSERV_REPLICA_PROTOCOL_H + +// System headers +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// This header declarations +namespace lsst::qserv::replica::protocol { + +/// Subtypes of the SQL requests. +enum class SqlRequestType : int { + + QUERY = 0, + CREATE_DATABASE = 1, + DROP_DATABASE = 2, + ENABLE_DATABASE = 3, ///< in Qserv + DISABLE_DATABASE = 4, ///< in Qserv + GRANT_ACCESS = 5, + CREATE_TABLE = 6, + DROP_TABLE = 7, + REMOVE_TABLE_PARTITIONING = 8, + DROP_TABLE_PARTITION = 9, + GET_TABLE_INDEX = 10, + CREATE_TABLE_INDEX = 11, + DROP_TABLE_INDEX = 12, + ALTER_TABLE = 13, + TABLE_ROW_STATS = 14 +}; + +/// @return the string representation of the SQL request type +std::string toString(SqlRequestType status); + +/// Types of the table indexes specified in the index management requests requests. +enum class SqlIndexSpec : int { DEFAULT = 1, UNIQUE = 2, FULLTEXT = 3, SPATIAL = 4 }; + +/// Status values returned by all request related to operations with +/// replicas. Request management operations always return messages whose types +/// match the return types of the corresponding (original) replica-related requests. +/// Service management requests have their own set of status values. +/// +enum class Status : int { + CREATED = 0, + SUCCESS = 1, + QUEUED = 2, + IN_PROGRESS = 3, + IS_CANCELLING = 4, + BAD = 5, + FAILED = 6, + CANCELLED = 7 +}; + +enum class StatusExt : int { + NONE = 0, ///< Unspecified problem. + INVALID_PARAM = 1, ///< Invalid parameter(s) of a request. + INVALID_ID = 2, ///< An invalid request identifier. + FOLDER_STAT = 4, ///< Failed to obtain fstat() for a folder. + FOLDER_CREATE = 5, ///< Failed to create a folder. + FILE_STAT = 6, ///< Failed to obtain fstat() for a file. + FILE_SIZE = 7, ///< Failed to obtain a size of a file. + FOLDER_READ = 8, ///< Failed to read the contents of a folder. + FILE_READ = 9, ///< Failed to read the contents of a file. + FILE_ROPEN = 10, ///< Failed to open a remote file. + FILE_CREATE = 11, ///< Failed to create a file. + FILE_OPEN = 12, ///< Failed to open a file. + FILE_RESIZE = 13, ///< Failed to resize a file. + FILE_WRITE = 14, ///< Failed to write into a file. + FILE_COPY = 15, ///< Failed to copy a file. + FILE_DELETE = 16, ///< Failed to delete a file. + FILE_RENAME = 17, ///< Failed to rename a file. + FILE_EXISTS = 18, ///< File already exists. + SPACE_REQ = 19, ///< Space availability check failed. + NO_FOLDER = 20, ///< Folder doesn't exist. + NO_FILE = 21, ///< File doesn't exist. + NO_ACCESS = 22, ///< No access to a file or a folder. + NO_SPACE = 23, ///< No space left on a device as required by an operation. + FILE_MTIME = 24, ///< Get/set 'mtime' operation failed. + MYSQL_ERROR = 25, ///< General MySQL error (other than any specific ones listed here). + LARGE_RESULT = 26, ///< Result exceeds a limit set in a request. + NO_SUCH_TABLE = 27, ///< No table found while performing a MySQL operation. + NOT_PARTITIONED_TABLE = 28, ///< The table is not MySQL partitioned as it was expected. + NO_SUCH_PARTITION = 29, ///< No MySQL partition found in a table as it was expected. + MULTIPLE = 30, ///< Multiple unspecified errors encountered when processing a request. + OTHER_EXCEPTION = 31, ///< Other exception not listed here. + FOREIGN_INSTANCE = 32, ///< Detected a request from a Controller serving an unrelated Qserv. + DUPLICATE_KEY = 33, ///< Duplicate key found when creating an index or altering a table schema. + CANT_DROP_KEY = 34 ///< Can't drop a field or a key which doesn't exist. +}; + +/// @return the string representation of the status +std::string toString(Status status); + +/// @return the string representation of the extended status +std::string toString(StatusExt extendedStatus); + +/// @return the string representation of the full status +std::string toString(Status status, StatusExt extendedStatus); + +/// Status of a replica. +enum class ReplicaStatus : int { NOT_FOUND = 0, CORRUPT = 1, INCOMPLETE = 2, COMPLETE = 3 }; + +/// Status of a service. +enum class ServiceState : int { SUSPEND_IN_PROGRESS = 0, SUSPENDED = 1, RUNNING = 2 }; + +/// The header to be sent with the requests processed through the worker's queueing system. +struct QueuedRequestHdr { + std::string id; + int priority; + unsigned int timeout; + QueuedRequestHdr(std::string const& id_, int priority_, unsigned int timeout_) + : id(id_), priority(priority_), timeout(timeout_) {} + nlohmann::json toJson() const { return {{"id", id}, {"priority", priority}, {"timeout", timeout}}; }; +}; + +} // namespace lsst::qserv::replica::protocol + +#endif // LSST_QSERV_REPLICA_PROTOCOL_H diff --git a/src/replica/util/Common.cc b/src/replica/util/Common.cc index 11c08df7c..0c9830944 100644 --- a/src/replica/util/Common.cc +++ b/src/replica/util/Common.cc @@ -29,10 +29,9 @@ #include "boost/uuid/uuid.hpp" #include "boost/uuid/uuid_generators.hpp" #include "boost/uuid/uuid_io.hpp" -#include "nlohmann/json.hpp" using namespace std; -using namespace nlohmann; +using json = nlohmann::json; namespace lsst::qserv::replica { @@ -80,6 +79,43 @@ string Generators::uniqueId() { return boost::uuids::to_string(id); } +/////////////////////////////////////////// +// SqlColDef // +/////////////////////////////////////////// + +list parseSqlColumns(json const& columnsJsonArray) { + if (!columnsJsonArray.is_array()) { + throw invalid_argument("lsst::qserv::replica::" + string(__func__) + + " columnsJsonArray is not an array"); + } + list columns; + for (auto const& column : columnsJsonArray) { + columns.emplace_back(column.at("name"), column.at("type")); + } + return columns; +} + +/////////////////////////////////////////// +// SqlIndexDef // +/////////////////////////////////////////// + +SqlIndexDef::SqlIndexDef(json const& indexSpecJson) { + if (!indexSpecJson.is_object()) { + throw invalid_argument("lsst::qserv::replica::" + string(__func__) + + " indexSpecJson is not an object"); + } + spec = indexSpecJson.value("spec", "DEFAULT"); + name = indexSpecJson.at("name"); + comment = indexSpecJson.value("comment", ""); + auto const keysJsonArray = indexSpecJson.at("keys"); + if (!keysJsonArray.is_array()) { + throw invalid_argument("lsst::qserv::replica::" + string(__func__) + " keys is not an array"); + } + for (auto const& key : keysJsonArray) { + keys.emplace_back(key.at("name"), key.at("length"), key.at("ascending")); + } +} + //////////////////////////////////////////// // Parameters of requests // //////////////////////////////////////////// diff --git a/src/replica/util/Common.h b/src/replica/util/Common.h index 3970f771a..ee1e1fc2d 100644 --- a/src/replica/util/Common.h +++ b/src/replica/util/Common.h @@ -36,6 +36,9 @@ #include #include +// Third party headers +#include "nlohmann/json.hpp" + // Qserv headers #include "replica/proto/protocol.pb.h" #include "replica/util/Mutex.h" @@ -112,6 +115,13 @@ inline bool operator==(SqlColDef const& lhs, SqlColDef const& rhs) { inline bool operator!=(SqlColDef const& lhs, SqlColDef const& rhs) { return !operator==(lhs, rhs); } +/** + * @param columnsJsonArray The JSON array containing the column definitions. + * @return The list of column definitions. + * @throw std::invalid_argument If the input JSON array is not valid. + */ +std::list parseSqlColumns(nlohmann::json const& columnsJsonArray); + /** * This class is an abstraction for columns within table index * specifications. @@ -130,6 +140,30 @@ class SqlIndexColumn { bool ascending = true; }; +/** + * This class is an abstraction for the index definitions. + */ +class SqlIndexDef { +public: + SqlIndexDef() = default; + + /** + * Parse the definition from then input JSON object. + * @param indexSpecJson The JSON object containing the index definitions. + * @throw std::invalid_argument If the input JSON object is not valid. + */ + SqlIndexDef(nlohmann::json const& indexSpecJson); + + SqlIndexDef(SqlIndexDef const&) = default; + SqlIndexDef& operator=(SqlIndexDef const&) = default; + ~SqlIndexDef() = default; + + std::string spec; + std::string name; + std::string comment; + std::list> keys; +}; + /** * Class ReplicationRequestParams encapsulates parameters of the replica * creation requests. diff --git a/src/replica/util/Performance.cc b/src/replica/util/Performance.cc index 8e3292d68..ae30b0ac3 100644 --- a/src/replica/util/Performance.cc +++ b/src/replica/util/Performance.cc @@ -30,6 +30,7 @@ #include "lsst/log/Log.h" using namespace std; +using json = nlohmann::json; namespace { diff --git a/src/replica/util/Performance.h b/src/replica/util/Performance.h index fcbfd394a..15320d08b 100644 --- a/src/replica/util/Performance.h +++ b/src/replica/util/Performance.h @@ -33,6 +33,9 @@ #include #include +// Third party headers +#include "nlohmann/json.hpp" + // Forward declarations namespace lsst::qserv::replica { class ProtocolPerformance; @@ -56,7 +59,6 @@ class Performance { * All (but the request creation one) timestamps will be initialized with 0. */ Performance(); - Performance(Performance const&) = default; Performance& operator=(Performance const&) = default; @@ -64,45 +66,28 @@ class Performance { /** * Update object state with counters from the protocol buffer object - * - * @param workerPerformanceInfo - * counters to be carried over into an internal state + * @param workerPerformanceInfo counters to be carried over into an internal state */ void update(ProtocolPerformance const& workerPerformanceInfo); /** * Update the Controller's 'start' time - * - * @return - * the previous state of the counter + * @return the previous state of the counter */ uint64_t setUpdateStart(); /** * Update the Controller's 'finish' time - * - * @return - * the previous state of the counter + * @return the previous state of the counter */ uint64_t setUpdateFinish(); - /// Created by the Controller - uint64_t c_create_time; - - /// Started by the Controller - uint64_t c_start_time; - - /// Received by a worker service - uint64_t w_receive_time; - - /// Execution started by a worker service - uint64_t w_start_time; - - /// Execution finished by a worker service - uint64_t w_finish_time; - - /// A subscriber notified by the Controller - uint64_t c_finish_time; + uint64_t c_create_time; ///< Created by the Controller + uint64_t c_start_time; ///< Started by the Controller + uint64_t w_receive_time; ///< Received by a worker service + uint64_t w_start_time; ///< Execution started by a worker service + uint64_t w_finish_time; ///< Execution finished by a worker service + uint64_t c_finish_time; ///< A subscriber notified by the Controller }; /// Overloaded streaming operator for class Performance @@ -127,6 +112,7 @@ class WorkerPerformance { uint64_t setUpdateFinish(); std::unique_ptr info() const; + nlohmann::json toJson() const; std::atomic receive_time; ///< Received by a worker service std::atomic start_time; ///< Execution started by a worker service diff --git a/src/replica/worker/CMakeLists.txt b/src/replica/worker/CMakeLists.txt index a37868d82..7b4d8ff46 100644 --- a/src/replica/worker/CMakeLists.txt +++ b/src/replica/worker/CMakeLists.txt @@ -4,17 +4,29 @@ target_sources(replica_worker PRIVATE FileClient.cc FileServer.cc FileServerConnection.cc + WorkerCreateReplicaHttpRequest.cc + WorkerDeleteReplicaHttpRequest.cc WorkerDeleteRequest.cc + WorkerDirectorIndexHttpRequest.cc WorkerDirectorIndexRequest.cc + WorkerEchoHttpRequest.cc WorkerEchoRequest.cc WorkerFindAllRequest.cc + WorkerFindAllReplicasHttpRequest.cc + WorkerFindReplicaHttpRequest.cc WorkerFindRequest.cc + WorkerHttpProcessor.cc + WorkerHttpProcessorThread.cc + WorkerHttpRequest.cc + WorkerHttpSvc.cc + WorkerHttpSvcMod.cc WorkerProcessor.cc WorkerProcessorThread.cc WorkerReplicationRequest.cc WorkerRequest.cc WorkerServer.cc WorkerServerConnection.cc + WorkerSqlHttpRequest.cc WorkerSqlRequest.cc ) target_link_libraries(replica_worker PUBLIC diff --git a/src/replica/worker/WorkerCreateReplicaHttpRequest.cc b/src/replica/worker/WorkerCreateReplicaHttpRequest.cc new file mode 100644 index 000000000..582b7c959 --- /dev/null +++ b/src/replica/worker/WorkerCreateReplicaHttpRequest.cc @@ -0,0 +1,467 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "replica/worker/WorkerCreateReplicaHttpRequest.h" + +// System headers +#include +#include +#include +#include + +// Qserv headers +#include "replica/config/Configuration.h" +#include "replica/proto/Protocol.h" +#include "replica/services/ServiceProvider.h" +#include "replica/util/FileUtils.h" +#include "replica/worker/FileClient.h" +#include "util/TimeUtils.h" + +// LSST headers +#include "lsst/log/Log.h" + +#define CONTEXT context("WorkerCreateReplicaHttpRequest", __func__) + +using namespace std; +namespace fs = boost::filesystem; +using json = nlohmann::json; + +namespace { + +LOG_LOGGER _log = LOG_GET("lsst.qserv.replica.WorkerCreateReplicaHttpRequest"); + +} // namespace + +namespace lsst::qserv::replica { + +shared_ptr WorkerCreateReplicaHttpRequest::create( + shared_ptr const& serviceProvider, string const& worker, + protocol::QueuedRequestHdr const& hdr, json const& req, ExpirationCallbackType const& onExpired) { + auto ptr = shared_ptr( + new WorkerCreateReplicaHttpRequest(serviceProvider, worker, hdr, req, onExpired)); + ptr->init(); + return ptr; +} + +WorkerCreateReplicaHttpRequest::WorkerCreateReplicaHttpRequest( + shared_ptr const& serviceProvider, string const& worker, + protocol::QueuedRequestHdr const& hdr, json const& req, ExpirationCallbackType const& onExpired) + : WorkerHttpRequest(serviceProvider, worker, "REPLICATE", hdr, req, onExpired), + _databaseInfo(serviceProvider->config()->databaseInfo(req.at("database"))), + _chunk(req.at("chunk")), + _sourceWorker(req.at("worker")), + _sourceWorkerHost(req.at("worker_host")), + _sourceWorkerPort(req.at("worker_port")), + _sourceWorkerHostPort(_sourceWorkerHost + ":" + to_string(_sourceWorkerPort)), + _initialized(false), + _files(FileUtils::partitionedFiles(_databaseInfo, _chunk)), + _tmpFilePtr(nullptr), + _buf(0), + _bufSize(serviceProvider->config()->get("worker", "fs-buf-size-bytes")) { + if (worker == _sourceWorker) { + throw invalid_argument(CONTEXT + " workers are the same in the request."); + } + if (_sourceWorkerHost.empty()) { + throw invalid_argument(CONTEXT + " the DNS name or an IP address of the worker not provided."); + } +} + +WorkerCreateReplicaHttpRequest::~WorkerCreateReplicaHttpRequest() { + replica::Lock lock(_mtx, CONTEXT); + _releaseResources(lock); +} + +void WorkerCreateReplicaHttpRequest::getResult(json& result) const { + // No locking is needed here since the method is called only after + // the request is completed. + result["replica_info"] = _replicaInfo.toJson(); +} + +bool WorkerCreateReplicaHttpRequest::execute() { + LOGS(_log, LOG_LVL_DEBUG, + CONTEXT << " sourceWorkerHostPort: " << _sourceWorkerHostPort << " database: " << _databaseInfo.name + << " chunk: " << _chunk); + + replica::Lock lock(_mtx, CONTEXT); + checkIfCancelling(lock, CONTEXT); + + // Obtain the list of files to be migrated + // + // IMPLEMENTATION NOTES: + // + // - Note using the overloaded operator '/' which is used to form + // folders and files path names below. The operator will concatenate + // names and also insert a file separator for an operating system + // on which this code will get compiled. + // + // - Temporary file names at a destination folders are prepended with + // prefix '_' to prevent colliding with the canonical names. They will + // be renamed in the last step. + // + // - All operations with the file system namespace (creating new non-temporary + // files, checking for folders and files, renaming files, creating folders, etc.) + // are guarded by acquiring replica::Lock lock(_mtxDataFolderOperations) where it's needed. + + WorkerHttpRequest::ErrorContext errorContext; + + /////////////////////////////////////////////////////// + // Initialization phase (runs only once) // + /////////////////////////////////////////////////////// + + if (!_initialized) { + _initialized = true; + + fs::path const outDir = + fs::path(serviceProvider()->config()->get("worker", "data-dir")) / _databaseInfo.name; + + vector tmpFiles; + vector outFiles; + for (auto&& file : _files) { + fs::path const tmpFile = outDir / ("_" + file); + tmpFiles.push_back(tmpFile); + + fs::path const outFile = outDir / file; + outFiles.push_back(outFile); + + _file2descr[file].inSizeBytes = 0; + _file2descr[file].outSizeBytes = 0; + _file2descr[file].mtime = 0; + _file2descr[file].cs = 0; + _file2descr[file].tmpFile = tmpFile; + _file2descr[file].outFile = outFile; + _file2descr[file].beginTransferTime = 0; + _file2descr[file].endTransferTime = 0; + } + + // Check input files, check and sanitize the destination folder + + boost::system::error_code ec; + { + replica::Lock dataFolderLock(_mtxDataFolderOperations, CONTEXT); + + // Check for a presence of input files and calculate space requirement + + uintmax_t totalBytes = 0; // the total number of bytes in all input files to be moved + map file2size; // the number of bytes in each file + + for (auto&& file : _files) { + // Open the file on the remote server in the no-content-read mode + auto const inFilePtr = FileClient::stat(_serviceProvider, _sourceWorkerHost, + _sourceWorkerPort, _databaseInfo.name, file); + errorContext = + errorContext or + reportErrorIf(inFilePtr == nullptr, protocol::StatusExt::FILE_ROPEN, + "failed to open input file on remote worker: " + _sourceWorker + " (" + + _sourceWorkerHostPort + "), database: " + _databaseInfo.name + + ", file: " + file); + if (errorContext.failed) { + setStatus(lock, protocol::Status::FAILED, errorContext.extendedStatus); + return true; + } + file2size[file] = inFilePtr->size(); + totalBytes += inFilePtr->size(); + _file2descr[file].inSizeBytes = inFilePtr->size(); + _file2descr[file].mtime = inFilePtr->mtime(); + } + + // Check and sanitize the output directory + + bool const outDirExists = fs::exists(outDir, ec); + errorContext = + errorContext or + reportErrorIf(ec.value() != 0, protocol::StatusExt::FOLDER_STAT, + "failed to check the status of output directory: " + outDir.string()) or + reportErrorIf(!outDirExists, protocol::StatusExt::NO_FOLDER, + "the output directory doesn't exist: " + outDir.string()); + + // The files with canonical(!) names should NOT exist at the destination + // folder. + for (auto&& file : outFiles) { + fs::file_status const stat = fs::status(file, ec); + errorContext = errorContext or + reportErrorIf(stat.type() == fs::status_error, protocol::StatusExt::FILE_STAT, + "failed to check the status of output file: " + file.string()) or + reportErrorIf(fs::exists(stat), protocol::StatusExt::FILE_EXISTS, + "the output file already exists: " + file.string()); + } + + // Check if there are any files with the temporary names at the destination + // folder and if so then get rid of them. + for (auto&& file : tmpFiles) { + fs::file_status const stat = fs::status(file, ec); + errorContext = + errorContext or + reportErrorIf(stat.type() == fs::status_error, protocol::StatusExt::FILE_STAT, + "failed to check the status of temporary file: " + file.string()); + if (fs::exists(stat)) { + fs::remove(file, ec); + errorContext = errorContext or + reportErrorIf(ec.value() != 0, protocol::StatusExt::FILE_DELETE, + "failed to remove temporary file: " + file.string()); + } + } + + // Make sure a file system at the destination has enough space + // to accommodate new files + // + // NOTE: this operation runs after cleaning up temporary files + fs::space_info const space = fs::space(outDir, ec); + errorContext = + errorContext or + reportErrorIf( + ec.value() != 0, protocol::StatusExt::SPACE_REQ, + "failed to obtaine space information at output folder: " + outDir.string()) or + reportErrorIf(space.available < totalBytes, protocol::StatusExt::NO_SPACE, + "not enough free space availble at output folder: " + outDir.string()); + + // Pre-create temporary files with the final size to assert disk space + // availability before filling these files with the actual payload. + for (auto&& file : _files) { + fs::path const tmpFile = _file2descr[file].tmpFile; + + // Create a file of size 0 + FILE* tmpFilePtr = fopen(tmpFile.string().c_str(), "wb"); + errorContext = errorContext or + reportErrorIf(tmpFilePtr == nullptr, protocol::StatusExt::FILE_CREATE, + "failed to open/create temporary file: " + tmpFile.string() + + ", error: " + strerror(errno)); + if (tmpFilePtr) { + fflush(tmpFilePtr); + fclose(tmpFilePtr); + } + + // Resize the file (will be filled with \0) + fs::resize_file(tmpFile, file2size[file], ec); + errorContext = errorContext or + reportErrorIf(ec.value() != 0, protocol::StatusExt::FILE_RESIZE, + "failed to resize the temporary file: " + tmpFile.string()); + } + } + if (errorContext.failed) { + setStatus(lock, protocol::Status::FAILED, errorContext.extendedStatus); + return true; + } + + // Allocate the record buffer + _buf = new uint8_t[_bufSize]; + if (_buf == nullptr) throw runtime_error(CONTEXT + " buffer allocation failed"); + + // Setup the iterator for the name of the very first file to be copied + _fileItr = _files.begin(); + if (!_openFiles(lock)) return true; + } + + // Copy the next record from the currently open remote file + // into the corresponding temporary files at the destination folder + // w/o acquiring the directory lock. + // + // NOTE: the while loop below is meant to skip files which are empty + while (_files.end() != _fileItr) { + // Copy the next record if any is available + size_t num = 0; + try { + num = _inFilePtr->read(_buf, _bufSize); + if (num) { + if (num == fwrite(_buf, sizeof(uint8_t), num, _tmpFilePtr)) { + // Update the descriptor (the number of bytes copied so far + // and the control sum) + _file2descr[*_fileItr].outSizeBytes += num; + uint64_t& cs = _file2descr[*_fileItr].cs; + for (uint8_t *ptr = _buf, *end = _buf + num; ptr != end; ++ptr) { + cs += *ptr; + } + + // Keep updating this stats while copying the files + _file2descr[*_fileItr].endTransferTime = util::TimeUtils::now(); + _updateInfo(lock); + + // Keep copying the same file + return false; + } + errorContext = errorContext or reportErrorIf(true, protocol::StatusExt::FILE_WRITE, + "failed to write into temporary file: " + + _file2descr[*_fileItr].tmpFile.string() + + ", error: " + strerror(errno)); + } + } catch (FileClientError const& ex) { + errorContext = + errorContext or + reportErrorIf(true, protocol::StatusExt::FILE_READ, + "failed to read input file from remote worker: " + _sourceWorker + " (" + + _sourceWorkerHostPort + "), database: " + _databaseInfo.name + + ", file: " + *_fileItr); + } + + // Make sure the number of bytes copied from the remote server + // matches expectations. + errorContext = + errorContext or + reportErrorIf(_file2descr[*_fileItr].inSizeBytes != _file2descr[*_fileItr].outSizeBytes, + protocol::StatusExt::FILE_READ, + "short read of the input file from remote worker: " + _sourceWorker + " (" + + _sourceWorkerHostPort + "), database: " + _databaseInfo.name + + ", file: " + *_fileItr); + if (errorContext.failed) { + setStatus(lock, protocol::Status::FAILED, errorContext.extendedStatus); + _releaseResources(lock); + return true; + } + + // Flush and close the current file + fflush(_tmpFilePtr); + fclose(_tmpFilePtr); + _tmpFilePtr = 0; + + // Keep updating this stats after finishing to copy each file + _file2descr[*_fileItr].endTransferTime = util::TimeUtils::now(); + _updateInfo(lock); + + // Move the iterator to the name of the next file to be copied + ++_fileItr; + if (_files.end() != _fileItr) { + if (!_openFiles(lock)) { + _releaseResources(lock); + return true; + } + } + } + + // Finalize the operation, de-allocate resources, etc. + return _finalize(lock); +} + +bool WorkerCreateReplicaHttpRequest::_openFiles(replica::Lock const& lock) { + LOGS(_log, LOG_LVL_DEBUG, + CONTEXT << " sourceWorkerHostPort: " << _sourceWorkerHostPort << " database: " << _databaseInfo.name + << " chunk: " << _chunk << " file: " << *_fileItr); + + WorkerHttpRequest::ErrorContext errorContext; + + // Open the input file on the remote server + _inFilePtr = FileClient::open(_serviceProvider, _sourceWorkerHost, _sourceWorkerPort, _databaseInfo.name, + *_fileItr); + errorContext = errorContext or + reportErrorIf(_inFilePtr == nullptr, protocol::StatusExt::FILE_ROPEN, + "failed to open input file on remote worker: " + _sourceWorker + " (" + + _sourceWorkerHostPort + "), database: " + _databaseInfo.name + + ", file: " + *_fileItr); + if (errorContext.failed) { + setStatus(lock, protocol::Status::FAILED, errorContext.extendedStatus); + return false; + } + + // Reopen a temporary output file locally in the 'append binary mode' + // then 'rewind' to the beginning of the file before writing into it. + fs::path const tmpFile = _file2descr[*_fileItr].tmpFile; + + _tmpFilePtr = fopen(tmpFile.string().c_str(), "wb"); + errorContext = errorContext or reportErrorIf(_tmpFilePtr == nullptr, protocol::StatusExt::FILE_OPEN, + "failed to open temporary file: " + tmpFile.string() + + ", error: " + strerror(errno)); + if (errorContext.failed) { + setStatus(lock, protocol::Status::FAILED, errorContext.extendedStatus); + return false; + } + rewind(_tmpFilePtr); + _file2descr[*_fileItr].beginTransferTime = util::TimeUtils::now(); + return true; +} + +bool WorkerCreateReplicaHttpRequest::_finalize(replica::Lock const& lock) { + LOGS(_log, LOG_LVL_DEBUG, + CONTEXT << " sourceWorkerHostPort: " << _sourceWorkerHostPort << " database: " << _databaseInfo.name + << " chunk: " << _chunk); + + // Unconditionally regardless of the completion of the file renaming attempt + _releaseResources(lock); + + // Rename temporary files into the canonical ones + // Note that this operation changes the directory namespace in a way + // which may affect other users (like replica lookup operations, etc.). Hence we're + // acquiring the directory lock to guarantee a consistent view onto the folder. + replica::Lock dataFolderLock(_mtxDataFolderOperations, CONTEXT); + + // ATTENTION: as per ISO/IEC 9945 the file rename operation will + // remove empty files. Not sure if this should be treated + // in a special way? + WorkerHttpRequest::ErrorContext errorContext; + boost::system::error_code ec; + for (auto&& file : _files) { + fs::path const tmpFile = _file2descr[file].tmpFile; + fs::path const outFile = _file2descr[file].outFile; + + fs::rename(tmpFile, outFile, ec); + errorContext = errorContext or reportErrorIf(ec.value() != 0, protocol::StatusExt::FILE_RENAME, + "failed to rename file: " + tmpFile.string()); + fs::last_write_time(outFile, _file2descr[file].mtime, ec); + errorContext = errorContext or reportErrorIf(ec.value() != 0, protocol::StatusExt::FILE_MTIME, + "failed to change 'mtime' of file: " + tmpFile.string()); + } + if (errorContext.failed) { + setStatus(lock, protocol::Status::FAILED, errorContext.extendedStatus); + return true; + } + setStatus(lock, protocol::Status::SUCCESS); + return true; +} + +void WorkerCreateReplicaHttpRequest::_updateInfo(replica::Lock const& lock) { + size_t totalInSizeBytes = 0; + size_t totalOutSizeBytes = 0; + ReplicaInfo::FileInfoCollection fileInfoCollection; + for (auto&& file : _files) { + fileInfoCollection.emplace_back( + ReplicaInfo::FileInfo({file, _file2descr[file].outSizeBytes, _file2descr[file].mtime, + to_string(_file2descr[file].cs), _file2descr[file].beginTransferTime, + _file2descr[file].endTransferTime, _file2descr[file].inSizeBytes})); + totalInSizeBytes += _file2descr[file].inSizeBytes; + totalOutSizeBytes += _file2descr[file].outSizeBytes; + } + ReplicaInfo::Status const status = + (_files.size() == fileInfoCollection.size()) and (totalInSizeBytes == totalOutSizeBytes) + ? ReplicaInfo::Status::COMPLETE + : ReplicaInfo::Status::INCOMPLETE; + + // Fill in the info on the chunk before finishing the operation + WorkerCreateReplicaHttpRequest::_replicaInfo = ReplicaInfo(status, worker(), _databaseInfo.name, _chunk, + util::TimeUtils::now(), fileInfoCollection); +} + +void WorkerCreateReplicaHttpRequest::_releaseResources(replica::Lock const& lock) { + // Drop a connection to the remote server + _inFilePtr.reset(); + + // Close the output file + if (_tmpFilePtr) { + fflush(_tmpFilePtr); + fclose(_tmpFilePtr); + _tmpFilePtr = nullptr; + } + + // Release the record buffer + if (_buf) { + delete[] _buf; + _buf = nullptr; + } +} + +} // namespace lsst::qserv::replica diff --git a/src/replica/worker/WorkerCreateReplicaHttpRequest.h b/src/replica/worker/WorkerCreateReplicaHttpRequest.h new file mode 100644 index 000000000..364a92934 --- /dev/null +++ b/src/replica/worker/WorkerCreateReplicaHttpRequest.h @@ -0,0 +1,186 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_REPLICA_WORKERCREATEREPLICAHTTPREQUEST_H +#define LSST_QSERV_REPLICA_WORKERCREATEREPLICAHTTPREQUEST_H + +// System headers +#include +#include +#include +#include +#include +#include + +// Third party headers +#include "boost/filesystem.hpp" +#include "nlohmann/json.hpp" + +// Qserv headers +#include "replica/config/ConfigDatabase.h" +#include "replica/util/ReplicaInfo.h" +#include "replica/worker/WorkerHttpRequest.h" + +// Forward declarations +namespace lsst::qserv::replica { +class FileClient; +class ServiceProvider; +} // namespace lsst::qserv::replica + +namespace lsst::qserv::replica::protocol { +struct QueuedRequestHdr; +} // namespace lsst::qserv::replica::protocol + +// This header declarations +namespace lsst::qserv::replica { + +/** + * Class WorkerCreateReplicaHttpRequest represents a context and a state of replication + * requests within the worker servers. + */ +class WorkerCreateReplicaHttpRequest : public WorkerHttpRequest { +public: + /** + * Static factory method is needed to prevent issue with the lifespan + * and memory management of instances created otherwise (as values or via + * low-level pointers). + * + * @param serviceProvider provider is needed to access the Configuration + * of a setup and for validating the input parameters + * @param worker the name of a worker. The name must match the worker which + * is going to execute the request. + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @param onExpired request expiration callback function + * @return pointer to the created object + */ + static std::shared_ptr create( + std::shared_ptr const& serviceProvider, std::string const& worker, + protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req, + ExpirationCallbackType const& onExpired); + + WorkerCreateReplicaHttpRequest() = delete; + WorkerCreateReplicaHttpRequest(WorkerCreateReplicaHttpRequest const&) = delete; + WorkerCreateReplicaHttpRequest& operator=(WorkerCreateReplicaHttpRequest const&) = delete; + + /// Non-trivial destructor is needed to relese resources + ~WorkerCreateReplicaHttpRequest() override; + + bool execute() override; + +protected: + void getResult(nlohmann::json& result) const override; + +private: + WorkerCreateReplicaHttpRequest(std::shared_ptr const& serviceProvider, + std::string const& worker, protocol::QueuedRequestHdr const& hdr, + nlohmann::json const& req, ExpirationCallbackType const& onExpired); + + /** + * Open files associated with the current state of iterator _fileItr. + * @param lock lock which must be acquired before calling this method + * @return 'false' in case of any error + */ + bool _openFiles(replica::Lock const& lock); + + /** + * The final stage to be executed just once after copying the content + * of the remote files into the local temporary ones. It will rename + * the temporary files into the standard ones. Resources will also be released. + * @param lock A lock to be acquired before calling this method + * @return always 'true' + */ + bool _finalize(replica::Lock const& lock); + + /** + * Close connections, de-allocate resources, etc. + * + * Any connections and open files will be closed, the buffers will be + * released to prevent unnecessary resource utilization. Note that + * request objects can stay in the server's memory for an extended + * period of time. + * @param lock A lock to be acquired before calling this method + */ + void _releaseResources(replica::Lock const& lock); + + /** + * Update file migration statistics + * @param lock A lock to be acquired before calling this method + */ + void _updateInfo(replica::Lock const& lock); + + // Input parameters (extracted from the request object) + + DatabaseInfo const _databaseInfo; ///< Database descriptor obtained from the Configuration + unsigned int const _chunk; + std::string const _sourceWorker; + std::string const _sourceWorkerHost; + uint16_t const _sourceWorkerPort; + std::string const _sourceWorkerHostPort; + std::string const _sourceWorkerDataDir; + + /// Result of the operation + ReplicaInfo _replicaInfo; + + /// The flag indicating if the initialization phase of the operation + /// has already completed + bool _initialized; + + std::vector const _files; ///< Short names of files to be copied + + /// The iterator pointing to the currently processed file. + /// If it's set to _files.end() then it means the operation + /// has finished. + std::vector::const_iterator _fileItr; + + /// This object represents the currently open (if any) input file + /// on the source worker node + std::shared_ptr _inFilePtr; + + std::FILE* _tmpFilePtr; ///< The file pointer for the temporary output file + + /// The FileDescr structure encapsulates various parameters of a file + struct FileDescr { + size_t inSizeBytes = 0; ///< The input file size as reported by a remote server + size_t outSizeBytes = 0; ///< Num. bytes read so far (changes during processing) + std::time_t mtime = 0; ///< The last modification time of the file (sec, UNIX Epoch) + uint64_t cs = 0; ///< Control sum computed locally while copying the file + + boost::filesystem::path tmpFile; /// The absolute path to the temporary file + + /// The final (canonic) file name the temporary file will be renamed as + /// upon a successful completion of the operation. + boost::filesystem::path outFile; + + uint64_t beginTransferTime = 0; ///< When the file transfer started + uint64_t endTransferTime = 0; ///< When the file transfer ended + }; + + /// Cached file descriptions mapping from short file names into + /// the corresponding parameters. + std::map _file2descr; + + uint8_t* _buf; ///< The buffer for storing file payload read from the remote service + size_t _bufSize; ///< The size of the buffer +}; + +} // namespace lsst::qserv::replica + +#endif // LSST_QSERV_REPLICA_WORKERCREATEREPLICAHTTPREQUEST_H diff --git a/src/replica/worker/WorkerDeleteReplicaHttpRequest.cc b/src/replica/worker/WorkerDeleteReplicaHttpRequest.cc new file mode 100644 index 000000000..065671a3f --- /dev/null +++ b/src/replica/worker/WorkerDeleteReplicaHttpRequest.cc @@ -0,0 +1,116 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "replica/worker/WorkerDeleteReplicaHttpRequest.h" + +// System headers +#include + +// Third party headers +#include "boost/filesystem.hpp" + +// Qserv headers +#include "replica/config/Configuration.h" +#include "replica/proto/Protocol.h" +#include "replica/services/ServiceProvider.h" +#include "replica/util/FileUtils.h" +#include "util/TimeUtils.h" + +// LSST headers +#include "lsst/log/Log.h" + +#define CONTEXT context("WorkerDeleteReplicaHttpRequest", __func__) + +using namespace std; +namespace fs = boost::filesystem; +using json = nlohmann::json; + +namespace { + +LOG_LOGGER _log = LOG_GET("lsst.qserv.replica.WorkerDeleteReplicaHttpRequest"); + +} // namespace + +namespace lsst::qserv::replica { + +shared_ptr WorkerDeleteReplicaHttpRequest::create( + shared_ptr const& serviceProvider, string const& worker, + protocol::QueuedRequestHdr const& hdr, json const& req, ExpirationCallbackType const& onExpired) { + auto ptr = shared_ptr( + new WorkerDeleteReplicaHttpRequest(serviceProvider, worker, hdr, req, onExpired)); + ptr->init(); + return ptr; +} + +WorkerDeleteReplicaHttpRequest::WorkerDeleteReplicaHttpRequest( + shared_ptr const& serviceProvider, string const& worker, + protocol::QueuedRequestHdr const& hdr, json const& req, ExpirationCallbackType const& onExpired) + : WorkerHttpRequest(serviceProvider, worker, "DELETE", hdr, req, onExpired), + _databaseInfo(serviceProvider->config()->databaseInfo(req.at("database"))), + _chunk(req.at("chunk")), + // This status will be returned in all contexts + _replicaInfo(ReplicaInfo::Status::NOT_FOUND, worker, _databaseInfo.name, _chunk, + util::TimeUtils::now(), ReplicaInfo::FileInfoCollection{}) {} + +void WorkerDeleteReplicaHttpRequest::getResult(json& result) const { + // No locking is needed here since the method is called only after + // the request is completed. + result["replica_info"] = _replicaInfo.toJson(); +} +bool WorkerDeleteReplicaHttpRequest::execute() { + LOGS(_log, LOG_LVL_DEBUG, CONTEXT << " db: " << _databaseInfo.name << " chunk: " << _chunk); + + replica::Lock lock(_mtx, CONTEXT); + checkIfCancelling(lock, CONTEXT); + + vector const files = FileUtils::partitionedFiles(_databaseInfo, _chunk); + + // The data folder will be locked while performing the operation + int numFilesDeleted = 0; + WorkerHttpRequest::ErrorContext errorContext; + boost::system::error_code ec; + { + replica::Lock dataFolderLock(_mtxDataFolderOperations, CONTEXT); + fs::path const dataDir = + fs::path(_serviceProvider->config()->get("worker", "data-dir")) / _databaseInfo.name; + fs::file_status const stat = fs::status(dataDir, ec); + errorContext = errorContext or + reportErrorIf(stat.type() == fs::status_error, protocol::StatusExt::FOLDER_STAT, + "failed to check the status of directory: " + dataDir.string()) or + reportErrorIf(!fs::exists(stat), protocol::StatusExt::NO_FOLDER, + "the directory does not exists: " + dataDir.string()); + for (const auto& name : files) { + const fs::path file = dataDir / fs::path(name); + if (fs::remove(file, ec)) ++numFilesDeleted; + errorContext = errorContext or reportErrorIf(ec.value() != 0, protocol::StatusExt::FILE_DELETE, + "failed to delete file: " + file.string()); + } + } + if (errorContext.failed) { + setStatus(lock, protocol::Status::FAILED, errorContext.extendedStatus); + return true; + } + setStatus(lock, protocol::Status::SUCCESS); + return true; +} + +} // namespace lsst::qserv::replica diff --git a/src/replica/worker/WorkerDeleteReplicaHttpRequest.h b/src/replica/worker/WorkerDeleteReplicaHttpRequest.h new file mode 100644 index 000000000..a862f082c --- /dev/null +++ b/src/replica/worker/WorkerDeleteReplicaHttpRequest.h @@ -0,0 +1,99 @@ +// -*- LSST-C++ -*- +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_REPLICA_WORKERDELETEREPLICAHTTPREQUEST_H +#define LSST_QSERV_REPLICA_WORKERDELETEREPLICAHTTPREQUEST_H + +// System headers +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "replica/config/ConfigDatabase.h" +#include "replica/util/ReplicaInfo.h" +#include "replica/worker/WorkerHttpRequest.h" + +// Forward declarations +namespace lsst::qserv::replica { +class ServiceProvider; +} // namespace lsst::qserv::replica + +namespace lsst::qserv::replica::protocol { +struct QueuedRequestHdr; +} // namespace lsst::qserv::replica::protocol + +// This header declarations +namespace lsst::qserv::replica { + +/** + * Class WorkerDeleteReplicaHttpRequest represents a context and a state of replica deletion + * requests within the worker servers. + */ +class WorkerDeleteReplicaHttpRequest : public WorkerHttpRequest { +public: + /** + * Static factory method is needed to prevent issue with the lifespan + * and memory management of instances created otherwise (as values or via + * low-level pointers). + * + * @param serviceProvider provider is needed to access the Configuration + * of a setup and for validating the input parameters + * @param worker the name of a worker. The name must match the worker which + * is going to execute the request. + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @param onExpired request expiration callback function + * @return pointer to the created object + */ + static std::shared_ptr create( + std::shared_ptr const& serviceProvider, std::string const& worker, + protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req, + ExpirationCallbackType const& onExpired); + + WorkerDeleteReplicaHttpRequest() = delete; + WorkerDeleteReplicaHttpRequest(WorkerDeleteReplicaHttpRequest const&) = delete; + WorkerDeleteReplicaHttpRequest& operator=(WorkerDeleteReplicaHttpRequest const&) = delete; + + ~WorkerDeleteReplicaHttpRequest() override = default; + + bool execute() override; + +protected: + void getResult(nlohmann::json& result) const override; + +private: + WorkerDeleteReplicaHttpRequest(std::shared_ptr const& serviceProvider, + std::string const& worker, protocol::QueuedRequestHdr const& hdr, + nlohmann::json const& req, ExpirationCallbackType const& onExpired); + + // Input parameters + DatabaseInfo const _databaseInfo; ///< Database descriptor obtained from the Configuration + unsigned int _chunk; + + /// Extended status of the replica deletion request + ReplicaInfo _replicaInfo; +}; + +} // namespace lsst::qserv::replica + +#endif // LSST_QSERV_REPLICA_WORKERDELETEREPLICAHTTPREQUEST_H diff --git a/src/replica/worker/WorkerDirectorIndexHttpRequest.cc b/src/replica/worker/WorkerDirectorIndexHttpRequest.cc new file mode 100644 index 000000000..fec8eeec2 --- /dev/null +++ b/src/replica/worker/WorkerDirectorIndexHttpRequest.cc @@ -0,0 +1,292 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "replica/worker/WorkerDirectorIndexHttpRequest.h" + +// System headers +#include +#include +#include +#include + +// Third party headers +#include "boost/filesystem.hpp" + +// Qserv headers +#include "global/constants.h" +#include "replica/config/Configuration.h" +#include "replica/mysql/DatabaseMySQL.h" +#include "replica/proto/Protocol.h" +#include "replica/services/ServiceProvider.h" +#include "replica/util/Performance.h" +#include "util/String.h" + +// LSST headers +#include "lsst/log/Log.h" + +#define CONTEXT context("WorkerDirectorIndexHttpRequest", __func__) + +using namespace std; +namespace fs = boost::filesystem; +using json = nlohmann::json; + +namespace { + +LOG_LOGGER _log = LOG_GET("lsst.qserv.replica.WorkerDirectorIndexHttpRequest"); + +} // namespace + +namespace lsst::qserv::replica { + +using namespace database::mysql; + +shared_ptr WorkerDirectorIndexHttpRequest::create( + shared_ptr const& serviceProvider, string const& worker, + protocol::QueuedRequestHdr const& hdr, json const& req, ExpirationCallbackType const& onExpired, + shared_ptr const& connectionPool) { + auto ptr = shared_ptr( + new WorkerDirectorIndexHttpRequest(serviceProvider, worker, hdr, req, onExpired, connectionPool)); + ptr->init(); + return ptr; +} + +WorkerDirectorIndexHttpRequest::WorkerDirectorIndexHttpRequest( + shared_ptr const& serviceProvider, string const& worker, + protocol::QueuedRequestHdr const& hdr, json const& req, ExpirationCallbackType const& onExpired, + shared_ptr const& connectionPool) + : WorkerHttpRequest(serviceProvider, worker, "INDEX", hdr, req, onExpired), + _databaseInfo(serviceProvider->config()->databaseInfo(req.at("database"))), + _tableInfo(_databaseInfo.findTable(req.at("director_table"))), + _hasTransactions(req.at("has_transaction")), + _transactionId(req.at("transaction_id")), + _chunk(req.at("chunk")), + _offset(req.at("offset")), + _connectionPool(connectionPool), + _tmpDirName(serviceProvider->config()->get("worker", "loader-tmp-dir") + "/" + + _databaseInfo.name), + _fileName(_tmpDirName + "/" + _tableInfo.name + "-" + to_string(_chunk) + + (_hasTransactions ? "-p" + to_string(_transactionId) : "") + "-" + hdr.id) {} + +void WorkerDirectorIndexHttpRequest::getResult(json& result) const { + // No locking is needed here since the method is called only after + // the request is completed. + result["error"] = _error; + result["data"] = util::String::toHex(_data.data(), _data.size()); + result["total_bytes"] = _fileSizeBytes; +} + +bool WorkerDirectorIndexHttpRequest::execute() { + LOGS(_log, LOG_LVL_DEBUG, CONTEXT); + + replica::Lock lock(_mtx, CONTEXT); + checkIfCancelling(lock, CONTEXT); + + try { + // The table will be scanned only when the offset is set to 0. + if (_offset == 0) { + // Create a folder (if it still doesn't exist) where the temporary files will be placed + // NOTE: this folder is supposed to be seen by the worker's MySQL/MariaDB server, and it + // must be write-enabled for an account under which the service is run. + boost::system::error_code ec; + fs::create_directory(fs::path(_tmpDirName), ec); + if (ec.value() != 0) { + _error = "failed to create folder '" + _tmpDirName; + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " " << _error); + setStatus(lock, protocol::Status::FAILED, protocol::StatusExt::FOLDER_CREATE); + } + + // Make sure no file exists from any previous attempt to harvest the index data + // in a scope of the request. Otherwise MySQL query will fail. + _removeFile(); + + // Connect to the worker database + // Manage the new connection via the RAII-style handler to ensure the transaction + // is automatically rolled-back in case of exceptions. + ConnectionHandler const h(_connectionPool); + + // A scope of the query depends on parameters of the request + h.conn->executeInOwnTransaction([self = shared_from_base()]( + auto conn) { conn->execute(self->_query(conn)); }); + } + if (auto const status = _readFile(_offset); status != protocol::StatusExt::NONE) { + setStatus(lock, protocol::Status::FAILED, status); + } else { + setStatus(lock, protocol::Status::SUCCESS); + } + } catch (ER_NO_SUCH_TABLE_ const& ex) { + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " MySQL error: " << ex.what()); + _error = ex.what(); + setStatus(lock, protocol::Status::FAILED, protocol::StatusExt::NO_SUCH_TABLE); + } catch (database::mysql::ER_PARTITION_MGMT_ON_NONPARTITIONED_ const& ex) { + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " MySQL error: " << ex.what()); + _error = ex.what(); + setStatus(lock, protocol::Status::FAILED, protocol::StatusExt::NOT_PARTITIONED_TABLE); + } catch (database::mysql::ER_UNKNOWN_PARTITION_ const& ex) { + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " MySQL error: " << ex.what()); + _error = ex.what(); + setStatus(lock, protocol::Status::FAILED, protocol::StatusExt::NO_SUCH_PARTITION); + } catch (database::mysql::Error const& ex) { + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " MySQL error: " << ex.what()); + _error = ex.what(); + setStatus(lock, protocol::Status::FAILED, protocol::StatusExt::MYSQL_ERROR); + } catch (invalid_argument const& ex) { + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " exception: " << ex.what()); + _error = ex.what(); + setStatus(lock, protocol::Status::FAILED, protocol::StatusExt::INVALID_PARAM); + } catch (out_of_range const& ex) { + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " exception: " << ex.what()); + _error = ex.what(); + setStatus(lock, protocol::Status::FAILED, protocol::StatusExt::LARGE_RESULT); + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " exception: " << ex.what()); + _error = "Exception: " + string(ex.what()); + setStatus(lock, protocol::Status::FAILED); + } + return true; +} + +string WorkerDirectorIndexHttpRequest::_query(shared_ptr const& conn) const { + if (!_tableInfo.isDirector()) { + throw invalid_argument("table '" + _tableInfo.name + + "' is not been configured as director in database '" + _databaseInfo.name + + "'"); + } + if (_tableInfo.directorTable.primaryKeyColumn().empty()) { + throw invalid_argument("director table '" + _tableInfo.name + + "' has not been properly configured in database '" + _databaseInfo.name + "'"); + } + if (_tableInfo.columns.empty()) { + throw invalid_argument("no schema found for director table '" + _tableInfo.name + "' of database '" + + _databaseInfo.name + "'"); + } + + // Find types required by the "director" index table's columns + + string const qservTransId = _hasTransactions ? "qserv_trans_id" : string(); + string qservTransIdType; + string primaryKeyColumnType; + string subChunkIdColNameType; + + for (auto&& column : _tableInfo.columns) { + if (!qservTransId.empty() && column.name == qservTransId) + qservTransIdType = column.type; + else if (column.name == _tableInfo.directorTable.primaryKeyColumn()) + primaryKeyColumnType = column.type; + else if (column.name == lsst::qserv::SUB_CHUNK_COLUMN) + subChunkIdColNameType = column.type; + } + if ((!qservTransId.empty() && qservTransIdType.empty()) || primaryKeyColumnType.empty() or + subChunkIdColNameType.empty()) { + throw invalid_argument( + "column definitions for the Object identifier or sub-chunk identifier" + " columns are missing in the director table schema for table '" + + _tableInfo.name + "' of database '" + _databaseInfo.name + "'"); + } + + // NOTE: injecting the chunk number into each row of the result set because + // the chunk-id column is optional. + QueryGenerator const g(conn); + DoNotProcess const chunk = g.val(_chunk); + SqlId const sqlTableId = g.id(_databaseInfo.name, _tableInfo.name + "_" + to_string(_chunk)); + string query; + if (qservTransId.empty()) { + query = g.select(_tableInfo.directorTable.primaryKeyColumn(), chunk, lsst::qserv::SUB_CHUNK_COLUMN) + + g.from(sqlTableId) + g.orderBy(make_pair(_tableInfo.directorTable.primaryKeyColumn(), "")); + } else { + query = g.select(qservTransId, _tableInfo.directorTable.primaryKeyColumn(), chunk, + lsst::qserv::SUB_CHUNK_COLUMN) + + g.from(sqlTableId) + g.inPartition(g.partId(_transactionId)) + + g.orderBy(make_pair(qservTransId, ""), + make_pair(_tableInfo.directorTable.primaryKeyColumn(), "")); + } + return query + g.intoOutfile(_fileName); +} + +protocol::StatusExt WorkerDirectorIndexHttpRequest::_readFile(size_t offset) { + LOGS(_log, LOG_LVL_DEBUG, CONTEXT); + + // Open the the file. + ifstream f(_fileName, ios::binary); + if (!f.good()) { + _error = "failed to open file '" + _fileName + "'"; + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " " << _error); + return protocol::StatusExt::FILE_ROPEN; + } + + // Get the file size. + boost::system::error_code ec; + _fileSizeBytes = fs::file_size(_fileName, ec); + if (ec.value() != 0) { + _error = "failed to get file size '" + _fileName + "'"; + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " " << _error); + return protocol::StatusExt::FILE_SIZE; + } + + // Validate a value of the offset and position indicator as requested. + if (offset == _fileSizeBytes) { + _removeFile(); + return protocol::StatusExt::NONE; + } else if (offset > _fileSizeBytes) { + _error = "attempted to read the file '" + _fileName + "' at the offset " + to_string(offset) + + " that is beyond the file size of " + to_string(_fileSizeBytes) + " bytes."; + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " " << _error); + return protocol::StatusExt::INVALID_PARAM; + } else if (offset != 0) { + f.seekg(offset, ios::beg); + } + + // Resize the memory buffer for the efficiency of the following read. + size_t const recordSize = + std::min(_fileSizeBytes - offset, + serviceProvider()->config()->get("worker", "director-index-record-size")); + _data.resize(recordSize, ' '); + + // Read the specified number of bytes into the buffer. + protocol::StatusExt result = protocol::StatusExt::NONE; + f.read(&_data[0], recordSize); + if (f.bad()) { + _error = "failed to read " + to_string(recordSize) + " bytes from the file '" + _fileName + + "' at the offset " + to_string(offset) + "."; + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " " << _error); + result = protocol::StatusExt::FILE_READ; + } + f.close(); + + // If this was the last record read from the file then delete the file. + if (offset + recordSize >= _fileSizeBytes) { + _removeFile(); + } + return result; +} + +void WorkerDirectorIndexHttpRequest::_removeFile() const { + // Make the best attempt to get rid of the temporary file. Ignore any errors + // for now. Just report them. Note that 'remove_all' won't complain if the file + // didn't exist. + boost::system::error_code ec; + fs::remove_all(fs::path(_fileName), ec); + if (ec.value() != 0) { + LOGS(_log, LOG_LVL_WARN, CONTEXT << " failed to remove the temporary file '" << _fileName); + } +} + +} // namespace lsst::qserv::replica diff --git a/src/replica/worker/WorkerDirectorIndexHttpRequest.h b/src/replica/worker/WorkerDirectorIndexHttpRequest.h new file mode 100644 index 000000000..1aa26715f --- /dev/null +++ b/src/replica/worker/WorkerDirectorIndexHttpRequest.h @@ -0,0 +1,149 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_REPLICA_WORKERDIRECTORINDEXHTTPREQUEST_H +#define LSST_QSERV_REPLICA_WORKERDIRECTORINDEXHTTPREQUEST_H + +// System headers +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "replica/config/ConfigDatabase.h" +#include "replica/proto/Protocol.h" +#include "replica/util/Common.h" +#include "replica/worker/WorkerHttpRequest.h" + +// Forward declarations +namespace lsst::qserv::replica { +class ServiceProvider; +} // namespace lsst::qserv::replica + +namespace lsst::qserv::replica::database::mysql { +class Connection; +class ConnectionPool; +} // namespace lsst::qserv::replica::database::mysql + +// This header declarations +namespace lsst::qserv::replica { + +/** + * Class WorkerDirectorIndexHttpRequest queries a director table (the whole or just one MySQL + * partition, depending on parameters of the request) of a database + * to extracts data to be loaded into the "director" index. + */ +class WorkerDirectorIndexHttpRequest : public WorkerHttpRequest { +public: + /** + * Static factory method is needed to prevent issue with the lifespan + * and memory management of instances created otherwise (as values or via + * low-level pointers). + * + * @param serviceProvider provider is needed to access the Configuration + * of a setup and for validating the input parameters + * @param worker the name of a worker. The name must match the worker which + * is going to execute the request. + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @param onExpired request expiration callback function + * @param connectionPool a pool of connections to the MySQL/MariaDB server + * @return pointer to the created object + */ + static std::shared_ptr create( + std::shared_ptr const& serviceProvider, std::string const& worker, + protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req, + ExpirationCallbackType const& onExpired, + std::shared_ptr const& connectionPool); + + WorkerDirectorIndexHttpRequest() = delete; + WorkerDirectorIndexHttpRequest(WorkerDirectorIndexHttpRequest const&) = delete; + WorkerDirectorIndexHttpRequest& operator=(WorkerDirectorIndexHttpRequest const&) = delete; + + ~WorkerDirectorIndexHttpRequest() override = default; + + bool execute() override; + +protected: + void getResult(nlohmann::json& result) const override; + +private: + WorkerDirectorIndexHttpRequest(std::shared_ptr const& serviceProvider, + std::string const& worker, protocol::QueuedRequestHdr const& hdr, + nlohmann::json const& req, ExpirationCallbackType const& onExpired, + std::shared_ptr const& connectionPool); + + /** + * The query generator uses parameters of a request to compose + * a desired query. + * + * @param conn a reference to the database connector is needed to process + * arguments to meet requirements of the database query processing engine. + * @return a query as per the input request + * @throws std::invalid_argument if the input parameters are not supported + */ + std::string _query(std::shared_ptr const& conn) const; + + /** + * Read the content of the file into memory starting from the given offset. + * @note The maximum number of bytes to read is set in the Configuration + * parameter (worker, director-index-record-size). + * @param offset A position of the first byte in the file to read. + * @return The completion status to be returned to the Controller. + */ + protocol::StatusExt _readFile(size_t offset); + + /// Get rid of the temporary file if it's still tehre. + void _removeFile() const; + + // Input parameters + DatabaseInfo const _databaseInfo; ///< Database descriptor obtained from the Configuration + TableInfo const _tableInfo; ///< Director table descriptor obtained from the Configuration + bool const _hasTransactions; + TransactionId const _transactionId; + unsigned int const _chunk; + std::size_t const _offset; + std::shared_ptr const _connectionPool; + + /// The path name of a temporary folder where the file will be stored. + /// The folder gets created before extracting data from the MySQL table + /// into the file. + std::string const _tmpDirName; + + /// The full path name of a temporary file into which the TSV/CSV dump will be made. + /// This file will get deleted when its whole content is sent to the Controller. + std::string const _fileName; + + /// The size of the file is determined each time before reading it. + size_t _fileSizeBytes = 0; + + /// Cached error to be sent to a client + std::string _error; + + /// In-memory storage for the content of the file upon a successful completion + /// of the data extraction query. + std::string _data; +}; + +} // namespace lsst::qserv::replica + +#endif // LSST_QSERV_REPLICA_WORKERDIRECTORINDEXHTTPREQUEST_H diff --git a/src/replica/worker/WorkerEchoHttpRequest.cc b/src/replica/worker/WorkerEchoHttpRequest.cc new file mode 100644 index 000000000..f75665161 --- /dev/null +++ b/src/replica/worker/WorkerEchoHttpRequest.cc @@ -0,0 +1,90 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "replica/worker/WorkerEchoHttpRequest.h" + +// System headers +#include + +// Qserv headers +#include "util/BlockPost.h" + +// LSST headers +#include "lsst/log/Log.h" + +#define CONTEXT context("WorkerEchoHttpRequest", __func__) + +using namespace std; +using json = nlohmann::json; + +namespace { + +LOG_LOGGER _log = LOG_GET("lsst.qserv.replica.WorkerEchoHttpRequest"); + +} // namespace + +namespace lsst::qserv::replica { + +shared_ptr WorkerEchoHttpRequest::create( + shared_ptr const& serviceProvider, string const& worker, + protocol::QueuedRequestHdr const& hdr, json const& req, ExpirationCallbackType const& onExpired) { + auto ptr = shared_ptr( + new WorkerEchoHttpRequest(serviceProvider, worker, hdr, req, onExpired)); + ptr->init(); + return ptr; +} + +WorkerEchoHttpRequest::WorkerEchoHttpRequest(shared_ptr const& serviceProvider, + string const& worker, protocol::QueuedRequestHdr const& hdr, + json const& req, ExpirationCallbackType const& onExpired) + : WorkerHttpRequest(serviceProvider, worker, "TEST_ECHO", hdr, req, onExpired), + _delay(req.at("delay")), + _data(req.at("data")), + _delayLeft(_delay) {} + +void WorkerEchoHttpRequest::getResult(json& result) const { + // No locking is needed here since the method is called only after + // the request is completed. + result["data"] = _data; +} + +bool WorkerEchoHttpRequest::execute() { + LOGS(_log, LOG_LVL_DEBUG, CONTEXT << " _delay:" << _delay << " _delayLeft:" << _delayLeft); + + replica::Lock lock(_mtx, CONTEXT); + checkIfCancelling(lock, CONTEXT); + + // Block the thread for the random number of milliseconds in the interval + // below. Then update the amount of time which is still left. + util::BlockPost blockPost(1000, 2000); + uint64_t const span = blockPost.wait(); + _delayLeft -= (span < _delayLeft) ? span : _delayLeft; + + // Done if have reached or exceeded the initial delay + if (0 == _delayLeft) { + setStatus(lock, protocol::Status::SUCCESS); + return true; + } + return false; +} + +} // namespace lsst::qserv::replica diff --git a/src/replica/worker/WorkerEchoHttpRequest.h b/src/replica/worker/WorkerEchoHttpRequest.h new file mode 100644 index 000000000..c5c7787f1 --- /dev/null +++ b/src/replica/worker/WorkerEchoHttpRequest.h @@ -0,0 +1,99 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_REPLICA_WORKERECHOHTTPREQUEST_H +#define LSST_QSERV_REPLICA_WORKERECHOHTTPREQUEST_H + +// System headers +#include +#include +#include + +// Qserv headers +#include "replica/worker/WorkerHttpRequest.h" + +// Third party headers +#include "nlohmann/json.hpp" + +// Forward declarations + +namespace lsst::qserv::replica { +class ServiceProvider; +} // namespace lsst::qserv::replica + +namespace lsst::qserv::replica::protocol { +struct QueuedRequestHdr; +} // namespace lsst::qserv::replica::protocol + +// This header declarations +namespace lsst::qserv::replica { + +/** + * Class WorkerEchoHttpRequest implements test requests within the worker servers. + * Requests of this type don't have any side effects (in terms of modifying + * any files or databases). + */ +class WorkerEchoHttpRequest : public WorkerHttpRequest { +public: + /** + * Static factory method is needed to prevent issue with the lifespan + * and memory management of instances created otherwise (as values or via + * low-level pointers). + * + * @param serviceProvider provider is needed to access the Configuration + * of a setup and for validating the input parameters + * @param worker the name of a worker. The name must match the worker which + * is going to execute the request. + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @param onExpired request expiration callback function + * @return pointer to the created object + */ + static std::shared_ptr create( + std::shared_ptr const& serviceProvider, std::string const& worker, + protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req, + ExpirationCallbackType const& onExpired); + + WorkerEchoHttpRequest() = delete; + WorkerEchoHttpRequest(WorkerEchoHttpRequest const&) = delete; + WorkerEchoHttpRequest& operator=(WorkerEchoHttpRequest const&) = delete; + + ~WorkerEchoHttpRequest() override = default; + + bool execute() override; + +protected: + void getResult(nlohmann::json& result) const override; + +private: + WorkerEchoHttpRequest(std::shared_ptr const& serviceProvider, std::string const& worker, + protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req, + ExpirationCallbackType const& onExpired); + + // Input parameters (extracted from the request object) + uint64_t const _delay; ///< The amount of the initial delay (milliseconds) + std::string const _data; ///< The message to be echoed back to the client + + uint64_t _delayLeft; ///< The amount of the initial delay which is still left (milliseconds) +}; + +} // namespace lsst::qserv::replica + +#endif // LSST_QSERV_REPLICA_WORKERECHOHTTPREQUEST_H diff --git a/src/replica/worker/WorkerFindAllReplicasHttpRequest.cc b/src/replica/worker/WorkerFindAllReplicasHttpRequest.cc new file mode 100644 index 000000000..85094bdda --- /dev/null +++ b/src/replica/worker/WorkerFindAllReplicasHttpRequest.cc @@ -0,0 +1,157 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "replica/worker/WorkerFindAllReplicasHttpRequest.h" + +// System headers +#include + +// Third party headers +#include "boost/filesystem.hpp" + +// Qserv headers +#include "replica/config/Configuration.h" +#include "replica/proto/Protocol.h" +#include "replica/util/FileUtils.h" +#include "replica/services/ServiceProvider.h" +#include "util/TimeUtils.h" + +// LSST headers +#include "lsst/log/Log.h" + +#define CONTEXT context("WorkerFindAllReplicasHttpRequest", __func__) + +using namespace std; +namespace fs = boost::filesystem; +using json = nlohmann::json; + +namespace { + +LOG_LOGGER _log = LOG_GET("lsst.qserv.replica.WorkerFindAllReplicasHttpRequest"); + +} // namespace + +namespace lsst::qserv::replica { + +shared_ptr WorkerFindAllReplicasHttpRequest::create( + shared_ptr const& serviceProvider, string const& worker, + protocol::QueuedRequestHdr const& hdr, json const& req, ExpirationCallbackType const& onExpired) { + auto ptr = shared_ptr( + new WorkerFindAllReplicasHttpRequest(serviceProvider, worker, hdr, req, onExpired)); + ptr->init(); + return ptr; +} + +WorkerFindAllReplicasHttpRequest::WorkerFindAllReplicasHttpRequest( + shared_ptr const& serviceProvider, string const& worker, + protocol::QueuedRequestHdr const& hdr, json const& req, ExpirationCallbackType const& onExpired) + : WorkerHttpRequest(serviceProvider, worker, "FIND-ALL", hdr, req, onExpired), + _database(req.at("database")), + _databaseInfo(serviceProvider->config()->databaseInfo(_database)) {} + +void WorkerFindAllReplicasHttpRequest::getResult(json& result) const { + // No locking is needed here since the method is called only after + // the request is completed. + result["replica_info_many"] = json::array(); + for (auto const& replicaInfo : _replicaInfoCollection) { + result["replica_info_many"].push_back(replicaInfo.toJson()); + } +} + +bool WorkerFindAllReplicasHttpRequest::execute() { + LOGS(_log, LOG_LVL_DEBUG, CONTEXT << " database: " << _databaseInfo.name); + + replica::Lock lock(_mtx, CONTEXT); + checkIfCancelling(lock, CONTEXT); + + // Scan the data directory to find all files which match the expected pattern(s) + // and group them by their chunk number + WorkerHttpRequest::ErrorContext errorContext; + boost::system::error_code ec; + + map chunk2fileInfoCollection; + { + replica::Lock dataFolderLock(_mtxDataFolderOperations, CONTEXT); + fs::path const dataDir = + fs::path(_serviceProvider->config()->get("worker", "data-dir")) / _databaseInfo.name; + fs::file_status const stat = fs::status(dataDir, ec); + errorContext = errorContext or + reportErrorIf(stat.type() == fs::status_error, protocol::StatusExt::FOLDER_STAT, + "failed to check the status of directory: " + dataDir.string()) or + reportErrorIf(!fs::exists(stat), protocol::StatusExt::NO_FOLDER, + "the directory does not exists: " + dataDir.string()); + try { + for (fs::directory_entry& entry : fs::directory_iterator(dataDir)) { + tuple parsed; + if (FileUtils::parsePartitionedFile(parsed, entry.path().filename().string(), + _databaseInfo)) { + LOGS(_log, LOG_LVL_DEBUG, + CONTEXT << " database: " << _databaseInfo.name + << " file: " << entry.path().filename() << " table: " << get<0>(parsed) + << " chunk: " << get<1>(parsed) << " ext: " << get<2>(parsed)); + + uint64_t const size = fs::file_size(entry.path(), ec); + errorContext = errorContext or + reportErrorIf(ec.value() != 0, protocol::StatusExt::FILE_SIZE, + "failed to read file size: " + entry.path().string()); + + time_t const mtime = fs::last_write_time(entry.path(), ec); + errorContext = errorContext or + reportErrorIf(ec.value() != 0, protocol::StatusExt::FILE_MTIME, + "failed to read file mtime: " + entry.path().string()); + + unsigned const chunk = get<1>(parsed); + chunk2fileInfoCollection[chunk].emplace_back(ReplicaInfo::FileInfo({ + entry.path().filename().string(), size, mtime, + "", /* cs is never computed for this type of requests */ + 0, /* beginTransferTime */ + 0, /* endTransferTime */ + size /* inSize */ + })); + } + } + } catch (fs::filesystem_error const& ex) { + errorContext = errorContext or reportErrorIf(true, protocol::StatusExt::FOLDER_READ, + "failed to read the directory: " + dataDir.string() + + ", error: " + string(ex.what())); + } + } + if (errorContext.failed) { + setStatus(lock, protocol::Status::FAILED, errorContext.extendedStatus); + return true; + } + + // Analyze results to see which chunks are complete using chunk 0 as an example + // of the total number of files which are normally associated with each chunk. + size_t const numFilesPerChunkRequired = FileUtils::partitionedFiles(_databaseInfo, 0).size(); + for (auto&& entry : chunk2fileInfoCollection) { + unsigned int const chunk = entry.first; + size_t const numFiles = entry.second.size(); + _replicaInfoCollection.emplace_back( + numFiles < numFilesPerChunkRequired ? ReplicaInfo::INCOMPLETE : ReplicaInfo::COMPLETE, + worker(), _databaseInfo.name, chunk, util::TimeUtils::now(), chunk2fileInfoCollection[chunk]); + } + setStatus(lock, protocol::Status::SUCCESS); + return true; +} + +} // namespace lsst::qserv::replica diff --git a/src/replica/worker/WorkerFindAllReplicasHttpRequest.h b/src/replica/worker/WorkerFindAllReplicasHttpRequest.h new file mode 100644 index 000000000..649cdfc82 --- /dev/null +++ b/src/replica/worker/WorkerFindAllReplicasHttpRequest.h @@ -0,0 +1,101 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_REPLICA_WORKERFINDALLREPLICASHTTPREQUEST_H +#define LSST_QSERV_REPLICA_WORKERFINDALLREPLICASHTTPREQUEST_H + +// System headers +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "replica/config/ConfigDatabase.h" +#include "replica/util/ReplicaInfo.h" +#include "replica/worker/WorkerHttpRequest.h" + +// Forward declarations +namespace lsst::qserv::replica { +class ServiceProvider; +} // namespace lsst::qserv::replica + +namespace lsst::qserv::replica::protocol { +struct QueuedRequestHdr; +} // namespace lsst::qserv::replica::protocol + +// This header declarations +namespace lsst::qserv::replica { + +/** + * Class WorkerFindAllReplicasHttpRequest represents a context and a state of replicas lookup + * requests within the worker servers. + */ +class WorkerFindAllReplicasHttpRequest : public WorkerHttpRequest { +public: + /** + * Static factory method is needed to prevent issue with the lifespan + * and memory management of instances created otherwise (as values or via + * low-level pointers). + * + * @param serviceProvider provider is needed to access the Configuration + * of a setup and for validating the input parameters + * @param worker the name of a worker. The name must match the worker which + * is going to execute the request. + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @param onExpired request expiration callback function + * @return pointer to the created object + */ + static std::shared_ptr create( + std::shared_ptr const& serviceProvider, std::string const& worker, + protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req, + ExpirationCallbackType const& onExpired); + + WorkerFindAllReplicasHttpRequest() = delete; + WorkerFindAllReplicasHttpRequest(WorkerFindAllReplicasHttpRequest const&) = delete; + WorkerFindAllReplicasHttpRequest& operator=(WorkerFindAllReplicasHttpRequest const&) = delete; + + ~WorkerFindAllReplicasHttpRequest() override = default; + + bool execute() override; + +protected: + void getResult(nlohmann::json& result) const override; + +private: + WorkerFindAllReplicasHttpRequest(std::shared_ptr const& serviceProvider, + std::string const& worker, protocol::QueuedRequestHdr const& hdr, + nlohmann::json const& req, ExpirationCallbackType const& onExpired); + + // Input parameters + std::string const _database; + + /// Cached descriptor of the database obtained from the Configuration + DatabaseInfo const _databaseInfo; + + /// Result of the operation + ReplicaInfoCollection _replicaInfoCollection; +}; + +} // namespace lsst::qserv::replica + +#endif // LSST_QSERV_REPLICA_WORKERFINDALLREPLICASHTTPREQUEST_H diff --git a/src/replica/worker/WorkerFindReplicaHttpRequest.cc b/src/replica/worker/WorkerFindReplicaHttpRequest.cc new file mode 100644 index 000000000..c4e2d728b --- /dev/null +++ b/src/replica/worker/WorkerFindReplicaHttpRequest.cc @@ -0,0 +1,233 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "replica/worker/WorkerFindReplicaHttpRequest.h" + +// System headers + +// Third party headers +#include "boost/filesystem.hpp" + +// Qserv headers +#include "replica/config/Configuration.h" +#include "replica/services/ServiceProvider.h" +#include "replica/util/FileUtils.h" +#include "util/TimeUtils.h" + +// LSST headers +#include "lsst/log/Log.h" + +#define CONTEXT context("WorkerFindReplicaHttpRequest", __func__) + +using namespace std; +namespace fs = boost::filesystem; +using json = nlohmann::json; + +namespace { + +LOG_LOGGER _log = LOG_GET("lsst.qserv.replica.WorkerFindReplicaHttpRequest"); + +} // namespace + +namespace lsst::qserv::replica { + +shared_ptr WorkerFindReplicaHttpRequest::create( + shared_ptr const& serviceProvider, string const& worker, + protocol::QueuedRequestHdr const& hdr, json const& req, ExpirationCallbackType const& onExpired) { + auto ptr = shared_ptr( + new WorkerFindReplicaHttpRequest(serviceProvider, worker, hdr, req, onExpired)); + ptr->init(); + return ptr; +} + +WorkerFindReplicaHttpRequest::WorkerFindReplicaHttpRequest(shared_ptr const& serviceProvider, + string const& worker, + protocol::QueuedRequestHdr const& hdr, + json const& req, + ExpirationCallbackType const& onExpired) + : WorkerHttpRequest(serviceProvider, worker, "FIND", hdr, req, onExpired), + _databaseInfo(serviceProvider->config()->databaseInfo(req.at("database"))), + _chunk(req.at("chunk")), + _computeCheckSum(req.at("compute_cs")) {} + +void WorkerFindReplicaHttpRequest::getResult(json& result) const { + // No locking is needed here since the method is called only after + // the request is completed. + result["replica_info"] = _replicaInfo.toJson(); +} + +bool WorkerFindReplicaHttpRequest::execute() { + LOGS(_log, LOG_LVL_DEBUG, CONTEXT << " database: " << _databaseInfo.name << " chunk: " << _chunk); + + replica::Lock lock(_mtx, CONTEXT); + checkIfCancelling(lock, CONTEXT); + + // There are two modes of operation of the code which would depend + // on a presence (or a lack of that) to calculate control/check sums + // for the found files. + // + // - if the control/check sum is NOT requested then the request will + // be executed immediately within this call. + // + // - otherwise the incremental approach will be used (which will require + // setting up the incremental engine if this is the first call to the method) + // + // Both methods are combined within the same code block to avoid + // code duplication. + WorkerHttpRequest::ErrorContext errorContext; + boost::system::error_code ec; + + if (!_computeCheckSum or (_csComputeEnginePtr == nullptr)) { + // Check if the data directory exists and it can be read + replica::Lock dataFolderLock(_mtxDataFolderOperations, CONTEXT); + fs::path const dataDir = + fs::path(_serviceProvider->config()->get("worker", "data-dir")) / _databaseInfo.name; + fs::file_status const stat = fs::status(dataDir, ec); + errorContext = errorContext or + reportErrorIf(stat.type() == fs::status_error, protocol::StatusExt::FOLDER_STAT, + "failed to check the status of directory: " + dataDir.string()) or + reportErrorIf(!fs::exists(stat), protocol::StatusExt::NO_FOLDER, + "the directory does not exists: " + dataDir.string()); + if (errorContext.failed) { + setStatus(lock, protocol::Status::FAILED, errorContext.extendedStatus); + return true; + } + + // For each file associated with the chunk check if the file is present in + // the data directory. + // + // - not finding a file is not a failure for this operation. Just reporting + // those files which are present. + // + // - assume the request failure for any file system operation failure + // + // - assume the successful completion otherwise and adjust the replica + // information record accordingly, depending on the findings. + ReplicaInfo::FileInfoCollection + fileInfoCollection; // file info if not using the incremental processing + vector files; // file paths registered for the incremental processing + + for (auto&& file : FileUtils::partitionedFiles(_databaseInfo, _chunk)) { + fs::path const path = dataDir / file; + fs::file_status const stat = fs::status(path, ec); + errorContext = errorContext or + reportErrorIf(stat.type() == fs::status_error, protocol::StatusExt::FILE_STAT, + "failed to check the status of file: " + path.string()); + if (fs::exists(stat)) { + if (!_computeCheckSum) { + // Get file size & mtime right away + uint64_t const size = fs::file_size(path, ec); + errorContext = + errorContext or reportErrorIf(ec.value() != 0, protocol::StatusExt::FILE_SIZE, + "failed to read file size: " + path.string()); + const time_t mtime = fs::last_write_time(path, ec); + errorContext = + errorContext or reportErrorIf(ec.value() != 0, protocol::StatusExt::FILE_MTIME, + "failed to read file mtime: " + path.string()); + fileInfoCollection.emplace_back(ReplicaInfo::FileInfo({ + file, size, mtime, "", /* cs */ + 0, /* beginTransferTime */ + 0, /* endTransferTime */ + size /* inSize */ + })); + } else { + // Register this file for the incremental processing + files.push_back(path.string()); + } + } + } + if (errorContext.failed) { + setStatus(lock, protocol::Status::FAILED, errorContext.extendedStatus); + return true; + } + + // If that's so then finalize the operation right away + if (!_computeCheckSum) { + ReplicaInfo::Status status = ReplicaInfo::Status::NOT_FOUND; + if (fileInfoCollection.size()) + status = + FileUtils::partitionedFiles(_databaseInfo, _chunk).size() == fileInfoCollection.size() + ? ReplicaInfo::Status::COMPLETE + : ReplicaInfo::Status::INCOMPLETE; + + // Fill in the info on the chunk before finishing the operation + _replicaInfo = ReplicaInfo(status, worker(), _databaseInfo.name, _chunk, util::TimeUtils::now(), + fileInfoCollection); + setStatus(lock, protocol::Status::SUCCESS); + return true; + } + + // Otherwise proceed with the incremental approach + _csComputeEnginePtr.reset(new MultiFileCsComputeEngine(files)); + } + + // Next (or the first) iteration in the incremental approach + bool finished = true; + try { + finished = _csComputeEnginePtr->execute(); + if (finished) { + // Extract statistics + ReplicaInfo::FileInfoCollection fileInfoCollection; + auto const fileNames = _csComputeEnginePtr->fileNames(); + for (auto&& file : fileNames) { + const fs::path path(file); + uint64_t const size = _csComputeEnginePtr->bytes(file); + time_t const mtime = fs::last_write_time(path, ec); + errorContext = errorContext or reportErrorIf(ec.value() != 0, protocol::StatusExt::FILE_MTIME, + "failed to read file mtime: " + path.string()); + fileInfoCollection.emplace_back(ReplicaInfo::FileInfo({ + path.filename().string(), size, mtime, to_string(_csComputeEnginePtr->cs(file)), + 0, /* beginTransferTime */ + 0, /* endTransferTime */ + size /* inSize */ + })); + } + if (errorContext.failed) { + setStatus(lock, protocol::Status::FAILED, errorContext.extendedStatus); + return true; + } + + // Fnalize the operation + ReplicaInfo::Status status = ReplicaInfo::Status::NOT_FOUND; + if (fileInfoCollection.size()) + status = FileUtils::partitionedFiles(_databaseInfo, _chunk).size() == fileNames.size() + ? ReplicaInfo::Status::COMPLETE + : ReplicaInfo::Status::INCOMPLETE; + + // Fill in the info on the chunk before finishing the operation + _replicaInfo = ReplicaInfo(status, worker(), _databaseInfo.name, _chunk, util::TimeUtils::now(), + fileInfoCollection); + setStatus(lock, protocol::Status::SUCCESS); + } + } catch (exception const& ex) { + WorkerHttpRequest::ErrorContext errorContext; + errorContext = errorContext or reportErrorIf(true, protocol::StatusExt::FILE_READ, ex.what()); + setStatus(lock, protocol::Status::FAILED, errorContext.extendedStatus); + } + + // If done (either way) then get rid of the engine right away because + // it may still have allocated buffers + if (finished) _csComputeEnginePtr.reset(); + return finished; +} + +} // namespace lsst::qserv::replica diff --git a/src/replica/worker/WorkerFindReplicaHttpRequest.h b/src/replica/worker/WorkerFindReplicaHttpRequest.h new file mode 100644 index 000000000..a0ad3c12f --- /dev/null +++ b/src/replica/worker/WorkerFindReplicaHttpRequest.h @@ -0,0 +1,104 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_REPLICA_WORKERFINDREPLICAHTTPREQUEST_H +#define LSST_QSERV_REPLICA_WORKERFINDREPLICAHTTPREQUEST_H + +// System headers +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "replica/config/ConfigDatabase.h" +#include "replica/util/ReplicaInfo.h" +#include "replica/worker/WorkerHttpRequest.h" + +// Forward declarations +namespace lsst::qserv::replica { +class MultiFileCsComputeEngine; +class ServiceProvider; +} // namespace lsst::qserv::replica + +namespace lsst::qserv::replica::protocol { +struct QueuedRequestHdr; +} // namespace lsst::qserv::replica::protocol + +// This header declarations +namespace lsst::qserv::replica { + +/** + * Class WorkerFindReplicaHttpRequest represents a context and a state of replica lookup + * requests within the worker servers. + */ +class WorkerFindReplicaHttpRequest : public WorkerHttpRequest { +public: + /** + * Static factory method is needed to prevent issue with the lifespan + * and memory management of instances created otherwise (as values or via + * low-level pointers). + * + * @param serviceProvider provider is needed to access the Configuration + * of a setup and for validating the input parameters + * @param worker the name of a worker. The name must match the worker which + * is going to execute the request. + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @param onExpired request expiration callback function + * @return pointer to the created object + */ + static std::shared_ptr create( + std::shared_ptr const& serviceProvider, std::string const& worker, + protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req, + ExpirationCallbackType const& onExpired); + + WorkerFindReplicaHttpRequest() = delete; + WorkerFindReplicaHttpRequest(WorkerFindReplicaHttpRequest const&) = delete; + WorkerFindReplicaHttpRequest& operator=(WorkerFindReplicaHttpRequest const&) = delete; + + ~WorkerFindReplicaHttpRequest() override = default; + + bool execute() override; + +protected: + void getResult(nlohmann::json& result) const override; + +private: + WorkerFindReplicaHttpRequest(std::shared_ptr const& serviceProvider, + std::string const& worker, protocol::QueuedRequestHdr const& hdr, + nlohmann::json const& req, ExpirationCallbackType const& onExpired); + + // Input parameters + DatabaseInfo const _databaseInfo; ///< Database descriptor obtained from the Configuration + unsigned int _chunk; + bool const _computeCheckSum; + + /// Result of the operation + ReplicaInfo _replicaInfo; + + /// The engine for incremental control sum calculation + std::unique_ptr _csComputeEnginePtr; +}; + +} // namespace lsst::qserv::replica + +#endif // LSST_QSERV_REPLICA_WORKERFINDREPLICAHTTPREQUEST_H diff --git a/src/replica/worker/WorkerHttpProcessor.cc b/src/replica/worker/WorkerHttpProcessor.cc new file mode 100644 index 000000000..a3f290d61 --- /dev/null +++ b/src/replica/worker/WorkerHttpProcessor.cc @@ -0,0 +1,568 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "replica/worker/WorkerHttpProcessor.h" + +// System headers +#include +#include +#include + +// Qserv headers +#include "replica/config/Configuration.h" +#include "replica/mysql/DatabaseMySQL.h" +#include "replica/services/ServiceProvider.h" +#include "replica/worker/WorkerHttpProcessorThread.h" +#include "replica/worker/WorkerHttpRequest.h" +#include "replica/worker/WorkerCreateReplicaHttpRequest.h" +#include "replica/worker/WorkerDeleteReplicaHttpRequest.h" +#include "replica/worker/WorkerDirectorIndexHttpRequest.h" +#include "replica/worker/WorkerEchoHttpRequest.h" +#include "replica/worker/WorkerFindReplicaHttpRequest.h" +#include "replica/worker/WorkerFindAllReplicasHttpRequest.h" +#include "replica/worker/WorkerSqlHttpRequest.h" +#include "util/BlockPost.h" +#include "util/TimeUtils.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using namespace std::placeholders; +using json = nlohmann::json; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.replica.WorkerHttpProcessor"); +} // namespace + +namespace lsst::qserv::replica { + +bool WorkerHttpProcessor::PriorityQueueType::remove(string const& id) { + auto itr = find_if(c.begin(), c.end(), + [&id](shared_ptr const& ptr) { return ptr->id() == id; }); + if (itr != c.end()) { + c.erase(itr); + make_heap(c.begin(), c.end(), comp); + return true; + } + return false; +} + +shared_ptr WorkerHttpProcessor::create( + shared_ptr const& serviceProvider, string const& worker) { + return shared_ptr(new WorkerHttpProcessor(serviceProvider, worker)); +} + +WorkerHttpProcessor::WorkerHttpProcessor(shared_ptr const& serviceProvider, + string const& worker) + : _serviceProvider(serviceProvider), + _worker(worker), + _connectionPool(database::mysql::ConnectionPool::create( + Configuration::qservWorkerDbParams(), + serviceProvider->config()->get("database", "services-pool-size"))), + _state(protocol::ServiceState::SUSPENDED), + _startTime(util::TimeUtils::now()) {} + +void WorkerHttpProcessor::run() { + LOGS(_log, LOG_LVL_DEBUG, _context(__func__)); + replica::Lock lock(_mtx, _context(__func__)); + + if (_state == protocol::ServiceState::SUSPENDED) { + size_t const numThreads = + _serviceProvider->config()->get("worker", "num-svc-processing-threads"); + if (numThreads == 0) { + throw out_of_range(_classMethodContext(__func__) + + "invalid configuration parameter for the number of processing threads. " + "The value of the parameter must be greater than 0"); + } + + // Create threads if needed + if (_threads.empty()) { + auto const self = shared_from_this(); + for (size_t i = 0; i < numThreads; ++i) { + _threads.push_back(WorkerHttpProcessorThread::create(self)); + } + } + + // Tell each thread to run + for (auto&& t : _threads) { + t->run(); + } + _state = protocol::ServiceState::RUNNING; + } +} + +void WorkerHttpProcessor::stop() { + LOGS(_log, LOG_LVL_DEBUG, _context(__func__)); + replica::Lock lock(_mtx, _context(__func__)); + + if (_state == protocol::ServiceState::RUNNING) { + // Tell each thread to stop. + for (auto&& t : _threads) { + t->stop(); + } + + // Begin transitioning to the final state via this intermediate one. + // The transition will finish asynchronous when all threads will report + // desired changes in their states. + _state = protocol::ServiceState::SUSPEND_IN_PROGRESS; + } +} + +void WorkerHttpProcessor::drain() { + LOGS(_log, LOG_LVL_DEBUG, _context(__func__)); + replica::Lock lock(_mtx, _context(__func__)); + + // Collect identifiers of requests to be affected by the operation + list ids; + for (auto&& ptr : _newRequests) ids.push_back(ptr->id()); + for (auto&& entry : _inProgressRequests) ids.push_back(entry.first); + for (auto&& id : ids) _stopRequestImpl(lock, id); +} + +void WorkerHttpProcessor::reconfig() { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context); + replica::Lock lock(_mtx, context); + _serviceProvider->config()->reload(); +} + +json WorkerHttpProcessor::createReplica(protocol::QueuedRequestHdr const& hdr, json const& req) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context << " id: " << hdr.id); + return _submit(replica::Lock(_mtx, context), context, hdr, req); +} + +json WorkerHttpProcessor::deleteReplica(protocol::QueuedRequestHdr const& hdr, json const& req) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context << " id: " << hdr.id); + return _submit(replica::Lock(_mtx, context), context, hdr, req); +} + +json WorkerHttpProcessor::findReplica(protocol::QueuedRequestHdr const& hdr, json const& req) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context << " id: " << hdr.id); + return _submit(replica::Lock(_mtx, context), context, hdr, req); +} + +json WorkerHttpProcessor::findAllReplicas(protocol::QueuedRequestHdr const& hdr, json const& req) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context << " id: " << hdr.id); + return _submit(replica::Lock(_mtx, context), context, hdr, req); +} + +json WorkerHttpProcessor::echo(protocol::QueuedRequestHdr const& hdr, json const& req) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context << " id: " << hdr.id); + return _submit(replica::Lock(_mtx, context), context, hdr, req); +} + +json WorkerHttpProcessor::sql(protocol::QueuedRequestHdr const& hdr, json const& req) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context << " id: " << hdr.id); + return _submit(replica::Lock(_mtx, context), context, hdr, req); +} + +json WorkerHttpProcessor::index(protocol::QueuedRequestHdr const& hdr, json const& req) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context << " id: " << hdr.id); + return _submit(replica::Lock(_mtx, context), context, hdr, req, + _connectionPool); +} + +json WorkerHttpProcessor::requestStatus(string const& id) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context << " id: " << id); + + replica::Lock lock(_mtx, context); + + // Still waiting in the queue? + shared_ptr targetRequestPtr; + for (auto ptr : _newRequests) { + if (ptr->id() == id) { + targetRequestPtr = ptr; + break; + } + } + if (targetRequestPtr == nullptr) { + // Is it already being processed? + auto itrInProgress = _inProgressRequests.find(id); + if (itrInProgress != _inProgressRequests.end()) { + targetRequestPtr = itrInProgress->second; + } + if (targetRequestPtr == nullptr) { + // Has it finished? + auto itrFinished = _finishedRequests.find(id); + if (itrFinished != _finishedRequests.end()) { + targetRequestPtr = itrFinished->second; + } + // No such request? + if (targetRequestPtr == nullptr) { + return json::object({ + {"status", protocol::Status::BAD}, + {"status_ext", protocol::StatusExt::INVALID_ID}, + }); + } + } + } + return targetRequestPtr->toJson(); +} + +json WorkerHttpProcessor::stopRequest(string const& id) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context << " id: " << id); + + replica::Lock lock(_mtx, context); + + json response = json::object(); + auto const request = _stopRequestImpl(lock, id); + if (request == nullptr) { + response["status"] = protocol::Status::BAD; + response["status_ext"] = protocol::StatusExt::INVALID_ID; + } else { + response = request->toJson(); + } + return response; +} + +json WorkerHttpProcessor::trackRequest(string const& id) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context << " id: " << id); + + replica::Lock lock(_mtx, context); + + json response = json::object(); + auto const request = _trackRequestImpl(lock, id); + if (request == nullptr) { + response["status"] = protocol::Status::BAD; + response["status_ext"] = protocol::StatusExt::INVALID_ID; + } else { + bool const includeResultIfFinished = true; + response = request->toJson(includeResultIfFinished); + } + return response; +} + +bool WorkerHttpProcessor::disposeRequest(string const& id) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context << " id: " << id); + + replica::Lock lock(_mtx, context); + + // Note that only the finished requests are allowed to be disposed. + if (auto itr = _finishedRequests.find(id); itr != _finishedRequests.end()) { + itr->second->dispose(); + _finishedRequests.erase(itr); + return true; + } + return false; +} + +size_t WorkerHttpProcessor::numNewRequests() const { + replica::Lock lock(_mtx, _context(__func__)); + return _newRequests.size(); +} + +size_t WorkerHttpProcessor::numInProgressRequests() const { + replica::Lock lock(_mtx, _context(__func__)); + return _inProgressRequests.size(); +} + +size_t WorkerHttpProcessor::numFinishedRequests() const { + replica::Lock lock(_mtx, _context(__func__)); + return _finishedRequests.size(); +} + +json WorkerHttpProcessor::toJson(protocol::Status status, bool includeRequests) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context); + + replica::Lock lock(_mtx, context); + + json response; + response["status"] = status; + response["status_ext"] = protocol::StatusExt::NONE; + response["service_state"] = state(); + response["num_new_requests"] = _newRequests.size(); + response["num_in_progress_requests"] = _inProgressRequests.size(); + response["num_finished_requests"] = _finishedRequests.size(); + response["new_requests"] = json::array(); + response["in_progress_requests"] = json::array(); + response["finished_requests"] = json::array(); + + if (includeRequests) { + for (auto const& request : _newRequests) { + response["new_requests"].push_back(request->toJson()); + } + for (auto const& entry : _inProgressRequests) { + response["in_progress_requests"].push_back(entry.second->toJson()); + } + for (auto const& entry : _finishedRequests) { + response["finished_requests"].push_back(entry.second->toJson()); + } + } + return response; +} + +string WorkerHttpProcessor::_classMethodContext(string const& func) { return "WorkerHttpProcessor::" + func; } + +void WorkerHttpProcessor::_logError(string const& context, string const& message) const { + LOGS(_log, LOG_LVL_ERROR, context << " " << message); +} + +shared_ptr WorkerHttpProcessor::_stopRequestImpl(replica::Lock const& lock, + string const& id) { + LOGS(_log, LOG_LVL_DEBUG, _context(__func__) << " id: " << id); + + // Still waiting in the queue? + // + // ATTENTION: the loop variable is a copy of (not a reference to) a shared + // pointer to allow removing (if needed) the corresponding entry from the + // input collection while retaining a valid copy of the pointer to be placed + // into the next stage collection. + + for (auto ptr : _newRequests) { + if (ptr->id() == id) { + // Cancel it and move it into the final queue in case if a client + // won't be able to receive the desired status of the request due to + // a protocol failure, etc. + ptr->cancel(); + switch (ptr->status()) { + case protocol::Status::CANCELLED: { + _newRequests.remove(id); + _finishedRequests[ptr->id()] = ptr; + return ptr; + } + default: + throw logic_error(_classMethodContext(__func__) + " unexpected request status " + + protocol::toString(ptr->status()) + " in new requests"); + } + } + } + + // Is it already being processed? + auto itrInProgress = _inProgressRequests.find(id); + if (itrInProgress != _inProgressRequests.end()) { + auto ptr = itrInProgress->second; + // Tell the request to begin the cancelling protocol. The protocol + // will take care of moving the request into the final queue when + // the cancellation will finish. + // + // At the meant time we just notify the client about the cancellation status + // of the request and let it come back later to check the updated status. + ptr->cancel(); + switch (ptr->status()) { + // These are the most typical states for request in this queue + case protocol::Status::CANCELLED: + case protocol::Status::IS_CANCELLING: + + // The following two states are also allowed here because + // in-progress requests are still allowed to progress to the completed + // states before reporting their new state via method: + // WorkerHttpProcessor::_processingFinished() + // Sometimes, the request just can't finish this in time due to + // replica::Lock lock(_mtx) held by the current method. We shouldn't worry + // about this situation here. The request will be moved into the next + // queue as soon as replica::Lock lock(_mtx) will be released. + case protocol::Status::SUCCESS: + case protocol::Status::FAILED: + return ptr; + default: + throw logic_error(_classMethodContext(__func__) + " unexpected request status " + + protocol::toString(ptr->status()) + " in in-progress requests"); + } + } + + // Has it finished? + auto itrFinished = _finishedRequests.find(id); + if (itrFinished != _finishedRequests.end()) { + auto ptr = itrFinished->second; + // There is nothing else we can do here other than just + // reporting the completion status of the request. It's up to a client + // to figure out what to do about this situation. + switch (ptr->status()) { + case protocol::Status::CANCELLED: + case protocol::Status::SUCCESS: + case protocol::Status::FAILED: + return ptr; + default: + throw logic_error(_classMethodContext(__func__) + " unexpected request status " + + protocol::toString(ptr->status()) + " in finished requests"); + } + } + + // No request found! + return nullptr; +} + +shared_ptr WorkerHttpProcessor::_trackRequestImpl(replica::Lock const& lock, + string const& id) { + LOGS(_log, LOG_LVL_DEBUG, _context(__func__) << " id: " << id); + + // Still waiting in the queue? + for (auto&& ptr : _newRequests) { + if (ptr->id() == id) { + switch (ptr->status()) { + // This state requirement is strict for the non-active requests + case protocol::Status::CREATED: + return ptr; + default: + throw logic_error(_classMethodContext(__func__) + " unexpected request status " + + protocol::toString(ptr->status()) + " in new requests"); + } + } + } + + // Is it already being processed? + auto itrInProgress = _inProgressRequests.find(id); + if (itrInProgress != _inProgressRequests.end()) { + auto ptr = itrInProgress->second; + switch (ptr->status()) { + // These are the most typical states for request in this queue + case protocol::Status::IS_CANCELLING: + case protocol::Status::IN_PROGRESS: + + // The following three states are also allowed here because + // in-progress requests are still allowed to progress to the completed + // states before reporting their new state via method: + // WorkerHttpProcessor::_processingFinished() + // Sometimes, the request just can't finish this in time due to + // replica::Lock lock(_mtx) held by the current method. We shouldn't worry + // about this situation here. The request will be moved into the next + // queue as soon as replica::Lock lock(_mtx) will be released. + case protocol::Status::CANCELLED: + case protocol::Status::SUCCESS: + case protocol::Status::FAILED: + return ptr; + default: + throw logic_error(_classMethodContext(__func__) + " unexpected request status " + + protocol::toString(ptr->status()) + " in in-progress requests"); + } + } + + // Has it finished? + auto itrFinished = _finishedRequests.find(id); + if (itrFinished != _finishedRequests.end()) { + auto ptr = itrFinished->second; + switch (ptr->status()) { + // This state requirement is strict for the completed requests + case protocol::Status::CANCELLED: + case protocol::Status::SUCCESS: + case protocol::Status::FAILED: + return ptr; + default: + throw logic_error(_classMethodContext(__func__) + " unexpected request status " + + protocol::toString(ptr->status()) + " in finished requests"); + } + } + + // No request found! + return nullptr; +} + +shared_ptr WorkerHttpProcessor::_fetchNextForProcessing( + shared_ptr const& processorThread, unsigned int timeoutMilliseconds) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_TRACE, + context << " thread: " << processorThread->id() << " timeout: " << timeoutMilliseconds); + + // For generating random intervals within the maximum range of seconds + // requested by a client. + // + // TODO: Re-implement this loop to use a condition variable instead. + // This will improve the performance of the processor which is limited + // by the half-latency of the wait interval. + util::BlockPost blockPost(0, min(10U, timeoutMilliseconds)); + + unsigned int totalElapsedTime = 0; + while (totalElapsedTime < timeoutMilliseconds) { + // IMPORTANT: make sure no wait is happening within the same + // scope where the thread safe block is defined. Otherwise + // the queue will be locked for all threads for the duration of + // the wait. + { + replica::Lock lock(_mtx, context); + if (!_newRequests.empty()) { + shared_ptr request = _newRequests.top(); + _newRequests.pop(); + request->start(); + _inProgressRequests[request->id()] = request; + return request; + } + } + totalElapsedTime += blockPost.wait(); + } + + // Return null pointer since noting has been found within the specified + // timeout. + return nullptr; +} + +void WorkerHttpProcessor::_processingRefused(shared_ptr const& request) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context << " id: " << request->id()); + + replica::Lock lock(_mtx, context); + + // Note that disposed requests won't be found in any queue. + auto itr = _inProgressRequests.find(request->id()); + if (itr != _inProgressRequests.end()) { + // Update request's state before moving it back into + // the input queue. + itr->second->stop(); + _newRequests.push(itr->second); + _inProgressRequests.erase(itr); + } +} + +void WorkerHttpProcessor::_processingFinished(shared_ptr const& request) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, + context << " id: " << request->id() << " status: " << protocol::toString(request->status())); + + replica::Lock lock(_mtx, context); + + // Note that disposed requests won't be found in any queue. + auto itr = _inProgressRequests.find(request->id()); + if (itr != _inProgressRequests.end()) { + _finishedRequests[itr->first] = itr->second; + _inProgressRequests.erase(itr); + } +} + +void WorkerHttpProcessor::_processorThreadStopped( + shared_ptr const& processorThread) { + string const context = _context(__func__); + LOGS(_log, LOG_LVL_DEBUG, context << " thread: " << processorThread->id()); + + replica::Lock lock(_mtx, context); + + if (_state == protocol::ServiceState::SUSPEND_IN_PROGRESS) { + // Complete state transition if all threads are stopped + for (auto&& t : _threads) { + if (t->isRunning()) return; + } + _state = protocol::ServiceState::SUSPENDED; + } +} + +} // namespace lsst::qserv::replica diff --git a/src/replica/worker/WorkerHttpProcessor.h b/src/replica/worker/WorkerHttpProcessor.h new file mode 100644 index 000000000..0a5308082 --- /dev/null +++ b/src/replica/worker/WorkerHttpProcessor.h @@ -0,0 +1,366 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_REPLICA_WORKERHTTPPROCESSOR_H +#define LSST_QSERV_REPLICA_WORKERHTTPPROCESSOR_H + +// System headers +#include +#include +#include +#include +#include +#include +#include +#include + +// Qserv headers +#include "replica/proto/Protocol.h" +#include "replica/util/Mutex.h" +#include "replica/worker/WorkerHttpRequest.h" + +// Third party headers +#include "nlohmann/json.hpp" + +// Forward declarations + +namespace lsst::qserv::replica { +class ServiceProvider; +class WorkerHttpProcessorThread; +} // namespace lsst::qserv::replica + +namespace lsst::qserv::replica::database::mysql { +class ConnectionPool; +} // namespace lsst::qserv::replica::database::mysql + +// This header declarations +namespace lsst::qserv::replica { + +/** + * Class WorkerHttpProcessor is a front-end interface for processing + * requests from remote clients within worker-side services. + */ +class WorkerHttpProcessor : public std::enable_shared_from_this { +public: + // The thread-based processor class is allowed to access the internal API + friend class WorkerHttpProcessorThread; + + /** + * Structure PriorityQueueType extends the standard priority queue for pointers + * to the new (unprocessed) requests. + * + * Its design relies upon the inheritance to get access to the protected + * data members 'c' representing the internal container of the base queue + * in order to implement the iterator protocol. + */ + struct PriorityQueueType + : std::priority_queue, + std::vector>, WorkerHttpRequestCompare> { + /// @return iterator to the beginning of the container + decltype(c.begin()) begin() { return c.begin(); } + + /// @return iterator to the end of the container + decltype(c.end()) end() { return c.end(); } + + /** + * Remove a request from the queue by its identifier + * @param id an identifier of a request + * @return 'true' if the object was actually removed + */ + bool remove(std::string const& id); + }; + + /** + * The factory method for objects of the class + * + * @param serviceProvider provider is needed to access the Configuration of + * a setup in order to get a number of the processing threads to be launched + * by the processor. + * @param worker the name of a worker + * @return a pointer to the created object + */ + static std::shared_ptr create( + std::shared_ptr const& serviceProvider, std::string const& worker); + + WorkerHttpProcessor() = delete; + WorkerHttpProcessor(WorkerHttpProcessor const&) = delete; + WorkerHttpProcessor& operator=(WorkerHttpProcessor const&) = delete; + + ~WorkerHttpProcessor() = default; + + /// @return the state of the processor + protocol::ServiceState state() const { return _state; } + + /// Begin processing requests + void run(); + + /// Stop processing all requests, and stop all threads + void stop(); + + /// Drain (cancel) all queued and in-progress requests + void drain(); + + /// Reload Configuration + void reconfig(); + + /** + * Enqueue the replica creation request for processing + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @return the response object to be sent back to a client + */ + nlohmann::json createReplica(protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req); + + /** + * Enqueue the replica deletion request for processing + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @return the response object to be sent back to a client + */ + nlohmann::json deleteReplica(protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req); + + /** + * Enqueue the replica lookup request for processing + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @return the response object to be sent back to a client + */ + nlohmann::json findReplica(protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req); + + /** + * Enqueue the multi-replica lookup request for processing + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @return the response object to be sent back to a client + */ + nlohmann::json findAllReplicas(protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req); + + /** + * Enqueue the worker-side testing request for processing + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @return the response object to be sent back to a client + */ + nlohmann::json echo(protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req); + + /** + * Enqueue a request for querying the worker database + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @return the response object to be sent back to a client + */ + nlohmann::json sql(protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req); + + /** + * Enqueue a request for extracting the "director" index data from + * the director tables. + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @return the response object to be sent back to a client + */ + nlohmann::json index(protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req); + + /** + * Get a status of the request + * @param id an identifier of a request affected by the operation + * @return the response object to be sent back to a client + */ + nlohmann::json requestStatus(std::string const& id); + + /** + * Dequeue replication request + * @note If the request is not being processed yet then it will be simply removed + * from the ready-to-be-processed queue. If it's being processed an attempt + * to cancel processing will be made. If it has already processed this will + * be reported. + * @param id an identifier of a request affected by the operation + * @return the response object to be sent back to a client + */ + nlohmann::json stopRequest(std::string const& id); + + /** + * Return the tracking info on the on-going request + * @param id an identifier of a request affected by the operation + * @return the response object to be sent back to a client + */ + nlohmann::json trackRequest(std::string const& id); + + /** + * Find the request in any queue, and "garbage collect" it to release resources + * associated with the request. If the request is still in the "in-progress" + * state then it will be "drained" before disposing. If the request isn't found + * in any queue then nothing will happen (no exception thrown, no side effects). + * + * @param id an identifier of a request affected by the operation + * @return 'true' if the request was found and actually removed from any queue + */ + bool disposeRequest(std::string const& id); + + size_t numNewRequests() const; + size_t numInProgressRequests() const; + size_t numFinishedRequests() const; + + /** + * Capture the processor's state and counters. + * @param status desired status to set in the response objet + * @param includeRequests (optional) flag to return detailed info on all known requests + * @return the response object to be sent back to a client + */ + nlohmann::json toJson(protocol::Status status, bool includeRequests = false); + +private: + WorkerHttpProcessor(std::shared_ptr const& serviceProvider, std::string const& worker); + + static std::string _classMethodContext(std::string const& func); + + /** + * Submit a request for processing + * @param lock a lock on _mtx to be acquired before calling this method + * @param context the logging context (including the name of a function/method) + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @return the response object to be sent back to a client + */ + template + nlohmann::json _submit(replica::Lock const& lock, std::string const& context, + protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req, Args... args) { + try { + auto const ptr = REQUEST_TYPE::create( + _serviceProvider, _worker, hdr, req, + [self = shared_from_this()](std::string const& id) { self->disposeRequest(id); }, + args...); + _newRequests.push(ptr); + return ptr->toJson(); + } catch (std::exception const& ec) { + _logError(context, ec.what()); + return nlohmann::json::object({ + {"status", protocol::Status::BAD}, + {"status_ext", protocol::StatusExt::INVALID_PARAM}, + }); + } + } + + /** + * Log the error message. + * @param context the logging context (including the name of a function/method) + * @param message the error message to be reported + */ + void _logError(std::string const& context, std::string const& message) const; + + /** + * Return the next request which is ready to be processed + * and if then one found assign it to the specified thread. The request + * will be removed from the ready-to-be-processed queue. + * + * If the one is available within the specified timeout then such request + * will be moved into the in-progress queue, assigned to the processor thread + * and returned to a caller. Otherwise an empty pointer (pointing to nullptr) + * will be returned. + * + * This method is supposed to be called by one of the processing threads + * when it becomes available. + * + * @note this method will block for a duration of time not exceeding + * the client-specified timeout unless it's set to 0. In the later + * case the method will block indefinitely. + * @param processorThread reference to a thread which fetches the next request + * @param timeoutMilliseconds (optional) amount of time to wait before to finish if + * no suitable requests are available for processing + */ + std::shared_ptr _fetchNextForProcessing( + std::shared_ptr const& processorThread, + unsigned int timeoutMilliseconds = 0); + + /** + * Implement the operation for the specified identifier if such request + * is still known to the Processor. Return a reference to the request object + * whose state will be properly updated. + * @param lock а lock on _mtx to be acquired before calling this method + * @param id an identifier of a request + * @return the request object (if found) or nullptr otherwise + */ + std::shared_ptr _stopRequestImpl(replica::Lock const& lock, std::string const& id); + + /** + * Find and return a reference to the request object. + * @param lock а lock on _mtx to be acquired before calling this method + * @param id an identifier of a request + * @return the request object (if found) or nullptr otherwise + */ + std::shared_ptr _trackRequestImpl(replica::Lock const& lock, std::string const& id); + + /** + * Report a decision not to process a request + * + * This method is supposed to be called by one of the processing threads + * after it fetches the next ready-to-process request and then decided + * not to proceed with processing. Normally this should happen when + * the thread was asked to stop. In that case the request will be put + * back into the ready-to-be processed request and be picked up later + * by some other thread. + * + * @param request a pointer to the request + */ + void _processingRefused(std::shared_ptr const& request); + + /** + * Report a request which has been processed or cancelled. + * + * The method is called by a thread which was processing the request. + * The request will be moved into the corresponding queue. A proper + * completion status is expected be stored within the request. + * + * @param request a pointer to the request + */ + void _processingFinished(std::shared_ptr const& request); + + /** + * For threads reporting their completion + * + * This method is used by threads to report a change in their state. + * It's meant to be used during the gradual and asynchronous state transition + * of this processor from the combined State::STATE_IS_STOPPING to + * State::STATE_IS_STOPPED. The later is achieved when all threads are stopped. + * + * @param processorThread reference to the processing thread which finished + */ + void _processorThreadStopped(std::shared_ptr const& processorThread); + + std::string _context(std::string const& func = std::string()) const { return "PROCESSOR " + func; } + + std::shared_ptr const _serviceProvider; + std::string const _worker; + std::shared_ptr const _connectionPool; + + protocol::ServiceState _state; + uint64_t _startTime; /// When the processor started (milliseconds since UNIX Epoch) + + std::vector> _threads; + + mutable replica::Mutex _mtx; /// Mutex guarding the queues + + PriorityQueueType _newRequests; + std::map> _inProgressRequests; + std::map> _finishedRequests; +}; + +} // namespace lsst::qserv::replica + +#endif // LSST_QSERV_REPLICA_WORKERHTTPPROCESSOR_H diff --git a/src/replica/worker/WorkerHttpProcessorThread.cc b/src/replica/worker/WorkerHttpProcessorThread.cc new file mode 100644 index 000000000..c2cd307d9 --- /dev/null +++ b/src/replica/worker/WorkerHttpProcessorThread.cc @@ -0,0 +1,121 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "replica/worker/WorkerHttpProcessorThread.h" + +// System headers +#include + +// Qserv headers +#include "replica/proto/Protocol.h" +#include "replica/worker/WorkerHttpProcessor.h" +#include "replica/worker/WorkerHttpRequest.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; + +namespace { + +LOG_LOGGER _log = LOG_GET("lsst.qserv.replica.WorkerHttpProcessorThread"); + +} // namespace + +namespace lsst::qserv::replica { + +shared_ptr WorkerHttpProcessorThread::create( + shared_ptr const& processor) { + static unsigned int id = 0; + return shared_ptr(new WorkerHttpProcessorThread(processor, id++)); +} + +WorkerHttpProcessorThread::WorkerHttpProcessorThread(shared_ptr const& processor, + unsigned int id) + : _processor(processor), _id(id), _stop(false) {} + +bool WorkerHttpProcessorThread::isRunning() const { return _thread != nullptr; } + +void WorkerHttpProcessorThread::run() { + if (isRunning()) return; + + _thread = make_unique([self = shared_from_this()]() { + LOGS(_log, LOG_LVL_DEBUG, self->context() << "start"); + while (!self->_stop) { + // Get the next request to process if any. This operation will block + // until either the next request is available (returned a valid pointer) + // or the specified timeout expires. In either case this thread has a chance + // to re-evaluate the stopping condition. + auto const request = self->_processor->_fetchNextForProcessing(self, 1000); + if (self->_stop) { + if (request) self->_processor->_processingRefused(request); + continue; + } + if (request) { + LOGS(_log, LOG_LVL_DEBUG, + self->context() << "begin processing" + << " id: " << request->id()); + bool finished = false; // just to report the request completion + try { + while (!(finished = request->execute())) { + if (self->_stop) { + LOGS(_log, LOG_LVL_DEBUG, + self->context() << "rollback processing" + << " id: " << request->id()); + request->rollback(); + self->_processor->_processingRefused(request); + break; + } + } + } catch (WorkerHttpRequestCancelled const& ex) { + LOGS(_log, LOG_LVL_DEBUG, + self->context() << "cancel processing" + << " id: " << request->id()); + self->_processor->_processingFinished(request); + } + if (finished) { + LOGS(_log, LOG_LVL_DEBUG, + self->context() << "finish processing" + << " id: " << request->id() + << " status: " << protocol::toString(request->status())); + self->_processor->_processingFinished(request); + } + } + } + LOGS(_log, LOG_LVL_DEBUG, self->context() << "stop"); + + self->_stopped(); + }); + _thread->detach(); +} + +void WorkerHttpProcessorThread::stop() { + if (isRunning()) _stop = true; +} + +void WorkerHttpProcessorThread::_stopped() { + _stop = false; + _thread.reset(nullptr); + _processor->_processorThreadStopped(shared_from_this()); +} + +} // namespace lsst::qserv::replica diff --git a/src/replica/worker/WorkerHttpProcessorThread.h b/src/replica/worker/WorkerHttpProcessorThread.h new file mode 100644 index 000000000..388a30faf --- /dev/null +++ b/src/replica/worker/WorkerHttpProcessorThread.h @@ -0,0 +1,113 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_REPLICA_WORKERHTTPPROCESSORTHREAD_H +#define LSST_QSERV_REPLICA_WORKERHTTPPROCESSORTHREAD_H + +// System headers +#include +#include +#include +#include + +// Forward declarations +namespace lsst::qserv::replica { +class WorkerHttpProcessor; +} // namespace lsst::qserv::replica + +// This header declarations +namespace lsst::qserv::replica { + +/** + * Class WorkerHttpProcessorThread is a thread-based request processing engine + * for replication requests within worker-side services. + */ +class WorkerHttpProcessorThread : public std::enable_shared_from_this { +public: + /** + * Static factory method is needed to prevent issue with the lifespan + * and memory management of instances created otherwise (as values or via + * low-level pointers). + * + * @param processor A pointer to the processor which launched this thread. This pointer + * will be used for making call backs to the processor on the completed or rejected requests. + * @return a pointer to the created object + */ + static std::shared_ptr create( + std::shared_ptr const& processor); + + WorkerHttpProcessorThread() = delete; + WorkerHttpProcessorThread(WorkerHttpProcessorThread const&) = delete; + WorkerHttpProcessorThread& operator=(WorkerHttpProcessorThread const&) = delete; + + ~WorkerHttpProcessorThread() = default; + + /// @return identifier of this thread object + unsigned int id() const { return _id; } + + /// @return 'true' if the processing thread is still running + bool isRunning() const; + + /** + * Create and run the thread (if none is still running) fetching + * and processing requests until method stop() is called. + */ + void run(); + + /** + * Tell the running thread to abort processing the current + * request (if any), put that request back into the input queue, + * stop fetching new requests and finish. The thread can be resumed + * later by calling method run(). + * + * @note This is an asynchronous operation. + */ + void stop(); + + /// @return context string for logs + std::string context() const { return "THREAD: " + std::to_string(_id) + " "; } + +private: + /// @see WorkerHttpProcessorThread::create() + WorkerHttpProcessorThread(std::shared_ptr const& processor, unsigned int id); + + /** + * Event handler called by the thread when it's about to stop + */ + void _stopped(); + + // Input parameters + + std::shared_ptr const _processor; + + /// The identifier of this thread object + unsigned int const _id; + + /// The processing thread is created on demand when calling method run() + std::unique_ptr _thread; + + /// The flag to be raised to tell the running thread to stop. + /// The thread will reset this flag when it finishes. + std::atomic _stop; +}; + +} // namespace lsst::qserv::replica + +#endif // LSST_QSERV_REPLICA_WORKERHTTPPROCESSORTHREAD_H diff --git a/src/replica/worker/WorkerHttpRequest.cc b/src/replica/worker/WorkerHttpRequest.cc new file mode 100644 index 000000000..4fd50f7b0 --- /dev/null +++ b/src/replica/worker/WorkerHttpRequest.cc @@ -0,0 +1,275 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "replica/worker/WorkerHttpRequest.h" + +// System headers +#include + +// Third party headers +#include "boost/date_time/posix_time/posix_time.hpp" + +// Qserv headers +#include "replica/config/Configuration.h" +#include "replica/services/ServiceProvider.h" + +// LSST headers +#include "lsst/log/Log.h" + +#define CONTEXT context("WorkerHttpRequest", __func__) + +using namespace std; +using namespace std::placeholders; +using json = nlohmann::json; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.replica.WorkerHttpRequest"); +} // namespace + +namespace lsst::qserv::replica { + +replica::Mutex WorkerHttpRequest::_mtxDataFolderOperations; + +atomic WorkerHttpRequest::_numInstances{0}; + +WorkerHttpRequest::WorkerHttpRequest(shared_ptr const& serviceProvider, string const& worker, + string const& type, protocol::QueuedRequestHdr const& hdr, + json const& req, ExpirationCallbackType const& onExpired) + : _serviceProvider(serviceProvider), + _worker(worker), + _type(type), + _hdr(hdr), + _req(req), + _onExpired(onExpired), + _expirationTimeoutSec(hdr.timeout == 0 ? serviceProvider->config()->get( + "controller", "request-timeout-sec") + : hdr.timeout), + _expirationTimer(serviceProvider->io_service()), + _status(protocol::Status::CREATED), + _extendedStatus(protocol::StatusExt::NONE), + _performance() { + _numInstances++; + LOGS(_log, LOG_LVL_TRACE, CONTEXT << " numInstances: " << _numInstances); +} + +WorkerHttpRequest::~WorkerHttpRequest() { + _numInstances--; + LOGS(_log, LOG_LVL_TRACE, CONTEXT << " numInstances: " << _numInstances); + dispose(); +} + +void WorkerHttpRequest::checkIfCancelling(replica::Lock const& lock, string const& context_) { + switch (status()) { + case protocol::Status::IN_PROGRESS: + break; + case protocol::Status::IS_CANCELLING: + setStatus(lock, protocol::Status::CANCELLED); + throw WorkerHttpRequestCancelled(); + default: + throw logic_error(CONTEXT + " not allowed while in status: " + protocol::toString(status())); + } +} + +WorkerHttpRequest::ErrorContext WorkerHttpRequest::reportErrorIf(bool errorCondition, + protocol::StatusExt extendedStatus, + string const& errorMsg) { + WorkerHttpRequest::ErrorContext errorContext; + if (errorCondition) { + errorContext.failed = true; + errorContext.extendedStatus = extendedStatus; + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " execute" << errorMsg); + } + return errorContext; +} + +void WorkerHttpRequest::init() { + LOGS(_log, LOG_LVL_TRACE, CONTEXT); + replica::Lock lock(_mtx, CONTEXT); + if (status() != protocol::Status::CREATED) return; + + // Start the expiration timer + if (_expirationTimeoutSec != 0) { + _expirationTimer.cancel(); + _expirationTimer.expires_from_now(boost::posix_time::seconds(_expirationTimeoutSec)); + _expirationTimer.async_wait(bind(&WorkerHttpRequest::_expired, shared_from_this(), _1)); + LOGS(_log, LOG_LVL_TRACE, + CONTEXT << " started timer with _expirationTimeoutSec: " << _expirationTimeoutSec); + } +} + +void WorkerHttpRequest::start() { + LOGS(_log, LOG_LVL_TRACE, CONTEXT); + replica::Lock lock(_mtx, CONTEXT); + switch (status()) { + case protocol::Status::CREATED: + setStatus(lock, protocol::Status::IN_PROGRESS); + break; + default: + throw logic_error(CONTEXT + " not allowed while in status: " + protocol::toString(status())); + } +} + +void WorkerHttpRequest::cancel() { + LOGS(_log, LOG_LVL_TRACE, CONTEXT); + replica::Lock lock(_mtx, CONTEXT); + switch (status()) { + case protocol::Status::QUEUED: + case protocol::Status::CREATED: + case protocol::Status::CANCELLED: + setStatus(lock, protocol::Status::CANCELLED); + break; + case protocol::Status::IN_PROGRESS: + case protocol::Status::IS_CANCELLING: + setStatus(lock, protocol::Status::IS_CANCELLING); + break; + + // Nothing to be done to the completed requests + case protocol::Status::SUCCESS: + case protocol::Status::BAD: + case protocol::Status::FAILED: + break; + } +} + +void WorkerHttpRequest::rollback() { + LOGS(_log, LOG_LVL_TRACE, CONTEXT); + replica::Lock lock(_mtx, CONTEXT); + switch (status()) { + case protocol::Status::CREATED: + case protocol::Status::IN_PROGRESS: + setStatus(lock, protocol::Status::CREATED); + break; + case protocol::Status::IS_CANCELLING: + setStatus(lock, protocol::Status::CANCELLED); + throw WorkerHttpRequestCancelled(); + break; + default: + throw logic_error(CONTEXT + " not allowed while in status: " + protocol::toString(status())); + } +} + +void WorkerHttpRequest::stop() { + LOGS(_log, LOG_LVL_TRACE, CONTEXT); + replica::Lock lock(_mtx, CONTEXT); + setStatus(lock, protocol::Status::CREATED); +} + +void WorkerHttpRequest::dispose() noexcept { + LOGS(_log, LOG_LVL_TRACE, CONTEXT); + replica::Lock lock(_mtx, CONTEXT); + if (_expirationTimeoutSec != 0) { + try { + _expirationTimer.cancel(); + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, + CONTEXT << " request expiration couldn't be cancelled, ex: " << ex.what()); + } + } +} + +json WorkerHttpRequest::toJson(bool includeResultIfFinished) const { + LOGS(_log, LOG_LVL_TRACE, CONTEXT); + + // IMPORTANT: the lock is not needed here because the data read by the method + // are safe to read w/o any synchronization. The only exception is the results + // which is not a problem since results are only read after the request is finished. + + json response = _hdr.toJson(); + response["req"] = _req; + response["type"] = _type; + response["status"] = protocol::toString(_status); + response["status_ext"] = protocol::toString(_extendedStatus); + response["expiration_timeout_sec"] = _expirationTimeoutSec; + response["performance"] = _performance.toJson(); + response["result"] = json::object(); + if (includeResultIfFinished && _status == protocol::Status::SUCCESS) { + getResult(response["result"]); + } + return response; +} + +string WorkerHttpRequest::context(string const& className, string const& func) const { + return id() + " " + type() + " " + protocol::toString(status()) + " " + className + "::" + func; +} + +void WorkerHttpRequest::setStatus(replica::Lock const& lock, protocol::Status status, + protocol::StatusExt extendedStatus) { + LOGS(_log, LOG_LVL_TRACE, + CONTEXT << " " << protocol::toString(_status, _extendedStatus) << " -> " + << protocol::toString(status, extendedStatus)); + switch (status) { + case protocol::Status::CREATED: + _performance.start_time = 0; + _performance.finish_time = 0; + break; + case protocol::Status::IN_PROGRESS: + _performance.setUpdateStart(); + _performance.finish_time = 0; + break; + case protocol::Status::IS_CANCELLING: + break; + case protocol::Status::CANCELLED: + + // Set the start time to some meaningful value in case if the request was + // cancelled while sitting in the input queue + if (0 == _performance.start_time) _performance.setUpdateStart(); + _performance.setUpdateFinish(); + break; + + case protocol::Status::SUCCESS: + case protocol::Status::FAILED: + _performance.setUpdateFinish(); + break; + default: + throw logic_error(CONTEXT + " unhandled status: " + protocol::toString(status)); + } + + // ATTENTION: the top-level status is the last to be modified in + // the state transition to ensure clients will see a consistent state + // of the object. + _extendedStatus = extendedStatus; + _status = status; +} + +void WorkerHttpRequest::_expired(boost::system::error_code const& ec) { + LOGS(_log, LOG_LVL_TRACE, + CONTEXT << (ec == boost::asio::error::operation_aborted ? " ** ABORTED **" : "")); + + replica::Lock lock(_mtx, CONTEXT); + + // Clearing the stored callback after finishing the up-stream notification + // has two purposes: + // + // 1. it guaranties no more than one time notification + // 2. it breaks the up-stream dependency on a caller object if a shared + // pointer to the object was mentioned as the lambda-function's closure + + // Ignore this event if the timer was aborted + if (ec != boost::asio::error::operation_aborted) { + if (_onExpired != nullptr) { + serviceProvider()->io_service().post(bind(move(_onExpired), _hdr.id)); + } + } + _onExpired = nullptr; +} + +} // namespace lsst::qserv::replica diff --git a/src/replica/worker/WorkerHttpRequest.h b/src/replica/worker/WorkerHttpRequest.h new file mode 100644 index 000000000..6b9921e98 --- /dev/null +++ b/src/replica/worker/WorkerHttpRequest.h @@ -0,0 +1,352 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_REPLICA_WORKERHTTPREQUEST_H +#define LSST_QSERV_REPLICA_WORKERHTTPREQUEST_H + +// System headers +#include +#include +#include +#include +#include + +// Third party headers +#include "boost/asio.hpp" + +// Qserv headers +#include "replica/proto/Protocol.h" +#include "replica/util/Common.h" +#include "replica/util/Mutex.h" +#include "replica/util/Performance.h" + +// Forward declarations +namespace lsst::qserv::replica { +class ServiceProvider; +} // namespace lsst::qserv::replica + +// This header declarations +namespace lsst::qserv::replica { + +/** + * Structure WorkerHttpRequestCancelled represent an exception thrown when + * a replication request is cancelled + */ +class WorkerHttpRequestCancelled : public std::exception { +public: + /// @return a short description of the exception + char const* what() const noexcept override { return "cancelled"; } +}; + +/** + * Class WorkerHttpRequest is the base class for a family of the worker-side + * requests which require non-deterministic interactions with the server's + * environment (network, disk I/O, etc.). Generally speaking, all requests + * which can't be implemented instantaneously fall into this category. + */ +class WorkerHttpRequest : public std::enable_shared_from_this { +public: + /// The function type for notifications on the expiration of the request + /// given its unique identifier. + typedef std::function ExpirationCallbackType; + + WorkerHttpRequest() = delete; + WorkerHttpRequest(WorkerHttpRequest const&) = delete; + WorkerHttpRequest& operator=(WorkerHttpRequest const&) = delete; + + /// Destructor (can't 'override' because the base class's one is not virtual) + /// Also, non-trivial destructor is needed to stop the request expiration + /// timer (if any was started by the constructor). + virtual ~WorkerHttpRequest(); + + std::shared_ptr const& serviceProvider() const { return _serviceProvider; } + std::string const& worker() const { return _worker; } + std::string const& type() const { return _type; } + std::string const& id() const { return _hdr.id; } + int priority() const { return _hdr.priority; } + nlohmann::json const& req() const { return _req; } + protocol::Status status() const { return _status; } + protocol::StatusExt extendedStatus() const { return _extendedStatus; } + + WorkerPerformance const& performance() const { return _performance; } + + /** + * This method is called from the initial state protocol::Status::CREATED in order + * to start the request expiration timer. It's safe to call this operation + * multiple times. Each invocation of the method will result in cancelling + * the previously set timer (if any) and starting a new one. + */ + void init(); + + /** + * This method is called from the initial state protocol::Status::CREATED in order + * to prepare the request for processing (to respond to methods 'execute', + * 'cancel', 'rollback' or 'reset'. The final state upon the completion + * of the method should be protocol::Status::IN_PROGRESS. + */ + void start(); + + /** + * This method should be invoked (repeatedly) to execute the request until + * it returns 'true' or throws an exception. Note that returning 'true' + * may mean both success or failure, depending on the completion status + * of the request. + * + * This method is required to be called while the request state is protocol::Status::IN_PROGRESS. + * The method will throw custom exception WorkerHttpRequestCancelled when it detects a cancellation + * request. + * + * @return result of the operation as explained above + */ + virtual bool execute() = 0; + + /** + * Cancel execution of the request. + * + * The effect of the operation varies depending on the current state of + * the request. The default (the base class's implementation) assumes + * the following transitions: + * + * {protocol::Status::CREATED,protocol::Status::CANCELLED} -> protocol::Status::CANCELLED + * {protocol::Status::IN_PROGRESS,protocol::Status::IS_CANCELLING} -> protocol::Status::IS_CANCELLING + * {*} -> throw std::logic_error + */ + virtual void cancel(); + + /** + * Roll back the request into its initial state and cleanup partial results + * if possible. + * + * The effect of the operation varies depending on the current state of + * the request. The default (the base class's implementation) assumes + * the following transitions: + * + * {protocol::Status::CREATED, protocol::Status::IN_PROGRESS} -> protocol::Status::CREATED + * {protocol::Status::IS_CANCELLING} -> protocol::Status::CANCELLED -> throw WorkerHttpRequestCancelled + * {*} -> throw std::logic_error + */ + virtual void rollback(); + + /** + * This method is called from *ANY* initial state in order to turn + * the request back into the initial protocol::Status::CREATED. + */ + void stop(); + + /** + * This method should be used to cancel the request expiration timer. + * Normally this method is initiated during the external "garbage collection" + * of requests to ensure all resources (including a copy of a smart pointer onto + * objects of the request classes) held by timers get released. + * + * @note this method won't throw any exceptions so that it could + * be invoked from the destructor. All exceptions (should they + * occur during an execution of the method) will be intersected + * and reported as errors to the message logger. + */ + void dispose() noexcept; + + /** + * Extract the extra data from the request and put it into the response object. + * @param includeResultIfFinished (optional) flag to include results if the request has finished + */ + nlohmann::json toJson(bool includeResultIfFinished = false) const; + + /// @return the context string + std::string context(std::string const& className, std::string const& func) const; + +protected: + /** + * The normal constructor of the class + * + * @param serviceProvider provider is needed to access the Configuration of + * a setup and for validating the input parameters + * @param worker the name of a worker. It must be the same worker as the one + * where the request is going to be processed. + * @param type the type name of a request + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @param onExpired request expiration callback function + * @throws std::invalid_argument if the worker is unknown + */ + WorkerHttpRequest(std::shared_ptr const& serviceProvider, std::string const& worker, + std::string const& type, protocol::QueuedRequestHdr const& hdr, + nlohmann::json const& req, ExpirationCallbackType const& onExpired); + + /** + * The method is used to check if the request is entered the cancellation state. + * The implementation assumes the following transitions: + * + * {protocol::Status::IN_PROGRESS} -> protocol::Status::IN_PROGRESS + * {protocol::Status::IS_CANCELLING} -> protocol::Status::CANCELLED -> throw WorkerHttpRequestCancelled + * {*} -> throw std::logic_error + * + * @param lock a lock on _mtx which acquired before calling this method + * @param context_ a scope class/method from where the method was called + * @throws WorkerHttpRequestCancelled if the request is being cancelled. + * @throws std::logic_error if the state is not as expected. + */ + void checkIfCancelling(replica::Lock const& lock, std::string const& context_); + + /** Set the status + * + * @note this method needs to be called within a thread-safe context + * when moving requests between different queues. + * + * @param lock a lock which acquired before calling this method + * @param status primary status to be set + * @param extendedStatus secondary status to be set + */ + void setStatus(replica::Lock const& lock, protocol::Status status, + protocol::StatusExt extendedStatus = protocol::StatusExt::NONE); + + /** + * Fill in the information object for the specified request based on its + * actual type. + * @param result an object to be filled + */ + virtual void getResult(nlohmann::json& result) const = 0; + + /** + * Structure ErrorContext is used for tracking errors reported by + * method 'reportErrorIf + */ + struct ErrorContext { + // State of the object + bool failed; + protocol::StatusExt extendedStatus; + + ErrorContext() : failed(false), extendedStatus(protocol::StatusExt::NONE) {} + + /** + * Merge the context of another object into the current one. + * + * @note Only the first error code will be stored when a error condition + * is detected. An assumption is that the first error would usually cause + * a "chain reaction", hence only the first one typically matters. + * Other details could be found in the log files if needed. + * @param ErrorContext input context to be merged with the current state + */ + ErrorContext& operator||(const ErrorContext& rhs) { + if (&rhs != this) { + if (rhs.failed and not failed) { + failed = true; + extendedStatus = rhs.extendedStatus; + } + } + return *this; + } + }; + + /** + * Check if the error condition is set and report the error. + * The error message will be sent to the corresponding logging + * stream. + * + * @param condition if set to 'true' then there is a error condition + * @param extendedStatus extended status corresponding to the condition + * (will be ignored if no error condition is present) + * @param errorMsg a message to be reported into the log stream + * @return the context object encapsulating values passed in parameters + * 'condition' and 'extendedStatus' + */ + ErrorContext reportErrorIf(bool condition, protocol::StatusExt extendedStatus, + std::string const& errorMsg); + + /// Return shared pointer of the desired subclass (no dynamic type checking) + template + std::shared_ptr shared_from_base() { + return std::static_pointer_cast(shared_from_this()); + } + + // Input parameters + + std::shared_ptr const _serviceProvider; + + std::string const _worker; + std::string const _type; + protocol::QueuedRequestHdr const _hdr; + nlohmann::json const _req; + + ExpirationCallbackType _onExpired; ///< The callback is reset when the request gets expired + /// or explicitly disposed. + unsigned int const _expirationTimeoutSec; + + /// This timer is used (if configured) to limit the total duration of time + /// a request could exist from its creation till termination. The timer + /// starts when the request gets created. And it's explicitly finished when + /// a request object gets destroyed. + /// + /// If the time has a chance to expire then the request expiration callback + /// (if any) passed into the constructor will be invoked to notify WorkerProcessor + /// on the expiration event. + boost::asio::deadline_timer _expirationTimer; + + // 2-layer state of a request + + std::atomic _status; + std::atomic _extendedStatus; + + /// Performance counters + WorkerPerformance _performance; + + /// Mutex guarding API calls where it's needed + mutable replica::Mutex _mtx; + + /// Mutex guarding operations with the worker's data folder + static replica::Mutex _mtxDataFolderOperations; + +private: + /** + * Request expiration timer's handler. The expiration interval (if any) + * is obtained from the Controller-side requests or obtained from + * the configuration service. When the request expires (and if the timer + * is not aborted due to request disposal) then an upstream callback + * is invoked. + * + * @param ec error code to be checked to see if the time was aborted + * by the explicit request disposal operation. + */ + void _expired(boost::system::error_code const& ec); + + // For memory usage monitoring and memory leak diagnostic. + static std::atomic _numInstances; +}; + +/** + * Structure WorkerHttpRequestCompare is a functor representing a comparison type + * for strict weak ordering required by std::priority_queue + */ +struct WorkerHttpRequestCompare { + /** + * Sort requests by their priorities + * @param lhs pointer to a request on the left side of a logical comparison + * @param rhs pointer to a request on the right side of a logical comparison + * @return 'true' if the priority of 'lhs' is strictly less than the one of 'rhs' + */ + bool operator()(std::shared_ptr const& lhs, + std::shared_ptr const& rhs) const { + return lhs->priority() < rhs->priority(); + } +}; + +} // namespace lsst::qserv::replica + +#endif // LSST_QSERV_REPLICA_WORKERHTTPREQUEST_H diff --git a/src/replica/worker/WorkerHttpSvc.cc b/src/replica/worker/WorkerHttpSvc.cc new file mode 100644 index 000000000..249a2b9c5 --- /dev/null +++ b/src/replica/worker/WorkerHttpSvc.cc @@ -0,0 +1,149 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "replica/worker/WorkerHttpSvc.h" + +// System headers +#include +#include + +// Qserv headers +#include "http/ChttpMetaModule.h" +#include "replica/config/Configuration.h" +#include "replica/services/ServiceProvider.h" +#include "replica/util/Common.h" +#include "replica/worker/WorkerHttpProcessor.h" +#include "replica/worker/WorkerHttpSvcMod.h" + +// LSST headers +#include "lsst/log/Log.h" + +// Third party headers +#include "httplib.h" +#include "nlohmann/json.hpp" + +using namespace nlohmann; +using namespace std; + +namespace { +string const context_ = "WORKER-HTTP-SVC "; +LOG_LOGGER _log = LOG_GET("lsst.qserv.worker.WorkerHttpSvc"); +} // namespace + +namespace lsst::qserv::replica { + +shared_ptr WorkerHttpSvc::create(shared_ptr const& serviceProvider, + string const& workerName) { + return shared_ptr(new WorkerHttpSvc(serviceProvider, workerName)); +} + +WorkerHttpSvc::WorkerHttpSvc(shared_ptr const& serviceProvider, string const& workerName) + : ChttpSvc(context_, serviceProvider, + serviceProvider->config()->get("worker", "http-svc-port"), + serviceProvider->config()->get("worker", "http-svc-max-queued-requests"), + serviceProvider->config()->get("worker", "num-http-svc-threads")), + _workerName(workerName), + _processor(WorkerHttpProcessor::create(serviceProvider, workerName)) { + // Start the processor to allow processing requests. + _processor->run(); +} + +void WorkerHttpSvc::registerServices(unique_ptr const& server) { + throwIf(server == nullptr, context_ + "the server is not initialized"); + auto const self = shared_from_base(); + server->Get("/meta/version", [self](httplib::Request const& req, httplib::Response& resp) { + json const info = json::object({{"kind", "replication-worker-svc"}, + {"id", self->_workerName}, + {"instance_id", self->serviceProvider()->instanceId()}}); + http::ChttpMetaModule::process(context_, info, req, resp, "VERSION"); + }); + server->Post("/worker/echo", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "ECHO", http::AuthType::REQUIRED); + }); + server->Post("/worker/replica/create", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "REPLICA-CREATE", http::AuthType::REQUIRED); + }); + server->Post("/worker/replica/delete", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "REPLICA-DELETE", http::AuthType::REQUIRED); + }); + server->Post("/worker/replica/find", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "REPLICA-FIND", http::AuthType::REQUIRED); + }); + server->Post("/worker/replica/find-all", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "REPLICA-FIND-ALL", http::AuthType::REQUIRED); + }); + server->Post("/worker/index", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "INDEX", http::AuthType::REQUIRED); + }); + server->Post("/worker/sql", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "SQL", http::AuthType::REQUIRED); + }); + server->Get("/worker/request/track/:id", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "REQUEST-TRACK"); + }); + server->Get("/worker/request/status/:id", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "REQUEST-STATUS"); + }); + server->Put("/worker/request/stop/:id", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "REQUEST-STOP", http::AuthType::REQUIRED); + }); + server->Put("/worker/request/dispose", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "REQUEST-DISPOSE", http::AuthType::REQUIRED); + }); + server->Get("/worker/service/status", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "SERVICE-STATUS"); + }); + server->Get("/worker/service/requests", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "SERVICE-REQUESTS"); + }); + server->Put("/worker/service/suspend", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "SERVICE-SUSPEND", http::AuthType::REQUIRED); + }); + server->Put("/worker/service/resume", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "SERVICE-RESUME", http::AuthType::REQUIRED); + }); + server->Put("/worker/service/drain", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "SERVICE-DRAIN", http::AuthType::REQUIRED); + }); + server->Put("/worker/service/reconfig", [self](httplib::Request const& req, httplib::Response& resp) { + WorkerHttpSvcMod::process(self->serviceProvider(), self->_processor, self->_workerName, req, resp, + "SERVICE-RECONFIG", http::AuthType::REQUIRED); + }); +} + +} // namespace lsst::qserv::replica diff --git a/src/replica/worker/WorkerHttpSvc.h b/src/replica/worker/WorkerHttpSvc.h new file mode 100644 index 000000000..0e204649e --- /dev/null +++ b/src/replica/worker/WorkerHttpSvc.h @@ -0,0 +1,84 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_REPLICA_WORKERHTTPSVC_H +#define LSST_QSERV_REPLICA_WORKERHTTPSVC_H + +// System headers +#include +#include + +// Qserv headers +#include "replica/util/ChttpSvc.h" + +// Forward declarations +namespace lsst::qserv::replica { +class ServiceProvider; +class WorkerHttpProcessor; +} // namespace lsst::qserv::replica + +namespace httplib { +class Server; +} // namespace httplib + +// This header declarations +namespace lsst::qserv::replica { + +/** + * Class WorkerHttpSvc is the HTTP frontend to the Replication Worker Service. + * Each instance of this class will be running in its own thread. + */ +class WorkerHttpSvc : public ChttpSvc { +public: + /** + * Create an instance of the service. + * + * @param serviceProvider For configuration, etc. services. + * @param workerName The name of a worker this service is acting upon (used for + * checking consistency of the protocol). + * @return A pointer to the created object. + */ + static std::shared_ptr create(std::shared_ptr const& serviceProvider, + std::string const& workerName); + + WorkerHttpSvc() = delete; + WorkerHttpSvc(WorkerHttpSvc const&) = delete; + WorkerHttpSvc& operator=(WorkerHttpSvc const&) = delete; + + virtual ~WorkerHttpSvc() = default; + +protected: + /// @see HttpSvc::registerServices() + virtual void registerServices(std::unique_ptr const& server) override; + +private: + /// @see WorkerHttpSvc::create() + WorkerHttpSvc(std::shared_ptr const& serviceProvider, std::string const& workerName); + + // Input parameters + std::string const _workerName; + + /// The request processor. + std::shared_ptr _processor; +}; + +} // namespace lsst::qserv::replica + +#endif // LSST_QSERV_REPLICA_WORKERHTTPSVC_H diff --git a/src/replica/worker/WorkerHttpSvcMod.cc b/src/replica/worker/WorkerHttpSvcMod.cc new file mode 100644 index 000000000..e6934381a --- /dev/null +++ b/src/replica/worker/WorkerHttpSvcMod.cc @@ -0,0 +1,242 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "replica/worker/WorkerHttpSvcMod.h" + +// System headers +#include + +// Third-party headers +#include + +// Qserv header +#include "http/Method.h" +#include "replica/proto/Protocol.h" +#include "replica/worker/WorkerHttpProcessor.h" +#include "replica/services/ServiceProvider.h" + +using namespace std; +using json = nlohmann::json; + +namespace lsst::qserv::replica { + +void WorkerHttpSvcMod::process(shared_ptr const& serviceProvider, + shared_ptr const& processor, string const& workerName, + httplib::Request const& req, httplib::Response& resp, + string const& subModuleName, http::AuthType const authType) { + WorkerHttpSvcMod module(serviceProvider, processor, workerName, req, resp); + module.execute(subModuleName, authType); +} + +WorkerHttpSvcMod::WorkerHttpSvcMod(shared_ptr const& serviceProvider, + shared_ptr const& processor, string const& workerName, + httplib::Request const& req, httplib::Response& resp) + : http::ChttpModule(serviceProvider->authKey(), serviceProvider->adminAuthKey(), req, resp), + _serviceProvider(serviceProvider), + _processor(processor), + _workerName(workerName) {} + +string WorkerHttpSvcMod::context() const { return "WORKER-HTTP-SVC "; } + +json WorkerHttpSvcMod::executeImpl(string const& subModuleName) { + debug(__func__, "subModuleName: '" + subModuleName + "'"); + enforceInstanceId(__func__, _serviceProvider->instanceId()); + if (subModuleName == "ECHO") + return _echo(); + else if (subModuleName == "REPLICA-CREATE") + return _replicaCreate(); + else if (subModuleName == "REPLICA-DELETE") + return _replicaDelete(); + else if (subModuleName == "REPLICA-FIND") + return _replicaFind(); + else if (subModuleName == "REPLICA-FIND-ALL") + return _replicaFindAll(); + else if (subModuleName == "SQL") + return _sql(); + else if (subModuleName == "INDEX") + return _index(); + else if (subModuleName == "REQUEST-TRACK") + return _requestTrack(); + else if (subModuleName == "REQUEST-STATUS") + return _requestStatus(); + else if (subModuleName == "REQUEST-STOP") + return _requestStop(); + else if (subModuleName == "REQUEST-DISPOSE") + return _requestDispose(); + else if (subModuleName == "SERVICE-SUSPEND") + return _serviceSuspend(); + else if (subModuleName == "SERVICE-RESUME") + return _serviceResume(); + else if (subModuleName == "SERVICE-STATUS") + return _serviceStatus(); + else if (subModuleName == "SERVICE-REQUESTS") + return _serviceRequests(); + else if (subModuleName == "SERVICE-DRAIN") + return _serviceDrain(); + else if (subModuleName == "SERVICE-RECONFIG") + return _serviceReconfig(); + throw invalid_argument(context() + "::" + string(__func__) + " unsupported sub-module: '" + + subModuleName + "'"); +} + +protocol::QueuedRequestHdr WorkerHttpSvcMod::_parseHdr(string const& func) const { + protocol::QueuedRequestHdr const hdr(body().required("id"), body().optional("priority", 0), + body().optional("timeout", 0)); + debug(func, "id: '" + hdr.id + "'"); + debug(func, "priority: " + to_string(hdr.priority)); + debug(func, "timeout: " + to_string(hdr.timeout)); + return hdr; +} + +json WorkerHttpSvcMod::_echo() const { + debug(__func__); + checkApiVersion(__func__, 40); + return _processor->echo(_parseHdr(__func__), body().required("req")); +} + +json WorkerHttpSvcMod::_replicaCreate() { + debug(__func__); + checkApiVersion(__func__, 40); + return _processor->createReplica(_parseHdr(__func__), body().required("req")); +} + +json WorkerHttpSvcMod::_replicaDelete() { + debug(__func__); + checkApiVersion(__func__, 40); + return _processor->deleteReplica(_parseHdr(__func__), body().required("req")); +} + +json WorkerHttpSvcMod::_replicaFind() { + debug(__func__); + checkApiVersion(__func__, 40); + return _processor->findReplica(_parseHdr(__func__), body().required("req")); +} + +json WorkerHttpSvcMod::_replicaFindAll() { + debug(__func__); + checkApiVersion(__func__, 40); + return _processor->findAllReplicas(_parseHdr(__func__), body().required("req")); +} + +json WorkerHttpSvcMod::_index() { + debug(__func__); + checkApiVersion(__func__, 40); + return _processor->index(_parseHdr(__func__), body().required("req")); +} + +json WorkerHttpSvcMod::_sql() { + debug(__func__); + checkApiVersion(__func__, 40); + return _processor->sql(_parseHdr(__func__), body().required("req")); +} + +json WorkerHttpSvcMod::_requestTrack() { + debug(__func__); + checkApiVersion(__func__, 40); + string const id = params().at("id"); + debug(__func__, "id: '" + id + "'"); + return _processor->trackRequest(id); +} + +json WorkerHttpSvcMod::_requestStatus() { + debug(__func__); + checkApiVersion(__func__, 40); + string const id = params().at("id"); + debug(__func__, "id: '" + id + "'"); + return _processor->requestStatus(id); +} + +json WorkerHttpSvcMod::_requestStop() { + debug(__func__); + checkApiVersion(__func__, 40); + string const id = params().at("id"); + debug(__func__, "id: '" + id + "'"); + return _processor->stopRequest(id); +} + +json WorkerHttpSvcMod::_requestDispose() { + debug(__func__); + checkApiVersion(__func__, 40); + auto const idsJson = body().required("ids"); + if (!idsJson.is_array()) + throw invalid_argument(context() + "::" + string(__func__) + " 'ids' is not an array"); + + json idsDisposedJson = json::object(); + for (auto const& idJson : idsJson) { + string const id = idJson.get(); + idsDisposedJson[id] = _processor->disposeRequest(id) ? 1 : 0; + } + return json::object({{"status", protocol::Status::SUCCESS}, + {"status_ext", protocol::StatusExt::NONE}, + {"ids_disposed", idsDisposedJson}}); +} + +json WorkerHttpSvcMod::_serviceSuspend() { + debug(__func__); + checkApiVersion(__func__, 40); + + // This operation is allowed to be asynchronous as it may take + // extra time for the processor's threads to finish on-going processing + _processor->stop(); + return _processor->toJson(_processor->state() == protocol::ServiceState::RUNNING + ? protocol::Status::FAILED + : protocol::Status::SUCCESS); +} + +json WorkerHttpSvcMod::_serviceResume() { + debug(__func__); + checkApiVersion(__func__, 40); + _processor->run(); + return _processor->toJson(_processor->state() == protocol::ServiceState::RUNNING + ? protocol::Status::SUCCESS + : protocol::Status::FAILED); +} + +json WorkerHttpSvcMod::_serviceStatus() { + debug(__func__); + checkApiVersion(__func__, 40); + return _processor->toJson(protocol::Status::SUCCESS); +} + +json WorkerHttpSvcMod::_serviceRequests() { + debug(__func__); + checkApiVersion(__func__, 40); + const bool includeRequests = true; + return _processor->toJson(protocol::Status::SUCCESS, includeRequests); +} + +json WorkerHttpSvcMod::_serviceDrain() { + debug(__func__); + checkApiVersion(__func__, 40); + _processor->drain(); + const bool includeRequests = true; + return _processor->toJson(protocol::Status::SUCCESS, includeRequests); +} + +json WorkerHttpSvcMod::_serviceReconfig() { + debug(__func__); + checkApiVersion(__func__, 40); + _processor->reconfig(); + return _processor->toJson(protocol::Status::SUCCESS); +} + +} // namespace lsst::qserv::replica diff --git a/src/replica/worker/WorkerHttpSvcMod.h b/src/replica/worker/WorkerHttpSvcMod.h new file mode 100644 index 000000000..bf72ad0bd --- /dev/null +++ b/src/replica/worker/WorkerHttpSvcMod.h @@ -0,0 +1,172 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_WORKERHTTPSVCMOD_H +#define LSST_QSERV_WORKERHTTPSVCMOD_H + +// System headers +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "http/ChttpModule.h" + +// Forward declarations + +namespace lsst::qserv::replica { +class ServiceProvider; +class WorkerHttpProcessor; +} // namespace lsst::qserv::replica + +namespace lsst::qserv::replica::protocol { +struct QueuedRequestHdr; +} // namespace lsst::qserv::replica::protocol + +// This header declarations +namespace lsst::qserv::replica { + +/** + * Class WorkerHttpSvcMod processes the Replication Controller's requests. + * The class is used by the HTTP server built into the worker Replication service. + */ +class WorkerHttpSvcMod : public http::ChttpModule { +public: + WorkerHttpSvcMod() = delete; + WorkerHttpSvcMod(WorkerHttpSvcMod const&) = delete; + WorkerHttpSvcMod& operator=(WorkerHttpSvcMod const&) = delete; + + virtual ~WorkerHttpSvcMod() = default; + + /** + * Process a request. + * + * Supported values for parameter 'subModuleName': + * + * ECHO for testing the worker-side framework + * REPLICA-CREATE for creating a replica of a chunk + * REPLICA-DELETE for deleting an existing replica of a chunk + * REPLICA-FIND for finding out if a replica is present, and reporting its state + * REPLICA-FIND-ALL for finding all replicas and reporting their states + * INDEX for extracting and returning a collection of the "director" index data + * SQL for executing various SQL statements against the worker's database + * REQUEST-TRACK for tracking status and retreiving results of the previously submitted request + * REQUEST-STATUS for checking the status of the previously submitted request + * REQUEST-STOP for stopping the previously submitted request + * REQUEST-DISPOSE for garbage collecting the request + * SERVICE-STATUS for checking the status of the worker replication service + * SERVICE-SUSPEND for suspending the worker replication service + * SERVICE-RESUME for resuming the worker replication service + * SERVICE-REQUESTS for listing the outstanding requests + * SERVICE-DRAIN for draining the worker replication service + * SERVICE-RECONFIG for reconfiguring the worker replication service + * + * @param serviceProvider The provider of services is needed to access + * the configuration and the database services. + * @param workerName The name of a worker this service is acting upon (used to pull + * worker-specific configuration options for the service). + * @param processor Request processor. + * @param req The HTTP request. + * @param resp The HTTP response channel. + * @param subModuleName The name of a submodule to be called. + * @param authType The authorization requirements for the module + * @throws std::invalid_argument for unknown values of parameter 'subModuleName' + */ + static void process(std::shared_ptr const& serviceProvider, + std::shared_ptr const& processor, std::string const& workerName, + httplib::Request const& req, httplib::Response& resp, + std::string const& subModuleName, + http::AuthType const authType = http::AuthType::NONE); + +protected: + virtual std::string context() const final; + virtual nlohmann::json executeImpl(std::string const& subModuleName) final; + +private: + WorkerHttpSvcMod(std::shared_ptr const& serviceProvider, + std::shared_ptr const& processor, std::string const& workerName, + httplib::Request const& req, httplib::Response& resp); + + /// Parse common parameters of the queued requests + /// @param func The name of the function to be used in the log messages + /// @return The parsed header + protocol::QueuedRequestHdr _parseHdr(std::string const& func) const; + + /// Process the ECHO request + nlohmann::json _echo() const; + + /// Process the REPLICA-CREATE request + nlohmann::json _replicaCreate(); + + /// Process the REPLICA-DELETE request + nlohmann::json _replicaDelete(); + + /// Process the REPLICA-FIND request + nlohmann::json _replicaFind(); + + /// Process the REPLICA-FIND-ALL request + nlohmann::json _replicaFindAll(); + + /// Process the INDEX request + nlohmann::json _index(); + + /// Process the SQL request + nlohmann::json _sql(); + + /// Process the REQUEST-TRACK request + nlohmann::json _requestTrack(); + + /// Process the REQUEST-STATUS request + nlohmann::json _requestStatus(); + + /// Process the REQUEST-STOP request + nlohmann::json _requestStop(); + + /// Process the REQUEST-DISPOSE request + nlohmann::json _requestDispose(); + + /// Process the SERVICE-SUSPEND request + nlohmann::json _serviceSuspend(); + + /// Process the SERVICE-RESUME request + nlohmann::json _serviceResume(); + + /// Process the SERVICE-STATUS request + nlohmann::json _serviceStatus(); + + /// Process the SERVICE-REQUESTS request + nlohmann::json _serviceRequests(); + + /// Process the SERVICE-DRAIN request + nlohmann::json _serviceDrain(); + + /// Process the SERVICE-RECONFIG request + nlohmann::json _serviceReconfig(); + + // Input parameters + std::shared_ptr const _serviceProvider; + std::shared_ptr _processor; + std::string const _workerName; +}; + +} // namespace lsst::qserv::replica + +#endif // LSST_QSERV_WORKERHTTPSVCMOD_H diff --git a/src/replica/worker/WorkerSqlHttpRequest.cc b/src/replica/worker/WorkerSqlHttpRequest.cc new file mode 100644 index 000000000..e27d3284f --- /dev/null +++ b/src/replica/worker/WorkerSqlHttpRequest.cc @@ -0,0 +1,416 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "replica/worker/WorkerSqlHttpRequest.h" + +// System headers +#include +#include + +// Qserv headers +#include "replica/config/Configuration.h" +#include "replica/mysql/DatabaseMySQLUtils.h" +#include "replica/services/ServiceProvider.h" +#include "replica/util/Performance.h" +#include "replica/util/Mutex.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using json = nlohmann::json; + +#define CONTEXT context("WorkerSqlHttpRequest", __func__) + +namespace { + +LOG_LOGGER _log = LOG_GET("lsst.qserv.replica.WorkerSqlHttpRequest"); + +} // namespace + +namespace lsst::qserv::replica { + +using namespace database::mysql; + +shared_ptr WorkerSqlHttpRequest::create( + shared_ptr const& serviceProvider, string const& worker, + protocol::QueuedRequestHdr const& hdr, json const& req, ExpirationCallbackType const& onExpired) { + auto ptr = shared_ptr( + new WorkerSqlHttpRequest(serviceProvider, worker, hdr, req, onExpired)); + ptr->init(); + return ptr; +} + +WorkerSqlHttpRequest::WorkerSqlHttpRequest(shared_ptr const& serviceProvider, + string const& worker, protocol::QueuedRequestHdr const& hdr, + json const& req, ExpirationCallbackType const& onExpired) + : WorkerHttpRequest(serviceProvider, worker, + "SQL:" + protocol::toString(protocol::SqlRequestType(req.at("type"))), hdr, req, + onExpired), + _sqlRequestType(req.at("type")), + _user(req.at("user")), + _password(req.at("password")), + _databaseInfo(serviceProvider->config()->databaseInfo(req.at("database"))), + _maxRows(req.at("max_rows")), + _batchMode(req.at("batch_mode")), + _resultSets(json::array()) { + // Parse the request-specific parameters. + switch (_sqlRequestType) { + case protocol::SqlRequestType::QUERY: + _query = req.at("query"); + break; + case protocol::SqlRequestType::CREATE_TABLE: + if (!_batchMode) _table = req.at("table"); + _engine = req.at("engine"); + _columns = replica::parseSqlColumns(req.at("columns")); + _partitionByColumn = req.at("partition_by_column"); + break; + case protocol::SqlRequestType::CREATE_TABLE_INDEX: + if (!_batchMode) _table = req.at("table"); + _index = SqlIndexDef(req.at("index")); + break; + case protocol::SqlRequestType::DROP_TABLE_PARTITION: + if (!_batchMode) _table = req.at("table"); + _transactionId = req.at("transaction_id"); + break; + case protocol::SqlRequestType::DROP_TABLE_INDEX: + if (!_batchMode) _table = req.at("table"); + _indexName = req.at("index_name"); + break; + case protocol::SqlRequestType::ALTER_TABLE: + if (!_batchMode) _table = req.at("table"); + _alterTableSpec = req.at("alter_spec"); + break; + default: + break; + } +} + +void WorkerSqlHttpRequest::getResult(json& result) const { + // No locking is needed here since the method is called only after + // the request is completed. + result["result_sets"] = _resultSets; +} + +bool WorkerSqlHttpRequest::execute() { + LOGS(_log, LOG_LVL_DEBUG, CONTEXT); + + replica::Lock lock(_mtx, CONTEXT); + checkIfCancelling(lock, __func__); + + try { + // Pre-create the default result-set message before any operations with + // the database service. This is needed to report errors in method _reportFailure. + json& resultSet = _currentResultSet(lock, true); + + // Open the connection once and then manage transactions via + // the connection handlers down below to ensure no lingering transactions + // are left after the completion of the request's execution (whether it's + // successful or not). + auto const connection = _connector(); + + // Check if this is the "batch" request which involves executing + // a series of queries. This kind of requests needs to be processed + // slightly differently since we need to intercept and properly handle + // a few known (and somewhat expected) MySQL errors w/o aborting + // the whole request. + if (_batchMode) { + // Count the number of failures for proper error reporting on + // the current request. + size_t numFailures = 0; + bool first = true; + for (string const& table : _tables) { + // If this is the very first iteration of the loop then use + // the default result set created earlier. Otherwise create + // a new one. + if (exchange(first, false) == false) { + resultSet = _currentResultSet(lock, true); + } + resultSet["scope"] = table; + try { + ConnectionHandler const h(connection); + h.conn->execute([&](decltype(h.conn) const& conn_) { + conn_->begin(); + auto const query = _generateQuery(conn_, table); + if (query.mutexName.empty()) { + conn_->execute(query.query); + } else { + replica::Lock const lock(serviceProvider()->getNamedMutex(query.mutexName), + CONTEXT); + conn_->execute(query.query); + } + _extractResultSet(lock, conn_); + conn_->commit(); + }); + } catch (database::mysql::ER_NO_SUCH_TABLE_ const& ex) { + ++numFailures; + resultSet["status_ext"] = protocol::StatusExt::NO_SUCH_TABLE; + resultSet["error"] = string(ex.what()); + } catch (database::mysql::ER_PARTITION_MGMT_ON_NONPARTITIONED_ const& ex) { + ++numFailures; + resultSet["status_ext"] = protocol::StatusExt::NOT_PARTITIONED_TABLE; + resultSet["error"] = string(ex.what()); + } catch (database::mysql::ER_DUP_KEYNAME_ const& ex) { + ++numFailures; + resultSet["status_ext"] = protocol::StatusExt::DUPLICATE_KEY; + resultSet["error"] = string(ex.what()); + } catch (database::mysql::ER_CANT_DROP_FIELD_OR_KEY_ const& ex) { + ++numFailures; + resultSet["status_ext"] = protocol::StatusExt::CANT_DROP_KEY; + resultSet["error"] = string(ex.what()); + } + } + if (numFailures > 0) { + setStatus(lock, protocol::Status::FAILED, protocol::StatusExt::MULTIPLE); + } else { + setStatus(lock, protocol::Status::SUCCESS); + } + } else { + // TODO: the algorithm will only report a result set of the last query + // from the multi-query collections. The implementations of the corresponding + // requests should take this into account. + ConnectionHandler const h(connection); + h.conn->execute([&](decltype(h.conn) const& conn_) { + conn_->begin(); + for (auto const& query : _queries(conn_)) { + if (query.mutexName.empty()) { + conn_->execute(query.query); + } else { + replica::Lock const lock(serviceProvider()->getNamedMutex(query.mutexName), CONTEXT); + conn_->execute(query.query); + } + _extractResultSet(lock, conn_); + } + conn_->commit(); + }); + setStatus(lock, protocol::Status::SUCCESS); + } + } catch (database::mysql::ER_NO_SUCH_TABLE_ const& ex) { + _reportFailure(lock, protocol::StatusExt::NO_SUCH_TABLE, ex.what()); + } catch (database::mysql::ER_PARTITION_MGMT_ON_NONPARTITIONED_ const& ex) { + _reportFailure(lock, protocol::StatusExt::NOT_PARTITIONED_TABLE, ex.what()); + } catch (database::mysql::ER_DUP_KEYNAME_ const& ex) { + _reportFailure(lock, protocol::StatusExt::DUPLICATE_KEY, ex.what()); + } catch (database::mysql::ER_CANT_DROP_FIELD_OR_KEY_ const& ex) { + _reportFailure(lock, protocol::StatusExt::CANT_DROP_KEY, ex.what()); + } catch (database::mysql::Error const& ex) { + _reportFailure(lock, protocol::StatusExt::MYSQL_ERROR, ex.what()); + } catch (invalid_argument const& ex) { + _reportFailure(lock, protocol::StatusExt::INVALID_PARAM, ex.what()); + } catch (out_of_range const& ex) { + _reportFailure(lock, protocol::StatusExt::LARGE_RESULT, ex.what()); + } catch (exception const& ex) { + _reportFailure(lock, protocol::StatusExt::OTHER_EXCEPTION, ex.what()); + } + return true; +} + +Connection::Ptr WorkerSqlHttpRequest::_connector() const { + // A choice of credential for connecting to the database service depends + // on a type of the request. For the sake of greater security, arbitrary + // queries require a client to explicitly provide the credentials. + // Otherwise, using credentials from the worker's configuration. + bool const clientCredentials = _sqlRequestType == protocol::SqlRequestType::QUERY; + auto connectionParams = Configuration::qservWorkerDbParams(); + if (clientCredentials) { + connectionParams.user = _user; + connectionParams.password = _password; + } + return Connection::open(connectionParams); +} + +vector WorkerSqlHttpRequest::_queries(Connection::Ptr const& conn) const { + QueryGenerator const g(conn); + vector queries; + switch (_sqlRequestType) { + case protocol::SqlRequestType::QUERY: + queries.emplace_back(Query(_query)); + break; + case protocol::SqlRequestType::CREATE_DATABASE: { + bool const ifNotExists = true; + string const query = g.createDb(_databaseInfo.name, ifNotExists); + queries.emplace_back(Query(query)); + break; + } + case protocol::SqlRequestType::DROP_DATABASE: { + bool const ifExists = true; + string const query = g.dropDb(_databaseInfo.name, ifExists); + queries.emplace_back(Query(query)); + break; + } + case protocol::SqlRequestType::ENABLE_DATABASE: { + // Using REPLACE instead of INSERT to avoid hitting the DUPLICATE KEY error + // if such entry already exists in the table. + string const query = g.replace("qservw_worker", "Dbs", _databaseInfo.name); + queries.emplace_back(Query(query)); + break; + } + case protocol::SqlRequestType::DISABLE_DATABASE: { + string const where = g.where(g.eq("db", _databaseInfo.name)); + queries.emplace_back(Query(g.delete_(g.id("qservw_worker", "Chunks")) + where)); + queries.emplace_back(Query(g.delete_(g.id("qservw_worker", "Dbs")) + where)); + break; + } + case protocol::SqlRequestType::GRANT_ACCESS: { + string const query = g.grant("ALL", _databaseInfo.name, _user, "localhost"); + queries.emplace_back(Query(query)); + break; + } + default: + // The remaining remaining types of requests require the name of a table + // affected by the operation. + queries.emplace_back(_generateQuery(conn, _table)); + break; + } + return queries; +} + +Query WorkerSqlHttpRequest::_generateQuery(Connection::Ptr const& conn, string const& table) const { + QueryGenerator const g(conn); + SqlId const databaseTable = g.id(_databaseInfo.name, table); + switch (_sqlRequestType) { + case protocol::SqlRequestType::CREATE_TABLE: { + list const keys; + bool const ifNotExists = true; + string query = g.createTable(databaseTable, ifNotExists, _columns, keys, _engine); + + // If MySQL partitioning was requested for the table then configure partitioning + // parameters and add the initial partition corresponding to the default + // transaction identifier. The table will be partitioned based on values of + // the transaction identifiers in the specified column. + string const partitionByColumn = _partitionByColumn; + if (!partitionByColumn.empty()) { + TransactionId const defaultTransactionId = 0; + query += g.partitionByList(partitionByColumn) + g.partition(defaultTransactionId); + } + return Query(query, databaseTable.str); + } + case protocol::SqlRequestType::DROP_TABLE: { + bool const ifExists = true; + string const query = g.dropTable(databaseTable, ifExists); + return Query(query, databaseTable.str); + } + case protocol::SqlRequestType::DROP_TABLE_PARTITION: { + bool const ifExists = true; + string const query = g.alterTable(databaseTable) + g.dropPartition(_transactionId, ifExists); + return Query(query, databaseTable.str); + } + case protocol::SqlRequestType::REMOVE_TABLE_PARTITIONING: { + string const query = g.alterTable(databaseTable) + g.removePartitioning(); + return Query(query, databaseTable.str); + } + case protocol::SqlRequestType::CREATE_TABLE_INDEX: { + string const query = + g.createIndex(databaseTable, _index.name, _index.spec, _index.keys, _index.comment); + return Query(query, databaseTable.str); + } + case protocol::SqlRequestType::DROP_TABLE_INDEX: { + string const query = g.dropIndex(databaseTable, _indexName); + return Query(query, databaseTable.str); + } + case protocol::SqlRequestType::GET_TABLE_INDEX: { + return Query(g.showIndexes(databaseTable)); + } + case protocol::SqlRequestType::ALTER_TABLE: { + string const query = g.alterTable(databaseTable, _alterTableSpec); + return Query(query, databaseTable.str); + } + case protocol::SqlRequestType::TABLE_ROW_STATS: { + // The transaction identifier column is not required to be present in + // the legacy catalogs (ingested w/o super-transactions), or in (the narrow) tables + // in which the column was removed to save disk space. The query generator + // implemented below accounts for this scenario by consulting MySQL's + // information schema. If the column isn't present then the default transaction + // identifier 0 will be injected into the result set. + string query = g.select(Sql::COUNT_STAR) + + g.from(DoNotProcess(g.id("information_schema", "COLUMNS"))) + + g.where(g.eq("TABLE_SCHEMA", _databaseInfo.name), g.eq("TABLE_NAME", table), + g.eq("COLUMN_NAME", "qserv_trans_id")); + int count = 0; + selectSingleValue(conn, query, count); + if (count == 0) { + string const query = + g.select(g.as(g.val(0), "qserv_trans_id"), g.as(Sql::COUNT_STAR, "num_rows")) + + g.from(DoNotProcess(databaseTable)); + return Query(query); + } + query = g.select("qserv_trans_id", g.as(Sql::COUNT_STAR, "num_rows")) + + g.from(DoNotProcess(databaseTable)) + g.groupBy("qserv_trans_id"); + return Query(query); + } + default: + throw invalid_argument( + CONTEXT + " not the table-scope request type: " + protocol::toString(_sqlRequestType)); + } +} + +void WorkerSqlHttpRequest::_extractResultSet(replica::Lock const& lock, Connection::Ptr const& conn) { + LOGS(_log, LOG_LVL_DEBUG, CONTEXT); + + json& resultSet = _currentResultSet(lock); + + // This will explicitly reset the default failure mode as it was + // initialized by the constructor of the result set class. + resultSet["status_ext"] = protocol::StatusExt::NONE; + + // Now carry over the actual rest set (if any) + resultSet["char_set_name"] = conn->charSetName(); + resultSet["has_result"] = conn->hasResult() ? 1 : 0; + if (conn->hasResult()) { + resultSet["fields"] = conn->fieldsToJson(); + resultSet["rows"] = json::array(); + json& rowsJson = resultSet["rows"]; + size_t numRowsProcessed = 0; + Row row; + while (conn->next(row)) { + if (_maxRows != 0) { + if (numRowsProcessed >= _maxRows) { + throw out_of_range(CONTEXT + " max_rows=" + to_string(_maxRows) + " limit exceeded"); + } + ++numRowsProcessed; + } + rowsJson.push_back(row.toJson()); + } + } +} + +void WorkerSqlHttpRequest::_reportFailure(replica::Lock const& lock, protocol::StatusExt statusExt, + string const& error) { + LOGS(_log, LOG_LVL_ERROR, CONTEXT << " exception: " << error); + + // Note that the actual reason for a query to fail is recorded in its + // result set, while the final state of the whole request may vary + // depending on a kind of the request - if it's a simple or the "batch" + // request. + json& resultSet = _currentResultSet(lock); + resultSet["status_ext"] = statusExt; + resultSet["error"] = error; + setStatus(lock, protocol::Status::FAILED, _batchMode ? statusExt : protocol::StatusExt::MULTIPLE); +} + +json& WorkerSqlHttpRequest::_currentResultSet(replica::Lock const& lock, bool create) { + if (create) _resultSets.push_back(json::object()); + if (_resultSets.size() != 0) return _resultSets.back(); + throw logic_error(CONTEXT + " the operation is not allowed in this state"); +} + +} // namespace lsst::qserv::replica diff --git a/src/replica/worker/WorkerSqlHttpRequest.h b/src/replica/worker/WorkerSqlHttpRequest.h new file mode 100644 index 000000000..3db660d01 --- /dev/null +++ b/src/replica/worker/WorkerSqlHttpRequest.h @@ -0,0 +1,183 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_REPLICA_WORKERSQLHTTPREQUEST_H +#define LSST_QSERV_REPLICA_WORKERSQLHTTPREQUEST_H + +// System headers +#include +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "replica/config/ConfigDatabase.h" +#include "replica/mysql/DatabaseMySQL.h" +#include "replica/proto/Protocol.h" +#include "replica/util/Common.h" +#include "replica/worker/WorkerHttpRequest.h" + +// Forward declarations +namespace lsst::qserv::replica { +class ServiceProvider; +} // namespace lsst::qserv::replica + +namespace lsst::qserv::replica::database::mysql { +class Connection; +} // namespace lsst::qserv::replica::database::mysql + +// This header declarations +namespace lsst::qserv::replica { + +/** + * Class WorkerSqlHttpRequest executes queries against the worker database + * and return results sets (if any) back to a caller. + * + * @note Queries passed into this operation are supposed to be well formed. + * If a MySQL error would occur during an attempt to execute an incorrectly + * formed query then the corresponding MySQL error will be recorded + * and reported to a caller in the response structure which is set + * by method WorkerSqlHttpRequest::setInfo(). + */ +class WorkerSqlHttpRequest : public WorkerHttpRequest { +public: + /** + * Static factory method is needed to prevent issue with the lifespan + * and memory management of instances created otherwise (as values or via + * low-level pointers). + * + * @param serviceProvider provider is needed to access the Configuration + * of a setup and for validating the input parameters + * @param worker the name of a worker. The name must match the worker which + * is going to execute the request. + * @param hdr request header (common parameters of the queued request) + * @param req the request object received from a client (request-specific parameters) + * @param onExpired request expiration callback function + * @return pointer to the created object + */ + static std::shared_ptr create( + std::shared_ptr const& serviceProvider, std::string const& worker, + protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req, + ExpirationCallbackType const& onExpired); + + WorkerSqlHttpRequest() = delete; + WorkerSqlHttpRequest(WorkerSqlHttpRequest const&) = delete; + WorkerSqlHttpRequest& operator=(WorkerSqlHttpRequest const&) = delete; + + ~WorkerSqlHttpRequest() override = default; + + bool execute() override; + +protected: + void getResult(nlohmann::json& result) const override; + +private: + WorkerSqlHttpRequest(std::shared_ptr const& serviceProvider, std::string const& worker, + protocol::QueuedRequestHdr const& hdr, nlohmann::json const& req, + ExpirationCallbackType const& onExpired); + + /// @return A connector as per the input request + std::shared_ptr _connector() const; + + /** + * The query generator for simple requests uses parameters of a request + * to compose a collection of desired queries. + * @note this method is capable of generating a single or multiple queries + * as needed by the corresponding non-batch requests. + * @param conn A reference to the database connector is needed to process + * arguments to meet requirements of the database query processing engine. + * @return A collection of queries to be executed as per the input request. + * @throw std::invalid_argument For unsupported requests types supported. + */ + std::vector _queries(std::shared_ptr const& conn) const; + + /** + * The query generator for queries which have a target table. + * @param conn A reference to the database connector is needed to process + * arguments to meet requirements of the database query processing engine. + * @param table The name of table affected by the query. + * @return A query as per the input request and the name of a table. + * @throw std::invalid_argument For unsupported requests types. + */ + Query _generateQuery(std::shared_ptr const& conn, + std::string const& table) const; + + /** + * Extract a result set (if any) via the database connector into + * the Protobuf response object. + * @param lock The lock must be held before calling the method since it's + * going to access a protected state of the object. + * @param conn a valid database connector for extracting a result set + */ + void _extractResultSet(replica::Lock const& lock, + std::shared_ptr const& conn); + + /** + * Report & record a failure + * + * @param lock The lock must be held before calling the method since it's + * going to modify a protected state of the object. + * @param statusExt An extended status to be reported to Controllers and + * set in the current (most recently processed query if any) result set. + * @param error A message to be logged and returned to Controllers. + * @throw std::logic_error Is thrown when the method is called before + * creating a result set. + */ + void _reportFailure(replica::Lock const& lock, protocol::StatusExt statusExt, std::string const& error); + + /// @param lock The lock must be held before calling the method since it's + /// going to modify a protected state of the object. + /// @param create A flag to indicate if a new result set should be created + /// @return A mutable pointer to the current result set + nlohmann::json& _currentResultSet(replica::Lock const& lock, bool create = false); + + // Input parameters (mandatory) + + protocol::SqlRequestType const _sqlRequestType; ///< The type of the SQL request + std::string const _user; ///< The name of the MySQL user (queries or grants) + std::string const _password; ///< The MySQL password for the user account (queries only) + DatabaseInfo const _databaseInfo; ///< Database descriptor obtained from the Configuration + std::size_t const _maxRows; ///< The maximum number of rows to be returned in a result set + + // Input parameters (of batch nmode requested) + bool const _batchMode; ///< A flag to indicate if the request is targeting many tables + std::vector _tables; ///< A list of tables to be affected by the request + + // Input parameters (request-specific, see the constructor for further details) + + std::string _query; ///< The query to be executed + std::string _table; ///< The name of the table to be affected by the request + std::list _columns; ///< The list of columns for a table to be created + std::string _partitionByColumn; ///< The name of the column to be used for partitioning + SqlIndexDef _index; ///< The index definition + std::string _engine; ///< The name of the table engine to be used + TransactionId _transactionId; ///< The transaction identifier + std::string _indexName; ///< The name of the index to be dropped + std::string _alterTableSpec; ///< The specification for the ALTER TABLE request + + /// Cached result to be sent to a client upon a request + nlohmann::json _resultSets; +}; + +} // namespace lsst::qserv::replica + +#endif // LSST_QSERV_REPLICA_WORKERSQLHTTPREQUEST_H