From 14696fb514a554ab9f9394260368654e6a6a2421 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Wed, 3 Apr 2024 02:26:06 +0000 Subject: [PATCH] Extend the MySQL API of the Qserv Replication Framework The new extension allows to generate collections of multi-row INSERT queries where the maximum length of each query is constrained by the specified limit. --- src/replica/mysql/DatabaseMySQLGenerator.cc | 52 +++++++++++++++++++-- src/replica/mysql/DatabaseMySQLGenerator.h | 40 +++++++++++++++- src/replica/tests/testQueryGenerator.cc | 42 +++++++++++++++++ 3 files changed, 127 insertions(+), 7 deletions(-) diff --git a/src/replica/mysql/DatabaseMySQLGenerator.cc b/src/replica/mysql/DatabaseMySQLGenerator.cc index d267c5e3a..cc48cdc6a 100644 --- a/src/replica/mysql/DatabaseMySQLGenerator.cc +++ b/src/replica/mysql/DatabaseMySQLGenerator.cc @@ -104,11 +104,7 @@ string QueryGenerator::createTable(SqlId const& sqlId, bool ifNotExists, list const& packedValues) const { - if (packedValues.empty()) { - string const msg = "QueryGenerator::" + string(__func__) + - " the collection of the packed values can not be empty."; - throw invalid_argument(msg); - } + _assertNotEmpty(__func__, packedValues); string sql = "INSERT INTO " + id(tableName).str + " (" + packedColumns + ") VALUES "; for (size_t i = 0, size = packedValues.size(); i < size; ++i) { if (i != 0) sql += ","; @@ -117,6 +113,52 @@ string QueryGenerator::insertPacked(string const& tableName, string const& packe return sql; } +vector QueryGenerator::insertPacked(string const& tableName, string const& packedColumns, + vector const& packedValues, + size_t const maxQueryLength) const { + _assertNotEmpty(__func__, packedValues); + vector queries; + string sql; + size_t numRowsPacked = 0; + for (vector::const_iterator itr = packedValues.cbegin(); itr != packedValues.cend();) { + string const& row = *itr; + if (sql.empty()) { + sql = "INSERT INTO " + id(tableName).str + " (" + packedColumns + ") VALUES "; + } + // 2 more characters are needed for injecting the first row: "(" + row + ")" + // And 1 more - for subsequent rows: ",(" + row + ")" + size_t const extraSpacePerRow = (numRowsPacked == 0 ? 2 : 3); + size_t const projectedQueryLength = sql.size() + extraSpacePerRow + row.size(); + if (projectedQueryLength <= maxQueryLength) { + // -- Extend the current query and move on to the next row (if any) + if (numRowsPacked != 0) sql += ","; + sql += "(" + row + ")"; + numRowsPacked++; + ++itr; + } else { + // -- Flush the current query and start building the next one + if (numRowsPacked == 0) { + string const msg = "QueryGenerator::" + string(__func__) + " the generated query length " + + to_string(projectedQueryLength) + " exceeds the limit " + + to_string(maxQueryLength); + throw invalid_argument(msg); + } + queries.push_back(move(sql)); + sql = string(); + numRowsPacked = 0; + } + } + // -- Flush the current query + if (!sql.empty()) queries.push_back(move(sql)); + return queries; +} + +void QueryGenerator::_assertNotEmpty(string const& func, vector const& coll) { + if (coll.empty()) { + throw invalid_argument("QueryGenerator::" + func + " the input collection is empty."); + } +} + string QueryGenerator::showVars(SqlVarScope scope, string const& pattern) const { string const like = pattern.empty() ? string() : " LIKE " + val(pattern).str; switch (scope) { diff --git a/src/replica/mysql/DatabaseMySQLGenerator.h b/src/replica/mysql/DatabaseMySQLGenerator.h index 4ae38d56b..994c69d7d 100644 --- a/src/replica/mysql/DatabaseMySQLGenerator.h +++ b/src/replica/mysql/DatabaseMySQLGenerator.h @@ -22,6 +22,7 @@ #define LSST_QSERV_REPLICA_DATABASEMYSQLGENERATOR_H // System headers +#include #include #include #include @@ -402,6 +403,33 @@ class QueryGenerator { std::string insertPacked(std::string const& tableName, std::string const& packedColumns, std::vector const& packedValues) const; + /** + * Generate a collection of complete INSERT statements for the given input, where + * the maximum size of each query string is determined by a value of + * the parameter 'maxQueryLength'. + * + * Here is an example: + * @code + * std::size_t const maxQueryLength = 1024*1024; + * QueryGenerator const g(conn); + * std::vector const queries = + * g.insertPacked("table", + * g.packIds("id", "timestamp", "name"), + * {g.packVals(Sql::NULL_, Sql::NOW, "John Smith"), + * g.packVals(Sql::NULL_, Sql::NOW, "Vera Rubin"), + * g.packVals(Sql::NULL_, Sql::NOW, "Igor Gaponenko")}); + * @endcode + * @param tableName The name of a table where the rows will be insert. + * @param packedColumns A collection of column names packed into a string. + * @param packedValues A collection of the packed rows. + * @return A collection of the generated queries + * @throws std::invalid_argument If the collection of rows is empty, or if it has + * rows which are too large for generating queries constrained by the given limit. + */ + std::vector insertPacked(std::string const& tableName, std::string const& packedColumns, + std::vector const& packedValues, + std::size_t const maxQueryLength) const; + /** * @brief Generate and return an SQL expression for a binary operator applied * over a pair of the pre-processed expressions. @@ -1083,6 +1111,14 @@ class QueryGenerator { std::string call(DoNotProcess const& packedProcAndArgs) const; private: + /** + * Check if the specified collection is not empty. + * @param func A scope from which the check was requested. + * @param coll A collection to be evaluated. + * @throws std::invalid_argument If the input collection is empty. + */ + static void _assertNotEmpty(std::string const& func, std::vector const& coll); + /// @return A string that's ready to be included into the queries. template std::string _values(Targs... Fargs) const { @@ -1151,8 +1187,8 @@ class QueryGenerator { /// @param scope The scope of the variable (SESSION, GLOBAL, etc.) /// @param packedVars Partial SQL for setting values of the variables. /// @return The well-formed SQL for setting the variables - /// @throws std::invalid_argument If a value of \param packedVars is empty, - /// or in case if the specified value of \param scope is not supported. + /// @throws std::invalid_argument If a value of the parameter 'packedVars' is empty, + /// or in case if the specified value of the parameter 'scope' is not supported. std::string _setVars(SqlVarScope scope, std::string const& packedVars) const; std::string _createIndex(SqlId const& tableId, std::string const& indexName, std::string const& spec, diff --git a/src/replica/tests/testQueryGenerator.cc b/src/replica/tests/testQueryGenerator.cc index 9ffa61f8d..234821c88 100644 --- a/src/replica/tests/testQueryGenerator.cc +++ b/src/replica/tests/testQueryGenerator.cc @@ -23,7 +23,9 @@ #include "lsst/log/Log.h" // System headers +#include #include +#include #include #include #include @@ -435,6 +437,46 @@ BOOST_AUTO_TEST_CASE(QueryGeneratorTest) { BOOST_CHECK_EQUAL(test.first, test.second); } + // Test bulk insert of many rows + vector const expectedInsertQueries1 = {"INSERT INTO `Table` (`num`,`str`) VALUES (1,'a')", + "INSERT INTO `Table` (`num`,`str`) VALUES (2,'b')", + "INSERT INTO `Table` (`num`,`str`) VALUES (3,'c')", + "INSERT INTO `Table` (`num`,`str`) VALUES (4,'d')"}; + vector const expectedInsertQueries2 = { + "INSERT INTO `Table` (`num`,`str`) VALUES (1,'a'),(2,'b')", + "INSERT INTO `Table` (`num`,`str`) VALUES (3,'c'),(4,'d')"}; + vector const expectedInsertQueries3 = { + "INSERT INTO `Table` (`num`,`str`) VALUES (1,'a'),(2,'b'),(3,'c')", + "INSERT INTO `Table` (`num`,`str`) VALUES (4,'d')"}; + vector const expectedInsertQueries4 = { + "INSERT INTO `Table` (`num`,`str`) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d')"}; + + // The test should throw because the collection of rows is empty + auto const packedIds = g.packIds("num", "str"); + vector const emptyInsertData; + size_t maxQueryLength = expectedInsertQueries4[0].size(); + BOOST_CHECK_THROW(g.insertPacked("Table", packedIds, emptyInsertData, maxQueryLength), invalid_argument); + + // The test should throw because the generated statements would exceed a limit. + vector const insertData = {g.packVals(1, "a"), g.packVals(2, "b"), g.packVals(3, "c"), + g.packVals(4, "d")}; + maxQueryLength = expectedInsertQueries1[0].size() - 1; + BOOST_CHECK_THROW(g.insertPacked("Table", packedIds, insertData, maxQueryLength), invalid_argument); + LOGS_INFO("QueryGenerator #1"); + + // None of the following tests should throw + vector> const expectedInsertQueries = {expectedInsertQueries1, expectedInsertQueries2, + expectedInsertQueries3, expectedInsertQueries4}; + for (auto const& expectedQueries : expectedInsertQueries) { + LOGS_INFO("QueryGenerator #2"); + size_t const maxQueryLength = expectedQueries[0].size(); + vector const generatedQueries = + g.insertPacked("Table", packedIds, insertData, maxQueryLength); + BOOST_CHECK_EQUAL(generatedQueries.size(), expectedQueries.size()); + for (size_t i = 0; i < min(generatedQueries.size(), expectedQueries.size()); ++i) { + BOOST_CHECK_EQUAL(generatedQueries[i], expectedQueries[i]); + } + } LOGS_INFO("QueryGenerator test ends"); }