Skip to content

Commit

Permalink
Merge branch 'tickets/DM-43625'
Browse files Browse the repository at this point in the history
  • Loading branch information
iagaponenko committed Apr 4, 2024
2 parents 3214c1e + 14696fb commit 91eec42
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 7 deletions.
52 changes: 47 additions & 5 deletions src/replica/mysql/DatabaseMySQLGenerator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,7 @@ string QueryGenerator::createTable(SqlId const& sqlId, bool ifNotExists, list<Sq

string QueryGenerator::insertPacked(string const& tableName, string const& packedColumns,
vector<string> const& packedValues) const {
if (packedValues.empty()) {
string const msg = "QueryGenerator::" + string(__func__) +
" the collection of the packed values can not be empty.";
throw invalid_argument(msg);
}
_assertNotEmpty(__func__, packedValues);
string sql = "INSERT INTO " + id(tableName).str + " (" + packedColumns + ") VALUES ";
for (size_t i = 0, size = packedValues.size(); i < size; ++i) {
if (i != 0) sql += ",";
Expand All @@ -117,6 +113,52 @@ string QueryGenerator::insertPacked(string const& tableName, string const& packe
return sql;
}

vector<string> QueryGenerator::insertPacked(string const& tableName, string const& packedColumns,
vector<string> const& packedValues,
size_t const maxQueryLength) const {
_assertNotEmpty(__func__, packedValues);
vector<string> queries;
string sql;
size_t numRowsPacked = 0;
for (vector<string>::const_iterator itr = packedValues.cbegin(); itr != packedValues.cend();) {
string const& row = *itr;
if (sql.empty()) {
sql = "INSERT INTO " + id(tableName).str + " (" + packedColumns + ") VALUES ";
}
// 2 more characters are needed for injecting the first row: "(" + row + ")"
// And 1 more - for subsequent rows: ",(" + row + ")"
size_t const extraSpacePerRow = (numRowsPacked == 0 ? 2 : 3);
size_t const projectedQueryLength = sql.size() + extraSpacePerRow + row.size();
if (projectedQueryLength <= maxQueryLength) {
// -- Extend the current query and move on to the next row (if any)
if (numRowsPacked != 0) sql += ",";
sql += "(" + row + ")";
numRowsPacked++;
++itr;
} else {
// -- Flush the current query and start building the next one
if (numRowsPacked == 0) {
string const msg = "QueryGenerator::" + string(__func__) + " the generated query length " +
to_string(projectedQueryLength) + " exceeds the limit " +
to_string(maxQueryLength);
throw invalid_argument(msg);
}
queries.push_back(move(sql));
sql = string();
numRowsPacked = 0;
}
}
// -- Flush the current query
if (!sql.empty()) queries.push_back(move(sql));
return queries;
}

void QueryGenerator::_assertNotEmpty(string const& func, vector<string> const& coll) {
if (coll.empty()) {
throw invalid_argument("QueryGenerator::" + func + " the input collection is empty.");
}
}

string QueryGenerator::showVars(SqlVarScope scope, string const& pattern) const {
string const like = pattern.empty() ? string() : " LIKE " + val(pattern).str;
switch (scope) {
Expand Down
40 changes: 38 additions & 2 deletions src/replica/mysql/DatabaseMySQLGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#define LSST_QSERV_REPLICA_DATABASEMYSQLGENERATOR_H

// System headers
#include <list>
#include <memory>
#include <string>
#include <tuple>
Expand Down Expand Up @@ -402,6 +403,33 @@ class QueryGenerator {
std::string insertPacked(std::string const& tableName, std::string const& packedColumns,
std::vector<std::string> const& packedValues) const;

/**
* Generate a collection of complete INSERT statements for the given input, where
* the maximum size of each query string is determined by a value of
* the parameter 'maxQueryLength'.
*
* Here is an example:
* @code
* std::size_t const maxQueryLength = 1024*1024;
* QueryGenerator const g(conn);
* std::vector<std::string> const queries =
* g.insertPacked("table",
* g.packIds("id", "timestamp", "name"),
* {g.packVals(Sql::NULL_, Sql::NOW, "John Smith"),
* g.packVals(Sql::NULL_, Sql::NOW, "Vera Rubin"),
* g.packVals(Sql::NULL_, Sql::NOW, "Igor Gaponenko")});
* @endcode
* @param tableName The name of a table where the rows will be insert.
* @param packedColumns A collection of column names packed into a string.
* @param packedValues A collection of the packed rows.
* @return A collection of the generated queries
* @throws std::invalid_argument If the collection of rows is empty, or if it has
* rows which are too large for generating queries constrained by the given limit.
*/
std::vector<std::string> insertPacked(std::string const& tableName, std::string const& packedColumns,
std::vector<std::string> const& packedValues,
std::size_t const maxQueryLength) const;

/**
* @brief Generate and return an SQL expression for a binary operator applied
* over a pair of the pre-processed expressions.
Expand Down Expand Up @@ -1083,6 +1111,14 @@ class QueryGenerator {
std::string call(DoNotProcess const& packedProcAndArgs) const;

private:
/**
* Check if the specified collection is not empty.
* @param func A scope from which the check was requested.
* @param coll A collection to be evaluated.
* @throws std::invalid_argument If the input collection is empty.
*/
static void _assertNotEmpty(std::string const& func, std::vector<std::string> const& coll);

/// @return A string that's ready to be included into the queries.
template <typename... Targs>
std::string _values(Targs... Fargs) const {
Expand Down Expand Up @@ -1151,8 +1187,8 @@ class QueryGenerator {
/// @param scope The scope of the variable (SESSION, GLOBAL, etc.)
/// @param packedVars Partial SQL for setting values of the variables.
/// @return The well-formed SQL for setting the variables
/// @throws std::invalid_argument If a value of \param packedVars is empty,
/// or in case if the specified value of \param scope is not supported.
/// @throws std::invalid_argument If a value of the parameter 'packedVars' is empty,
/// or in case if the specified value of the parameter 'scope' is not supported.
std::string _setVars(SqlVarScope scope, std::string const& packedVars) const;

std::string _createIndex(SqlId const& tableId, std::string const& indexName, std::string const& spec,
Expand Down
42 changes: 42 additions & 0 deletions src/replica/tests/testQueryGenerator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
#include "lsst/log/Log.h"

// System headers
#include <algorithm>
#include <list>
#include <stdexcept>
#include <string>
#include <tuple>
#include <vector>
Expand Down Expand Up @@ -435,6 +437,46 @@ BOOST_AUTO_TEST_CASE(QueryGeneratorTest) {
BOOST_CHECK_EQUAL(test.first, test.second);
}

// Test bulk insert of many rows
vector<string> const expectedInsertQueries1 = {"INSERT INTO `Table` (`num`,`str`) VALUES (1,'a')",
"INSERT INTO `Table` (`num`,`str`) VALUES (2,'b')",
"INSERT INTO `Table` (`num`,`str`) VALUES (3,'c')",
"INSERT INTO `Table` (`num`,`str`) VALUES (4,'d')"};
vector<string> const expectedInsertQueries2 = {
"INSERT INTO `Table` (`num`,`str`) VALUES (1,'a'),(2,'b')",
"INSERT INTO `Table` (`num`,`str`) VALUES (3,'c'),(4,'d')"};
vector<string> const expectedInsertQueries3 = {
"INSERT INTO `Table` (`num`,`str`) VALUES (1,'a'),(2,'b'),(3,'c')",
"INSERT INTO `Table` (`num`,`str`) VALUES (4,'d')"};
vector<string> const expectedInsertQueries4 = {
"INSERT INTO `Table` (`num`,`str`) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d')"};

// The test should throw because the collection of rows is empty
auto const packedIds = g.packIds("num", "str");
vector<string> const emptyInsertData;
size_t maxQueryLength = expectedInsertQueries4[0].size();
BOOST_CHECK_THROW(g.insertPacked("Table", packedIds, emptyInsertData, maxQueryLength), invalid_argument);

// The test should throw because the generated statements would exceed a limit.
vector<string> const insertData = {g.packVals(1, "a"), g.packVals(2, "b"), g.packVals(3, "c"),
g.packVals(4, "d")};
maxQueryLength = expectedInsertQueries1[0].size() - 1;
BOOST_CHECK_THROW(g.insertPacked("Table", packedIds, insertData, maxQueryLength), invalid_argument);
LOGS_INFO("QueryGenerator #1");

// None of the following tests should throw
vector<vector<string>> const expectedInsertQueries = {expectedInsertQueries1, expectedInsertQueries2,
expectedInsertQueries3, expectedInsertQueries4};
for (auto const& expectedQueries : expectedInsertQueries) {
LOGS_INFO("QueryGenerator #2");
size_t const maxQueryLength = expectedQueries[0].size();
vector<string> const generatedQueries =
g.insertPacked("Table", packedIds, insertData, maxQueryLength);
BOOST_CHECK_EQUAL(generatedQueries.size(), expectedQueries.size());
for (size_t i = 0; i < min(generatedQueries.size(), expectedQueries.size()); ++i) {
BOOST_CHECK_EQUAL(generatedQueries[i], expectedQueries[i]);
}
}
LOGS_INFO("QueryGenerator test ends");
}

Expand Down

0 comments on commit 91eec42

Please sign in to comment.