diff --git a/common/asyncdbupdater.h b/common/asyncdbupdater.h index 25c6ef70..cade1a0f 100644 --- a/common/asyncdbupdater.h +++ b/common/asyncdbupdater.h @@ -7,7 +7,7 @@ #include "table.h" #define MQ_SIZE 100 -#define MQ_MAX_RETRY 10 +#define MQ_MAX_RETRY 1000 #define MQ_POLL_TIMEOUT (1000) namespace swss { diff --git a/common/redispipeline.h b/common/redispipeline.h index be7561b6..96f97ab8 100644 --- a/common/redispipeline.h +++ b/common/redispipeline.h @@ -164,7 +164,7 @@ class RedisPipeline { return; m_channels.insert(channel); - m_luaPub += "redis.call('PUBLISH', '" + channel + "', 'G');"; + m_luaPub += "redis.call('PUBLISH', '" + channel + "', 'G')\n"; m_shaPub = loadRedisScript(m_luaPub); } diff --git a/common/zmqclient.cpp b/common/zmqclient.cpp index d112cc55..7e77f5f8 100644 --- a/common/zmqclient.cpp +++ b/common/zmqclient.cpp @@ -208,11 +208,13 @@ bool ZmqClient::wait(std::string& dbName, rc = zmq_recv(m_socket, m_sendbuffer.data(), m_sendbuffer.size(), 0); if (rc < 0) { + SWSS_LOG_DEBUG("MQ_MAX_RETRY value is: %d", MQ_MAX_RETRY); if (zmq_errno() == EINTR && i <= MQ_MAX_RETRY) { continue; } SWSS_LOG_THROW("zmq_recv failed, zmqerrno: %d", zmq_errno()); + return false; } if (rc >= (int)m_sendbuffer.size()) { diff --git a/common/zmqconsumerstatetable.h b/common/zmqconsumerstatetable.h index dece60bd..bf90311a 100644 --- a/common/zmqconsumerstatetable.h +++ b/common/zmqconsumerstatetable.h @@ -10,6 +10,8 @@ #include "table.h" #include "zmqserver.h" +#define MQ_MAX_RETRY 1000 + namespace swss { class ZmqConsumerStateTable : public Selectable, public TableBase, public ZmqMessageHandler diff --git a/common/zmqserver.cpp b/common/zmqserver.cpp index ab7cc43e..2753cc80 100644 --- a/common/zmqserver.cpp +++ b/common/zmqserver.cpp @@ -171,4 +171,70 @@ void ZmqServer::mqPollThread() SWSS_LOG_NOTICE("mqPollThread end"); } +void ZmqServer::sendMsg(const std::string& dbName, const std::string& tableName, + const std::vector& values) +{ + int serializedlen = (int)BinarySerializer::serializeBuffer( + m_buffer.data(), + m_buffer.size(), + dbName, + tableName, + values); + SWSS_LOG_DEBUG("sending: %d", serializedlen); + int zmq_err = 0; + int retry_delay = 10; + int rc = 0; + for (int i = 0; i <= MQ_MAX_RETRY; ++i) + { + rc = zmq_send(m_socket, m_buffer.data(), serializedlen, 0); + + if (rc >= 0) + { + m_allowZmqPoll = true; + SWSS_LOG_DEBUG("zmq sent %d bytes", serializedlen); + return; + } + zmq_err = zmq_errno(); + // sleep (2 ^ retry time) * 10 ms + retry_delay *= 2; + if (zmq_err == EINTR + || zmq_err== EFSM) + { + // EINTR: interrupted by signal + // EFSM: socket state not ready + // For example when ZMQ socket still not receive reply message from last sended package. + // There was state machine inside ZMQ socket, when the socket is not in ready to send state, this + // error will happen. + // for more detail, please check: http://api.zeromq.org/2-1:zmq-send + SWSS_LOG_DEBUG("zmq send retry, endpoint: %s, error: %d", m_endpoint.c_str(), zmq_err); + + retry_delay = 0; + } + else if (zmq_err == EAGAIN) + { + // EAGAIN: ZMQ is full to need try again + SWSS_LOG_WARN("zmq is full, will retry in %d ms, endpoint: %s, error: %d", retry_delay, m_endpoint.c_str(), zmq_err); + } + else if (zmq_err == ETERM) + { + auto message = "zmq connection break, endpoint: " + m_endpoint + ", error: " + to_string(rc); + SWSS_LOG_ERROR("%s", message.c_str()); + throw system_error(make_error_code(errc::connection_reset), message); + } + else + { + // for other error, send failed immediately. + auto message = "zmq send failed, endpoint: " + m_endpoint + ", error: " + to_string(rc); + SWSS_LOG_ERROR("%s", message.c_str()); + // throw system_error(make_error_code(errc::io_error), message); + } + usleep(retry_delay * 1000); + } + + // failed after retry + auto message = "zmq send failed, endpoint: " + m_endpoint + ", zmqerrno: " + to_string(zmq_err) + ":" + zmq_strerror(zmq_err) + ", msg length:" + to_string(serializedlen); + SWSS_LOG_ERROR("%s", message.c_str()); + // throw system_error(make_error_code(errc::io_error), message); +} + } diff --git a/common/zmqserver.h b/common/zmqserver.h index 79ea4f6f..9b1e8510 100644 --- a/common/zmqserver.h +++ b/common/zmqserver.h @@ -8,7 +8,7 @@ #define MQ_RESPONSE_MAX_COUNT (16*1024*1024) #define MQ_SIZE 100 -#define MQ_MAX_RETRY 10 +#define MQ_MAX_RETRY 1000 #define MQ_POLL_TIMEOUT (1000) #define MQ_WATERMARK 10000 @@ -39,6 +39,9 @@ class ZmqServer const std::string tableName, ZmqMessageHandler* handler); + void sendMsg(const std::string& dbName, const std::string& tableName, + const std::vector& values); + private: void handleReceivedData(const char* buffer, const size_t size); @@ -56,6 +59,8 @@ class ZmqServer std::string m_vrf; + void* m_socket; + bool m_allowZmqPoll; std::map> m_HandlerMap; diff --git a/tests/zmq_state_ut.cpp b/tests/zmq_state_ut.cpp index c4dcc748..d4869ee1 100644 --- a/tests/zmq_state_ut.cpp +++ b/tests/zmq_state_ut.cpp @@ -288,9 +288,6 @@ static void testMethod(bool producerPersistence) // start consumer first, SHM can only have 1 consumer per table. thread *consumerThread = new thread(consumerWorker, testTableName, pullEndpoint, !producerPersistence); - // Wait for the consumer to start. - sleep(1); - cout << "Starting " << NUMBER_OF_THREADS << " producers" << endl; /* Starting the producer before the producer */ for (int i = 0; i < NUMBER_OF_THREADS; i++) @@ -354,9 +351,6 @@ static void testBatchMethod(bool producerPersistence) // start consumer first, SHM can only have 1 consumer per table. thread *consumerThread = new thread(consumerWorker, testTableName, pullEndpoint, !producerPersistence); - // Wait for the consumer to start. - sleep(1); - cout << "Starting " << NUMBER_OF_THREADS << " producers" << endl; /* Starting the producer before the producer */ for (int i = 0; i < NUMBER_OF_THREADS; i++) @@ -471,3 +465,101 @@ TEST(ZmqProducerStateTableDeleteAfterSend, test) table.getKeys(keys); EXPECT_EQ(keys.front(), testKey); } + +static bool zmq_done = false; + +static void zmqConsumerWorker(string tableName, string endpoint, bool dbPersistence) +{ + cout << "Consumer thread started: " << tableName << endl; + DBConnector db(TEST_DB, 0, true); + ZmqServer server(endpoint); + ZmqConsumerStateTable c(&db, tableName, server, 128, 0, dbPersistence); + Select cs; + cs.addSelectable(&c); + //validate received data + Selectable *selectcs; + std::deque vkco; + int ret = 0; + while (!zmq_done) + { + ret = cs.select(&selectcs, 10, true); + if (ret == Select::OBJECT) + { + c.pops(vkco); + std::vector values; + values.push_back(KeyOpFieldsValuesTuple{"k", SET_COMMAND, std::vector{FieldValueTuple{"f", "v"}}}); + server.sendMsg(TEST_DB, tableName, values); + } + } + + allDataReceived = true; + if (dbPersistence) + { + // wait all persist data write to redis + while (c.dbUpdaterQueueSize() > 0) + { + sleep(1); + } + } + + cout << "Consumer thread ended: " << tableName << endl; +} + +static void ZmqWithResponse(bool producerPersistence) +{ + std::string testTableName = "ZMQ_PROD_CONS_UT"; + std::string pushEndpoint = "tcp://localhost:1234"; + std::string pullEndpoint = "tcp://*:1234"; + // start consumer first, SHM can only have 1 consumer per table. + thread *consumerThread = new thread(zmqConsumerWorker, testTableName, pullEndpoint, !producerPersistence); + + // Wait for the consumer to be ready. + sleep(1); + DBConnector db(TEST_DB, 0, true); + ZmqClient client(pushEndpoint); + ZmqProducerStateTable p(&db, testTableName, client, true); + std::vector kcos; + kcos.push_back(KeyOpFieldsValuesTuple{"k", SET_COMMAND, std::vector{FieldValueTuple{"f", "v"}}}); + std::vector> kcos_p; + std::string dbName, tableName; + for (int i =0; i < 3; ++i) + { + p.send(kcos); + ASSERT_TRUE(p.wait(dbName, tableName, kcos_p)); + EXPECT_EQ(dbName, TEST_DB); + EXPECT_EQ(tableName, testTableName); + ASSERT_EQ(kcos_p.size(), 1); + EXPECT_EQ(kfvKey(*kcos_p[0]), "k"); + EXPECT_EQ(kfvOp(*kcos_p[0]), SET_COMMAND); + std::vector cos = std::vector{FieldValueTuple{"f", "v"}}; + EXPECT_EQ(kfvFieldsValues(*kcos_p[0]), cos); + } + + zmq_done = true; + consumerThread->join(); + delete consumerThread; +} + +TEST(ZmqWithResponse, test) +{ + // test with persist by consumer + ZmqWithResponse(false); +} + +TEST(ZmqWithResponseClientError, test) +{ + std::string testTableName = "ZMQ_PROD_CONS_UT"; + std::string pushEndpoint = "tcp://localhost:1234"; + DBConnector db(TEST_DB, 0, true); + ZmqClient client(pushEndpoint); + ZmqProducerStateTable p(&db, testTableName, client, true); + std::vector kcos; + kcos.push_back(KeyOpFieldsValuesTuple{"k", SET_COMMAND, std::vector{}}); + std::vector> kcos_p; + std::string dbName, tableName; + p.send(kcos); + // Wait will timeout without server reply. + EXPECT_FALSE(p.wait(dbName, tableName, kcos_p)); + // Send will return error without server reply. +} +