Skip to content

Commit

Permalink
Merge pull request #810 from Expensify/rafe-bedrock-healthcheck-132972
Browse files Browse the repository at this point in the history
Respond 200 to STATUS_HANDLING_COMMANDS if LEADING or STANDINGDOWN
  • Loading branch information
tylerkaraszewski authored Jun 15, 2020
2 parents fed8267 + fc5b212 commit 1d7eb9a
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 5 deletions.
16 changes: 11 additions & 5 deletions BedrockServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1901,13 +1901,19 @@ void BedrockServer::_status(unique_ptr<BedrockCommand>& command) {
}
} else if (SIEquals(request.methodLine, STATUS_HANDLING_COMMANDS)) {
// This is similar to the above check, and is used for letting HAProxy load-balance commands.
SQLiteNode::State state = _replicationState.load();
if (state != SQLiteNode::FOLLOWING) {
response.methodLine = "HTTP/1.1 500 Not following. State=" + SQLiteNode::stateName(state);
} else if (_version != _leaderVersion.load()) {

if (_version != _leaderVersion.load()) {
response.methodLine = "HTTP/1.1 500 Mismatched version. Version=" + _version;
} else {
response.methodLine = "HTTP/1.1 200 Following";
SQLiteNode::State state = _replicationState.load();
string method = "HTTP/1.1 ";

if (state == SQLiteNode::FOLLOWING || state == SQLiteNode::LEADING || state == SQLiteNode::STANDINGDOWN) {
method += "200";
} else {
method += "500";
}
response.methodLine = method + " " + SQLiteNode::stateName(state);
}
}

Expand Down
6 changes: 6 additions & 0 deletions sqlitecluster/SQLiteNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ bool SQLiteNode::update() {
// There are no peers, jump straight to leading
SHMMM("No peers configured, jumping to LEADING");
_changeState(LEADING);
_leaderVersion = _version;
return true; // Re-update immediately
}

Expand Down Expand Up @@ -640,6 +641,7 @@ bool SQLiteNode::update() {
// Complete standup
SINFO("All peers approved standup, going LEADING.");
_changeState(LEADING);
_leaderVersion = _version;
return true; // Re-update
}

Expand Down Expand Up @@ -1806,6 +1808,10 @@ void SQLiteNode::_changeState(SQLiteNode::State newState) {

// Additional logic for some old states
if (SWITHIN(LEADING, oldState, STANDINGDOWN) && !SWITHIN(LEADING, newState, STANDINGDOWN)) {
// If we stop leading, unset _leaderVersion from our own _version.
// It will get re-set to the version on the new leader.
_leaderVersion = "";

// We are no longer leading. Are we processing a command?
if (commitInProgress()) {
// Abort this command
Expand Down
67 changes: 67 additions & 0 deletions test/clustertest/tests/StatusHandlingCommandsTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#include "../BedrockClusterTester.h"

struct StatusHandlingCommandsTest : tpunit::TestFixture {
StatusHandlingCommandsTest()
: tpunit::TestFixture("StatusHandlingCommandsTest",
BEFORE_CLASS(StatusHandlingCommandsTest::setup),
AFTER_CLASS(StatusHandlingCommandsTest::teardown),
TEST(StatusHandlingCommandsTest::test)) { }

BedrockClusterTester* tester;

void setup () {
tester = new BedrockClusterTester();
}

void teardown() {
delete tester;
}

void test() {
vector<string> results(3);
BedrockTester& leader = tester->getTester(0);
BedrockTester& follower = tester->getTester(1);

thread healthCheckThread([this, &results, &follower](){
SData cmd("GET /status/handlingCommands HTTP/1.1");
string result;
bool foundLeader = false;
bool foundFollower = false;
bool foundStandingdown = false;
chrono::steady_clock::time_point start = chrono::steady_clock::now();

while (chrono::steady_clock::now() < start + 60s && (!foundLeader || !foundFollower || !foundStandingdown)) {
result = follower.executeWaitMultipleData({cmd}, 1, false)[0].methodLine;
if (result == "HTTP/1.1 200 LEADING") {
results[0] = result;
foundLeader = true;
} else if (result == "HTTP/1.1 200 FOLLOWING") {
results[1] = result;
foundFollower = true;
} else if (result == "HTTP/1.1 200 STANDINGDOWN") {
results[2] = result;
foundStandingdown = true;
}
}
});

leader.stopServer();

// Execute a slow query while the follower is leading so when the
// leader is brought back up, it will be STANDINGDOWN until it finishes
thread slowQueryThread([this, &follower](){
SData slow("slowquery");
slow["processTimeout"] = "5000"; // 5s
follower.executeWaitVerifyContent(slow, "555 Timeout peeking command");
});

leader.startServer(true);
slowQueryThread.join();
healthCheckThread.join();

ASSERT_EQUAL(results[0], "HTTP/1.1 200 LEADING")
ASSERT_EQUAL(results[1], "HTTP/1.1 200 FOLLOWING")
ASSERT_EQUAL(results[2], "HTTP/1.1 200 STANDINGDOWN")
}

} __StatusHandlingCommandsTest;

0 comments on commit 1d7eb9a

Please sign in to comment.