From 857ec155f9b94c5c309b75216c9f5fa260845112 Mon Sep 17 00:00:00 2001
From: Xiaoxi Chen <xiaoxchen@ebay.com>
Date: Sat, 28 Sep 2024 01:13:35 +0800
Subject: [PATCH] Start data service after log replay done.

Signed-off-by: Xiaoxi Chen <xiaoxchen@ebay.com>
---
 src/lib/replication/service/raft_repl_service.cpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/lib/replication/service/raft_repl_service.cpp b/src/lib/replication/service/raft_repl_service.cpp
index 974984ca3..96047a5a8 100644
--- a/src/lib/replication/service/raft_repl_service.cpp
+++ b/src/lib/replication/service/raft_repl_service.cpp
@@ -128,8 +128,19 @@ void RaftReplService::start() {
     m_config_sb_bufs.clear();
 
     // Step 5: Start the data and logstore service now. This step is essential before we can ask Raft to join groups etc
-    hs()->data_service().start();
+
+    // It is crucial to start the logstore before the data service. This is because during log replay,
+    // the commit_blks() function is called, which interacts with the allocator.
+    // Starting the data service before the log replay is complete can lead to a race condition between
+    // PUSHDATA operations and log replay.
+    // For example, consider LSN 100 in the log store is associated with PBA1. After a restart, the allocator
+    // is only aware of allocations up to the last checkpoint and may consider PBA1 as available.
+    // If a PUSHDATA request is received during this time, PBA1 could be allocated again to a new request,
+    // leading to data corruption by overwriting the data associated with LSN 100.
+
     hs()->logstore_service().start(hs()->is_first_time_boot());
+    // all log stores are replayed, time to start data service.
+    hs()->data_service().start();
 
     // Step 6: Iterate all the repl dev and ask each one of the join the raft group.
     for (auto it = m_rd_map.begin(); it != m_rd_map.end();) {