Skip to content

Commit

Permalink
[scheduler] Fix a race during transiting fake leave to leave.
Browse files Browse the repository at this point in the history
  • Loading branch information
gaomy3832 committed Dec 23, 2019
1 parent 5d0cb34 commit 6222f1f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,19 @@ void Scheduler::watchdogThreadFunc() {

uint64_t pc = fl->pc;
do {
// NOTE(gaomy): to avoid race with join() if a thread is not actually blocked but just waiting too long (e.g., heavily loaded host).
// Because we finish fake leave and release lock before doing actual leave, the join could happen in between,
// when we haven't done leave.
th->flWord = 1;
finishFakeLeave(th);

futex_unlock(&schedLock);
leave(pid, tid, cid);
futex_lock(&schedLock);

th->flWord = 0;
syscall(SYS_futex, &th->flWord, FUTEX_WAKE, 1, nullptr, nullptr, 0);

// also do real leave for other threads blocked at the same pc ...
fl = fakeLeaves.front();
if (fl == nullptr || getPid(th->gid) != pid || fl->pc != pc)
Expand Down
10 changes: 10 additions & 0 deletions src/scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class Scheduler : public GlobAlloc, public Callee {
g_vector<bool> mask;

FakeLeaveInfo* fakeLeave; // for accurate join-leaves, see below
volatile uint32_t flWord; // if non-zero, currently transiting fake leave to true leave

FutexJoinInfo futexJoin;

Expand All @@ -124,6 +125,7 @@ class Scheduler : public GlobAlloc, public Callee {
for (auto b : mask) if (b) count++;
if (count == 0) panic("Empty mask on gid %d!", gid);
fakeLeave = nullptr;
flWord = 0;
futexJoin.action = FJA_NONE;
}
};
Expand Down Expand Up @@ -294,6 +296,14 @@ class Scheduler : public GlobAlloc, public Callee {
uint32_t cid = th->cid;
futex_unlock(&schedLock);
return cid;
} else if (th->flWord) {
// We are just finishing fake leave and transiting into true leave. Wait until done.
futex_unlock(&schedLock);
while (true) {
int futex_res = syscall(SYS_futex, &th->flWord, FUTEX_WAIT, 1, nullptr, nullptr, 0);
if (futex_res == 0 || th->futexWord != 1) break;
}
futex_lock(&schedLock);
}

assert(!th->markedForSleep);
Expand Down

0 comments on commit 6222f1f

Please sign in to comment.