Skip to content

Commit

Permalink
force shutdown if connections don't close
Browse files Browse the repository at this point in the history
There is a hard to reproduce bug in the shutdown logic where the
application hangs on SIGINT shutdown. I think this might occur when the
server blocks on writing to a dead TCP connection. I will explore the
root cause at a later date. For now, I'm adding a forced shutdown that
occurs 5 seconds after SIGINT is processed.
  • Loading branch information
mk6i committed Nov 3, 2024
1 parent 9665333 commit 1c7cc69
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 8 deletions.
8 changes: 6 additions & 2 deletions server/oscar/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,12 @@ func (rt AdminServer) Start(ctx context.Context) error {
}()
}

wg.Wait()
rt.Logger.Info("shutdown complete")
if !waitForShutdown(&wg) {
rt.Logger.Error("shutdown complete, but connections didn't close cleanly")
} else {
rt.Logger.Info("shutdown complete")
}

return nil
}

Expand Down
8 changes: 6 additions & 2 deletions server/oscar/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,12 @@ func (rt AuthServer) Start(ctx context.Context) error {
}()
}

wg.Wait()
rt.Logger.Info("shutdown complete")
if !waitForShutdown(&wg) {
rt.Logger.Error("shutdown complete, but connections didn't close cleanly")
} else {
rt.Logger.Info("shutdown complete")
}

return nil
}

Expand Down
29 changes: 27 additions & 2 deletions server/oscar/bos.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"log/slog"
"net"
"sync"
"time"

"github.com/mk6i/retro-aim-server/config"
"github.com/mk6i/retro-aim-server/wire"
Expand Down Expand Up @@ -69,11 +70,35 @@ func (rt BOSServer) Start(ctx context.Context) error {
}()
}

wg.Wait()
rt.Logger.Info("shutdown complete")
if !waitForShutdown(&wg) {
rt.Logger.Error("shutdown complete, but connections didn't close cleanly")
} else {
rt.Logger.Info("shutdown complete")
}

return nil
}

// waitForShutdown returns when either the wg completes or 5 seconds has
// passed. This is a temporary hack to ensure that the server shuts down even
// if all the TCP connections do not drain. Return true if the shutdown is
// clean.
func waitForShutdown(wg *sync.WaitGroup) bool {
ch := make(chan struct{})

go func() {
wg.Wait() // goroutine leak if wg never completes
close(ch)
}()

select {
case <-ch:
return true
case <-time.After(time.Second * 5):
return false
}
}

func (rt BOSServer) handleNewConnection(ctx context.Context, rwc io.ReadWriteCloser) error {
flapc := wire.NewFlapClient(100, rwc, rwc)

Expand Down
8 changes: 6 additions & 2 deletions server/oscar/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,12 @@ func (rt ChatServer) Start(ctx context.Context) error {
}()
}

wg.Wait()
rt.Logger.Info("shutdown complete")
if !waitForShutdown(&wg) {
rt.Logger.Error("shutdown complete, but connections didn't close cleanly")
} else {
rt.Logger.Info("shutdown complete")
}

return nil
}

Expand Down

0 comments on commit 1c7cc69

Please sign in to comment.