Skip to content

Commit

Permalink
WIP: Fix disk add race
Browse files Browse the repository at this point in the history
Get rid of the racy OSD id calculation, make the database the single
source of truth for OSD ids.

Signed-off-by: Peter Sabaini <[email protected]>
  • Loading branch information
sabaini committed Oct 30, 2023
1 parent 895c58a commit 7f6ff10
Show file tree
Hide file tree
Showing 6 changed files with 416 additions and 504 deletions.
70 changes: 4 additions & 66 deletions microceph/ceph/osd.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ import (
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"

Expand All @@ -31,56 +29,6 @@ import (
"github.com/canonical/microceph/microceph/database"
)

func nextOSD(s *state.State) (int64, error) {
// Get the used OSD ids from Ceph.
osds, err := cephRun("osd", "ls")
if err != nil {
return -1, err
}

cephIds := []int64{}
for _, line := range strings.Split(osds, "\n") {
if line == "" {
continue
}

id, err := strconv.ParseInt(line, 10, 64)
if err != nil {
continue
}

cephIds = append(cephIds, id)
}

// Get the used OSD ids from the database.
dbIds := []int64{}
err = s.Database.Transaction(s.Context, func(ctx context.Context, tx *sql.Tx) error {
disks, err := database.GetDisks(ctx, tx)
if err != nil {
return fmt.Errorf("Failed to fetch disks: %w", err)
}

for _, disk := range disks {
dbIds = append(dbIds, int64(disk.OSD))
}

return nil
})
if err != nil {
return -1, err
}

// Find next available.
nextID := int64(0)
for {
if !shared.Int64InSlice(nextID, cephIds) && !shared.Int64InSlice(nextID, dbIds) {
return nextID, nil
}

nextID++
}
}

func prepareDisk(disk *types.DiskParameter, suffix string, osdPath string, osdID int64) error {
if disk.Wipe {
err := timeoutWipe(disk.Path)
Expand Down Expand Up @@ -362,20 +310,13 @@ func AddOSD(s *state.State, data types.DiskParameter, wal *types.DiskParameter,
return fmt.Errorf("Failed to set stable disk path: %w", err)
}

// Get a OSD number.
nr, err := nextOSD(s)
if err != nil {
return fmt.Errorf("Failed to find next OSD number: %w", err)
}
logger.Debugf("nextOSD number is %d for disk %s", nr, data.Path)

// Record the disk.
var nr int64
err = s.Database.Transaction(s.Context, func(ctx context.Context, tx *sql.Tx) error {
_, err := database.CreateDisk(ctx, tx, database.Disk{Member: s.Name(), Path: data.Path, OSD: int(nr)})
nr, err = database.CreateDiskPath(ctx, tx, database.DiskPath{Member: s.Name(), Path: data.Path})
if err != nil {
return fmt.Errorf("Failed to record disk: %w", err)
}

return nil
})
if err != nil {
Expand All @@ -384,17 +325,14 @@ func AddOSD(s *state.State, data types.DiskParameter, wal *types.DiskParameter,

logger.Debugf("Created disk record for osd.%d", nr)

// Keep the old path in case it changes after encrypting.
oldPath := data.Path

dataPath := filepath.Join(os.Getenv("SNAP_COMMON"), "data")
osdDataPath := filepath.Join(dataPath, "osd", fmt.Sprintf("ceph-%d", nr))

// if we fail later, make sure we free up the record
revert.Add(func() {
os.RemoveAll(osdDataPath)
s.Database.Transaction(s.Context, func(ctx context.Context, tx *sql.Tx) error {
database.DeleteDisk(ctx, tx, s.Name(), oldPath)
database.DeleteDiskPath(ctx, tx, s.Name(), data.Path)
return nil
})
})
Expand Down Expand Up @@ -514,7 +452,7 @@ func sanityCheck(s common.StateInterface, osd int64) error {
return err
}
if !exists {
return fmt.Errorf("ods.%d not found", osd)
return fmt.Errorf("osd.%d not found", osd)
}
return nil
}
Expand Down
Loading

0 comments on commit 7f6ff10

Please sign in to comment.