Skip to content

Commit

Permalink
Merge pull request juju#16647 from SimonRichardson/bootstrap-worker
Browse files Browse the repository at this point in the history
juju#16647

Adds the bootstrap worker scaffolding. The core concept
is to perform the deployment of the controller charm within 
the dependency engine and not outside of it. We currently
have issues with slide-loading various state functions in the
tests (machine legacy tests).
The new model is for bootstrapping a database with dqlite
to prevent that same modeling aspect. Instead, to access
the database you have two core concepts. 

 1. Insert bootstrap database functions
 2. Access the database from within the dependency engine.

The way we're planning to add a binary blob to the object
store is to be immutable once added. There is no concept
of updating the hash of an existing object (there are 
consequences to doing this after the fact which complicates
the model). So with that in mind, we calculate the hash of the 
blob when writing to the underlying storage and then populate
the database. This rules out doing point 1, unless we're willing
to swallow calculating the hash multiple times and I'm not even
sure it's possible to do it nicely for charmhub charms.
Moving it to the bootstrap worker allows us to keep the second
concept pure.

The only complication is ensuring that if we add the application
and fail at a later step, then it's still recoverable or exits in a clean
fashion.

<!-- Why this change is needed and what it does. -->

## Checklist

<!-- If an item is not applicable, use `~strikethrough~`. -->

- [x] Code style: imports ordered, good names, simple structure, etc
- [x] Comments saying why design decisions were made
- [x] Go unit tests, with comments saying what you're testing

## QA steps

We need to ensure that we can still bootstrap.

```sh
$ juju bootstrap lxd test --build-agent
$ juju enable-ha
$ juju add-model default
$ juju deploy ubuntu
```

## Links

**Jira card:** JUJU-4972
  • Loading branch information
jujubot authored Dec 4, 2023
2 parents 397f2e7 + 33b578a commit f97415f
Show file tree
Hide file tree
Showing 21 changed files with 1,474 additions and 86 deletions.
3 changes: 1 addition & 2 deletions caas/kubernetes/provider/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -1574,9 +1574,8 @@ func (c *controllerStack) buildContainerSpecForController() (*core.PodSpec, erro
if c.pcfg.ControllerId == agent.BootstrapControllerId {
// only do bootstrap-state on the bootstrap controller - controller-0.
bootstrapStateCmd := fmt.Sprintf(
"%s bootstrap-state %s --data-dir $JUJU_DATA_DIR %s --timeout %s",
"%s bootstrap-state --data-dir $JUJU_DATA_DIR %s --timeout %s",
c.pathJoin("$JUJU_TOOLS_DIR", "jujud"),
c.pathJoin("$JUJU_DATA_DIR", cloudconfig.FileNameBootstrapParams),
loggingOption,
c.timeout.String(),
)
Expand Down
2 changes: 1 addition & 1 deletion caas/kubernetes/provider/bootstrap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -835,7 +835,7 @@ export JUJU_TOOLS_DIR=$JUJU_DATA_DIR/tools
mkdir -p $JUJU_TOOLS_DIR
cp /opt/jujud $JUJU_TOOLS_DIR/jujud
test -e $JUJU_DATA_DIR/agents/controller-0/agent.conf || JUJU_DEV_FEATURE_FLAGS=developer-mode $JUJU_TOOLS_DIR/jujud bootstrap-state $JUJU_DATA_DIR/bootstrap-params --data-dir $JUJU_DATA_DIR --debug --timeout 10m0s
test -e $JUJU_DATA_DIR/agents/controller-0/agent.conf || JUJU_DEV_FEATURE_FLAGS=developer-mode $JUJU_TOOLS_DIR/jujud bootstrap-state --data-dir $JUJU_DATA_DIR --debug --timeout 10m0s
mkdir -p /var/lib/pebble/default/layers
cat > /var/lib/pebble/default/layers/001-jujud.yaml <<EOF
Expand Down
19 changes: 8 additions & 11 deletions cmd/jujud/agent/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"io"
"net"
"os"
"path"
"path/filepath"
"strings"
"time"
Expand Down Expand Up @@ -47,6 +48,7 @@ import (
"github.com/juju/juju/environs/imagemetadata"
"github.com/juju/juju/environs/simplestreams"
envtools "github.com/juju/juju/environs/tools"
"github.com/juju/juju/internal/cloudconfig"
"github.com/juju/juju/internal/cloudconfig/instancecfg"
"github.com/juju/juju/internal/database"
"github.com/juju/juju/internal/mongo"
Expand Down Expand Up @@ -74,10 +76,9 @@ const adminUserName = "admin"
type BootstrapCommand struct {
cmd.CommandBase
agentconf.AgentConf
BootstrapParamsFile string
Timeout time.Duration
BootstrapAgent BootstrapAgentFunc
DqliteInitializer agentbootstrap.DqliteInitializerFunc
Timeout time.Duration
BootstrapAgent BootstrapAgentFunc
DqliteInitializer agentbootstrap.DqliteInitializerFunc
}

// NewBootstrapCommand returns a new BootstrapCommand that has been initialized.
Expand Down Expand Up @@ -106,14 +107,10 @@ func (c *BootstrapCommand) SetFlags(f *gnuflag.FlagSet) {

// Init initializes the command for running.
func (c *BootstrapCommand) Init(args []string) error {
if len(args) == 0 {
return errors.New("bootstrap-params file must be specified")
}
if err := cmd.CheckEmpty(args[1:]); err != nil {
if err := cmd.CheckEmpty(args); err != nil {
return err
}
c.BootstrapParamsFile = args[0]
return c.AgentConf.CheckArgs(args[1:])
return c.AgentConf.CheckArgs(args)
}

func copyFile(dest, source string) error {
Expand Down Expand Up @@ -206,7 +203,7 @@ func (c cloudGetter) Get(_ stdcontext.Context, name string) (*jujucloud.Cloud, e

// Run initializes state for an environment.
func (c *BootstrapCommand) Run(ctx *cmd.Context) error {
bootstrapParamsData, err := os.ReadFile(c.BootstrapParamsFile)
bootstrapParamsData, err := os.ReadFile(path.Join(c.DataDir(), cloudconfig.FileNameBootstrapParams))
if err != nil {
return errors.Annotate(err, "reading bootstrap params file")
}
Expand Down
43 changes: 4 additions & 39 deletions cmd/jujud/agent/bootstrap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,7 @@ type BootstrapSuite struct {
testing.BaseSuite
mgotesting.MgoSuite

bootstrapParamsFile string
bootstrapParams instancecfg.StateInitializationParams
bootstrapParams instancecfg.StateInitializationParams

dataDir string
logDir string
Expand Down Expand Up @@ -122,7 +121,6 @@ func (s *BootstrapSuite) SetUpTest(c *gc.C) {
s.MgoSuite.SetUpTest(c)
s.dataDir = c.MkDir()
s.logDir = c.MkDir()
s.bootstrapParamsFile = filepath.Join(s.dataDir, "bootstrap-params")
s.mongoOplogSize = "1234"
s.fakeEnsureMongo = agenttest.InstallFakeEnsureMongo(s, s.dataDir)
s.PatchValue(&initiateMongoServer, s.fakeEnsureMongo.InitiateMongo)
Expand Down Expand Up @@ -383,9 +381,6 @@ func (s *BootstrapSuite) initBootstrapCommand(c *gc.C, jobs []model.MachineJob,
err = machineConf.Write()
c.Assert(err, jc.ErrorIsNil)

if len(args) == 0 {
args = []string{s.bootstrapParamsFile}
}
cmd = NewBootstrapCommand()
cmd.BootstrapAgent = s.bootstrapAgentFunc
cmd.DqliteInitializer = s.dqliteInitializerFunc
Expand Down Expand Up @@ -593,36 +588,6 @@ func (s *BootstrapSuite) TestInitialPassword(c *gc.C) {
c.Assert(node.HasVote(), jc.IsTrue)
}

var bootstrapArgTests = []struct {
input []string
err string
expectedBootstrapParamsFile string
}{
{
err: "bootstrap-params file must be specified",
input: []string{"--data-dir", "/tmp/juju/data/dir"},
}, {
input: []string{"/some/where"},
expectedBootstrapParamsFile: "/some/where",
},
}

func (s *BootstrapSuite) TestBootstrapArgs(c *gc.C) {
for i, t := range bootstrapArgTests {
c.Logf("test %d", i)
var args []string
args = append(args, t.input...)
_, cmd, err := s.initBootstrapCommand(c, nil, args...)
if t.err == "" {
c.Assert(cmd, gc.NotNil)
c.Assert(err, jc.ErrorIsNil)
c.Assert(cmd.BootstrapParamsFile, gc.Equals, t.expectedBootstrapParamsFile)
} else {
c.Assert(err, gc.ErrorMatches, t.err)
}
}
}

func (s *BootstrapSuite) TestInitializeStateArgs(c *gc.C) {
var called int
s.bootstrapAgentFunc = func(args agentbootstrap.AgentBootstrapArgs) (*agentbootstrap.AgentBootstrap, error) {
Expand All @@ -637,7 +602,7 @@ func (s *BootstrapSuite) TestInitializeStateArgs(c *gc.C) {
return nil, errors.New("failed to initialize state")
}

_, cmd, err := s.initBootstrapCommand(c, nil, "--timeout", "123s", s.bootstrapParamsFile)
_, cmd, err := s.initBootstrapCommand(c, nil, "--timeout", "123s")
c.Assert(err, jc.ErrorIsNil)
err = cmd.Run(cmdtesting.Context(c))
c.Assert(err, gc.ErrorMatches, "failed to initialize state")
Expand All @@ -652,7 +617,7 @@ func (s *BootstrapSuite) TestInitializeStateMinSocketTimeout(c *gc.C) {
c.Assert(args.MongoDialOpts.SocketTimeout, gc.Equals, 1*time.Minute)
return nil, errors.New("failed to initialize state")
}
_, cmd, err := s.initBootstrapCommand(c, nil, "--timeout", "13s", s.bootstrapParamsFile)
_, cmd, err := s.initBootstrapCommand(c, nil, "--timeout", "13s")
c.Assert(err, jc.ErrorIsNil)
err = cmd.Run(cmdtesting.Context(c))
c.Assert(err, gc.ErrorMatches, "failed to initialize state")
Expand Down Expand Up @@ -835,7 +800,7 @@ func (s *BootstrapSuite) makeTestModel(c *gc.C) {
func (s *BootstrapSuite) writeBootstrapParamsFile(c *gc.C) {
data, err := s.bootstrapParams.Marshal()
c.Assert(err, jc.ErrorIsNil)
err = os.WriteFile(s.bootstrapParamsFile, data, 0600)
err = os.WriteFile(filepath.Join(s.dataDir, "bootstrap-params"), data, 0600)
c.Assert(err, jc.ErrorIsNil)
}

Expand Down
22 changes: 11 additions & 11 deletions cmd/jujud/agent/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ var (
"api-caller",
"api-config-watcher",
"clock",
"is-responsible-flag",
"environ-upgrade-gate",
"environ-upgraded-flag",
"is-responsible-flag",
"not-alive-flag",
"not-dead-flag",
"valid-credential-flag",
Expand All @@ -37,6 +37,7 @@ var (
"charm-revision-updater", // tertiary dependency: will be inactive because migration workers will be inactive
"compute-provisioner",
"environ-tracker",
"environ-upgrader",
"firewaller",
"instance-mutater",
"instance-poller",
Expand All @@ -46,14 +47,13 @@ var (
"migration-fortress", // secondary dependency: will be inactive because depends on environ-upgrader
"migration-inactive-flag", // secondary dependency: will be inactive because depends on environ-upgrader
"migration-master", // secondary dependency: will be inactive because depends on environ-upgrader
"environ-upgrader",
"remote-relations", // tertiary dependency: will be inactive because migration workers will be inactive
"remote-relations", // tertiary dependency: will be inactive because migration workers will be inactive
"secrets-pruner",
"state-cleaner", // tertiary dependency: will be inactive because migration workers will be inactive
"status-history-pruner", // tertiary dependency: will be inactive because migration workers will be inactive
"storage-provisioner", // tertiary dependency: will be inactive because migration workers will be inactive
"undertaker",
"unit-assigner", // tertiary dependency: will be inactive because migration workers will be inactive
"secrets-pruner",
"user-secrets-drain-worker",
}
aliveModelWorkers = []string{
Expand All @@ -74,21 +74,21 @@ var (
"migration-inactive-flag",
"migration-master",
"remote-relations",
"secrets-pruner",
"state-cleaner",
"status-history-pruner",
"storage-provisioner",
"unit-assigner",
"secrets-pruner",
"user-secrets-drain-worker",
}
migratingModelWorkers = []string{
"environ-tracker",
"migration-fortress",
"migration-inactive-flag",
"migration-master",
"environ-upgrade-gate",
"environ-upgraded-flag",
"log-forwarder",
"migration-fortress",
"migration-inactive-flag",
"migration-master",
}
// ReallyLongTimeout should be long enough for the model-tracker
// tests that depend on a hosted model; its backing state is not
Expand Down Expand Up @@ -124,15 +124,15 @@ var (
"deployer",
"disk-manager",
"fan-configurer",
"is-bootstrap-flag",
"is-bootstrap-gate",
"is-controller-flag",
"is-not-controller-flag",
// "host-key-reporter", not stable, exits when done
"kvm-container-provisioner",
"log-sender",
"logging-config-updater",
"lxd-container-provisioner",
"kvm-container-provisioner",
"machine-action-runner",
//"machine-setup", exits when done
"machiner",
"proxy-config-updater",
"reboot-executor",
Expand Down
3 changes: 3 additions & 0 deletions cmd/jujud/agent/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ type MachineAgent struct {
preUpgradeSteps PreUpgradeStepsFunc
upgradeSteps UpgradeStepsFunc

bootstrapLock gate.Lock
upgradeDBLock gate.Lock
upgradeStepsLock gate.Lock

Expand Down Expand Up @@ -558,6 +559,7 @@ func (a *MachineAgent) Run(ctx *cmd.Context) (err error) {
}
a.machineLock = machineLock

a.bootstrapLock = gate.NewLock()
a.upgradeDBLock = internalupgrade.NewLock(agentConfig, jujuversion.Current)
a.upgradeStepsLock = internalupgrade.NewLock(agentConfig, jujuversion.Current)

Expand Down Expand Up @@ -625,6 +627,7 @@ func (a *MachineAgent) makeEngineCreator(
Agent: agent.APIHostPortsSetter{Agent: a},
RootDir: a.rootDir,
AgentConfigChanged: a.configChangedVal,
BootstrapLock: a.bootstrapLock,
UpgradeDBLock: a.upgradeDBLock,
UpgradeStepsLock: a.upgradeStepsLock,
UpgradeCheckLock: a.initialUpgradeCheckComplete,
Expand Down
58 changes: 57 additions & 1 deletion cmd/jujud/agent/machine/manifolds.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import (
"github.com/juju/juju/worker/apiservercertwatcher"
"github.com/juju/juju/worker/auditconfigupdater"
"github.com/juju/juju/worker/authenticationworker"
"github.com/juju/juju/worker/bootstrap"
"github.com/juju/juju/worker/caasunitsmanager"
"github.com/juju/juju/worker/caasupgrader"
"github.com/juju/juju/worker/centralhub"
Expand Down Expand Up @@ -134,6 +135,11 @@ type ManifoldsConfig struct {
// agent was running before the current restart.
PreviousAgentVersion version.Number

// BootstrapLock is passed to the bootstrap gate to coordinate
// workers that shouldn't do anything until the bootstrap worker
// is done.
BootstrapLock gate.Lock

// UpgradeDBLock is passed to the upgrade database gate to
// coordinate workers that shouldn't do anything until the
// upgrade-database worker is done.
Expand Down Expand Up @@ -318,6 +324,15 @@ func commonManifolds(config ManifoldsConfig) dependency.Manifolds {
// foundation stone on which most other manifolds ultimately depend.
agentName: agent.Manifold(config.Agent),

// The upgrade database gate is used to coordinate workers that should
// not do anything until the upgrade-database worker has finished
// running any required database upgrade steps.
isBootstrapGateName: gate.ManifoldEx(config.BootstrapLock),
isBootstrapFlagName: gate.FlagManifold(gate.FlagManifoldConfig{
GateName: isBootstrapGateName,
NewWorker: gate.NewFlagWorker,
}),

// The termination worker returns ErrTerminateAgent if a
// termination signal is received by the process it's running
// in. It has no inputs and its only output is the error it
Expand Down Expand Up @@ -406,7 +421,7 @@ func commonManifolds(config ManifoldsConfig) dependency.Manifolds {
// The multiwatcher manifold watches all the changes in the database
// through the AllWatcherBacking and manages notifying the multiwatchers.
// Note: ifDatabaseUpgradeComplete implies running on a controller.
multiwatcherName: ifDatabaseUpgradeComplete(multiwatcher.Manifold(multiwatcher.ManifoldConfig{
multiwatcherName: ifBootstrapComplete(multiwatcher.Manifold(multiwatcher.ManifoldConfig{
StateName: stateName,
Clock: config.Clock,
Logger: loggo.GetLogger("juju.worker.multiwatcher"),
Expand Down Expand Up @@ -818,6 +833,17 @@ func commonManifolds(config ManifoldsConfig) dependency.Manifolds {
// various responsibilities of a IAAS machine agent.
func IAASManifolds(config ManifoldsConfig) dependency.Manifolds {
manifolds := dependency.Manifolds{
// Bootstrap worker is responsible for setting up the initial machine.
bootstrapName: ifDatabaseUpgradeComplete(bootstrap.Manifold(bootstrap.ManifoldConfig{
AgentName: agentName,
StateName: stateName,
ObjectStoreName: objectStoreName,
BootstrapGateName: isBootstrapGateName,
AgentBinarySeeder: bootstrap.IAASAgentBinarySeeder,
RequiresBootstrap: bootstrap.RequiresBootstrap,
Logger: loggo.GetLogger("juju.worker.bootstrap"),
})),

toolsVersionCheckerName: ifNotMigrating(toolsversionchecker.Manifold(toolsversionchecker.ManifoldConfig{
AgentName: agentName,
APICallerName: apiCallerName,
Expand Down Expand Up @@ -1023,6 +1049,17 @@ func IAASManifolds(config ManifoldsConfig) dependency.Manifolds {
// various responsibilities of a CAAS machine agent.
func CAASManifolds(config ManifoldsConfig) dependency.Manifolds {
return mergeManifolds(config, dependency.Manifolds{
// Bootstrap worker is responsible for setting up the initial machine.
bootstrapName: ifDatabaseUpgradeComplete(bootstrap.Manifold(bootstrap.ManifoldConfig{
AgentName: agentName,
StateName: stateName,
ObjectStoreName: objectStoreName,
BootstrapGateName: isBootstrapGateName,
AgentBinarySeeder: bootstrap.CAASAgentBinarySeeder,
RequiresBootstrap: bootstrap.RequiresBootstrap,
Logger: loggo.GetLogger("juju.worker.bootstrap"),
})),

// TODO(caas) - when we support HA, only want this on primary
upgraderName: caasupgrader.Manifold(caasupgrader.ManifoldConfig{
AgentName: agentName,
Expand Down Expand Up @@ -1083,6 +1120,21 @@ func clockManifold(clock clock.Clock) dependency.Manifold {
}
}

// ifBootstrapComplete gates against the bootstrap worker completing.
// This ensures that all blobs (agent binaries and controller charm) are
// available before the machine agent starts.
// We currently use this to provide a happier experience for the user
// when bootstrapping a controller, before immediately going into HA. If the
// underlying object store storage is slow, then retrying for the agent binary
// against the controller can lead to slower HA deployment. It might be worth
// revisiting this in the future, so we release the gate as soon as the binaries
// are being uploaded.
var ifBootstrapComplete = engine.Housing{
Flags: []string{
isBootstrapFlagName,
},
}.Decorate

var ifFullyUpgraded = engine.Housing{
Flags: []string{
upgradeStepsFlagName,
Expand Down Expand Up @@ -1141,6 +1193,10 @@ const (
pubSubName = "pubsub-forwarder"
clockName = "clock"

bootstrapName = "bootstrap"
isBootstrapGateName = "is-bootstrap-gate"
isBootstrapFlagName = "is-bootstrap-flag"

upgradeDatabaseName = "upgrade-database-runner"
upgradeDatabaseGateName = "upgrade-database-gate"
upgradeDatabaseFlagName = "upgrade-database-flag"
Expand Down
Loading

0 comments on commit f97415f

Please sign in to comment.