From db83bfcd932bc7db25d6ecffc1f69d41b34bf9ed Mon Sep 17 00:00:00 2001 From: Jake Schuurmans Date: Tue, 24 Sep 2024 17:48:50 -0400 Subject: [PATCH] Huge Refactor Refactor CMD Replace old tasks and steps with new Task Replace slog with logrus. I know we want to move off logrus, but I think we need to do this at a later date. Move handler to bioscfg move publisher to bioscfg, in its own file --- chart/Chart.yaml | 2 +- cmd/bioscfg.go | 26 +++ cmd/root.go | 29 +--- cmd/run.go | 93 ---------- internal/bioscfg/bioscfg.go | 113 ++++++++++++ internal/bioscfg/errors.go | 9 + internal/bioscfg/handler.go | 154 +++++++++++++++++ internal/bioscfg/publish.go | 113 ++++++++++++ internal/bioscfg/run.go | 52 ++++++ internal/bioscfg/task.go | 89 ++++++++++ internal/config/config.go | 11 -- internal/handlers/handlers.go | 76 -------- internal/log/log.go | 84 --------- internal/model/model.go | 23 +-- internal/tasks/steps.go | 140 --------------- internal/tasks/tasks.go | 314 ---------------------------------- internal/tasks/tasks_test.go | 69 -------- internal/version/version.go | 17 ++ main.go | 2 - 19 files changed, 584 insertions(+), 832 deletions(-) create mode 100644 cmd/bioscfg.go delete mode 100644 cmd/run.go create mode 100644 internal/bioscfg/bioscfg.go create mode 100644 internal/bioscfg/errors.go create mode 100644 internal/bioscfg/handler.go create mode 100644 internal/bioscfg/publish.go create mode 100644 internal/bioscfg/run.go create mode 100644 internal/bioscfg/task.go delete mode 100644 internal/handlers/handlers.go delete mode 100644 internal/log/log.go delete mode 100644 internal/tasks/steps.go delete mode 100644 internal/tasks/tasks.go delete mode 100644 internal/tasks/tasks_test.go diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 2d651d6..11594b0 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: bioscfg description: A chart to control BMCs -version: v0.1.0 +version: v0.1.1 keywords: - bmc - bios diff --git a/cmd/bioscfg.go b/cmd/bioscfg.go new file mode 100644 index 0000000..ff5615e --- /dev/null +++ b/cmd/bioscfg.go @@ -0,0 +1,26 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/metal-toolbox/bioscfg/internal/bioscfg" + "github.com/spf13/cobra" +) + +// bioscfgCmd represents the bioscfg command +var bioscfgCmd = &cobra.Command{ + Use: "run", + Short: "Run the BiosCfg Controller", + Run: func(cmd *cobra.Command, _ []string) { + err := bioscfg.Run(cmd.Context(), ConfigFile, LogLevel, EnableProfiling) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + }, +} + +func init() { + rootCmd.AddCommand(bioscfgCmd) +} diff --git a/cmd/root.go b/cmd/root.go index 0b9f446..52fae87 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -18,26 +18,21 @@ package cmd import ( "fmt" - "log/slog" "os" - "github.com/metal-toolbox/bioscfg/internal/model" "github.com/spf13/cobra" ) var ( - args = &model.Args{} + LogLevel string + ConfigFile string + EnableProfiling bool ) // rootCmd represents the base command when called without any subcommands var rootCmd = &cobra.Command{ - Use: "bioscfg", - Short: "bioscfg remotely manages BIOS settings", - Run: func(cmd *cobra.Command, _ []string) { - if err := runWorker(cmd.Context(), args); err != nil { - os.Exit(1) - } - }, + Use: "controller", + Short: "Fleet Services Controllers", } // Execute adds all child commands to the root command and sets flags appropriately. @@ -51,19 +46,11 @@ func Execute() { func init() { rootCmd.PersistentFlags(). - StringVar(&args.ConfigFile, "config", "", "configuration file (default is $HOME/.bioscfg.yml)") + StringVar(&ConfigFile, "config", "", "configuration file (default is $HOME/.bioscfg.yml)") rootCmd.PersistentFlags(). - StringVar(&args.LogLevel, "log-level", "info", "set logging level - debug, trace") + StringVar(&LogLevel, "log-level", "info", "set logging level - debug, trace") rootCmd.PersistentFlags(). - BoolVarP(&args.EnableProfiling, "enable-pprof", "", false, "Enable profiling endpoint at: http://localhost:9091") - - rootCmd.PersistentFlags(). - StringVarP(&args.FacilityCode, "facility-code", "f", "", "The facility code this bioscfg instance is associated with") - - if err := rootCmd.MarkPersistentFlagRequired("facility-code"); err != nil { - slog.Error("failed to mark required flag", "error", err) - os.Exit(1) - } + BoolVarP(&EnableProfiling, "enable-pprof", "", false, "Enable profiling endpoint at: http://localhost:9091") } diff --git a/cmd/run.go b/cmd/run.go deleted file mode 100644 index bd347cb..0000000 --- a/cmd/run.go +++ /dev/null @@ -1,93 +0,0 @@ -package cmd - -import ( - "context" - "log/slog" - _ "net/http/pprof" // nolint:gosec // profiling endpoint listens on localhost. - "os" - "os/signal" - "syscall" - - "github.com/equinix-labs/otel-init-go/otelinit" - "github.com/metal-toolbox/bioscfg/internal/config" - "github.com/metal-toolbox/bioscfg/internal/handlers" - "github.com/metal-toolbox/bioscfg/internal/log" - "github.com/metal-toolbox/bioscfg/internal/metrics" - "github.com/metal-toolbox/bioscfg/internal/model" - "github.com/metal-toolbox/bioscfg/internal/profiling" - "github.com/metal-toolbox/bioscfg/internal/store/fleetdb" - "github.com/metal-toolbox/bioscfg/internal/version" - "github.com/metal-toolbox/ctrl" -) - -func runWorker(ctx context.Context, args *model.Args) error { - cfg, err := config.Load(args.ConfigFile, args.LogLevel) - if err != nil { - slog.Error("Failed to load configuration", "error", err) - return err - } - - slog.Info("Configuration loaded", cfg.AsLogFields()...) - - log.SetLevel(cfg.LogLevel) - - // serve metrics endpoint - metrics.ListenAndServe() - version.ExportBuildInfoMetric() - - if args.EnableProfiling { - profiling.Enable() - } - - ctx, otelShutdown := otelinit.InitOpenTelemetry(ctx, model.AppName) - defer otelShutdown(ctx) - - log.NewLogrusLogger(cfg.LogLevel) - repository, err := fleetdb.New(ctx, &cfg.Endpoints.FleetDB, log.NewLogrusLogger(cfg.LogLevel)) - if err != nil { - slog.Error("Failed to create repository", "error", err) - return err - } - - termChan := make(chan os.Signal, 1) - signal.Notify(termChan, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) - - ctx, cancel := context.WithCancel(ctx) - - // Cancel the context when we receive a termination signal. - go func() { - s := <-termChan - slog.Info("Received signal for termination, exiting...", "signal", s.String()) - cancel() - }() - - nc := ctrl.NewNatsController( - model.AppName, - cfg.FacilityCode, - model.AppSubject, - cfg.Endpoints.Nats.URL, - cfg.Endpoints.Nats.CredsFile, - model.AppSubject, - ctrl.WithConcurrency(cfg.Concurrency), - ctrl.WithKVReplicas(cfg.Endpoints.Nats.KVReplicationFactor), - ctrl.WithConnectionTimeout(cfg.Endpoints.Nats.ConnectTimeout), - ctrl.WithLogger(log.NewLogrusLogger(cfg.LogLevel)), - ) - - if err = nc.Connect(ctx); err != nil { - slog.Error("Failed to connect to NATS", "error", err) - return err - } - - slog.With(version.Current().AsLogFields()...).Info("bioscfg worker running") - - err = nc.ListenEvents(ctx, func() ctrl.TaskHandler { - return handlers.NewHandlerFactory(repository) - }) - if err != nil { - slog.Error("Failed to listen for events", "error", err) - return err - } - - return nil -} diff --git a/internal/bioscfg/bioscfg.go b/internal/bioscfg/bioscfg.go new file mode 100644 index 0000000..15ede94 --- /dev/null +++ b/internal/bioscfg/bioscfg.go @@ -0,0 +1,113 @@ +package bioscfg + +import ( + "context" + + "github.com/metal-toolbox/bioscfg/internal/config" + "github.com/metal-toolbox/bioscfg/internal/store/fleetdb" + "github.com/metal-toolbox/ctrl" + rctypes "github.com/metal-toolbox/rivets/condition" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +var ( + pkgName = "internal/bioscfg" +) + +// BiosCfg BiosCfg Controller Struct +type BiosCfg struct { + cfg *config.Configuration + logger *logrus.Entry + ctx context.Context + fleetdb *fleetdb.Store + nc *ctrl.NatsController +} + +// New create a new BiosCfg Controller +func New(ctx context.Context, cfg *config.Configuration, logger *logrus.Entry) (*BiosCfg, error) { + bc := &BiosCfg{ + cfg: cfg, + logger: logger, + ctx: ctx, + } + + err := bc.initDependences() + if err != nil { + return nil, err + } + + return bc, nil +} + +// Listen listen to Nats for tasks +func (bc *BiosCfg) Listen() error { + handleFactory := func() ctrl.TaskHandler { + return &TaskHandler{ + cfg: bc.cfg, + logger: bc.logger, + controllerID: bc.nc.ID(), + fleetdb: bc.fleetdb, + } + } + + err := bc.nc.ListenEvents(bc.ctx, handleFactory) + if err != nil { + return err + } + + return nil +} + +// initDependences Initialize network dependencies +func (bc *BiosCfg) initDependences() error { + err := bc.initNats() + if err != nil { + return errors.Wrap(err, "failed to initialize connection to nats") + } + + err = bc.initFleetDB() + if err != nil { + return errors.Wrap(err, "failed to initialize connection to fleetdb") + } + + return nil +} + +func (bc *BiosCfg) initNats() error { + bc.nc = ctrl.NewNatsController( + string(rctypes.BiosControl), + bc.cfg.FacilityCode, + string(rctypes.BiosControl), + bc.cfg.Endpoints.Nats.URL, + bc.cfg.Endpoints.Nats.CredsFile, + rctypes.BiosControl, + ctrl.WithConcurrency(bc.cfg.Concurrency), + ctrl.WithKVReplicas(bc.cfg.Endpoints.Nats.KVReplicationFactor), + ctrl.WithLogger(bc.logger.Logger), + ctrl.WithConnectionTimeout(bc.cfg.Endpoints.Nats.ConnectTimeout), + ) + + err := bc.nc.Connect(bc.ctx) + if err != nil { + bc.logger.Error(err) + return err + } + + return nil +} + +func (bc *BiosCfg) initFleetDB() error { + store, err := fleetdb.New( + bc.ctx, + &bc.cfg.Endpoints.FleetDB, + bc.logger.Logger, + ) + if err != nil { + return err + } + + bc.fleetdb = store + + return nil +} diff --git a/internal/bioscfg/errors.go b/internal/bioscfg/errors.go new file mode 100644 index 0000000..1d32181 --- /dev/null +++ b/internal/bioscfg/errors.go @@ -0,0 +1,9 @@ +package bioscfg + +import "errors" + +var ( + errInvalidConditionParams = errors.New("invalid condition parameters") + errTaskConv = errors.New("error in generic Task conversion") + errUnsupportedAction = errors.New("unsupported action") +) diff --git a/internal/bioscfg/handler.go b/internal/bioscfg/handler.go new file mode 100644 index 0000000..25d911f --- /dev/null +++ b/internal/bioscfg/handler.go @@ -0,0 +1,154 @@ +package bioscfg + +import ( + "context" + "time" + + "github.com/metal-toolbox/bioscfg/internal/config" + "github.com/metal-toolbox/bioscfg/internal/model" + "github.com/metal-toolbox/bioscfg/internal/store/bmc" + "github.com/metal-toolbox/bioscfg/internal/store/fleetdb" + "github.com/metal-toolbox/ctrl" + rctypes "github.com/metal-toolbox/rivets/condition" + "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/trace" +) + +type TaskHandler struct { + logger *logrus.Entry + cfg *config.Configuration + fleetdb *fleetdb.Store + bmcClient bmc.BMC + publisher ctrl.Publisher + server *model.Asset + task *Task + startTS time.Time + controllerID string +} + +func (th *TaskHandler) HandleTask(ctx context.Context, genTask *rctypes.Task[any, any], publisher ctrl.Publisher) error { + ctx, span := otel.Tracer(pkgName).Start( + ctx, + "bioscfg.HandleTask", + ) + defer span.End() + + var err error + th.publisher = publisher + + // Ungeneric the task + th.task, err = NewTask(genTask) + if err != nil { + th.logger.WithFields(logrus.Fields{ + "conditionID": genTask.ID, + "controllerID": th.controllerID, + "err": err.Error(), + }).Error("asset lookup error") + return err + } + + // Get Server + th.server, err = th.fleetdb.AssetByID(ctx, th.task.Parameters.AssetID) + if err != nil { + th.logger.WithFields(logrus.Fields{ + "assetID": th.task.Parameters.AssetID.String(), + "conditionID": th.task.ID, + "controllerID": th.controllerID, + "err": err.Error(), + }).Error("asset lookup error") + + return ctrl.ErrRetryHandler + } + + // New log entry for this condition + th.logger = th.logger.WithFields( + logrus.Fields{ + "controllerID": th.controllerID, + "conditionID": th.task.ID.String(), + "serverID": th.server.ID.String(), + "bmc": th.server.BmcAddress.String(), + "action": th.task.Parameters.Action, + }, + ) + + // Get BMC Client + if th.cfg.Dryrun { // Fake BMC + th.bmcClient = bmc.NewDryRunBMCClient(th.server) + th.logger.Warn("Running BMC in Dryrun mode") + } else { + th.bmcClient = bmc.NewBMCClient(th.server, th.logger) + } + + err = th.bmcClient.Open(ctx) + if err != nil { + th.logger.WithError(err).Error("bmc connection failed to connect") + return err + } + defer func() { + if err := th.bmcClient.Close(ctx); err != nil { + th.logger.WithError(err).Error("bmc connection close error") + } + }() + + return th.Run(ctx) +} + +func (th *TaskHandler) Run(ctx context.Context) error { + ctx, span := otel.Tracer(pkgName).Start( + ctx, + "TaskHandler.Run", + trace.WithSpanKind(trace.SpanKindConsumer), + ) + defer span.End() + + th.logger.Info("running condition action") + err := th.publishActive(ctx, "running condition action") + if err != nil { + return err + } + + switch th.task.Parameters.Action { + case rctypes.ResetSettings: + return th.ResetBios(ctx) + default: + return th.failedWithError(ctx, string(th.task.Parameters.Action), errUnsupportedAction) + } +} + +// ResetBios reset the bios of the server +func (th *TaskHandler) ResetBios(ctx context.Context) error { + // Get Power State + state, err := th.bmcClient.GetPowerState(ctx) + if err != nil { + return th.failedWithError(ctx, "error getting power state", err) + } + + err = th.publishActivef(ctx, "current power state: %s", state) + if err != nil { + return err + } + + // Reset Bios + err = th.bmcClient.ResetBios(ctx) + if err != nil { + return th.failedWithError(ctx, "error reseting bios", err) + } + + err = th.publishActive(ctx, "BIOS settings reset") + if err != nil { + return err + } + + // Reboot (if ON) + if state == model.PowerStateOn { + err = th.bmcClient.SetPowerState(ctx, model.PowerStateReset) + if err != nil { + return th.failedWithError(ctx, "failed to reboot server", err) + } + + return th.successful(ctx, "rebooting server") + } + + return th.successful(ctx, "skipping server reboot, not on") +} diff --git a/internal/bioscfg/publish.go b/internal/bioscfg/publish.go new file mode 100644 index 0000000..551ea3a --- /dev/null +++ b/internal/bioscfg/publish.go @@ -0,0 +1,113 @@ +package bioscfg + +import ( + "context" + "fmt" + "time" + + "github.com/metal-toolbox/bioscfg/internal/metrics" + rctypes "github.com/metal-toolbox/rivets/condition" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" +) + +func (th *TaskHandler) publish(ctx context.Context, status string, state rctypes.State) error { + th.task.State = state + th.task.Status.Append(status) + + genTask, err := th.task.ToGeneric() + if err != nil { + th.logger.WithError(errTaskConv).Error() + return err + } + + if errDelay := sleepInContext(ctx, 10*time.Second); errDelay != nil { + return context.Canceled + } + + return th.publisher.Publish(ctx, + genTask, + false, + ) +} + +func (th *TaskHandler) publishActive(ctx context.Context, status string) error { + err := th.publish(ctx, status, rctypes.Active) + if err != nil { + th.logger.Infof("failed to publish condition status: %s", status) + return err + } + + th.logger.Infof("condition active: %s", status) + return nil +} + +func (th *TaskHandler) publishActivef(ctx context.Context, status string, args ...interface{}) error { + if len(args) > 0 { + status = fmt.Sprintf(status, args) + } + + return th.publishActive(ctx, status) +} + +// failed condition helper method +func (th *TaskHandler) failed(ctx context.Context, status string) error { + err := th.publish(ctx, status, rctypes.Failed) + + th.registerConditionMetrics(string(rctypes.Failed)) + + if err != nil { + th.logger.Infof("failed to publish condition status: %s", status) + return err + } + + th.logger.Warnf("condition failed: %s", status) + return nil +} + +func (th *TaskHandler) failedWithError(ctx context.Context, status string, err error) error { + newError := th.failed(ctx, errors.Wrap(err, status).Error()) + if newError != nil { + if err != nil { + return errors.Wrap(newError, err.Error()) + } + + return newError + } + + return err +} + +// successful condition helper method +func (th *TaskHandler) successful(ctx context.Context, status string) error { + err := th.publish(ctx, status, rctypes.Succeeded) + + th.registerConditionMetrics(string(rctypes.Succeeded)) + + if err != nil { + th.logger.Warnf("failed to publish condition status: %s", status) + return err + } + + th.logger.Infof("condition complete: %s", status) + return nil +} + +func (th *TaskHandler) registerConditionMetrics(status string) { + metrics.ConditionRunTimeSummary.With( + prometheus.Labels{ + "condition": string(rctypes.ServerControl), + "state": status, + }, + ).Observe(time.Since(th.startTS).Seconds()) +} + +// sleepInContext +func sleepInContext(ctx context.Context, t time.Duration) error { + select { + case <-time.After(t): + return nil + case <-ctx.Done(): + return context.Canceled + } +} diff --git a/internal/bioscfg/run.go b/internal/bioscfg/run.go new file mode 100644 index 0000000..79ad1af --- /dev/null +++ b/internal/bioscfg/run.go @@ -0,0 +1,52 @@ +package bioscfg + +import ( + "context" + + "github.com/equinix-labs/otel-init-go/otelinit" + "github.com/metal-toolbox/bioscfg/internal/config" + "github.com/metal-toolbox/bioscfg/internal/metrics" + "github.com/metal-toolbox/bioscfg/internal/model" + "github.com/metal-toolbox/bioscfg/internal/profiling" + "github.com/metal-toolbox/bioscfg/internal/version" + "github.com/sirupsen/logrus" +) + +func Run(ctx context.Context, configFile, logLevel string, enableProfiling bool) error { + cfg, err := config.Load(configFile, logLevel) + if err != nil { + return err + } + + logger := logrus.New() + // TODO; Replace cfg.LogLevel with logrus.LogLevel, it should marshall/unmarshall? + logger.Level, err = logrus.ParseLevel(cfg.LogLevel) + if err != nil { + return err + } + + metrics.ListenAndServe() + version.ExportBuildInfoMetric() + if enableProfiling { + profiling.Enable() + } + + ctx, otelShutdown := otelinit.InitOpenTelemetry(ctx, model.Name) + defer otelShutdown(ctx) + + v, err := version.Current().AsMap() + if err != nil { + return err + } + loggerEntry := logger.WithFields(v) + loggerEntry.Infof("Initializing %s", model.Name) + + controller, err := New(ctx, cfg, loggerEntry) + if err != nil { + return err + } + + loggerEntry.Infof("Success! %s is starting to listen for conditions", model.Name) + + return controller.Listen() +} diff --git a/internal/bioscfg/task.go b/internal/bioscfg/task.go new file mode 100644 index 0000000..8f74604 --- /dev/null +++ b/internal/bioscfg/task.go @@ -0,0 +1,89 @@ +package bioscfg + +import ( + "encoding/json" + + rctypes "github.com/metal-toolbox/rivets/condition" + rtypes "github.com/metal-toolbox/rivets/types" + "github.com/mitchellh/copystructure" + "github.com/pkg/errors" +) + +type Task rctypes.Task[*rctypes.BiosControlTaskParameters, json.RawMessage] + +func NewTask(task *rctypes.Task[any, any]) (*Task, error) { + paramsJSON, ok := task.Parameters.(json.RawMessage) + if !ok { + return nil, errInvalidConditionParams + } + + params := rctypes.BiosControlTaskParameters{} + if err := json.Unmarshal(paramsJSON, ¶ms); err != nil { + return nil, err + } + + // deep copy fields referenced by pointer + asset, err := copystructure.Copy(task.Server) + if err != nil { + return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Server") + } + + fault, err := copystructure.Copy(task.Fault) + if err != nil { + return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Fault") + } + + return &Task{ + StructVersion: task.StructVersion, + ID: task.ID, + Kind: task.Kind, + State: task.State, + Status: task.Status, + Parameters: ¶ms, + Fault: fault.(*rctypes.Fault), + FacilityCode: task.FacilityCode, + Server: asset.(*rtypes.Server), + WorkerID: task.WorkerID, + TraceID: task.TraceID, + SpanID: task.SpanID, + CreatedAt: task.CreatedAt, + UpdatedAt: task.UpdatedAt, + CompletedAt: task.CompletedAt, + }, nil +} + +func (task *Task) ToGeneric() (*rctypes.Task[any, any], error) { + paramsJSON, err := task.Parameters.Marshal() + if err != nil { + return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Parameters") + } + + // deep copy fields referenced by pointer + asset, err := copystructure.Copy(task.Server) + if err != nil { + return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Server") + } + + fault, err := copystructure.Copy(task.Fault) + if err != nil { + return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Fault") + } + + return &rctypes.Task[any, any]{ + StructVersion: task.StructVersion, + ID: task.ID, + Kind: task.Kind, + State: task.State, + Status: task.Status, + Parameters: paramsJSON, + Fault: fault.(*rctypes.Fault), + FacilityCode: task.FacilityCode, + Server: asset.(*rtypes.Server), + WorkerID: task.WorkerID, + TraceID: task.TraceID, + SpanID: task.SpanID, + CreatedAt: task.CreatedAt, + UpdatedAt: task.UpdatedAt, + CompletedAt: task.CompletedAt, + }, nil +} diff --git a/internal/config/config.go b/internal/config/config.go index d78e9e4..7955856 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -136,14 +136,3 @@ func (cfg *Configuration) envBindVars(v *viper.Viper) error { return nil } - -func (cfg *Configuration) AsLogFields() []any { - return []any{ - "logLevel", cfg.LogLevel, - "concurrency", cfg.Concurrency, - "facilityCode", cfg.FacilityCode, - "authenticate", cfg.Endpoints.FleetDB.Authenticate, - "fleetDBUrl", cfg.Endpoints.FleetDB.URL, - "natsURL", cfg.Endpoints.Nats.URL, - } -} diff --git a/internal/handlers/handlers.go b/internal/handlers/handlers.go deleted file mode 100644 index f626ae8..0000000 --- a/internal/handlers/handlers.go +++ /dev/null @@ -1,76 +0,0 @@ -package handlers - -import ( - "context" - "log/slog" - _ "net/http/pprof" // nolint:gosec // pprof path is only exposed over localhost - - "github.com/bmc-toolbox/bmclib/v2" - "github.com/metal-toolbox/bioscfg/internal/model" - "github.com/metal-toolbox/bioscfg/internal/store/fleetdb" - "github.com/metal-toolbox/bioscfg/internal/tasks" - "github.com/metal-toolbox/ctrl" - rctypes "github.com/metal-toolbox/rivets/condition" -) - -// HandlerFactory has the data and business logic for the application -type HandlerFactory struct { - repository *fleetdb.Store -} - -// NewHandlerFactory returns a new instance of the Handler -func NewHandlerFactory(repository *fleetdb.Store) *HandlerFactory { - return &HandlerFactory{ - repository: repository, - } -} - -func (h *HandlerFactory) getAsset(ctx context.Context, params *rctypes.BiosControlTaskParameters) (*model.Asset, error) { - asset, err := h.repository.AssetByID(ctx, params.AssetID) - if err != nil { - // TODO: Check error type - return nil, ctrl.ErrRetryHandler - } - - slog.Debug("Found asset", asset.AsLogFields()...) - - return asset, nil -} - -// Handle will handle the received condition -func (h *HandlerFactory) HandleTask( - ctx context.Context, - genTask *rctypes.Task[any, any], - publisher ctrl.Publisher, -) error { - slog.Debug("Handling condition", "condition", genTask) - - task, err := tasks.NewTask(genTask) - if err != nil { - return err - } - - server, err := h.getAsset(ctx, task.Parameters) - if err != nil { - return err - } - - var oldTask tasks.Task - - switch task.Parameters.Action { - case rctypes.ResetSettings: - oldTask = tasks.NewBiosResetTask(server) - default: - slog.With(server.AsLogFields()...).Error("Invalid action", "action", task.Parameters.Action) - return model.ErrInvalidAction - } - - runner := tasks.NewTaskRunner(publisher, oldTask, task) - client := bmclib.NewClient(server.BmcAddress.String(), server.BmcUsername, server.BmcPassword) - - if err := runner.Run(ctx, client); err != nil { - slog.Error("Failed running task", "error", err, "task", oldTask.Name()) - } - - return nil -} diff --git a/internal/log/log.go b/internal/log/log.go deleted file mode 100644 index 872c8bc..0000000 --- a/internal/log/log.go +++ /dev/null @@ -1,84 +0,0 @@ -package log - -import ( - "log/slog" - "os" - - runtime "github.com/banzaicloud/logrus-runtime-formatter" - "github.com/sirupsen/logrus" -) - -type Level string - -const ( - LevelTrace Level = "trace" - LevelDebug Level = "debug" - LevelInfo Level = "info" - LevelWarn Level = "warn" - LevelError Level = "error" -) - -var levelVar *slog.LevelVar - -// InitLogger will initialize the default logger instance. -func InitLogger() { - levelVar = &slog.LevelVar{} - levelVar.Set(slog.LevelInfo) - - logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: levelVar})) - - slog.SetDefault(logger) -} - -// SetLevel will set the logging level of the default logger at runtime. -func SetLevel(loglevel string) { - switch Level(loglevel) { - case LevelDebug: - levelVar.Set(slog.LevelDebug) - case LevelInfo, "": - levelVar.Set(slog.LevelInfo) - case LevelWarn: - levelVar.Set(slog.LevelWarn) - case LevelError: - levelVar.Set(slog.LevelError) - default: - levelVar.Set(slog.LevelInfo) - slog.Warn("Unknown log level, defaulting to info", "loglevel", loglevel) - } -} - -// NewLogrusLogger will generate a new logrus logger instance -func NewLogrusLogger(logLevel string) *logrus.Logger { - logger := logrus.New() - - logger.SetOutput(os.Stdout) - - switch Level(logLevel) { - case LevelDebug: - logger.Level = logrus.DebugLevel - case LevelTrace: - logger.Level = logrus.TraceLevel - case LevelInfo, "": - logger.Level = logrus.InfoLevel - case LevelWarn: - logger.Level = logrus.WarnLevel - case LevelError: - logger.Level = logrus.ErrorLevel - default: - logger.Level = logrus.InfoLevel - logger.WithField("logLevel", logLevel).Warn("Unknown log level, defaulting to info") - } - - logger.Level = logrus.InfoLevel - - runtimeFormatter := &runtime.Formatter{ - ChildFormatter: &logrus.JSONFormatter{}, - File: true, - Line: true, - BaseNameOnly: true, - } - - logger.SetFormatter(runtimeFormatter) - - return logger -} diff --git a/internal/model/model.go b/internal/model/model.go index b393758..508c6f0 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -10,9 +10,8 @@ type ( StoreKind string ) -const ( - AppName = "bioscfg" - AppSubject = "biosControl" +var ( + Name = "bioscfg" ) // nolint:govet // prefer to keep field ordering as is @@ -32,21 +31,3 @@ type Asset struct { // Facility this Asset is hosted in. FacilityCode string } - -func (a *Asset) AsLogFields() []any { - return []any{ - "asset_id", a.ID.String(), - "address", a.BmcAddress.String(), - "vendor", a.Vendor, - "model", a.Model, - "serial", a.Serial, - "facility", a.FacilityCode, - } -} - -type Args struct { - LogLevel string - ConfigFile string - FacilityCode string - EnableProfiling bool -} diff --git a/internal/tasks/steps.go b/internal/tasks/steps.go deleted file mode 100644 index 5763d41..0000000 --- a/internal/tasks/steps.go +++ /dev/null @@ -1,140 +0,0 @@ -package tasks - -import ( - "context" - "log/slog" - - "github.com/bmc-toolbox/bmclib/v2" - "github.com/metal-toolbox/bioscfg/internal/model" - rctypes "github.com/metal-toolbox/rivets/condition" - "github.com/pkg/errors" -) - -// StepStatus has status about a step, to be reported as part of the overall task. -type StepStatus struct { - Step string `json:"step"` - Status string `json:"status"` - Details string `json:"details,omitempty"` - Error string `json:"error,omitempty"` -} - -// NewStepStatus will create a new step status struct -func NewStepStatus(stepName string, state rctypes.State, details string, err error) *StepStatus { - status := &StepStatus{ - Step: stepName, - Status: string(state), - Details: details, - } - - if err != nil { - status.Error = err.Error() - } - - return status -} - -func (s *StepStatus) AsLogFields() []any { - return []any{ - "task", s.Step, - "status", s.Status, - "details", s.Details, - "error", s.Error, - } -} - -// Step is a unit of work. Multiple steps accomplish a task. -type Step interface { - // Name of this step - Name() string - // Run will execute the code to accomplish this step - Run(ctx context.Context, client *bmclib.Client, data sharedData) (string, error) -} - -type getServerPowerStateStep struct { - name string -} - -// GetServerPowerStateStep will get the current power state of a server, -// and store it in sharedData. -func GetServerPowerStateStep() Step { - return &getServerPowerStateStep{ - name: "GetServerPowerState", - } -} - -func (t *getServerPowerStateStep) Name() string { - return t.name -} - -func (t *getServerPowerStateStep) Run(ctx context.Context, client *bmclib.Client, data sharedData) (string, error) { - state, err := client.GetPowerState(ctx) - if err != nil { - return "Failed to get current power state", err - } - - data[currentPowerStateKey] = state - - return "Current power state: " + state, nil -} - -type biosResetStep struct { - name string -} - -// BiosResetStep will use the client to reset the BIOS settings. -func BiosResetStep() Step { - return &biosResetStep{ - name: "BiosReset", - } -} - -func (t *biosResetStep) Name() string { - return t.name -} - -func (t *biosResetStep) Run(ctx context.Context, client *bmclib.Client, _ sharedData) (string, error) { - err := client.ResetBiosConfiguration(ctx) - if err != nil { - return "Failed to reset bios settings", err - } - - return "BIOS settings reset", nil -} - -type serverRebootStep struct { - name string -} - -// ServerRebootStep will reboot the server, if necessary, per the information in sharedData -func ServerRebootStep() Step { - return &serverRebootStep{ - name: "ServerReboot", - } -} - -func (t *serverRebootStep) Name() string { - return t.name -} - -func (t *serverRebootStep) Run(ctx context.Context, client *bmclib.Client, data sharedData) (string, error) { - powerState, ok := data[currentPowerStateKey].(string) - if !ok { - return "Reboot requirement unknown", errors.New("missing power state") - } - - var details string - - if powerState == model.PowerStateOn { - slog.Info("Rebooting server", "powerState", powerState) - _, err := client.SetPowerState(ctx, model.PowerStateReset) - if err != nil { - return "Failed to reset power state", err - } - details = "Rebooting server" - } else { - slog.Info("Skipping server reboot", "ok", ok, "powerState", powerState) - details = "Reboot not required" - } - - return details, nil -} diff --git a/internal/tasks/tasks.go b/internal/tasks/tasks.go deleted file mode 100644 index 1bd7480..0000000 --- a/internal/tasks/tasks.go +++ /dev/null @@ -1,314 +0,0 @@ -package tasks - -import ( - "context" - "encoding/json" - "log/slog" - "runtime/debug" - - "github.com/bmc-toolbox/bmclib/v2" - "github.com/metal-toolbox/bioscfg/internal/model" - "github.com/metal-toolbox/ctrl" - rctypes "github.com/metal-toolbox/rivets/condition" - rtypes "github.com/metal-toolbox/rivets/types" - "github.com/mitchellh/copystructure" - "github.com/pkg/errors" -) - -var ( - currentPowerStateKey = "currentPowerState" - errInvalidConditionParams = errors.New("invalid condition parameters") - errTaskConv = errors.New("error in generic Task conversion") -) - -// Miscellaneous -type sharedData map[string]interface{} - -// TaskStatus has status about a task, and it's steps. -type TaskStatus struct { - Task string `json:"task"` - Status string `json:"status"` - Details string `json:"details,omitempty"` - Error string `json:"error,omitempty"` - ActiveStep string `json:"active_step,omitempty"` - Steps []*StepStatus `json:"steps"` -} - -// NewTaskStatus will generate a new task status struct -func NewTaskStatus(taskName string, state rctypes.State) *TaskStatus { - return &TaskStatus{ - Task: taskName, - Status: string(state), - } -} - -func (r *TaskStatus) AsLogFields() []string { - return []string{ - "task", r.Task, - "status", r.Status, - "details", r.Details, - "error", r.Error, - } -} - -func (r *TaskStatus) Marshal() ([]byte, error) { - respBytes, err := json.Marshal(r) - if err != nil { - return nil, errors.Wrap(err, "failed to marshal response to json") - } - - return respBytes, nil -} - -// Task is a unit of work to address a condition from condition orchestrator. -// The task multiple steps to accomplish the task. -type Task interface { - // Name of the task - Name() string - // Asset is the server that will be affected by this task - Asset() *model.Asset - // Steps is the multiple units of work that will accomplish this task - Steps() []Step -} - -type biosResetTask struct { - name string - asset *model.Asset - steps []Step -} - -// NewBiosResetTask creates the task for resetting the BIOS of a server to default settings. -func NewBiosResetTask(asset *model.Asset) Task { - return &biosResetTask{ - name: "BiosResetSettings", - asset: asset, - steps: []Step{ - GetServerPowerStateStep(), - BiosResetStep(), - ServerRebootStep(), - }, - } -} - -func (j *biosResetTask) Name() string { - return j.name -} - -func (j *biosResetTask) Steps() []Step { - return j.steps -} - -func (j *biosResetTask) Asset() *model.Asset { - return j.asset -} - -type RCTask rctypes.Task[*rctypes.BiosControlTaskParameters, json.RawMessage] - -func NewTask(task *rctypes.Task[any, any]) (*RCTask, error) { - paramsJSON, ok := task.Parameters.(json.RawMessage) - if !ok { - return nil, errInvalidConditionParams - } - - params := rctypes.BiosControlTaskParameters{} - if err := json.Unmarshal(paramsJSON, ¶ms); err != nil { - return nil, err - } - - // deep copy fields referenced by pointer - asset, err := copystructure.Copy(task.Server) - if err != nil { - return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Server") - } - - fault, err := copystructure.Copy(task.Fault) - if err != nil { - return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Fault") - } - - return &RCTask{ - StructVersion: task.StructVersion, - ID: task.ID, - Kind: task.Kind, - State: task.State, - Status: task.Status, - Parameters: ¶ms, - Fault: fault.(*rctypes.Fault), - FacilityCode: task.FacilityCode, - Server: asset.(*rtypes.Server), - WorkerID: task.WorkerID, - TraceID: task.TraceID, - SpanID: task.SpanID, - CreatedAt: task.CreatedAt, - UpdatedAt: task.UpdatedAt, - CompletedAt: task.CompletedAt, - }, nil -} - -func (task *RCTask) ToGeneric() (*rctypes.Task[any, any], error) { - paramsJSON, err := task.Parameters.Marshal() - if err != nil { - return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Parameters") - } - - // deep copy fields referenced by pointer - asset, err := copystructure.Copy(task.Server) - if err != nil { - return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Server") - } - - fault, err := copystructure.Copy(task.Fault) - if err != nil { - return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Fault") - } - - return &rctypes.Task[any, any]{ - StructVersion: task.StructVersion, - ID: task.ID, - Kind: task.Kind, - State: task.State, - Status: task.Status, - Parameters: paramsJSON, - Fault: fault.(*rctypes.Fault), - FacilityCode: task.FacilityCode, - Server: asset.(*rtypes.Server), - WorkerID: task.WorkerID, - TraceID: task.TraceID, - SpanID: task.SpanID, - CreatedAt: task.CreatedAt, - UpdatedAt: task.UpdatedAt, - CompletedAt: task.CompletedAt, - }, nil -} - -// TaskRunner Will run the task by executing the individual steps in the task, -// and reports task status using the publisher. -type TaskRunner struct { - publisher ctrl.Publisher - task *RCTask - oldTask Task - taskStatus *TaskStatus -} - -// NewTaskRunner creates a TaskRunner to run a specific Task -func NewTaskRunner(publisher ctrl.Publisher, oldTask Task, task *RCTask) *TaskRunner { - return &TaskRunner{ - publisher: publisher, - task: task, - oldTask: oldTask, - taskStatus: NewTaskStatus(oldTask.Name(), rctypes.Pending), - } -} - -func (r *TaskRunner) Run(ctx context.Context, client *bmclib.Client) (err error) { - slog.With(r.oldTask.Asset().AsLogFields()...).Info("Running task", "task", r.oldTask.Name()) - - data := sharedData{} - r.initTaskLog() - - defer func() { - if rec := recover(); rec != nil { - err = r.handlePanic(ctx, rec) - } - }() - - r.publishTaskUpdate(ctx, rctypes.Active, "Opening client", nil) - - if err = client.Open(ctx); err != nil { - r.publishFailed(ctx, 0, "Failed to open client", err) - return errors.Wrap(err, "failed to open client") - } - defer client.Close(ctx) - - for stepID, step := range r.oldTask.Steps() { - r.publishStepUpdate(ctx, stepID, "Running step") - - details, err := step.Run(ctx, client, data) - if err != nil { - r.publishFailed(ctx, stepID, "Step failure", err) - return err - } - - r.publishStepSuccess(ctx, stepID, details) - } - - r.publishTaskSuccess(ctx) - - return nil -} - -func (r *TaskRunner) initTaskLog() { - steps := r.oldTask.Steps() - r.taskStatus.Steps = make([]*StepStatus, len(steps)) - - for i, step := range steps { - r.taskStatus.Steps[i] = NewStepStatus(step.Name(), rctypes.Pending, "", nil) - } -} - -func (r *TaskRunner) handlePanic(ctx context.Context, rec any) error { - msg := "Panic occurred while running task" - slog.Error("!!panic occurred", "rec", rec, "stack", string(debug.Stack())) - slog.Error(msg) - err := errors.New("Task fatal error, check logs for details") - - r.publishTaskUpdate(ctx, rctypes.Failed, msg, err) - - return err -} - -func (r *TaskRunner) publishStepUpdate(ctx context.Context, stepID int, details string) { - r.publish(ctx, stepID, rctypes.Active, rctypes.Active, details, nil) -} - -func (r *TaskRunner) publishStepSuccess(ctx context.Context, stepID int, details string) { - r.publish(ctx, stepID, rctypes.Succeeded, rctypes.Active, details, nil) -} - -func (r *TaskRunner) publishFailed(ctx context.Context, stepID int, details string, err error) { - slog.With(r.oldTask.Asset().AsLogFields()...).Error("Task failed", "task", r.oldTask.Name()) - r.publish(ctx, stepID, rctypes.Failed, rctypes.Failed, details, err) -} - -func (r *TaskRunner) publishTaskSuccess(ctx context.Context) { - slog.With(r.oldTask.Asset().AsLogFields()...).Info("Task completed successfully", "task", r.oldTask.Name()) - r.publishTaskUpdate(ctx, rctypes.Succeeded, "Task completed successfully", nil) -} - -func (r *TaskRunner) publish(ctx context.Context, stepID int, stepState, taskState rctypes.State, details string, err error) { - step := r.oldTask.Steps()[stepID] - stepStatus := NewStepStatus(step.Name(), stepState, details, err) - - slog.With(r.oldTask.Asset().AsLogFields()...).With(stepStatus.AsLogFields()...).Info(details, "step", step.Name()) - - r.taskStatus.Steps[stepID] = stepStatus - - var taskDetails string - if err != nil { - taskDetails = "Task failed at step " + step.Name() - } - - r.publishTaskUpdate(ctx, taskState, taskDetails, err) -} - -func (r *TaskRunner) publishTaskUpdate(ctx context.Context, state rctypes.State, details string, err error) { - r.task.State = state - r.task.Status.Append(details) - - if err != nil { - r.taskStatus.Error = err.Error() - } - - slog.With(r.oldTask.Asset().AsLogFields()...).Info("Task update", "task", r.oldTask.Name()) - - genTask, err := r.task.ToGeneric() - if err != nil { - r.taskStatus.Error = err.Error() - return - } - - err = r.publisher.Publish(ctx, genTask, false) - if err != nil { - return - } -} diff --git a/internal/tasks/tasks_test.go b/internal/tasks/tasks_test.go deleted file mode 100644 index 5627ba6..0000000 --- a/internal/tasks/tasks_test.go +++ /dev/null @@ -1,69 +0,0 @@ -package tasks - -import ( - "context" - "testing" - - "github.com/bmc-toolbox/bmclib/v2" - "github.com/metal-toolbox/bioscfg/internal/model" - "github.com/metal-toolbox/rivets/condition" - "github.com/stretchr/testify/assert" -) - -type fakeStep struct{} - -func (s *fakeStep) Name() string { - return "fake step" -} - -func (s *fakeStep) Run(_ context.Context, _ *bmclib.Client, _ sharedData) (string, error) { - return "", nil -} - -type fakeTask struct { - asset *model.Asset - steps []Step -} - -func newFakeTask() *fakeTask { - return &fakeTask{ - asset: &model.Asset{}, - steps: []Step{&fakeStep{}}, - } -} - -func newFakeRCTask() *RCTask { - return &RCTask{} -} - -func (t *fakeTask) Name() string { - return "fake task" -} - -func (t *fakeTask) Asset() *model.Asset { - return t.asset -} - -func (t *fakeTask) Steps() []Step { - return t.steps -} - -type fakePublisher struct { - t *testing.T -} - -func (m *fakePublisher) Publish(_ context.Context, _ *condition.Task[any, any], _ bool) error { - return nil -} - -func TestTaskRunnerHandlePanic(t *testing.T) { - task := newFakeTask() - rcTask := newFakeRCTask() - runner := NewTaskRunner(&fakePublisher{t: t}, task, rcTask) - - err := runner.Run(context.Background(), nil) - - if assert.NotNil(t, err) { - assert.Equal(t, "Task fatal error, check logs for details", err.Error()) - } -} diff --git a/internal/version/version.go b/internal/version/version.go index b5100d5..361d561 100644 --- a/internal/version/version.go +++ b/internal/version/version.go @@ -1,6 +1,7 @@ package version import ( + "encoding/json" "runtime" rdebug "runtime/debug" "strings" @@ -49,6 +50,22 @@ func Current() *Version { } } +func (v *Version) AsMap() (map[string]any, error) { + var asMap map[string]interface{} + + bytes, err := json.Marshal(v) + if err != nil { + return nil, err + } + + err = json.Unmarshal(bytes, &asMap) + if err != nil { + return nil, err + } + + return asMap, nil +} + func ExportBuildInfoMetric() { buildInfo := promauto.NewGaugeVec( prometheus.GaugeOpts{ diff --git a/main.go b/main.go index d899e73..30b0781 100644 --- a/main.go +++ b/main.go @@ -17,10 +17,8 @@ package main import ( "github.com/metal-toolbox/bioscfg/cmd" - "github.com/metal-toolbox/bioscfg/internal/log" ) func main() { - log.InitLogger() cmd.Execute() }