From 8dc2d91fdfacc9226f1a3d83d914359d2b03c297 Mon Sep 17 00:00:00 2001 From: John Mears Date: Thu, 7 Mar 2024 16:25:30 -0700 Subject: [PATCH] Add support for sending NMI Support for sending an NMI has been added to ipmi, redfish, redfishwrapper, and all providers that use the redfishwrapper. --- bmc/nmi.go | 66 +++++++++++++++ bmc/nmi_test.go | 124 ++++++++++++++++++++++++++++ client.go | 11 +++ internal/ipmi/ipmi.go | 10 +++ internal/redfishwrapper/power.go | 21 ++++- providers/dell/idrac.go | 5 ++ providers/ipmitool/ipmitool.go | 5 ++ providers/ipmitool/ipmitool_test.go | 18 ++++ providers/openbmc/openbmc.go | 5 ++ providers/redfish/redfish.go | 5 ++ providers/supermicro/supermicro.go | 5 ++ 11 files changed, 274 insertions(+), 1 deletion(-) create mode 100644 bmc/nmi.go create mode 100644 bmc/nmi_test.go diff --git a/bmc/nmi.go b/bmc/nmi.go new file mode 100644 index 00000000..9512a94e --- /dev/null +++ b/bmc/nmi.go @@ -0,0 +1,66 @@ +package bmc + +import ( + "context" + "errors" + "fmt" + "time" + + "github.com/hashicorp/go-multierror" +) + +type NMISender interface { + SendNMI(ctx context.Context) error +} + +func sendNMI(ctx context.Context, timeout time.Duration, sender NMISender, metadata *Metadata) error { + senderName := getProviderName(sender) + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + metadata.ProvidersAttempted = append(metadata.ProvidersAttempted, senderName) + + err := sender.SendNMI(ctx) + if err != nil { + metadata.FailedProviderDetail[senderName] = err.Error() + return err + } + + metadata.SuccessfulProvider = senderName + + return nil +} + +// SendNMIFromInterface will look for providers that implement NMISender +// and attempt to call SendNMI until a provider is successful, +// or all providers have been exhausted. +func SendNMIFromInterface( + ctx context.Context, + timeout time.Duration, + providers []interface{}, +) (metadata Metadata, err error) { + metadata = newMetadata() + + for _, provider := range providers { + sender, ok := provider.(NMISender) + if !ok { + err = multierror.Append(err, fmt.Errorf("not an NMISender implementation: %T", provider)) + continue + } + + sendNMIErr := sendNMI(ctx, timeout, sender, &metadata) + if sendNMIErr != nil { + err = multierror.Append(err, sendNMIErr) + continue + } + return metadata, nil + } + + if len(metadata.ProvidersAttempted) == 0 { + err = multierror.Append(err, errors.New("no NMISender implementations found")) + } else { + err = multierror.Append(err, errors.New("failed to send NMI")) + } + + return metadata, err +} diff --git a/bmc/nmi_test.go b/bmc/nmi_test.go new file mode 100644 index 00000000..c9a8e418 --- /dev/null +++ b/bmc/nmi_test.go @@ -0,0 +1,124 @@ +package bmc + +import ( + "context" + "testing" + "time" + + "github.com/pkg/errors" + "github.com/stretchr/testify/assert" +) + +type mockNMISender struct { + err error +} + +func (m *mockNMISender) SendNMI(ctx context.Context) error { + select { + case <-ctx.Done(): + return ctx.Err() + default: + return m.err + } +} + +func (m *mockNMISender) Name() string { + return "mock" +} + +func TestSendNMIFromInterface(t *testing.T) { + testCases := []struct { + name string + mockSenders []interface{} + errMsg string + isTimedout bool + expectedMetadata Metadata + }{ + { + name: "success", + mockSenders: []interface{}{&mockNMISender{}}, + expectedMetadata: Metadata{ + SuccessfulProvider: "mock", + ProvidersAttempted: []string{"mock"}, + FailedProviderDetail: make(map[string]string), + }, + }, + { + name: "success with multiple senders", + mockSenders: []interface{}{ + nil, + "foo", + &mockNMISender{err: errors.New("err from sender")}, + &mockNMISender{}, + }, + expectedMetadata: Metadata{ + SuccessfulProvider: "mock", + ProvidersAttempted: []string{"mock", "mock"}, + FailedProviderDetail: map[string]string{"mock": "err from sender"}, + }, + }, + { + name: "not an nmisender", + mockSenders: []interface{}{nil}, + errMsg: "not an NMISender", + expectedMetadata: Metadata{ + FailedProviderDetail: make(map[string]string), + }, + }, + { + name: "no nmisenders", + mockSenders: []interface{}{}, + errMsg: "no NMISender implementations found", + expectedMetadata: Metadata{ + FailedProviderDetail: make(map[string]string), + }, + }, + { + name: "timed out", + mockSenders: []interface{}{&mockNMISender{}}, + isTimedout: true, + errMsg: "context deadline exceeded", + expectedMetadata: Metadata{ + ProvidersAttempted: []string{"mock"}, + FailedProviderDetail: map[string]string{"mock": "context deadline exceeded"}, + }, + }, + { + name: "error from nmisender", + mockSenders: []interface{}{&mockNMISender{err: errors.New("foobar")}}, + errMsg: "foobar", + expectedMetadata: Metadata{ + ProvidersAttempted: []string{"mock"}, + FailedProviderDetail: map[string]string{"mock": "foobar"}, + }, + }, + { + name: "error when fail to send", + mockSenders: []interface{}{&mockNMISender{err: errors.New("err from sender")}}, + errMsg: "failed to send NMI", + expectedMetadata: Metadata{ + ProvidersAttempted: []string{"mock"}, + FailedProviderDetail: map[string]string{"mock": "err from sender"}, + }, + }, + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + timeout := time.Second * 60 + if tt.isTimedout { + timeout = 0 + } + + metadata, err := SendNMIFromInterface(context.Background(), timeout, tt.mockSenders) + + if tt.errMsg == "" { + assert.NoError(t, err) + } else { + assert.ErrorContains(t, err, tt.errMsg) + } + + assert.Equal(t, tt.expectedMetadata, metadata) + }) + } +} diff --git a/client.go b/client.go index f93d7602..a4444b3b 100644 --- a/client.go +++ b/client.go @@ -717,3 +717,14 @@ func (c *Client) GetSystemEventLogRaw(ctx context.Context) (eventlog string, err c.setMetadata(metadata) return eventlog, err } + +// SendNMI tells the BMC to issue an NMI to the device +func (c *Client) SendNMI(ctx context.Context) error { + ctx, span := c.traceprovider.Tracer(pkgName).Start(ctx, "SendNMI") + defer span.End() + + metadata, err := bmc.SendNMIFromInterface(ctx, c.perProviderTimeout(ctx), c.registry().GetDriverInterfaces()) + c.setMetadata(metadata) + + return err +} diff --git a/internal/ipmi/ipmi.go b/internal/ipmi/ipmi.go index c97c7a4e..35196178 100644 --- a/internal/ipmi/ipmi.go +++ b/internal/ipmi/ipmi.go @@ -437,3 +437,13 @@ func (i *Ipmi) DeactivateSOL(ctx context.Context) (err error) { } return err } + +// SendPowerDiag tells the BMC to issue an NMI to the device +func (i *Ipmi) SendPowerDiag(ctx context.Context) error { + _, err := i.run(ctx, []string{"chassis", "power", "diag"}) + if err != nil { + err = errors.Wrap(err, "failed sending power diag") + } + + return err +} diff --git a/internal/redfishwrapper/power.go b/internal/redfishwrapper/power.go index 7e91b8e7..0a700871 100644 --- a/internal/redfishwrapper/power.go +++ b/internal/redfishwrapper/power.go @@ -212,7 +212,6 @@ func (c *Client) SystemForceOff(ctx context.Context) (ok bool, err error) { system.DisableEtagMatch(c.disableEtagMatch) - err = system.Reset(rf.ForceOffResetType) if err != nil { return false, err @@ -221,3 +220,23 @@ func (c *Client) SystemForceOff(ctx context.Context) (ok bool, err error) { return true, nil } + +// SendNMI tells the BMC to issue an NMI to the device +func (c *Client) SendNMI(_ context.Context) error { + if err := c.SessionActive(); err != nil { + return errors.Wrap(bmclibErrs.ErrNotAuthenticated, err.Error()) + } + + ss, err := c.client.Service.Systems() + if err != nil { + return err + } + + for _, system := range ss { + if err = system.Reset(rf.NmiResetType); err != nil { + return err + } + } + + return nil +} diff --git a/providers/dell/idrac.go b/providers/dell/idrac.go index e4b2aa46..73c457ef 100644 --- a/providers/dell/idrac.go +++ b/providers/dell/idrac.go @@ -219,6 +219,11 @@ func (c *Conn) BmcReset(ctx context.Context, resetType string) (ok bool, err err return c.redfishwrapper.BMCReset(ctx, resetType) } +// SendNMI tells the BMC to issue an NMI to the device +func (c *Conn) SendNMI(ctx context.Context) error { + return c.redfishwrapper.SendNMI(ctx) +} + // deviceManufacturer returns the device manufacturer and model attributes func (c *Conn) deviceManufacturer(ctx context.Context) (vendor string, err error) { systems, err := c.redfishwrapper.Systems() diff --git a/providers/ipmitool/ipmitool.go b/providers/ipmitool/ipmitool.go index d285161f..69f37ccc 100644 --- a/providers/ipmitool/ipmitool.go +++ b/providers/ipmitool/ipmitool.go @@ -201,3 +201,8 @@ func (c *Conn) GetSystemEventLog(ctx context.Context) (entries [][]string, err e func (c *Conn) GetSystemEventLogRaw(ctx context.Context) (eventlog string, err error) { return c.ipmitool.GetSystemEventLogRaw(ctx) } + +// SendNMI tells the BMC to issue an NMI to the device +func (c *Conn) SendNMI(ctx context.Context) error { + return c.ipmitool.SendPowerDiag(ctx) +} diff --git a/providers/ipmitool/ipmitool_test.go b/providers/ipmitool/ipmitool_test.go index 90f563e8..de395bc1 100644 --- a/providers/ipmitool/ipmitool_test.go +++ b/providers/ipmitool/ipmitool_test.go @@ -178,3 +178,21 @@ func TestSystemEventLogGetRaw(t *testing.T) { t.Log(eventlog) t.Fatal() } + +func TestSendNMI(t *testing.T) { + t.Skip("need real ipmi server") + host := "127.0.0.1" + port := "623" + user := "ADMIN" + pass := "ADMIN" + i, err := New(host, user, pass, WithPort(port), WithLogger(logging.DefaultLogger())) + if err != nil { + t.Fatal(err) + } + err = i.SendNMI(context.Background()) + if err != nil { + t.Fatal(err) + } + t.Log("NMI sent") + t.Fatal() +} diff --git a/providers/openbmc/openbmc.go b/providers/openbmc/openbmc.go index f2de939f..8e9cde11 100644 --- a/providers/openbmc/openbmc.go +++ b/providers/openbmc/openbmc.go @@ -184,3 +184,8 @@ func (c *Conn) Inventory(ctx context.Context) (device *common.Device, err error) func (c *Conn) BmcReset(ctx context.Context, resetType string) (ok bool, err error) { return c.redfishwrapper.BMCReset(ctx, resetType) } + +// SendNMI tells the BMC to issue an NMI to the device +func (c *Conn) SendNMI(ctx context.Context) error { + return c.redfishwrapper.SendNMI(ctx) +} diff --git a/providers/redfish/redfish.go b/providers/redfish/redfish.go index 1dae9e80..9bfacc5a 100644 --- a/providers/redfish/redfish.go +++ b/providers/redfish/redfish.go @@ -217,3 +217,8 @@ func (c *Conn) Inventory(ctx context.Context) (device *common.Device, err error) func (c *Conn) GetBiosConfiguration(ctx context.Context) (biosConfig map[string]string, err error) { return c.redfishwrapper.GetBiosConfiguration(ctx) } + +// SendNMI tells the BMC to issue an NMI to the device +func (c *Conn) SendNMI(ctx context.Context) error { + return c.redfishwrapper.SendNMI(ctx) +} diff --git a/providers/supermicro/supermicro.go b/providers/supermicro/supermicro.go index 53b15929..ec22e08a 100644 --- a/providers/supermicro/supermicro.go +++ b/providers/supermicro/supermicro.go @@ -544,3 +544,8 @@ func hostIP(hostURL string) (string, error) { return hostURLParsed.Host, nil } + +// SendNMI tells the BMC to issue an NMI to the device +func (c *Client) SendNMI(ctx context.Context) error { + return c.serviceClient.redfish.SendNMI(ctx) +}