diff --git a/config/config.go b/config/config.go index 79b941a..ee3b859 100644 --- a/config/config.go +++ b/config/config.go @@ -7,11 +7,10 @@ package config import ( "fmt" "io" + "log/slog" "time" "golang.org/x/sys/unix" - - "github.com/sirupsen/logrus" "gopkg.in/yaml.v3" ) @@ -44,7 +43,7 @@ type InfluxDBConf struct { } type LoggingConf struct { - LogLevel LogLevel `yaml:"log_level"` + LogLevel slog.Level `yaml:"log_level"` } type TopologyConf struct { @@ -62,31 +61,12 @@ func (conf *TopologyConf) validate() error { return nil } -// LogLevel is a wrapper type for logrus.Level. -type LogLevel logrus.Level - -// String returns the string representation of the log level. -func (l LogLevel) String() string { - return logrus.Level(l).String() -} - -// UnmarshalText parses a byte slice value into a logrus.Level value. -func (l *LogLevel) UnmarshalText(text []byte) error { - level, err := logrus.ParseLevel(string(text)) - - if err == nil { - *l = LogLevel(level) - } - - return err -} - func ReadConfig(r io.Reader) (*FabricmonConf, error) { // Defaults conf := &FabricmonConf{ PollInterval: time.Second * 10, Logging: LoggingConf{ - LogLevel: LogLevel(logrus.InfoLevel), + LogLevel: slog.LevelInfo, }, } diff --git a/go.mod b/go.mod index 50d4d35..c3aefee 100644 --- a/go.mod +++ b/go.mod @@ -1,11 +1,10 @@ module github.com/dswarbrick/fabricmon -go 1.19 +go 1.21 require ( github.com/fsnotify/fsnotify v1.6.0 github.com/influxdata/influxdb v1.8.10 - github.com/sirupsen/logrus v1.9.0 golang.org/x/sys v0.3.0 gopkg.in/alecthomas/kingpin.v2 v2.2.6 gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum index a85e6e2..1e4fc07 100644 --- a/go.sum +++ b/go.sum @@ -177,8 +177,6 @@ github.com/segmentio/kafka-go v0.1.0/go.mod h1:X6itGqS9L4jDletMsxZ7Dz+JFWxM6JHfP github.com/segmentio/kafka-go v0.2.0/go.mod h1:X6itGqS9L4jDletMsxZ7Dz+JFWxM6JHfPOCvTvk+EJo= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= -github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= -github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= @@ -288,7 +286,6 @@ golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200107162124-548cf772de50/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/infiniband/netdiscover.go b/infiniband/netdiscover.go index 90458eb..9c28dbf 100644 --- a/infiniband/netdiscover.go +++ b/infiniband/netdiscover.go @@ -12,11 +12,10 @@ import "C" import ( "fmt" + "log/slog" "os" "time" "unsafe" - - log "github.com/sirupsen/logrus" ) type HCA struct { @@ -46,14 +45,14 @@ func (h *HCA) NetDiscover(output chan Fabric, mkey uint64, resetThreshold uint) portNum := int(umad_port.portnum) linkLayer := C.GoString(&umad_port.link_layer[0]) - portLog := log.WithFields(log.Fields{"ca": h.Name, "port": portNum}) + portLog := slog.With("ca", h.Name, "port", portNum) if linkLayer != "InfiniBand" && linkLayer != "IB" { - portLog.Debugf("Skipping port with unsupported link layer %q", linkLayer) + portLog.Debug("skipping port with unsupported link layer", "link_layer", linkLayer) continue } - portLog.Debug("Polling port") + portLog.Debug("polling port") // ibnd_config_t specifies max hops, timeout, max SMPs etc config := C.ibnd_config_t{flags: C.IBND_CONFIG_MLX_EPI, mkey: C.uint64_t(mkey)} @@ -64,7 +63,7 @@ func (h *HCA) NetDiscover(output chan Fabric, mkey uint64, resetThreshold uint) fabric, err := C.ibnd_discover_fabric(&h.umad_ca.ca_name[0], umad_port.portnum, nil, &config) if err != nil { - portLog.WithError(err).Error("Unable to discover fabric") + portLog.Error("unable to discover fabric", "err", err) continue } @@ -91,23 +90,20 @@ func (h *HCA) NetDiscover(output chan Fabric, mkey uint64, resetThreshold uint) } } } else { - portLog.Error("Unable to open MAD port") + portLog.Error("unable to open MAD port") } C.ibnd_destroy_fabric(fabric) } - log.WithFields(log.Fields{ - "duration": time.Since(start), - "nodes": totalNodes, - "ports": totalPorts}, - ).Info("NetDiscover completed") + slog.Info("netdiscover complete", + "duration", time.Since(start), "nodes", totalNodes, "ports", totalPorts) } func (h *HCA) Release() { // Free associated memory from pointers in umad_ca_t.ports if C.umad_release_ca(h.umad_ca) < 0 { - log.Error("umad_release_ca: ", h.umad_ca) + slog.Error("umad_release_ca", "umad_ca", h.umad_ca) } } @@ -122,15 +118,14 @@ func GetCAs() []HCA { C.umad_get_ca(ca_name, &ca) C.free(unsafe.Pointer(ca_name)) - log.WithFields(log.Fields{ - "ca": C.GoString(&ca.ca_name[0]), - "type": C.GoString(&ca.ca_type[0]), - "ports": ca.numports, - "firmware": C.GoString(&ca.fw_ver[0]), - "hardware": C.GoString(&ca.hw_ver[0]), - "node_guid": fmt.Sprintf("%#016x", ntohll(uint64(ca.node_guid))), - "system_guid": fmt.Sprintf("%#016x", ntohll(uint64(ca.system_guid))), - }).Info("Found HCA") + slog.Info("found HCA", + "ca", C.GoString(&ca.ca_name[0]), + "type", C.GoString(&ca.ca_type[0]), + "ports", ca.numports, + "firmware", C.GoString(&ca.fw_ver[0]), + "hardware", C.GoString(&ca.hw_ver[0]), + "node_guid", fmt.Sprintf("%#016x", ntohll(uint64(ca.node_guid))), + "system_guid", fmt.Sprintf("%#016x", ntohll(uint64(ca.system_guid)))) hcas[i] = HCA{ Name: caName, @@ -143,7 +138,7 @@ func GetCAs() []HCA { type ibndNode struct { ibnd_node *C.struct_ibnd_node - log *log.Entry + slog *slog.Logger } // getPortCounters retrieves all counters for a specific port. @@ -154,7 +149,7 @@ func (n *ibndNode) getPortCounters(portId *C.ib_portid_t, portNum int, ibmadPort var buf [1024]byte counters := make(map[uint32]interface{}) - portLog := n.log.WithFields(log.Fields{"port": portNum}) + portLog := n.slog.With("port", portNum) // PerfMgt ClassPortInfo is a required attribute. See ClassPortInfo, IBTA spec v1.3, table 126. pmaBuf := C.pma_query_via(unsafe.Pointer(&buf), portId, C.int(portNum), PMA_TIMEOUT, C.CLASS_PORT_INFO, ibmadPort) @@ -181,10 +176,7 @@ func (n *ibndNode) getPortCounters(portId *C.ib_portid_t, portNum int, ibmadPort counters[field] = uint32(C.mad_get_field(unsafe.Pointer(&buf), 0, field)) if float64(counters[field].(uint32)) > (float64(counter.Limit) * float64(resetThreshold) / 100) { - portLog.WithFields(log.Fields{ - "counter": counter.Name, - "value": counters[field], - }).Warn("Counter exceeds threshold") + portLog.Warn("counter exceeds threshold", "counter", counter.Name, "value", counters[field]) selMask |= counter.Select } @@ -193,8 +185,8 @@ func (n *ibndNode) getPortCounters(portId *C.ib_portid_t, portNum int, ibmadPort if selMask > 0 { var pc [1024]byte - resetLog := portLog.WithFields(log.Fields{"select_mask": fmt.Sprintf("%#x", selMask)}) - resetLog.Warn("Resetting counters") + resetLog := portLog.With("select_mask", fmt.Sprintf("%#x", selMask)) + resetLog.Warn("resetting counters") if C.performance_reset_via(unsafe.Pointer(&pc), portId, C.int(portNum), C.uint(selMask), PMA_TIMEOUT, C.IB_GSI_PORT_COUNTERS, ibmadPort) == nil { resetLog.Error("performance_reset_via failed") @@ -205,7 +197,7 @@ func (n *ibndNode) getPortCounters(portId *C.ib_portid_t, portNum int, ibmadPort if (capMask&C.IB_PM_EXT_WIDTH_SUPPORTED == 0) && (capMask&C.IB_PM_EXT_WIDTH_NOIETF_SUP == 0) { // TODO: Fetch standard data / packet counters if extended counters are not supported // (pre-QDR hardware). - portLog.Warn("Port does not support extended counters") + portLog.Warn("port does not support extended counters") return counters, nil } @@ -254,10 +246,7 @@ func (n *ibndNode) simpleNode() Node { func (n *ibndNode) walkPorts(mad_port *C.struct_ibmad_port, resetThreshold uint) []Port { var portid C.ib_portid_t - n.log.WithFields(log.Fields{ - "node_type": n.ibnd_node._type, - "num_ports": n.ibnd_node.numports, - }).Debug("Walking ports for node") + n.slog.Debug("walking ports for node", "node_type", n.ibnd_node._type, "num_ports", n.ibnd_node.numports) ports := make([]Port, n.ibnd_node.numports+1) @@ -277,7 +266,7 @@ func (n *ibndNode) walkPorts(mad_port *C.struct_ibmad_port, resetThreshold uint) linkSpeedExt uint ) - portLog := n.log.WithFields(log.Fields{"port": portNum}) + portLog := n.slog.With("port", portNum) // Get pointer to port struct at portNum array offset pp := *(**C.ibnd_port_t)(unsafe.Pointer(arrayPtr + unsafe.Sizeof(arrayPtr)*uintptr(portNum))) @@ -325,12 +314,11 @@ func (n *ibndNode) walkPorts(mad_port *C.struct_ibmad_port, resetThreshold uint) } } - portLog.WithFields(log.Fields{ - "port_state": PortStateToStr(uint(portState)), - "phys_state": PortPhysStateToStr(uint(physState)), - "link_width": myPort.LinkWidth, - "link_speed": myPort.LinkSpeed, - }).Debugf("Port info") + portLog.Debug("port info", + "port_state", PortStateToStr(uint(portState)), + "phys_state", PortPhysStateToStr(uint(physState)), + "link_width", myPort.LinkWidth, + "link_speed", myPort.LinkSpeed) // Remote port may be nil if port state is polling / armed. rp := pp.remoteport @@ -347,7 +335,7 @@ func (n *ibndNode) walkPorts(mad_port *C.struct_ibmad_port, resetThreshold uint) uint(C.mad_get_field(unsafe.Pointer(&rp.info), 0, C.IB_PORT_LINK_WIDTH_SUPPORTED_F))) if uint(linkWidth) != maxWidth { - portLog.Warn("Link width is not the max width supported by both ports") + portLog.Warn("link width is not the max width supported by both ports") } // Determine max speed supported by both ends @@ -367,7 +355,7 @@ func (n *ibndNode) walkPorts(mad_port *C.struct_ibmad_port, resetThreshold uint) if counters, err := n.getPortCounters(&portid, portNum, mad_port, resetThreshold); err == nil { myPort.Counters = counters } else { - portLog.WithError(err).Error("Cannot get counters for port") + portLog.Error("cannot get counters for port", "err", err) } } } @@ -383,10 +371,10 @@ func walkFabric(fabric *C.struct_ibnd_fabric, mad_port *C.struct_ibmad_port, res for node := fabric.nodes; node != nil; node = node.next { n := ibndNode{ibnd_node: node} - n.log = log.WithFields(log.Fields{ - "node_desc": nnMap.RemapNodeName(n.guid(), n.nodeDesc()), - "node_guid": n.guidString(), - }) + n.slog = slog.With( + "node_desc", nnMap.RemapNodeName(n.guid(), n.nodeDesc()), + "node_guid", n.guidString(), + ) myNode := n.simpleNode() diff --git a/infiniband/nodenamemap.go b/infiniband/nodenamemap.go index d51f8c3..85b3670 100644 --- a/infiniband/nodenamemap.go +++ b/infiniband/nodenamemap.go @@ -7,6 +7,7 @@ package infiniband import ( "bufio" + "log/slog" "os" "strconv" "strings" @@ -14,7 +15,6 @@ import ( "unicode" "github.com/fsnotify/fsnotify" - log "github.com/sirupsen/logrus" ) const DEFAULT_NODE_NAME_MAP = "/etc/opensm/ib-node-name-map" @@ -42,10 +42,10 @@ func NewNodeNameMap(filePath string) (*NodeNameMap, error) { if watcher, err := fsnotify.NewWatcher(); err == nil { n.watcher = watcher if err := n.watcher.Add(n.filePath); err != nil { - log.WithError(err).Error("Cannot add fsnotify watch for node name map") + slog.Error("cannot add fsnotify watch for node name map", "err", err) } } else { - log.WithError(err).Error("Cannot create fsnotify watcher") + slog.Error("cannot create fsnotify watcher", "err", err) return n, err } @@ -58,23 +58,23 @@ func NewNodeNameMap(filePath string) (*NodeNameMap, error) { break } - log.Infof("NodeNameMap watcher event: %s", event.Op) + slog.Info("node name map watcher event", "event", event.Op) if event.Op == fsnotify.Remove { if err := n.watcher.Add(n.filePath); err != nil { - log.WithError(err).Error("Cannot re-add fsnotify watcher for node name map") + slog.Error("cannot re-add fsnotify watcher for node name map", "err", err) } } else { if err := n.reload(); err != nil { - log.WithError(err).Error("Failed to reload node name map") + slog.Error("failed to reload node name map", "err", err) } else { - log.Info("Node name map reloaded") + slog.Info("node name map reloaded") } } case err := <-n.watcher.Errors: if err != nil { - log.WithError(err).Error("Error watching node name map") + slog.Error("error watching node name map", "err", err) } } } @@ -154,6 +154,8 @@ func init() { var err error if nnMap, err = NewNodeNameMap(DEFAULT_NODE_NAME_MAP); err != nil { - log.WithError(err).Error("Cannot load node name map") + // FIXME: since init() functions are called before main(), this can resulting in + // inconsistent log formatting, as the default logger is not yet initialised. + slog.Error("cannot load node name map", "err", err) } } diff --git a/infiniband/sm.go b/infiniband/sm.go index 1778362..2f9cfc3 100644 --- a/infiniband/sm.go +++ b/infiniband/sm.go @@ -12,9 +12,8 @@ package infiniband import "C" import ( + "log/slog" "unsafe" - - log "github.com/sirupsen/logrus" ) const ( @@ -72,6 +71,11 @@ func smInfo(caName string, portNum int) { C.mad_decode_field(&sminfo[0], C.IB_SMINFO_PRIO_F, unsafe.Pointer(&prio)) C.mad_decode_field(&sminfo[0], C.IB_SMINFO_STATE_F, unsafe.Pointer(&state)) - log.Infof("sminfo: sm lid %d sm guid %#16x, activity count %d priority %d state %d %s", - portid.lid, guid, act, prio, state, smStateMap[state]) + slog.Info("sminfo", + "sm_lid", portid.lid, + "sm_guid", guid, + "activity_count", act, + "priority", prio, + "state", state, + "state_text", smStateMap[state]) } diff --git a/main.go b/main.go index 07b5246..b939fd3 100644 --- a/main.go +++ b/main.go @@ -6,16 +6,15 @@ // executed as root. // Package fabricmon is an InfiniBand fabric monitor daemon. -// package main import ( "fmt" + "log/slog" "os" "os/signal" "time" - log "github.com/sirupsen/logrus" "golang.org/x/sys/unix" "gopkg.in/alecthomas/kingpin.v2" @@ -49,7 +48,7 @@ func router(input chan infiniband.Fabric, writers []writer.FabricWriter) { close(c) } - log.Debug("Router input channel closed. Exiting function.") + slog.Debug("Router input channel closed. Exiting function.") } func main() { @@ -62,27 +61,27 @@ func main() { conf, err := config.ReadConfig(*configFile) if err != nil { - fmt.Println(err) + fmt.Fprintln(os.Stderr, err) os.Exit(1) } (*configFile).Close() + slog.SetDefault(slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: conf.Logging.LogLevel}))) + // Initialise umad library (also required in order to run under ibsim). if infiniband.UmadInit() < 0 { - fmt.Println("Error initialising umad library. Exiting.") + slog.Error("Error initialising umad library. Exiting.") os.Exit(1) } + slog.Info("FabricMon " + version.Info()) hcas := infiniband.GetCAs() if len(hcas) == 0 { - fmt.Println("No HCAs found in system. Exiting.") + slog.Error("No HCAs found in system. Exiting.") os.Exit(1) } - log.SetLevel(log.Level(conf.Logging.LogLevel)) - log.Info("FabricMon ", version.Info()) - // Channel to signal goroutines that we are shutting down. shutdownChan := make(chan bool) @@ -91,7 +90,7 @@ func main() { signal.Notify(sigChan, unix.SIGINT, unix.SIGTERM) go func() { s := <-sigChan - log.WithFields(log.Fields{"signal": s}).Debug("Shutting down due to signal.") + slog.Debug("shutting down due to signal", "signal", s) close(shutdownChan) }() @@ -129,7 +128,7 @@ func main() { hca.NetDiscover(splitter, conf.Mkey, conf.ResetThreshold) } case <-shutdownChan: - log.Debug("Shutdown received in polling loop.") + slog.Debug("shutdown received in polling loop") break Loop } } @@ -137,7 +136,7 @@ func main() { close(splitter) } - log.Debug("Cleaning up") + slog.Debug("cleaning up") // Free associated memory from pointers in umad_ca_t.ports for _, hca := range hcas { diff --git a/writer/forcegraph/forcegraph.go b/writer/forcegraph/forcegraph.go index 744d89b..c52bc12 100644 --- a/writer/forcegraph/forcegraph.go +++ b/writer/forcegraph/forcegraph.go @@ -8,11 +8,10 @@ package forcegraph import ( "encoding/json" "fmt" + "log/slog" "os" "path/filepath" - log "github.com/sirupsen/logrus" - "github.com/dswarbrick/fabricmon/infiniband" ) @@ -47,7 +46,7 @@ func (fg *ForceGraphWriter) Receiver(input chan infiniband.Fabric) { if fg.OutputDir != "" { if err := writeTopology(fg.OutputDir, fabric); err != nil { - log.WithError(err).Error("cannot marshal fabric to force graph topology") + slog.Error("cannot marshal fabric to force graph topology", "err", err) } } } diff --git a/writer/influxdb/influxdb.go b/writer/influxdb/influxdb.go index e797b2c..599cf54 100644 --- a/writer/influxdb/influxdb.go +++ b/writer/influxdb/influxdb.go @@ -7,11 +7,11 @@ package influxdb import ( "fmt" + "log/slog" "strconv" "time" "github.com/influxdata/influxdb/client/v2" - log "github.com/sirupsen/logrus" "github.com/dswarbrick/fabricmon/config" "github.com/dswarbrick/fabricmon/infiniband" @@ -47,32 +47,31 @@ func (w *InfluxDBWriter) Receiver(input chan infiniband.Fabric) { }) if err != nil { - log.Error(err) + slog.Error("cannot init InfluxDB client", "err", err) return } if rtt, version, err := c.Ping(0); err == nil { - log.WithFields(log.Fields{"version": version, "rtt": rtt}).Infof("InfluxDB ping reply") + slog.Info("InfluxDB ping reply", "version", version, "rtt", rtt) } // Loop indefinitely until input chan closed. for fabric := range input { if batch, err := w.makeBatch(fabric); err == nil { - log.WithFields(log.Fields{ - "hca": fabric.CAName, - "port": fabric.SourcePort, - "points": len(batch.Points()), - }).Debugf("InfluxDB batch created") + slog.Debug("InfluxDB batch created", + "hca", fabric.CAName, + "port", fabric.SourcePort, + "points", len(batch.Points())) if err := c.Write(batch); err != nil { - log.WithError(err).Error("InfluxDB batch write error") + slog.Error("InfluxDB batch write error", "err", err) } } else { - log.Error(err) + slog.Error("InfluxDB batch creation error", "err", err) } } - log.Debug("InfluxDBWriter input channel closed. Closing InfluxDB client connections.") + slog.Debug("InfluxDBWriter input channel closed. Closing InfluxDB client connections.") c.Close() }