diff --git a/cmd/collectors/keyperf/keyperf.go b/cmd/collectors/keyperf/keyperf.go index 111a8fb98..1540f15ec 100644 --- a/cmd/collectors/keyperf/keyperf.go +++ b/cmd/collectors/keyperf/keyperf.go @@ -5,6 +5,7 @@ import ( "github.com/netapp/harvest/v2/cmd/collectors/rest" "github.com/netapp/harvest/v2/cmd/poller/collector" "github.com/netapp/harvest/v2/cmd/poller/plugin" + "github.com/netapp/harvest/v2/pkg/conf" "github.com/netapp/harvest/v2/pkg/errs" "github.com/netapp/harvest/v2/pkg/matrix" "github.com/netapp/harvest/v2/pkg/slogx" @@ -20,8 +21,10 @@ const ( ) type KeyPerf struct { - *rest.Rest // provides: AbstractCollector, Client, Object, Query, TemplateFn, TemplateType - perfProp *perfProp + *rest.Rest // provides: AbstractCollector, Client, Object, Query, TemplateFn, TemplateType + perfProp *perfProp + pollDataCalls int + recordsToSave int // Number of records to save when using the recorder } type counter struct { @@ -91,6 +94,8 @@ func (kp *KeyPerf) Init(a *collector.AbstractCollector) error { kp.buildCounters() + kp.recordsToSave = collector.RecordKeepLast(kp.Params, kp.Logger) + kp.Logger.Debug( "initialized cache", slog.Int("numMetrics", len(kp.Prop.Metrics)), @@ -234,7 +239,25 @@ func (kp *KeyPerf) PollData() (map[string]*matrix.Matrix, error) { return nil, errs.New(errs.ErrConfig, "empty url") } - perfRecords, err = kp.GetRestData(href) + kp.pollDataCalls++ + if kp.pollDataCalls >= kp.recordsToSave { + kp.pollDataCalls = 0 + } + + var headers map[string]string + + poller, err := conf.PollerNamed(kp.Options.Poller) + if err != nil { + slog.Error("failed to find poller", slogx.Err(err), slog.String("poller", kp.Options.Poller)) + } + + if poller.IsRecording() { + headers = map[string]string{ + "From": strconv.Itoa(kp.pollDataCalls), + } + } + + perfRecords, err = kp.GetRestData(href, headers) if err != nil { return nil, fmt.Errorf("failed to fetch href=%s %w", href, err) } diff --git a/cmd/collectors/rest/rest.go b/cmd/collectors/rest/rest.go index fdc4d5528..0fd69c96c 100644 --- a/cmd/collectors/rest/rest.go +++ b/cmd/collectors/rest/rest.go @@ -679,13 +679,13 @@ func (r *Rest) HandleResults(mat *matrix.Matrix, result []gjson.Result, prop *pr return count, numPartials } -func (r *Rest) GetRestData(href string) ([]gjson.Result, error) { +func (r *Rest) GetRestData(href string, headers ...map[string]string) ([]gjson.Result, error) { r.Logger.Debug("Fetching data", slog.String("href", href)) if href == "" { return nil, errs.New(errs.ErrConfig, "empty url") } - result, err := rest.FetchAll(r.Client, href) + result, err := rest.FetchAll(r.Client, href, headers...) if err != nil { return r.handleError(err) } diff --git a/cmd/collectors/restperf/restperf.go b/cmd/collectors/restperf/restperf.go index 99677926f..cfb3e056b 100644 --- a/cmd/collectors/restperf/restperf.go +++ b/cmd/collectors/restperf/restperf.go @@ -18,6 +18,7 @@ import ( "github.com/netapp/harvest/v2/cmd/poller/collector" "github.com/netapp/harvest/v2/cmd/poller/plugin" "github.com/netapp/harvest/v2/cmd/tools/rest" + "github.com/netapp/harvest/v2/pkg/conf" "github.com/netapp/harvest/v2/pkg/errs" "github.com/netapp/harvest/v2/pkg/matrix" "github.com/netapp/harvest/v2/pkg/set" @@ -46,17 +47,15 @@ const ( ) var ( - constituentRegex = regexp.MustCompile(`^(.*)__(\d{4})$`) + constituentRegex = regexp.MustCompile(`^(.*)__(\d{4})$`) + qosQuery = "api/cluster/counter/tables/qos" + qosVolumeQuery = "api/cluster/counter/tables/qos_volume" + qosDetailQuery = "api/cluster/counter/tables/qos_detail" + qosDetailVolumeQuery = "api/cluster/counter/tables/qos_detail_volume" + qosWorkloadQuery = "api/storage/qos/workloads" + workloadDetailMetrics = []string{"resource_latency"} ) -var qosQuery = "api/cluster/counter/tables/qos" -var qosVolumeQuery = "api/cluster/counter/tables/qos_volume" -var qosDetailQuery = "api/cluster/counter/tables/qos_detail" -var qosDetailVolumeQuery = "api/cluster/counter/tables/qos_detail_volume" -var qosWorkloadQuery = "api/storage/qos/workloads" - -var workloadDetailMetrics = []string{"resource_latency"} - var qosQueries = map[string]string{ qosQuery: qosQuery, qosVolumeQuery: qosVolumeQuery, @@ -71,6 +70,9 @@ type RestPerf struct { perfProp *perfProp archivedMetrics map[string]*rest2.Metric // Keeps metric definitions that are not found in the counter schema. These metrics may be available in future ONTAP versions. hasInstanceSchedule bool + pollInstanceCalls int + pollDataCalls int + recordsToSave int // Number of records to save when using the recorder } type counter struct { @@ -147,6 +149,8 @@ func (r *RestPerf) Init(a *collector.AbstractCollector) error { r.InitSchedule() + r.recordsToSave = collector.RecordKeepLast(r.Params, r.Logger) + r.Logger.Debug( "initialized cache", slog.Int("numMetrics", len(r.Prop.Metrics)), @@ -721,7 +725,26 @@ func (r *RestPerf) PollData() (map[string]*matrix.Matrix, error) { return nil, errs.New(errs.ErrConfig, "empty url") } - err = rest.FetchRestPerfData(r.Client, href, &perfRecords) + r.pollDataCalls++ + if r.pollDataCalls >= r.recordsToSave { + r.pollDataCalls = 0 + } + + var headers map[string]string + + poller, err := conf.PollerNamed(r.Options.Poller) + if err != nil { + slog.Error("failed to find poller", slogx.Err(err), slog.String("poller", r.Options.Poller)) + } + + if poller.IsRecording() { + headers = map[string]string{ + "From": strconv.Itoa(r.pollDataCalls), + } + } + + err = rest.FetchRestPerfData(r.Client, href, &perfRecords, headers) + if err != nil { return nil, fmt.Errorf("failed to fetch href=%s %w", href, err) } @@ -1476,6 +1499,24 @@ func (r *RestPerf) PollInstance() (map[string]*matrix.Matrix, error) { } } + r.pollInstanceCalls++ + if r.pollInstanceCalls > r.recordsToSave/3 { + r.pollInstanceCalls = 0 + } + + var headers map[string]string + + poller, err := conf.PollerNamed(r.Options.Poller) + if err != nil { + slog.Error("failed to find poller", slogx.Err(err), slog.String("poller", r.Options.Poller)) + } + + if poller.IsRecording() { + headers = map[string]string{ + "From": strconv.Itoa(r.pollInstanceCalls), + } + } + href := rest.NewHrefBuilder(). APIPath(dataQuery). Fields([]string{fields}). @@ -1491,7 +1532,7 @@ func (r *RestPerf) PollInstance() (map[string]*matrix.Matrix, error) { apiT := time.Now() r.Client.Metadata.Reset() - records, err = rest.FetchAll(r.Client, href) + records, err = rest.FetchAll(r.Client, href, headers) if err != nil { return r.handleError(err, href) } diff --git a/cmd/collectors/storagegrid/rest/client.go b/cmd/collectors/storagegrid/rest/client.go index 7f9144a7b..1f8887572 100644 --- a/cmd/collectors/storagegrid/rest/client.go +++ b/cmd/collectors/storagegrid/rest/client.go @@ -75,7 +75,7 @@ func New(poller *conf.Poller, timeout time.Duration, c *auth.Credentials) (*Clie var ( client Client httpclient *http.Client - transport *http.Transport + transport http.RoundTripper addr string href string err error @@ -96,7 +96,7 @@ func New(poller *conf.Poller, timeout time.Duration, c *auth.Credentials) (*Clie client.baseURL = href client.Timeout = timeout - transport, err = c.Transport(nil) + transport, err = c.Transport(nil, poller) if err != nil { return nil, err } diff --git a/cmd/collectors/zapi/collector/zapi.go b/cmd/collectors/zapi/collector/zapi.go index 33fbbd6be..a332ebfa8 100644 --- a/cmd/collectors/zapi/collector/zapi.go +++ b/cmd/collectors/zapi/collector/zapi.go @@ -19,10 +19,15 @@ import ( "github.com/netapp/harvest/v2/cmd/collectors/zapi/plugins/systemnode" "github.com/netapp/harvest/v2/cmd/collectors/zapi/plugins/volume" "github.com/netapp/harvest/v2/cmd/collectors/zapi/plugins/workload" + "github.com/netapp/harvest/v2/cmd/poller/collector" "github.com/netapp/harvest/v2/cmd/poller/plugin" + client "github.com/netapp/harvest/v2/pkg/api/ontapi/zapi" "github.com/netapp/harvest/v2/pkg/conf" + "github.com/netapp/harvest/v2/pkg/errs" + "github.com/netapp/harvest/v2/pkg/matrix" "github.com/netapp/harvest/v2/pkg/set" "github.com/netapp/harvest/v2/pkg/slogx" + "github.com/netapp/harvest/v2/pkg/tree/node" "github.com/netapp/harvest/v2/pkg/util" "log/slog" "slices" @@ -30,13 +35,6 @@ import ( "strconv" "strings" "time" - - "github.com/netapp/harvest/v2/cmd/poller/collector" - "github.com/netapp/harvest/v2/pkg/errs" - "github.com/netapp/harvest/v2/pkg/matrix" - "github.com/netapp/harvest/v2/pkg/tree/node" - - client "github.com/netapp/harvest/v2/pkg/api/ontapi/zapi" ) const BatchSize = "500" diff --git a/cmd/collectors/zapiperf/zapiperf.go b/cmd/collectors/zapiperf/zapiperf.go index 91db51133..4472ca1b5 100644 --- a/cmd/collectors/zapiperf/zapiperf.go +++ b/cmd/collectors/zapiperf/zapiperf.go @@ -40,12 +40,15 @@ import ( "github.com/netapp/harvest/v2/cmd/collectors/zapiperf/plugins/vscan" "github.com/netapp/harvest/v2/cmd/poller/collector" "github.com/netapp/harvest/v2/cmd/poller/plugin" + "github.com/netapp/harvest/v2/pkg/conf" "github.com/netapp/harvest/v2/pkg/errs" "github.com/netapp/harvest/v2/pkg/matrix" "github.com/netapp/harvest/v2/pkg/set" "github.com/netapp/harvest/v2/pkg/slogx" "github.com/netapp/harvest/v2/pkg/tree/node" "log/slog" + "maps" + "slices" "strconv" "strings" "time" @@ -73,20 +76,23 @@ const ( var workloadDetailMetrics = []string{"resource_latency"} type ZapiPerf struct { - *zapi.Zapi // provides: AbstractCollector, Client, Object, Query, TemplateFn, TemplateType - object string - filter string - batchSize int - latencyIoReqd int - instanceKeys []string - instanceLabels map[string]string - histogramLabels map[string][]string - scalarCounters []string - qosLabels map[string]string - isCacheEmpty bool - keyName string - keyNameIndex int - testFilePath string // Used only from unit test + *zapi.Zapi // provides: AbstractCollector, Client, Object, Query, TemplateFn, TemplateType + object string + filter string + batchSize int + latencyIoReqd int + instanceKeys []string + instanceLabels map[string]string + histogramLabels map[string][]string + scalarCounters []string + qosLabels map[string]string + isCacheEmpty bool + keyName string + keyNameIndex int + testFilePath string // Used only from unit test + recordsToSave int // Number of records to save when using the recorder + pollDataCalls int + pollInstanceCalls int } func init() { @@ -122,6 +128,8 @@ func (z *ZapiPerf) Init(a *collector.AbstractCollector) error { z.InitQOS() + z.recordsToSave = collector.RecordKeepLast(z.Params, z.Logger) + z.Logger.Debug("initialized") return nil } @@ -416,18 +424,27 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) { // load requested counters (metrics + labels) requestCounters := request.NewChildS("counters", "") // load scalar metrics + + // Sort the counters and instanceKeys so they are deterministic + for _, key := range z.scalarCounters { requestCounters.NewChildS("counter", key) } + // load histograms - for key := range z.histogramLabels { + sortedHistogramKeys := slices.Sorted(maps.Keys(z.histogramLabels)) + for _, key := range sortedHistogramKeys { requestCounters.NewChildS("counter", key) } + // load instance labels - for key := range z.instanceLabels { + sortedLabels := slices.Sorted(maps.Keys(z.instanceLabels)) + for _, key := range sortedLabels { requestCounters.NewChildS("counter", key) } + slices.Sort(instanceKeys) + // batch indices startIndex := 0 endIndex := 0 @@ -457,7 +474,7 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) { key = v[z.keyNameIndex] } } - // avoid adding duplicate keys. It can happen for flex-cache case + // Avoid adding duplicate keys. It can happen for flex-cache case if !addedKeys[key] { requestInstances.NewChildS(z.keyName, key) addedKeys[key] = true @@ -472,7 +489,26 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) { return nil, err } - response, rd, pd, err := z.Client.InvokeWithTimers(z.testFilePath) + z.pollDataCalls++ + if z.pollDataCalls >= z.recordsToSave { + z.pollDataCalls = 0 + } + + var headers map[string]string + + poller, err := conf.PollerNamed(z.Options.Poller) + if err != nil { + slog.Error("failed to find poller", slogx.Err(err), slog.String("poller", z.Options.Poller)) + } + + if poller.IsRecording() { + headers = map[string]string{ + "From": strconv.Itoa(z.pollDataCalls), + } + } + + response, rd, pd, err := z.Client.InvokeWithTimers(z.testFilePath, headers) + if err != nil { errMsg := strings.ToLower(err.Error()) // if ONTAP complains about batch size, use a smaller batch size @@ -927,6 +963,7 @@ func (z *ZapiPerf) getParentOpsCounters(data *matrix.Matrix, keyAttr string) (ti } instanceKeys = data.GetInstanceKeys() + slices.Sort(instanceKeys) // build ZAPI request request := node.NewXMLS("perf-object-get-instances") @@ -1324,6 +1361,7 @@ func (z *ZapiPerf) PollCounter() (map[string]*matrix.Matrix, error) { return nil, errs.New(errs.ErrNoMetric, "") } + slices.Sort(z.scalarCounters) return nil, nil } @@ -1608,7 +1646,26 @@ func (z *ZapiPerf) PollInstance() (map[string]*matrix.Matrix, error) { for { apiT = time.Now() - responseData, err := z.Client.InvokeBatchRequest(request, batchTag, z.testFilePath) + + z.pollInstanceCalls++ + if z.pollInstanceCalls >= z.recordsToSave/3 { + z.pollInstanceCalls = 0 + } + + var headers map[string]string + + poller, err := conf.PollerNamed(z.Options.Poller) + if err != nil { + slog.Error("failed to find poller", slogx.Err(err), slog.String("poller", z.Options.Poller)) + } + + if poller.IsRecording() { + headers = map[string]string{ + "From": strconv.Itoa(z.pollInstanceCalls), + } + } + + responseData, err := z.Client.InvokeBatchRequest(request, batchTag, z.testFilePath, headers) if err != nil { if errors.Is(err, errs.ErrAPIRequestRejected) { diff --git a/cmd/poller/collector/collector.go b/cmd/poller/collector/collector.go index 4984740fe..d4bc6d6fc 100644 --- a/cmd/poller/collector/collector.go +++ b/cmd/poller/collector/collector.go @@ -74,7 +74,8 @@ type Collector interface { } const ( - begin = "zBegin" + begin = "zBegin" + DefaultRecordsToSave = 60 ) // Status defines the possible states of a collector diff --git a/cmd/poller/collector/helpers.go b/cmd/poller/collector/helpers.go index ce421fc35..431b68ce4 100644 --- a/cmd/poller/collector/helpers.go +++ b/cmd/poller/collector/helpers.go @@ -27,6 +27,7 @@ import ( "path/filepath" "regexp" "sort" + "strconv" "strings" ) @@ -238,3 +239,29 @@ func GetBuiltinPlugin(name string, abc *plugin.AbstractPlugin) plugin.Plugin { return nil } + +func RecordKeepLast(n *node.Node, logger *slog.Logger) int { + r := n.GetChildS("recorder") + if r == nil { + return DefaultRecordsToSave + } + if !r.HasChildS("path") { + return DefaultRecordsToSave + } + + kl := r.GetChildContentS("keep_last") + if kl != "" { + keep, err := strconv.Atoi(kl) + if err != nil { + logger.Error( + "invalid keep_last value. Using default.", + slog.Int("default", DefaultRecordsToSave), + slog.String("value", kl), + ) + return DefaultRecordsToSave + } + return keep + } + + return DefaultRecordsToSave +} diff --git a/cmd/tools/rest/client.go b/cmd/tools/rest/client.go index 613335f98..678d4a9b7 100644 --- a/cmd/tools/rest/client.go +++ b/cmd/tools/rest/client.go @@ -15,7 +15,6 @@ import ( "github.com/tidwall/gjson" "io" "log/slog" - "net" "net/http" "net/http/httputil" "os" @@ -27,11 +26,9 @@ import ( const ( // DefaultTimeout should be > than ONTAP's default REST timeout, which is 15 seconds for GET requests DefaultTimeout = "30s" - // DefaultDialerTimeout limits the time spent establishing a TCP connection - DefaultDialerTimeout = 10 * time.Second - Message = "message" - Code = "code" - Target = "target" + Message = "message" + Code = "code" + Target = "target" ) type Client struct { @@ -52,7 +49,7 @@ func New(poller *conf.Poller, timeout time.Duration, credentials *auth.Credentia var ( client Client httpclient *http.Client - transport *http.Transport + transport http.RoundTripper addr string url string err error @@ -76,11 +73,11 @@ func New(poller *conf.Poller, timeout time.Duration, credentials *auth.Credentia client.baseURL = url client.Timeout = timeout - transport, err = credentials.Transport(nil) + transport, err = credentials.Transport(nil, poller) if err != nil { return nil, err } - transport.DialContext = (&net.Dialer{Timeout: DefaultDialerTimeout}).DialContext + httpclient = &http.Client{Transport: transport, Timeout: timeout} client.client = httpclient @@ -109,7 +106,7 @@ func (c *Client) printRequestAndResponse(req string, response []byte) { } // GetPlainRest makes a REST request to the cluster and returns a json response as a []byte -func (c *Client) GetPlainRest(request string, encodeURL bool) ([]byte, error) { +func (c *Client) GetPlainRest(request string, encodeURL bool, headers ...map[string]string) ([]byte, error) { var err error if strings.Index(request, "/") == 0 { request = request[1:] @@ -127,6 +124,13 @@ func (c *Client) GetPlainRest(request string, encodeURL bool) ([]byte, error) { return nil, err } c.request.Header.Set("Accept", "application/json") + + for _, hs := range headers { + for k, v := range hs { + c.request.Header.Set(k, v) + } + } + pollerAuth, err := c.auth.GetPollerAuth() if err != nil { return nil, err @@ -152,8 +156,8 @@ func (c *Client) GetPlainRest(request string, encodeURL bool) ([]byte, error) { } // GetRest makes a REST request to the cluster and returns a json response as a []byte -func (c *Client) GetRest(request string) ([]byte, error) { - return c.GetPlainRest(request, true) +func (c *Client) GetRest(request string, headers ...map[string]string) ([]byte, error) { + return c.GetPlainRest(request, true, headers...) } func (c *Client) invokeWithAuthRetry() ([]byte, error) { @@ -289,7 +293,7 @@ func downloadSwagger(poller *conf.Poller, path string, url string, verbose bool) timeout, _ := time.ParseDuration(DefaultTimeout) credentials := auth.NewCredentials(poller, slog.Default()) - transport, err := credentials.Transport(request) + transport, err := credentials.Transport(request, poller) if err != nil { return 0, err } diff --git a/cmd/tools/rest/rest.go b/cmd/tools/rest/rest.go index 3ca87f141..7b5d958c2 100644 --- a/cmd/tools/rest/rest.go +++ b/cmd/tools/rest/rest.go @@ -408,14 +408,14 @@ func FetchForCli(client *Client, href string, records *[]any, downloadAll bool, // FetchAll collects all records. // If you want to limit the number of records returned, use FetchSome. -func FetchAll(client *Client, href string) ([]gjson.Result, error) { +func FetchAll(client *Client, href string, headers ...map[string]string) ([]gjson.Result, error) { var ( records []gjson.Result result []gjson.Result err error ) - err = fetchAll(client, href, &records) + err = fetchAll(client, href, &records, headers...) if err != nil { return nil, err } @@ -463,8 +463,8 @@ func FetchAnalytics(client *Client, href string) ([]gjson.Result, gjson.Result, return result, *analytics, nil } -func fetchAll(client *Client, href string, records *[]gjson.Result) error { - getRest, err := client.GetRest(href) +func fetchAll(client *Client, href string, records *[]gjson.Result, headers ...map[string]string) error { + getRest, err := client.GetRest(href, headers...) if err != nil { return fmt.Errorf("error making request %w", err) } @@ -634,8 +634,8 @@ func fetchAnalytics(client *Client, href string, records *[]gjson.Result, analyt } // FetchRestPerfData This method is used in PerfRest collector. This method returns timestamp per batch -func FetchRestPerfData(client *Client, href string, perfRecords *[]PerfRecord) error { - getRest, err := client.GetRest(href) +func FetchRestPerfData(client *Client, href string, perfRecords *[]PerfRecord, headers ...map[string]string) error { + getRest, err := client.GetRest(href, headers...) if err != nil { return fmt.Errorf("error making request %w", err) } diff --git a/docs/configure-harvest-basic.md b/docs/configure-harvest-basic.md index e779e071a..d74c84086 100644 --- a/docs/configure-harvest-basic.md +++ b/docs/configure-harvest-basic.md @@ -25,6 +25,7 @@ All pollers are defined in `harvest.yml`, the main configuration file of Harvest | `log` | optional, list of collector names | Matching collectors log their ZAPI request/response | | | `prefer_zapi` | optional, bool | Use the ZAPI API if the cluster supports it, otherwise allow Harvest to choose REST or ZAPI, whichever is appropriate to the ONTAP version. See [rest-strategy](https://github.com/NetApp/harvest/blob/main/docs/architecture/rest-strategy.md) for details. | | | `conf_path` | optional, `:` separated list of directories | The search path Harvest uses to load its [templates](configure-templates.md). Harvest walks each directory in order, stopping at the first one that contains the desired template. | conf | +| `recorder` | optional, section | Section that determines if Harvest should record or replay HTTP requests. See [here](configure-harvest-basic.md#http-recorder) for details. | | ## Defaults @@ -195,6 +196,21 @@ node_vol_cifs_write_data{org="meg",ns="rtp",datacenter="DC-01",cluster="cluster- Keep in mind that each unique combination of key-value pairs increases the amount of stored data. Use them sparingly. See [PrometheusNaming](https://prometheus.io/docs/practices/naming/#labels) for details. +# HTTP Recorder + +When troubleshooting, it can be useful to record HTTP requests and responses to disk for later replay. + +Harvest removes `Authorization` and `Host` headers from recorded requests and responses +to prevent sensitive information from being stored on disk. + +The `recorder` section in the `harvest.yml` file allows you to configure the HTTP recorder. + +| parameter | type | description | default | +|-------------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------|--------:| +| `path` | string **required** | Path to a directory. Recorded requests and responses will be stored here. Replaying will read the requests and responses from this directory. | | +| `mode` | string **required** | `record` or `replay` | | +| `keep_last` | optional, int | When mode is `record`, the number of records to keep before overwriting | 60 | + # Authentication When authenticating with ONTAP and StorageGRID clusters, diff --git a/harvest.cue b/harvest.cue index c6c1261cd..b2bada7c4 100644 --- a/harvest.cue +++ b/harvest.cue @@ -62,6 +62,11 @@ label: [string]: string timeout?: string } +#Recorder: { + path: string + mode: "record" | "replay" +} + #CollectorDef: { [Name=_]: [...string] } @@ -87,6 +92,7 @@ Pollers: [Name=_]: #Poller log_max_files?: int password?: string prefer_zapi?: bool + recorder?: #Recorder ssl_cert?: string ssl_key?: string tls_min_version?: string diff --git a/pkg/api/ontapi/zapi/client.go b/pkg/api/ontapi/zapi/client.go index 1d78e2c87..e1876e897 100644 --- a/pkg/api/ontapi/zapi/client.go +++ b/pkg/api/ontapi/zapi/client.go @@ -6,7 +6,6 @@ package zapi import ( "bytes" - "crypto/tls" "errors" "fmt" "github.com/netapp/harvest/v2/pkg/auth" @@ -54,7 +53,7 @@ func New(poller *conf.Poller, c *auth.Credentials) (*Client, error) { client Client httpclient *http.Client request *http.Request - transport *http.Transport + transport http.RoundTripper timeout time.Duration url, addr string err error @@ -103,18 +102,11 @@ func New(poller *conf.Poller, c *auth.Credentials) (*Client, error) { request.Header.Set("Content-Type", "text/xml") request.Header.Set("Charset", "utf-8") - transport, err = c.Transport(request) + transport, err = c.Transport(request, poller) if err != nil { return nil, err } - if poller.TLSMinVersion != "" { - tlsVersion := client.tlsVersion(poller.TLSMinVersion) - if tlsVersion != 0 { - client.Logger.Info("Using TLS version", slog.Int("tlsVersion", int(tlsVersion))) - transport.TLSClientConfig.MinVersion = tlsVersion - } - } client.request = request // initialize http client @@ -332,7 +324,7 @@ func (c *Client) Invoke(testFilePath string) (*node.Node, error) { // Else -> will issue API requests in series, once there // are no more instances returned by the server, returned results will be nil // Use the returned tag for subsequent calls to this method -func (c *Client) InvokeBatchRequest(request *node.Node, tag string, testFilePath string) (Response, error) { +func (c *Client) InvokeBatchRequest(request *node.Node, tag string, testFilePath string, headers ...map[string]string) (Response, error) { if testFilePath != "" && tag != "" { testData, err := tree.ImportXML(testFilePath) if err != nil { @@ -341,13 +333,13 @@ func (c *Client) InvokeBatchRequest(request *node.Node, tag string, testFilePath return Response{Result: testData, Tag: "", Rd: time.Second, Pd: time.Second}, nil } // wasteful of course, need to rewrite later @TODO - results, tag, rd, pd, err := c.InvokeBatchWithTimers(request, tag) + results, tag, rd, pd, err := c.InvokeBatchWithTimers(request, tag, headers...) return Response{Result: results, Tag: tag, Rd: rd, Pd: pd}, err } // InvokeBatchWithTimers does the same as InvokeBatchRequest, but it also // returns API time and XML parse time -func (c *Client) InvokeBatchWithTimers(request *node.Node, tag string) (*node.Node, string, time.Duration, time.Duration, error) { +func (c *Client) InvokeBatchWithTimers(request *node.Node, tag string, headers ...map[string]string) (*node.Node, string, time.Duration, time.Duration, error) { var ( results *node.Node @@ -368,7 +360,7 @@ func (c *Client) InvokeBatchWithTimers(request *node.Node, tag string) (*node.No return nil, "", rd, pd, err } - if results, rd, pd, err = c.invokeWithAuthRetry(true); err != nil { + if results, rd, pd, err = c.invokeWithAuthRetry(true, headers...); err != nil { return nil, "", rd, pd, err } @@ -405,7 +397,7 @@ func (c *Client) InvokeRequest(request *node.Node) (*node.Node, error) { // Else -> invokes the request and returns parsed XML response and timers: // API wait time and XML parse time. // This method should only be called after building the request -func (c *Client) InvokeWithTimers(testFilePath string) (*node.Node, time.Duration, time.Duration, error) { +func (c *Client) InvokeWithTimers(testFilePath string, headers ...map[string]string) (*node.Node, time.Duration, time.Duration, error) { if testFilePath != "" { testData, err := tree.ImportXML(testFilePath) if err != nil { @@ -413,10 +405,10 @@ func (c *Client) InvokeWithTimers(testFilePath string) (*node.Node, time.Duratio } return testData, 0, 0, nil } - return c.invokeWithAuthRetry(true) + return c.invokeWithAuthRetry(true, headers...) } -func (c *Client) invokeWithAuthRetry(withTimers bool) (*node.Node, time.Duration, time.Duration, error) { +func (c *Client) invokeWithAuthRetry(withTimers bool, headers ...map[string]string) (*node.Node, time.Duration, time.Duration, error) { var buffer bytes.Buffer pollerAuth, err := c.auth.GetPollerAuth() if err != nil { @@ -428,7 +420,7 @@ func (c *Client) invokeWithAuthRetry(withTimers bool) (*node.Node, time.Duration buffer = *c.buffer } - resp, t1, t2, err := c.invoke(withTimers) + resp, t1, t2, err := c.invoke(withTimers, headers...) if err != nil { var he errs.HarvestError @@ -456,7 +448,7 @@ func (c *Client) invokeWithAuthRetry(withTimers bool) (*node.Node, time.Duration } // invokes the request that has been built with one of the BuildRequest* methods -func (c *Client) invoke(withTimers bool) (*node.Node, time.Duration, time.Duration, error) { +func (c *Client) invoke(withTimers bool, headers ...map[string]string) (*node.Node, time.Duration, time.Duration, error) { var ( root, result *node.Node @@ -486,6 +478,12 @@ func (c *Client) invoke(withTimers bool) (*node.Node, time.Duration, time.Durati zapiReq = c.buffer.String() } + for _, hs := range headers { + for k, v := range hs { + c.request.Header.Set(k, v) + } + } + if response, err = c.client.Do(c.request); err != nil { return result, responseT, parseT, errs.New(errs.ErrConnection, err.Error()) } @@ -581,23 +579,6 @@ func (c *Client) SetTimeout(timeout string) { c.client.Timeout = newTimeout } -func (c *Client) tlsVersion(version string) uint16 { - lower := strings.ToLower(version) - switch lower { - case "tls10": - return tls.VersionTLS10 - case "tls11": - return tls.VersionTLS11 - case "tls12": - return tls.VersionTLS12 - case "tls13": - return tls.VersionTLS13 - default: - c.Logger.Warn("Unknown TLS version, using default", slog.String("version", version)) - } - return 0 -} - // NewTestClient It's used for unit test only func NewTestClient() *Client { return &Client{ diff --git a/pkg/auth/auth.go b/pkg/auth/auth.go index 06b7c72f5..0196732ce 100644 --- a/pkg/auth/auth.go +++ b/pkg/auth/auth.go @@ -15,6 +15,7 @@ import ( "github.com/netapp/harvest/v2/third_party/mergo" "gopkg.in/yaml.v3" "log/slog" + "net" "net/http" "os" "os/exec" @@ -30,6 +31,8 @@ const ( defaultTimeout = "10s" certType = "CERTIFICATE" keyType = "PRIVATE KEY" + // DefaultDialerTimeout limits the time spent establishing a TCP connection + DefaultDialerTimeout = 10 * time.Second ) func NewCredentials(p *conf.Poller, logger *slog.Logger) *Credentials { @@ -438,7 +441,7 @@ func extractCertAndKey(blob string) ([]byte, []byte, error) { return nil, nil, fmt.Errorf("unexpected PEM block1Type=%s block2Type=%s", block1.Type, block2.Type) } -func (c *Credentials) Transport(request *http.Request) (*http.Transport, error) { +func (c *Credentials) Transport(request *http.Request, poller *conf.Poller) (http.RoundTripper, error) { var ( cert tls.Certificate transport *http.Transport @@ -475,6 +478,7 @@ func (c *Credentials) Transport(request *http.Request) (*http.Transport, error) if request != nil { request.SetBasicAuth(pollerAuth.Username, pollerAuth.Password) } + transport = &http.Transport{ Proxy: http.ProxyFromEnvironment, TLSClientConfig: &tls.Config{ @@ -483,5 +487,27 @@ func (c *Credentials) Transport(request *http.Request) (*http.Transport, error) }, } } - return transport, err + + transport.DialContext = (&net.Dialer{Timeout: DefaultDialerTimeout}).DialContext + + if poller.TLSMinVersion != "" { + tlsVersion := tlsVersion(poller.TLSMinVersion, c.logger) + if tlsVersion != 0 { + c.logger.Info("Using TLS version", slog.Int("tlsVersion", int(tlsVersion))) + transport.TLSClientConfig.MinVersion = tlsVersion + } + } + + if !poller.IsRecording() { + return transport, nil + } + + switch poller.Recorder.Mode { + case "record": + return recording(poller, transport), nil + case "replay": + return replaying(poller), nil + default: + return nil, errs.New(errs.ErrInvalidParam, "recorder mode") + } } diff --git a/pkg/auth/transport.go b/pkg/auth/transport.go new file mode 100644 index 000000000..10406f194 --- /dev/null +++ b/pkg/auth/transport.go @@ -0,0 +1,227 @@ +package auth + +import ( + "bufio" + "bytes" + "crypto/md5" //nolint:gosec + "crypto/tls" + "encoding/base64" + "fmt" + "github.com/netapp/harvest/v2/pkg/conf" + "github.com/netapp/harvest/v2/pkg/errs" + "io" + "io/fs" + "log/slog" + "net/http" + "net/http/httputil" + "os" + "path/filepath" + "strings" +) + +type RoundTripFunc func(req *http.Request) (res *http.Response, err error) + +func (rtf RoundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) { + return rtf(r) +} + +func tlsVersion(version string, logger *slog.Logger) uint16 { + lower := strings.ToLower(version) + switch lower { + case "tls10": + return tls.VersionTLS10 + case "tls11": + return tls.VersionTLS11 + case "tls12": + return tls.VersionTLS12 + case "tls13": + return tls.VersionTLS13 + default: + logger.Warn("Unknown TLS version, using default", slog.String("version", version)) + } + return 0 +} + +func recording(poller *conf.Poller, transport *http.Transport) http.RoundTripper { + + basePath := poller.Recorder.Path + + rtf := RoundTripFunc(func(req *http.Request) (*http.Response, error) { + var ( + err error + response *http.Response + ) + + err = os.MkdirAll(basePath, 0750) + if err != nil { + return nil, fmt.Errorf("problem while creating directories=%s transport: %w", basePath, err) + } + b, err := DumpRequest(req, true) + if err != nil { + return nil, err + } + + requestName, responseName := buildName(b) + name := filepath.Join(basePath, requestName) + if err := os.WriteFile(name, b, 0600); err != nil { + return nil, err + } + if response, err = transport.RoundTrip(req); err != nil { + return nil, err + } + b, err = httputil.DumpResponse(response, true) + if err != nil { + return nil, err + } + name = filepath.Join(basePath, responseName) + if err := os.WriteFile(name, b, 0600); err != nil { + return nil, err + } + return response, nil + }) + + return rtf +} + +func replaying(poller *conf.Poller) http.RoundTripper { + + aFs := os.DirFS(poller.Recorder.Path) + + rtf := RoundTripFunc(func(req *http.Request) (*http.Response, error) { + var ( + err error + ) + + defer func() { + if err != nil { + err = fmt.Errorf("problem while replaying transport: %w", err) + } + }() + + b, err := DumpRequest(req, true) + if err != nil { + return nil, err + } + _, name := buildName(b) + glob := "*" + name + matches, err := fs.Glob(aFs, glob) + if err != nil { + return nil, err + } + if len(matches) == 0 { + return nil, fmt.Errorf("%w: no replay file matches %q", errs.ErrResponseNotFound, glob) + } + if len(matches) > 1 { + return nil, fmt.Errorf("ambiguous response: multiple replay files match %q", glob) + } + b, err = fs.ReadFile(aFs, matches[0]) + if err != nil { + return nil, err + } + r := bufio.NewReader(bytes.NewReader(b)) + return http.ReadResponse(r, req) + }) + + return rtf +} + +// return request and response names +func buildName(b []byte) (string, string) { + h := md5.New() //nolint:gosec + h.Write(b) + s := base64.URLEncoding.EncodeToString(h.Sum(nil)) + return s[:8] + ".req.txt", s[:8] + ".res.txt" +} + +var reqHeadersToExclude = map[string]bool{ + "Authorization": true, + "User-Agent": true, +} + +// DumpRequest was copied from httputil.DumpRequest to remove the Host, Authorization, and User-Agent headers +func DumpRequest(req *http.Request, body bool) ([]byte, error) { + var err error + save := req.Body + if !body || req.Body == nil { + req.Body = nil + } else { + save, req.Body, err = drainBody(req.Body) + if err != nil { + return nil, err + } + } + + var b bytes.Buffer + + // By default, print out the unmodified req.RequestURI, which + // is always set for incoming server requests. But because we + // previously used req.URL.RequestURI and the docs weren't + // always so clear about when to use DumpRequest vs + // DumpRequestOut, fall back to the old way if the caller + // provides a non-server Request. + reqURI := req.RequestURI + if reqURI == "" { + reqURI = req.URL.RequestURI() + } + + _, _ = fmt.Fprintf(&b, "%s %s HTTP/%d.%d\r\n", valueOrDefault(req.Method, "GET"), + reqURI, req.ProtoMajor, req.ProtoMinor) + + chunked := len(req.TransferEncoding) > 0 && req.TransferEncoding[0] == "chunked" + if len(req.TransferEncoding) > 0 { + _, _ = fmt.Fprintf(&b, "Transfer-Encoding: %s\r\n", strings.Join(req.TransferEncoding, ",")) + } + + err = req.Header.WriteSubset(&b, reqHeadersToExclude) + if err != nil { + return nil, err + } + + b.WriteString("\r\n") + + if req.Body != nil { + var dest io.Writer = &b + if chunked { + dest = httputil.NewChunkedWriter(dest) + } + _, err = io.Copy(dest, req.Body) + if chunked { + _ = dest.(io.Closer).Close() + b.WriteString("\r\n") + } + } + + req.Body = save + if err != nil { + return nil, err + } + return b.Bytes(), nil +} + +// drainBody reads all of b to memory and then returns two equivalent +// ReadClosers yielding the same bytes. +// +// It returns an error if the initial slurp of all bytes fails. It does not attempt +// to make the returned ReadClosers have identical error-matching behavior. +func drainBody(b io.ReadCloser) (io.ReadCloser, io.ReadCloser, error) { + if b == nil || b == http.NoBody { + // No copying needed. Preserve the magic sentinel meaning of NoBody. + return http.NoBody, http.NoBody, nil + } + var buf bytes.Buffer + if _, err := buf.ReadFrom(b); err != nil { + return nil, b, err + } + if err := b.Close(); err != nil { + return nil, b, err + } + return io.NopCloser(&buf), io.NopCloser(bytes.NewReader(buf.Bytes())), nil +} + +// Return value if nonempty, def otherwise. +func valueOrDefault(value, def string) string { + if value != "" { + return value + } + return def +} diff --git a/pkg/conf/conf.go b/pkg/conf/conf.go index b5446a846..f9450ecb0 100644 --- a/pkg/conf/conf.go +++ b/pkg/conf/conf.go @@ -503,6 +503,12 @@ type ExportDef struct { Exporter } +type Recorder struct { + Path string `yaml:"path,omitempty"` + Mode string `yaml:"mode,omitempty"` // record or replay + KeepLast string `yaml:"keep_last,omitempty"` // number of records to keep before overwriting +} + func (e *ExportDef) UnmarshalYAML(n *yaml.Node) error { if n.Kind == yaml.MappingNode { var aExporter *Exporter @@ -518,34 +524,35 @@ func (e *ExportDef) UnmarshalYAML(n *yaml.Node) error { } type Poller struct { - Addr string `yaml:"addr,omitempty"` APIVersion string `yaml:"api_version,omitempty"` APIVfiler string `yaml:"api_vfiler,omitempty"` + Addr string `yaml:"addr,omitempty"` AuthStyle string `yaml:"auth_style,omitempty"` CaCertPath string `yaml:"ca_cert,omitempty"` + CertificateScript CertificateScript `yaml:"certificate_script,omitempty"` ClientTimeout string `yaml:"client_timeout,omitempty"` Collectors []Collector `yaml:"collectors,omitempty"` + ConfPath string `yaml:"conf_path,omitempty"` CredentialsFile string `yaml:"credentials_file,omitempty"` CredentialsScript CredentialsScript `yaml:"credentials_script,omitempty"` - CertificateScript CertificateScript `yaml:"certificate_script,omitempty"` Datacenter string `yaml:"datacenter,omitempty"` ExporterDefs []ExportDef `yaml:"exporters,omitempty"` + Exporters []string `yaml:"-"` IsKfs bool `yaml:"is_kfs,omitempty"` Labels *[]map[string]string `yaml:"labels,omitempty"` LogMaxBytes int64 `yaml:"log_max_bytes,omitempty"` LogMaxFiles int `yaml:"log_max_files,omitempty"` LogSet *[]string `yaml:"log,omitempty"` Password string `yaml:"password,omitempty"` - PollerSchedule string `yaml:"poller_schedule,omitempty"` PollerLogSchedule string `yaml:"poller_log_schedule,omitempty"` + PollerSchedule string `yaml:"poller_schedule,omitempty"` + PreferZAPI bool `yaml:"prefer_zapi,omitempty"` + Recorder Recorder `yaml:"recorder,omitempty"` SslCert string `yaml:"ssl_cert,omitempty"` SslKey string `yaml:"ssl_key,omitempty"` TLSMinVersion string `yaml:"tls_min_version,omitempty"` UseInsecureTLS *bool `yaml:"use_insecure_tls,omitempty"` Username string `yaml:"username,omitempty"` - PreferZAPI bool `yaml:"prefer_zapi,omitempty"` - ConfPath string `yaml:"conf_path,omitempty"` - Exporters []string `yaml:"-"` promIndex int Name string } @@ -578,6 +585,10 @@ func (p *Poller) Union(defaults *Poller) { p.CredentialsScript.Path = pCredentialsScript } +func (p *Poller) IsRecording() bool { + return p.Recorder.Path != "" +} + // ZapiPoller creates a poller out of a node, this is a bridge between the node and struct-based code // Used by ZAPI based code func ZapiPoller(n *node.Node) *Poller { @@ -639,6 +650,10 @@ func ZapiPoller(n *node.Node) *Poller { p.CertificateScript.Path = certificateScriptNode.GetChildContentS("path") p.CertificateScript.Timeout = certificateScriptNode.GetChildContentS("timeout") } + if recorderNode := n.GetChildS("recorder"); recorderNode != nil { + p.Recorder.Path = recorderNode.GetChildContentS("path") + p.Recorder.Mode = recorderNode.GetChildContentS("mode") + } if clientTimeout := n.GetChildContentS("client_timeout"); clientTimeout != "" { p.ClientTimeout = clientTimeout } else if p.ClientTimeout == "" { diff --git a/pkg/errs/errors.go b/pkg/errs/errors.go index 1233f0afc..b9b8f4c49 100644 --- a/pkg/errs/errors.go +++ b/pkg/errs/errors.go @@ -28,6 +28,7 @@ const ( ErrNoMetric = harvestError("no metrics") ErrPanic = harvestError("goroutine panic") ErrPermissionDenied = harvestError("Permission denied") + ErrResponseNotFound = harvestError("response not found") ErrWrongTemplate = harvestError("wrong template") ErrMetroClusterNotConfigured = harvestError("MetroCluster is not configured in cluster") )