Skip to content

Commit

Permalink
Merge pull request #51 from NETWAYS/feature/snapshot
Browse files Browse the repository at this point in the history
Add subcommand for snapshots
  • Loading branch information
martialblog authored Apr 12, 2024
2 parents 4dc8764 + 39769b1 commit ea9d866
Show file tree
Hide file tree
Showing 5 changed files with 348 additions and 0 deletions.
40 changes: 40 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,46 @@ check_elasticsearch ingest --pipeline foobar
\_[OK] Failed ingest operations for foobar: 5; | pipelines.foobar.failed=5c
```

### Snapshot

Checks status of Snapshots.

```
Checks the status of Elasticsearch snapshots
The plugin maps snapshot status to the following status codes:
SUCCESS, Exit code 0
PARTIAL, Exit code 1
FAILED, Exit code 2
IN_PROGRESS, Exit code 3
If there are multiple snapshots the plugin uses the worst status
Usage:
check_elasticsearch snapshot [flags]
Flags:
-a, --all Check all retrieved snapshots. If not set only the latest snapshot is checked
-N, --number int Check latest N number snapshots. If not set only the latest snapshot is checked (default 1)
-r, --repository string Comma-separated list of snapshot repository names used to limit the request (default "*")
-s, --snapshot string Comma-separated list of snapshot names to retrieve. Wildcard (*) expressions are supported (default "*")
-h, --help help for snapshot
```

Examples:

```
$ check_elasticsearch snapshot
[OK] - All evaluated snapshots are in state SUCCESS
$ check_elasticsearch snapshot --all -r myrepo
[CRITICAL] - At least one evaluated snapshot is in state FAILED
$ check_elasticsearch snapshot --number 5 -s mysnapshot
[WARNING] - At least one evaluated snapshot is in state PARTIAL
```


## License

Copyright (c) 2022 [NETWAYS GmbH](mailto:[email protected])
Expand Down
126 changes: 126 additions & 0 deletions cmd/snapshot.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package cmd

import (
"fmt"
"strings"

"github.com/NETWAYS/go-check"
"github.com/NETWAYS/go-check/result"
"github.com/spf13/cobra"
)

var snapshotCmd = &cobra.Command{
Use: "snapshot",
Short: "Checks the status of Elasticsearch snapshots",
Long: `Checks the status of Elasticsearch snapshots.
The plugin maps snapshot status to the following exit codes:
SUCCESS, Exit code 0
PARTIAL, Exit code 1
FAILED, Exit code 2
IN_PROGRESS, Exit code 3
If there are multiple snapshots the plugin uses the worst status.
`,
Example: `
$ check_elasticsearch snapshot
[OK] - All evaluated snapshots are in state SUCCESS
$ check_elasticsearch snapshot --all
[CRITICAL] - At least one evaluated snapshot is in state FAILED
$ check_elasticsearch snapshot --number 5
[WARNING] - At least one evaluated snapshot is in state PARTIAL
`,
Run: func(cmd *cobra.Command, args []string) {
repository, _ := cmd.Flags().GetString("repository")
snapshot, _ := cmd.Flags().GetString("snapshot")
numberOfSnapshots, _ := cmd.Flags().GetInt("number")
evalAllSnapshots, _ := cmd.Flags().GetBool("all")

var (
rc int
output string
)

client := cliConfig.NewClient()

snapResponse, err := client.Snapshot(repository, snapshot)

if err != nil {
check.ExitError(err)
}

// If all snapshots are to be evaluated
if evalAllSnapshots {
numberOfSnapshots = len(snapResponse.Snapshots)
}

// If more snapshots are requested than available
if numberOfSnapshots > len(snapResponse.Snapshots) {
numberOfSnapshots = len(snapResponse.Snapshots)
}

// Evaluate snashots given their states
sStates := make([]int, 0, len(snapResponse.Snapshots))

// Check status for each snapshot
var summary strings.Builder

for _, snap := range snapResponse.Snapshots[:numberOfSnapshots] {

summary.WriteString("\n \\_")

switch snap.State {
default:
sStates = append(sStates, check.Unknown)
summary.WriteString(fmt.Sprintf("[UNKNOWN] Snapshot: %s, State %s, Repository: %s", snap.Snapshot, snap.State, snap.Repository))
case "SUCCESS":
sStates = append(sStates, check.OK)
summary.WriteString(fmt.Sprintf("[OK] Snapshot: %s, State %s, Repository: %s", snap.Snapshot, snap.State, snap.Repository))
case "PARTIAL":
sStates = append(sStates, check.Warning)
summary.WriteString(fmt.Sprintf("[WARNING] Snapshot: %s, State %s, Repository: %s", snap.Snapshot, snap.State, snap.Repository))
case "FAILED":
sStates = append(sStates, check.Critical)
summary.WriteString(fmt.Sprintf("[CRITICAL] Snapshot: %s, State %s, Repository: %s", snap.Snapshot, snap.State, snap.Repository))
case "IN PROGRESS":
sStates = append(sStates, check.Unknown)
summary.WriteString(fmt.Sprintf("[UNKNOWN] Snapshot: %s, State %s, Repository: %s", snap.Snapshot, snap.State, snap.Repository))
}
}

rc = result.WorstState(sStates...)

switch rc {
case check.OK:
output = "All evaluated snapshots are in state SUCCESS."
case check.Warning:
output = "At least one evaluated snapshot is in state PARTIAL."
case check.Critical:
output = "At least one evaluated snapshot is in state FAILED."
case check.Unknown:
output = "At least one evaluated snapshot is in state IN_PROGRESS."
default:
output = "Could not evaluate status of snapshots"
}

check.ExitRaw(rc, output, "repository:", repository, "snapshot:", snapshot, summary.String())
},
}

func init() {
rootCmd.AddCommand(snapshotCmd)

fs := snapshotCmd.Flags()

fs.StringP("snapshot", "s", "*",
"Comma-separated list of snapshot names to retrieve. Wildcard (*) expressions are supported")
fs.StringP("repository", "r", "*",
"Comma-separated list of snapshot repository names used to limit the request")

fs.IntP("number", "N", 1, "Check latest N number snapshots. If not set only the latest snapshot is checked")
fs.BoolP("all", "a", false, "Check all retrieved snapshots. If not set only the latest snapshot is checked")

snapshotCmd.MarkFlagsMutuallyExclusive("number", "all")
}
120 changes: 120 additions & 0 deletions cmd/snapshot_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package cmd

import (
"net/http"
"net/http/httptest"
"net/url"
"os/exec"
"strings"
"testing"
)

func TestSnapshot_ConnectionRefused(t *testing.T) {

cmd := exec.Command("go", "run", "../main.go", "snapshot", "--port", "9999")
out, _ := cmd.CombinedOutput()

actual := string(out)
expected := "[UNKNOWN] - could not fetch snapshots: Get \"http://localhost:9999/_snapshot/*/*?order=desc\": dial"

if !strings.Contains(actual, expected) {
t.Error("\nActual: ", actual, "\nExpected: ", expected)
}
}

func TestSnapshot_WithWrongFlags(t *testing.T) {

cmd := exec.Command("go", "run", "../main.go", "snapshot", "--all", "--number", "9999")
out, _ := cmd.CombinedOutput()

actual := string(out)
expected := "[UNKNOWN] - if any flags in the group"

if !strings.Contains(actual, expected) {
t.Error("\nActual: ", actual, "\nExpected: ", expected)
}
}

type SnapshotTest struct {
name string
server *httptest.Server
args []string
expected string
}

func TestSnapshotCmd(t *testing.T) {
tests := []SnapshotTest{
{
name: "no-snapshot",
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Elastic-Product", "Elasticsearch")
w.WriteHeader(http.StatusOK)
w.Write([]byte(`Hey dude where my snapshot`))
})),
args: []string{"run", "../main.go", "snapshot"},
expected: "[UNKNOWN] - could not decode snapshot response",
},
{
name: "snapshot-ok",
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Elastic-Product", "Elasticsearch")
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"snapshots":[{"snapshot":"snapshot_1","uuid":"dKb54xw67gvdRctLCxSket","repository":"my_repository","version_id":1.1,"version":1,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"SUCCESS","start_time":"2020-07-06T21:55:18.129Z","start_time_in_millis":1593093628850,"end_time":"2020-07-06T21:55:18.129Z","end_time_in_millis":1593094752018,"duration_in_millis":0,"failures":[],"shards":{"total":0,"failed":0,"successful":0}},{"snapshot":"snapshot_2","uuid":"vdRctLCxSketdKb54xw67g","repository":"my_repository","version_id":2,"version":2,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"SUCCESS","start_time":"2020-07-06T21:55:18.130Z","start_time_in_millis":1593093628851,"end_time":"2020-07-06T21:55:18.130Z","end_time_in_millis":1593094752019,"duration_in_millis":1,"failures":[],"shards":{"total":0,"failed":0,"successful":0}}],"next":"c25hcHNob3RfMixteV9yZXBvc2l0b3J5LHNuYXBzaG90XzI=","total":3,"remaining":1}
`))
})),
args: []string{"run", "../main.go", "snapshot"},
expected: "[OK] - All evaluated snapshots are in state SUCCESS",
},
{
name: "snapshot-inprogress",
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Elastic-Product", "Elasticsearch")
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"snapshots":[{"snapshot":"snapshot_1","uuid":"dKb54xw67gvdRctLCxSket","repository":"my_repository","version_id":1,"version":1,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"IN_PROGRESS","start_time":"2020-07-06T21:55:18.129Z","start_time_in_millis":1593093628850,"end_time":"2020-07-06T21:55:18.129Z","end_time_in_millis":1593094752018,"duration_in_millis":0,"failures":[],"shards":{"total":0,"failed":0,"successful":0}},{"snapshot":"snapshot_2","uuid":"vdRctLCxSketdKb54xw67g","repository":"my_repository","version_id":2,"version":2,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"SUCCESS","start_time":"2020-07-06T21:55:18.130Z","start_time_in_millis":1593093628851,"end_time":"2020-07-06T21:55:18.130Z","end_time_in_millis":1593094752019,"duration_in_millis":1,"failures":[],"shards":{"total":0,"failed":0,"successful":0}}],"next":"c25hcHNob3RfMixteV9yZXBvc2l0b3J5LHNuYXBzaG90XzI=","total":3,"remaining":1}
`))
})),
args: []string{"run", "../main.go", "snapshot"},
expected: "[UNKNOWN] - At least one evaluated snapshot is in state IN_PROGRESS",
},
{
name: "snapshot-failed-with-all",
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Elastic-Product", "Elasticsearch")
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"snapshots":[{"snapshot":"snapshot_1","uuid":"dKb54xw67gvdRctLCxSket","repository":"my_repository","version_id":1,"version":1,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"IN_PROGRESS","start_time":"2020-07-06T21:55:18.129Z","start_time_in_millis":1593093628850,"end_time":"2020-07-06T21:55:18.129Z","end_time_in_millis":1593094752018,"duration_in_millis":0,"failures":[],"shards":{"total":0,"failed":0,"successful":0}},{"snapshot":"snapshot_2","uuid":"vdRctLCxSketdKb54xw67g","repository":"my_repository","version_id":2,"version":2,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"FAILED","start_time":"2020-07-06T21:55:18.130Z","start_time_in_millis":1593093628851,"end_time":"2020-07-06T21:55:18.130Z","end_time_in_millis":1593094752019,"duration_in_millis":1,"failures":[],"shards":{"total":0,"failed":0,"successful":0}}],"next":"c25hcHNob3RfMixteV9yZXBvc2l0b3J5LHNuYXBzaG90XzI=","total":3,"remaining":1}
`))
})),
args: []string{"run", "../main.go", "snapshot", "--all"},
expected: "[CRITICAL] - At least one evaluated snapshot is in state FAILED",
},
{
name: "snapshot-partial-with-number",
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Elastic-Product", "Elasticsearch")
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"snapshots":[{"snapshot":"snapshot_1","uuid":"dKb54xw67gvdRctLCxSket","repository":"my_repository","version_id":1,"version":1,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"SUCCESS","start_time":"2020-07-06T21:55:18.129Z","start_time_in_millis":1593093628850,"end_time":"2020-07-06T21:55:18.129Z","end_time_in_millis":1593094752018,"duration_in_millis":0,"failures":[],"shards":{"total":0,"failed":0,"successful":0}},{"snapshot":"snapshot_2","uuid":"vdRctLCxSketdKb54xw67g","repository":"my_repository","version_id":2,"version":2,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"PARTIAL","start_time":"2020-07-06T21:55:18.130Z","start_time_in_millis":1593093628851,"end_time":"2020-07-06T21:55:18.130Z","end_time_in_millis":1593094752019,"duration_in_millis":1,"failures":[],"shards":{"total":0,"failed":0,"successful":0}}],"next":"c25hcHNob3RfMixteV9yZXBvc2l0b3J5LHNuYXBzaG90XzI=","total":3,"remaining":1}
`))
})),
args: []string{"run", "../main.go", "snapshot", "--number", "4"},
expected: "[WARNING] - At least one evaluated snapshot is in state PARTIAL",
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
defer test.server.Close()

// We need the random Port extracted
u, _ := url.Parse(test.server.URL)
cmd := exec.Command("go", append(test.args, "--port", u.Port())...)
out, _ := cmd.CombinedOutput()

actual := string(out)

if !strings.Contains(actual, test.expected) {
t.Error("\nActual: ", actual, "\nExpected: ", test.expected)
}

})
}
}
36 changes: 36 additions & 0 deletions internal/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,39 @@ func (c *Client) NodeStats() (r *es.ClusterStats, err error) {

return
}

func (c *Client) Snapshot(repository string, snapshot string) (*es.SnapshotResponse, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()

r := &es.SnapshotResponse{}

u, _ := url.JoinPath(c.URL, "/_snapshot/", repository, snapshot)

// Retrieve snapshots in descending order to get latest
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u+"?order=desc", nil)

if err != nil {
return r, fmt.Errorf("error creating request: %w", err)
}

resp, err := c.Client.Do(req)

if err != nil {
return r, fmt.Errorf("could not fetch snapshots: %w", err)
}

if resp.StatusCode != http.StatusOK {
return r, fmt.Errorf("request failed for snapshots: %s", resp.Status)
}

defer resp.Body.Close()

err = json.NewDecoder(resp.Body).Decode(r)

if err != nil {
return r, fmt.Errorf("could not decode snapshot response: %w", err)
}

return r, nil
}
26 changes: 26 additions & 0 deletions internal/elasticsearch/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,29 @@ type ClusterStats struct {
Nodes map[string]NodeInfo `json:"nodes"`
ClusterName string `json:"cluster_name"`
}

type Snapshot struct {
Snapshot string `json:"snapshot"`
UUID string `json:"uuid"`
Repository string `json:"repository"`
Indices []string `json:"indices"`
DataStreams []string `json:"data_streams"`
FeatureStates []string `json:"feature_states"`
IncludeGlobalState bool `json:"include_global_state"`
State string `json:"state"`
StartTimeInMillis int `json:"start_time_in_millis"`
EndTimeInMillis int `json:"end_time_in_millis"`
DurationInMillis int `json:"duration_in_millis"`
Failures []string `json:"failures"`
Shards struct {
Total int `json:"total"`
Failed int `json:"failed"`
Successful int `json:"successful"`
} `json:"shards"`
}

type SnapshotResponse struct {
Snapshots []Snapshot `json:"snapshots"`
Total int `json:"total"`
Remaining int `json:"remaining"`
}

0 comments on commit ea9d866

Please sign in to comment.