Skip to content

Commit

Permalink
new zookeper logs and better gc log default (#253)
Browse files Browse the repository at this point in the history
* Default for gc logs is now "server*.gc*"
* Now collecting ZooKeeper container logs from the default helm chart
* Now collects the previous container logs as well
* remove the bootstrap of sjk as it is unused
  • Loading branch information
rsvihladremio authored Aug 23, 2024
1 parent 555a35d commit 17e78c3
Show file tree
Hide file tree
Showing 8 changed files with 191 additions and 140 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## [3.2.0] - 2024-08-23

### Changed

* Default for gc logs is now "server*.gc*"
* Now collecting ZooKeeper container logs from the default helm chart
* Now collects the previous container logs as well

## [3.1.2] - 2024-06-17

### Added
Expand Down Expand Up @@ -714,6 +722,7 @@ someone has added the PAT which is always available

- able to capture logs, configuration and diagnostic data from Dremio clusters deployed on Kubernetes and on-prem

[3.2.0]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.1.2...v3.2.0
[3.1.2]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.1.1...v3.1.2
[3.1.1]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.1.0...v3.1.1
[3.1.0]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.0.3...v3.1.0
Expand Down
2 changes: 1 addition & 1 deletion cmd/local/conf/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func SetViperDefaults(confData map[string]interface{}, hostName string, defaultC
setDefault(confData, KeyCollectOSConfig, true)
setDefault(confData, KeyCollectDiskUsage, true)

setDefault(confData, KeyDremioGCFilePattern, "gc*.log*")
setDefault(confData, KeyDremioGCFilePattern, "server*.gc*")
setDefault(confData, KeyCollectQueriesJSON, true)
setDefault(confData, KeyCollectServerLogs, true)
setDefault(confData, KeyCollectMetaRefreshLog, true)
Expand Down
6 changes: 3 additions & 3 deletions cmd/local/conf/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func TestSetViperDefaultsWithHealthCheck(t *testing.T) {
{conf.KeyCollectDiskUsage, true},
{conf.KeyDremioLogsNumDays, 7},
{conf.KeyDremioQueriesJSONNumDays, 30},
{conf.KeyDremioGCFilePattern, "gc*.log*"},
{conf.KeyDremioGCFilePattern, "server*.gc*"},
{conf.KeyCollectQueriesJSON, true},
{conf.KeyCollectServerLogs, true},
{conf.KeyCollectMetaRefreshLog, true},
Expand Down Expand Up @@ -121,7 +121,7 @@ func TestSetViperDefaultsQuickCollect(t *testing.T) {
{conf.KeyCollectDiskUsage, true},
{conf.KeyDremioLogsNumDays, 2},
{conf.KeyDremioQueriesJSONNumDays, 2},
{conf.KeyDremioGCFilePattern, "gc*.log*"},
{conf.KeyDremioGCFilePattern, "server*.gc*"},
{conf.KeyCollectQueriesJSON, true},
{conf.KeyCollectServerLogs, true},
{conf.KeyCollectMetaRefreshLog, true},
Expand Down Expand Up @@ -180,7 +180,7 @@ func TestSetViperDefaults(t *testing.T) {
{conf.KeyCollectDiskUsage, true},
{conf.KeyDremioLogsNumDays, 7},
{conf.KeyDremioQueriesJSONNumDays, 30},
{conf.KeyDremioGCFilePattern, "gc*.log*"},
{conf.KeyDremioGCFilePattern, "server*.gc*"},
{conf.KeyCollectQueriesJSON, true},
{conf.KeyCollectServerLogs, true},
{conf.KeyCollectMetaRefreshLog, true},
Expand Down
83 changes: 67 additions & 16 deletions cmd/root/collection/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ func ClusterK8sExecute(hook shutdown.CancelHook, namespace string, cs CopyStrate
return err
}

// zookeeper logs specifically
path, err := cs.CreatePath("kubernetes", "zookeeper-container-logs", "")
if err != nil {
simplelog.Errorf("trying to construct cluster container log path %v with error %v", path, err)
return err
}
if err := saveZookeeperPodLogs(hook, namespace, cs, ddfs, path); err != nil {
simplelog.Errorf("unable to save zookeeper pod logs: %v", err)
}
// everything else
for _, cmd := range cmds {
resource := cmd
out, err := clusterExecuteBytes(hook, namespace, resource)
Expand Down Expand Up @@ -80,31 +90,38 @@ func GetClusterLogs(hook shutdown.CancelHook, namespace string, cs CopyStrategy,
if err != nil {
return err
}
// Loop over dremio pods
// Loop over pods
for _, podname := range pods {
podObj, err := clientSet.CoreV1().Pods(namespace).Get(context.Background(), podname, metav1.GetOptions{})
if err != nil {
simplelog.Errorf("unable to get pod %v: %v", podname, err)
continue
}
var containers []string
for _, c := range podObj.Spec.Containers {
containers = append(containers, c.Name)
}
for _, c := range podObj.Spec.InitContainers {
containers = append(containers, c.Name)
}
// Loop over each container, construct a path and log file name
// write the output of the kubectl logs command to a file
for _, container := range containers {
copyContainerLog(hook, cs, ddfs, container, namespace, path, podname)
}
consoleprint.UpdateK8sFiles(fmt.Sprintf("pod %v logs", podname))
saveLogsFromPod(podObj, hook, cs, ddfs, namespace, path, podname)
}
return err
}

func copyContainerLog(hook shutdown.CancelHook, cs CopyStrategy, ddfs helpers.Filesystem, container, namespace, path, pod string) {
func saveLogsFromPod(podObj *corev1.Pod, hook shutdown.CancelHook, cs CopyStrategy, ddfs helpers.Filesystem, namespace, path, podname string) {
var containers []string
for _, c := range podObj.Spec.Containers {
containers = append(containers, c.Name)
}
for _, c := range podObj.Spec.InitContainers {
containers = append(containers, c.Name)
}
// Loop over each container, construct a path and log file name
// write the output of the kubectl logs command to a file
for _, container := range containers {
// save previous logs if present
copyContainerLog(hook, cs, ddfs, container, namespace, path, podname, true)
// save current logs
copyContainerLog(hook, cs, ddfs, container, namespace, path, podname, false)
}
consoleprint.UpdateK8sFiles(fmt.Sprintf("pod %v logs", podname))
}

func copyContainerLog(hook shutdown.CancelHook, cs CopyStrategy, ddfs helpers.Filesystem, container, namespace, path, pod string, previous bool) {
client, _, err := kubernetes.GetClientset()
if err != nil {
simplelog.Errorf("unable to get k8s client for collecting logs on pod: %v container: %v with error: %v", pod, container, err)
Expand All @@ -115,6 +132,7 @@ func copyContainerLog(hook shutdown.CancelHook, cs CopyStrategy, ddfs helpers.Fi
defer timeout() // releases resources if slowOperation completes before timeout elapses
req := client.CoreV1().Pods(namespace).GetLogs(pod, &corev1.PodLogOptions{
Container: container,
Previous: previous,
})
r, err := req.Stream(ctx)
if err != nil {
Expand All @@ -140,7 +158,12 @@ func copyContainerLog(hook shutdown.CancelHook, cs CopyStrategy, ddfs helpers.Fi
}
}
out := buf.String()
outFile := filepath.Join(path, pod+"-"+container+".txt")
var outFile string
if previous {
outFile = filepath.Join(path, pod+"-"+container+"-previous.txt")
} else {
outFile = filepath.Join(path, pod+"-"+container+".txt")
}
simplelog.Debugf("getting logs for pod: %v container: %v", pod, container)
p, err := cs.CreatePath("kubernetes", "container-logs", "")
if err != nil {
Expand All @@ -154,6 +177,33 @@ func copyContainerLog(hook shutdown.CancelHook, cs CopyStrategy, ddfs helpers.Fi
}
}

func saveZookeeperPodLogs(hook shutdown.CancelHook, namespace string, cs CopyStrategy, ddfs helpers.Filesystem, path string) error {
c, _, err := kubernetes.GetClientset()
if err != nil {
return err
}
options := metav1.ListOptions{
LabelSelector: "app=zk",
}
timeoutDuration := 60 * time.Second
ctx, timeout := context.WithTimeoutCause(hook.GetContext(), timeoutDuration, fmt.Errorf("while getting resource zk pod in namespace %s timeout exceeded %v", namespace, timeoutDuration))
defer timeout()
list, err := c.CoreV1().Pods(namespace).List(ctx, options)
if err != nil {
switch ctx.Err() {
case context.DeadlineExceeded:
return context.Cause(ctx)
default:
return err
}
}
for _, c := range list.Items {
cb := c
saveLogsFromPod(&cb, hook, cs, ddfs, namespace, path, c.Name)
}
return nil
}

// Execute commands at the cluster level
// Calls a raw execute function and simply writes out the byte array read from the response
// that comes in directly from kubectl
Expand Down Expand Up @@ -571,6 +621,7 @@ func clusterExecuteBytes(hook shutdown.CancelHook, namespace, resource string) (
default:
simplelog.Errorf("resource (%v) does not have an implementation", resource)
}

return b, nil

}
2 changes: 1 addition & 1 deletion default-ddc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# collect-disk-usage: true
# dremio-logs-num-days: 7
# dremio-queries-json-num-days: 30
# dremio-gc-file-pattern: "gc*.log*"
# dremio-gc-file-pattern: "server*.gc*"
# collect-queries-json: true
# collect-jvm-flags: true
# collect-server-logs: true
Expand Down
60 changes: 31 additions & 29 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,58 +2,60 @@ module github.com/dremio/dremio-diagnostic-collector/v3

go 1.22.0

require github.com/spf13/cobra v1.7.0 // direct
require github.com/spf13/cobra v1.8.1 // direct

require (
github.com/google/uuid v1.3.0
github.com/google/uuid v1.6.0
github.com/manifoldco/promptui v0.9.0
github.com/rogpeppe/go-internal v1.10.0
github.com/spf13/cast v1.5.1
github.com/rogpeppe/go-internal v1.12.0
github.com/spf13/cast v1.7.0
github.com/spf13/pflag v1.0.5
golang.org/x/sys v0.18.0
golang.org/x/sys v0.24.0
gopkg.in/yaml.v3 v3.0.1
k8s.io/api v0.30.1
k8s.io/apimachinery v0.30.1
k8s.io/client-go v0.30.1
k8s.io/kubectl v0.30.1
k8s.io/api v0.31.0
k8s.io/apimachinery v0.31.0
k8s.io/client-go v0.31.0
k8s.io/kubectl v0.31.0
)

require (
github.com/chzyer/readline v1.5.1 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.22.3 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/emicklei/go-restful/v3 v3.12.1 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/jsonreference v0.21.0 // indirect
github.com/go-openapi/swag v0.23.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/gorilla/websocket v1.5.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/gorilla/websocket v1.5.3 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/moby/spdystream v0.2.0 // indirect
github.com/moby/spdystream v0.5.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
golang.org/x/net v0.23.0 // indirect
golang.org/x/oauth2 v0.10.0 // indirect
golang.org/x/term v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.3.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.33.0 // indirect
github.com/x448/float16 v0.8.4 // indirect
golang.org/x/net v0.28.0 // indirect
golang.org/x/oauth2 v0.22.0 // indirect
golang.org/x/term v0.23.0 // indirect
golang.org/x/text v0.17.0 // indirect
golang.org/x/time v0.6.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
k8s.io/klog/v2 v2.120.1 // indirect
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20240822171749-76de80e0abd9 // indirect
k8s.io/utils v0.0.0-20240821151609-f90d01438635 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
)
Loading

0 comments on commit 17e78c3

Please sign in to comment.