From e96d7659a2b1a4f146cab3f74064678fdd218eb4 Mon Sep 17 00:00:00 2001 From: Ryan Svihla <105286284+rsvihladremio@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:55:41 +0200 Subject: [PATCH] updated gc logging logic (#261) --- CHANGELOG.md | 10 ++ cmd/local/conf/autodetect/awse.go | 22 ---- cmd/local/conf/autodetect/awse_test.go | 36 ------ cmd/local/conf/autodetect/gclog_finder.go | 114 +++++++++++------- .../conf/autodetect/gclog_finder_test.go | 36 ++++-- cmd/local/conf/autodetect/pid.go | 54 +++++---- cmd/local/conf/autodetect/pid_test.go | 65 +++++++--- cmd/local/conf/conf.go | 9 +- cmd/local/logcollect/logcollect.go | 3 + integrationtest/kube/testdata/dremio.yaml | 14 +-- pkg/jps/jps.go | 2 +- 11 files changed, 198 insertions(+), 167 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da1e8687..0df5cd16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,16 @@ # Changelog +## [3.2.5] - 2024-09-13 + +### Added + +* autodetection of gc log name from the logging parameter, this removes the need to set gc log matching pattern +* enhanced logging during file iteration while searching for logs in the gc logging folder + ## [3.2.4] - 2024-09-09 +### Added + * added support for using older kubectl clients since the kubectl cp interface is stable, by checking client version we can safely check if retries are supported and only add them if the are ## [3.2.3] - 2024-09-06 @@ -772,6 +781,7 @@ someone has added the PAT which is always available - able to capture logs, configuration and diagnostic data from Dremio clusters deployed on Kubernetes and on-prem +[3.2.5]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.2.4...v3.2.5 [3.2.4]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.2.3...v3.2.4 [3.2.3]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.2.2...v3.2.3 [3.2.2]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.2.1...v3.2.2 diff --git a/cmd/local/conf/autodetect/awse.go b/cmd/local/conf/autodetect/awse.go index bbbb0514..62119a7a 100644 --- a/cmd/local/conf/autodetect/awse.go +++ b/cmd/local/conf/autodetect/awse.go @@ -16,26 +16,13 @@ package autodetect import ( - "bytes" - "fmt" "os" "path/filepath" "strings" - "github.com/dremio/dremio-diagnostic-collector/v3/cmd/local/ddcio" - "github.com/dremio/dremio-diagnostic-collector/v3/pkg/shutdown" "github.com/dremio/dremio-diagnostic-collector/v3/pkg/simplelog" ) -func IsAWSEFromJPSOutput(jpsText string) (bool, error) { - if strings.Contains(jpsText, "DremioDaemon") && strings.Contains(jpsText, "preview") { - return true, nil - } else if strings.Contains(jpsText, "AwsDremioDaemon") { - return true, nil - } - return false, nil -} - func IsAWSEExecutorUsingDir(efsFolder, nodeName string) (bool, error) { dir, err := os.ReadDir(efsFolder) if err != nil { @@ -56,15 +43,6 @@ func IsAWSEExecutorUsingDir(efsFolder, nodeName string) (bool, error) { return false, nil } -func IsAWSE(hook shutdown.Hook) (bool, error) { - var dremioPIDOutput bytes.Buffer - if err := ddcio.Shell(hook, &dremioPIDOutput, "jps -v"); err != nil { - return false, fmt.Errorf("grepping from Dremio from jps -v failed %v with output %v", err, dremioPIDOutput.String()) - } - dremioPIDString := dremioPIDOutput.String() - return IsAWSEFromJPSOutput(dremioPIDString) -} - func IsAWSEExecutor(nodeName string) (bool, error) { // search EFS folder // Open the directory diff --git a/cmd/local/conf/autodetect/awse_test.go b/cmd/local/conf/autodetect/awse_test.go index 6526d329..29d76671 100644 --- a/cmd/local/conf/autodetect/awse_test.go +++ b/cmd/local/conf/autodetect/awse_test.go @@ -21,42 +21,6 @@ import ( "github.com/dremio/dremio-diagnostic-collector/v3/cmd/local/conf/autodetect" ) -func TestIsAWSEFromText(t *testing.T) { - //should return false when AwsDremioDaemon or DremioDaemon is not found in the text - jpsText := "12345 JavaProcess\n67890 AnotherProcess" - isAWSE, err := autodetect.IsAWSEFromJPSOutput(jpsText) - if err != nil { - t.Errorf("unexpected error %v", err) - } - if isAWSE { - t.Error("expected to not be AWSE but was detected as AWSE") - } - - //should return true when AwsDremioDaemon is found in the text - jpsText = "12345 AwsDremioDaemon\n67890 AnotherProcess" - isAWSE, err = autodetect.IsAWSEFromJPSOutput(jpsText) - if err != nil { - t.Errorf("unexpected error %v", err) - } - if !isAWSE { - t.Error("expected to be AWSE but was detected as not AWSE") - } - - // AWSE can show two DremioDaemon processes but one is the preview engine, this gives us indication of AWSE - //should return true when DremioDaemon and preview is found in the text - jpsText = `27059 Jps -Dapplication.home=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.362.b08-1.amzn2.0.1.x86_64 -Xms8m -31577 DremioDaemon -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/var/log/dremio/preview/server.gc -Ddremio.log.path=/var/log/dremio/preview -Ddremio.plugins.path=/opt/dremio/plugins -Xmx2048m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio/preview -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -Xloggc:/var/log/dremio/server-%t.gc -XX:+UseG1GC -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=2000 -XX:GCLogFileSize=50M -XX:+StartAttachListener -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -28091 DremioDaemon -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/var/log/dremio/server-%t.gc -Ddremio.log.path=/var/log/dremio -Ddremio.plugins.path=/opt/dremio/plugins -Xmx5491m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -Xloggc:/var/log/dremio/server-%t.gc -XX:+UseG1GC -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=2000 -XX:GCLogFileSize=50M -XX:+StartAttachListener -XX:+AlwaysPreTouch -Xms5g -Xmx5g -XX:MaxDirectMemorySize=5g -Xloggc:/opt/dremio/data/gc.log -XX:NumberOfGCLogFiles=20 -XX:GCLogFileSize=100m -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintAdaptiveSizePolicy -XX:+UseGCLogFileRotation -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/opt/dremio/data -XX:ErrorFile=/opt/dremio/data/hs_err_pid%p.log -XX:G1 -` - isAWSE, err = autodetect.IsAWSEFromJPSOutput(jpsText) - if err != nil { - t.Errorf("unexpected error %v", err) - } - if !isAWSE { - t.Error("expected to be AWSE but was detected as not AWSE") - } -} - func TestIsAWSEExecutorUsingDir(t *testing.T) { var ( testDir string diff --git a/cmd/local/conf/autodetect/gclog_finder.go b/cmd/local/conf/autodetect/gclog_finder.go index b95223e0..05cf5662 100644 --- a/cmd/local/conf/autodetect/gclog_finder.go +++ b/cmd/local/conf/autodetect/gclog_finder.go @@ -16,7 +16,6 @@ package autodetect import ( - "bufio" "bytes" "fmt" "path" @@ -24,99 +23,128 @@ import ( "github.com/dremio/dremio-diagnostic-collector/v3/cmd/local/ddcio" "github.com/dremio/dremio-diagnostic-collector/v3/pkg/shutdown" + "github.com/dremio/dremio-diagnostic-collector/v3/pkg/simplelog" ) -// findGCLogLocation retrieves the gc log location with a search string to greedily retrieve everything by prefix -func FindGCLogLocation(hook shutdown.Hook) (gcLogLoc string, err error) { +const jdk8GCLoggingFLag = "-Xloggc:" +const jdk9UnifiedGCLoggingFlag = "-Xlog:" - var jpsVerbose bytes.Buffer - err = ddcio.Shell(hook, &jpsVerbose, "jps -v") +// FindGCLogLocation retrieves the gc log location from ps eww output +func FindGCLogLocation(hook shutdown.Hook, pid int) (gcLogPattern string, gcLogLoc string, err error) { + var psEWW bytes.Buffer + + // remove the header with tail -n 1 + err = ddcio.Shell(hook, &psEWW, fmt.Sprintf("ps eww %v | tail -n 1", pid)) if err != nil { - return "", fmt.Errorf("unable to find gc logs due to error '%v'", err) + return "", "", fmt.Errorf("unable to find gc logs due to error '%v'", err) } - pid, err := GetDremioPID(hook) - if err != nil { - return "", fmt.Errorf("unable to find gc logs due to error '%v'", err) + + data := strings.TrimSpace(psEWW.String()) + lines := len(strings.Split(data, "\n")) + if lines == 0 { + return "", "", fmt.Errorf("empty ps eww %v output cannot find gc logs", pid) + } + if lines > 1 { + return "", "", fmt.Errorf("to many results in the ps eww %v output cannot find gc logs: '%v'", pid, data) } var startupFlags string - scanner := bufio.NewScanner(&jpsVerbose) - for scanner.Scan() { - line := scanner.Text() - tokens := strings.Split(line, " ") - if len(tokens) > 0 { - potentialPid := strings.TrimSpace(tokens[0]) - if potentialPid == fmt.Sprintf("%d", pid) { - startupFlags = strings.Join(tokens[1:], " ") - } - } + tokens := strings.Split(data, " ") + if len(tokens) > 0 { + startupFlags = strings.Join(tokens[1:], " ") } - logLocation, err := ParseGCLogFromFlags(startupFlags) + + if startupFlags == "" { + return "", "", fmt.Errorf("unable to find gc logs because there was no matching pid %v found in the jps -v output: '%v'", pid, psEWW) + } + logRegex, logLocation, err := ParseGCLogFromFlags(startupFlags) if err != nil { - return "", fmt.Errorf("unable to find gc logs due to error '%v'", err) + return "", "", fmt.Errorf("unable to find gc logs due to error '%v'", err) } - if logLocation != "" { - return logLocation, nil + if logLocation == "" { + simplelog.Warningf("autodetection of gc logs location failed as no %v or %v flag was found in the startup flags: '%v'", jdk8GCLoggingFLag, jdk9UnifiedGCLoggingFlag, startupFlags) + return "", "", nil } - return "", nil + simplelog.Infof("detected gc log directory at '%v'", logLocation) + if logRegex == "" { + simplelog.Warningf("autodetection of gc logs location failed we were unable to determine gc log regex: '%v'", startupFlags) + return "", "", nil + } + simplelog.Infof("detected gc log pattern at '%v'", logRegex) + return logRegex, logLocation, nil } // ParseGCLogFromFlags takes a given string with java startup flags and finds the gclog directive -func ParseGCLogFromFlags(startupFlagsStr string) (gcLogLocation string, err error) { - logDir, errorFromPost25 := ParseGCLogFromFlagsPost25(startupFlagsStr) +func ParseGCLogFromFlags(startupFlagsStr string) (logRegex string, gcLogLocation string, err error) { + logRegex, logDir, errorFromPost25 := ParseGCLogFromFlagsPost25(startupFlagsStr) if logDir == "" { - logDir, err := ParseGCLogFromFlagsPre25(startupFlagsStr) + logRegex, logDir, err := ParseGCLogFromFlagsPre25(startupFlagsStr) if err != nil { - return "", fmt.Errorf("uanble to parse gc flags due the following errors: '%v' and '%v'", errorFromPost25, err) + return "", "", fmt.Errorf("uanble to parse gc flags due the following errors: '%v' and '%v'", errorFromPost25, err) } - return logDir, nil + return logRegex, logDir, nil } - return logDir, nil + return logRegex, logDir, nil } // ParseGCLogFromFlags takes a given string with java startup flags and finds the gclog directive -func ParseGCLogFromFlagsPost25(startupFlagsStr string) (gcLogLocation string, err error) { +func ParseGCLogFromFlagsPost25(startupFlagsStr string) (logRegex string, gcLogLocation string, err error) { tokens := strings.Split(startupFlagsStr, " ") var found []int for i, token := range tokens { - if strings.HasPrefix(token, "-Xlog:") { + if strings.HasPrefix(token, jdk9UnifiedGCLoggingFlag) { found = append(found, i) } } if len(found) == 0 { - return "", nil + return "", "", nil } lastIndex := found[len(found)-1] last := tokens[lastIndex] - gcLogLocationTokens := strings.Split(last, "-Xlog:") + gcLogLocationTokens := strings.Split(last, jdk9UnifiedGCLoggingFlag) if len(gcLogLocationTokens) != 2 { - return "", fmt.Errorf("unexpected items in string '%v', expected only 2 items but found %v", last, len(gcLogLocationTokens)) + return "", "", fmt.Errorf("unexpected items in string '%v', expected only 2 items but found %v", last, len(gcLogLocationTokens)) } tokens = strings.Split(gcLogLocationTokens[1], ":") for _, t := range tokens { if strings.HasPrefix(t, "file=") { - return path.Dir(strings.Split(t, "file=")[1]), nil + gcPath := strings.Split(t, "file=")[1] + gcLogDir := path.Dir(gcPath) + gcRegex := fmt.Sprintf("*%v*", path.Base(gcPath)) + // unified logging lets you add the timestamp, just doing a * here + gcRegex = strings.ReplaceAll(gcRegex, "%t", "*") + // unified logging lets you set the pid also just doing * + gcRegex = strings.ReplaceAll(gcRegex, "%p", "*") + return gcRegex, gcLogDir, nil } } - return "", fmt.Errorf("could not find an Xlog parameter with file= in the string %v", startupFlagsStr) + + return "", "", fmt.Errorf("could not find an %v parameter with file= in the string %v", jdk9UnifiedGCLoggingFlag, startupFlagsStr) } // ParseGCLogFromFlags takes a given string with java startup flags and finds the gclog directive -func ParseGCLogFromFlagsPre25(startupFlagsStr string) (gcLogLocation string, err error) { +func ParseGCLogFromFlagsPre25(startupFlagsStr string) (logRegex string, gcLogLocation string, err error) { tokens := strings.Split(startupFlagsStr, " ") var found []int for i, token := range tokens { - if strings.HasPrefix(token, "-Xloggc:") { + if strings.HasPrefix(token, jdk8GCLoggingFLag) { found = append(found, i) } } if len(found) == 0 { - return "", nil + return "", "", nil } lastIndex := found[len(found)-1] last := tokens[lastIndex] - gcLogLocationTokens := strings.Split(last, "-Xloggc:") + gcLogLocationTokens := strings.Split(last, jdk8GCLoggingFLag) if len(gcLogLocationTokens) != 2 { - return "", fmt.Errorf("unexpected items in string '%v', expected only 2 items but found %v", last, len(gcLogLocationTokens)) + return "", "", fmt.Errorf("unexpected items in string '%v', expected only 2 items but found %v", last, len(gcLogLocationTokens)) } - return path.Dir(gcLogLocationTokens[1]), nil + gcPath := gcLogLocationTokens[1] + // get the file arg + gcRegex := fmt.Sprintf("*%v*", path.Base(gcPath)) + // since jdk8 lets you add the timestamp, just doing a * here + gcRegex = strings.ReplaceAll(gcRegex, "%t", "*") + // since jdk8 lets you set the pid also just doing * + gcRegex = strings.ReplaceAll(gcRegex, "%p", "*") + return gcRegex, path.Dir(gcPath), nil } diff --git a/cmd/local/conf/autodetect/gclog_finder_test.go b/cmd/local/conf/autodetect/gclog_finder_test.go index 9ec7d446..922241cb 100644 --- a/cmd/local/conf/autodetect/gclog_finder_test.go +++ b/cmd/local/conf/autodetect/gclog_finder_test.go @@ -22,39 +22,44 @@ import ( func TestParseGCLogFromFlags_WhenJVMFlagsAreGiven(t *testing.T) { //Should parse the GC log location correctly" - processFlags := `1: -jvm_args: -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Ddremio.plugins.path=/opt/dremio/plugins -Xmx4096m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -XX:+UseG1GC -Ddremio.log.path=/opt/dremio/data/log -Xloggc:/opt/dremio/data/log/gc.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=4000k -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/opt/dremio/data -XX:+UseG1GC -XX:G1HeapRegionSize=32M -XX:MaxGCPauseMillis=500 -XX:InitiatingHeapOccupancyPercent=25 -XX:+PrintAdaptiveSizePolicy -XX:+PrintReferenceGC -XX:ErrorFile=/opt/dremio/data/hs_err_pid%p.log -Dzookeeper=zk-hs:2181 -Dservices.coordinator.enabled=false -Dservices.coordinator.master.enabled=false -Dservices.coordinator.master.embedded-zookeeper.enabled=false -Dservices.executor.enabled=true -Dservices.conduit.port=45679 -Dservices.node-tag=default -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -java_command: com.dremio.dac.daemon.DremioDaemon -java_class_path (initial): /opt/dremio/conf:/opt/dremio/jars/dremio-services-coordinator-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-hive-function-registry-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-ce-sabot-serializer-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-hive2-plugin-launcher-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-ee-services-credentials-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-ee-services-accesscontrol-common-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-ce-sabot-scheduler-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-dac-common-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-services-usersessions-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-ee-services-sysflight-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-protocol-20.0.0-202201050826310141-8cc7162b-proto.jar:/opt/dremio/jars/dremio-services-telemetry-impl-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-services-jobtelemetry-client-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-hive3-plugin-launcher-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-ce-services-cachemanager-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-ee-dac-tools-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-services-base-rpc-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-services-datastore-20.0.0-202201050826310141-8cc7162b-proto.jar:/opt/dremio/jars/dremio-sabot-logical-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-services-transientstore-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-services-resourcescheduler-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-dac-daemon-20.0.0-202201050826310141-8cc7162b.jar:/opt/dremio/jars/dremio-ee-services-namespace-20.0.0-202201050826310141-8cc7162b-tests.jar:/opt/dremio/j -Launcher Type: SUN_STANDARD` - gcLogLocation, err := autodetect.ParseGCLogFromFlags(processFlags) + + processFlags := ` 519 ? Ssl 192:04 /usr/lib/jvm/bellsoft-java8-amd64/bin/java -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/opt/dremio/log/server.gc -Ddremio.log.path=/opt/dremio/log -Ddremio.plugins.path=/opt/dremio/plugins -Xmx4096m -XX:MaxDirectMemorySize=8192m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/opt/dremio/log -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -XX:+UseG1GC -Ddremio.log.path=/opt/dremio/log/ -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=4000k -XX:+UseGCLogFileRotation -Xloggc:/opt/dremio/log/gc.log -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -cp /opt/dremio/conf:/opt/dremio/jars/*:/opt/dremio/jars/ext/*:/opt/dremio/jars/3rdparty/*:/usr/lib/jvm/bellsoft-java8-amd64/lib/tools.jar com.dremio.dac.daemon.DremioDaemon` + gcRegex, gcLogLocation, err := autodetect.ParseGCLogFromFlags(processFlags) if err != nil { t.Errorf("expected no error but we have %v", err) } - expected := "/opt/dremio/data/log" + expected := "/opt/dremio/log" if gcLogLocation != expected { t.Errorf("expected %v but was %v", gcLogLocation, expected) } + expected = "*gc.log*" + if gcRegex != expected { + t.Errorf("expected %v but was %v", expected, gcRegex) + } } func TestParseGCLogFromFlagsWithExtraLogFileLine(t *testing.T) { //"Should parse the GC log location correctly", func() { - processFlags := `jvm_args: -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/var/log/dremio/server.gc -Ddremio.log.path=/var/log/dremio -Ddremio.plugins.path=/opt/dremio/plugins -Xmx6144m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -XX:+UseG1GC -Ddremio.log.path=/opt/dremio/data/log -Xloggc:/opt/dremio/data/log/gc.log -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=4000k -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/opt/dremio/data -XX:+UseG1GC -XX:G1HeapRegionSize=32M -XX:MaxGCPauseMillis=500 -XX:InitiatingHeapOccupancyPercent=25 -XX:+PrintAdaptiveSizePolicy -XX:+PrintReferenceGC -XX:ErrorFile=/opt/dremio/data/hs_err_pid%p.log -Dzookeeper=zk-hs:2181 -Dservices.coordinator.enabled=true -Dservices.coordinator.master.enabled=true -Dservices.coordinator.master.embedded-zookeeper.enabled=false -Dservices.executor.enabled=false -Dservices.conduit.port=45679 -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC` - gcLogLocation, err := autodetect.ParseGCLogFromFlags(processFlags) + processFlags := ` 519 ? Ssl 192:04 /usr/lib/jvm/bellsoft-java8-amd64/bin/java -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/opt/dremio/wrong/server.gc -Xloggc:/opt/dremio/log/server.gc -Ddremio.log.path=/opt/dremio/log -Ddremio.plugins.path=/opt/dremio/plugins -Xmx4096m -XX:MaxDirectMemorySize=8192m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/opt/dremio/log -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -XX:+UseG1GC -Ddremio.log.path=/opt/dremio/log/ -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=4000k -XX:+UseGCLogFileRotation -Xloggc:/opt/dremio/log/gc.log -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -cp /opt/dremio/conf:/opt/dremio/jars/*:/opt/dremio/jars/ext/*:/opt/dremio/jars/3rdparty/*:/usr/lib/jvm/bellsoft-java8-amd64/lib/tools.jar com.dremio.dac.daemon.DremioDaemon` + + gcRegex, gcLogLocation, err := autodetect.ParseGCLogFromFlags(processFlags) if err != nil { t.Errorf("expected no error but we have %v", err) } - expected := "/opt/dremio/data/log" + expected := "/opt/dremio/log" if gcLogLocation != expected { t.Errorf("expected %v but was %v", gcLogLocation, expected) } + expected = "*gc.log*" + if gcRegex != expected { + t.Errorf("expected %v but was %v", expected, gcRegex) + } } func TestParseGCLogFromFlagsWithExtraLogFileLineDremio25Plus(t *testing.T) { //"Should parse the GC log location correctly", func() { - processFlags := `jvm_args: -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xlog:gc*,classhisto*=trace:file=/opt/dremio/data/gclog/%%t-gc.log:uptime,time,tags,level:filecount=1,filesize=4M -Ddremio.log.path=/var/log/dremio -Ddremio.plugins.path=/opt/dremio/plugins -Xmx6144m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -XX:+UseG1GC -Ddremio.log.path=/opt/dremio/data/log -Xloggc:/opt/dremio/data/log/gc.log -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=4000k -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/opt/dremio/data -XX:+UseG1GC -XX:G1HeapRegionSize=32M -XX:MaxGCPauseMillis=500 -XX:InitiatingHeapOccupancyPercent=25 -XX:+PrintAdaptiveSizePolicy -XX:+PrintReferenceGC -XX:ErrorFile=/opt/dremio/data/hs_err_pid%p.log -Dzookeeper=zk-hs:2181 -Dservices.coordinator.enabled=true -Dservices.coordinator.master.enabled=true -Dservices.coordinator.master.embedded-zookeeper.enabled=false -Dservices.executor.enabled=false -Dservices.conduit.port=45679 -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -` - gcLogLocation, err := autodetect.ParseGCLogFromFlags(processFlags) + psOut := ` 1 ? Ssl 97:20 /opt/java/openjdk/bin/java -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED -XX:UseAVX=2 -Xlog:gc*::time,uptime,tags,level -Ddremio.plugins.path=/opt/dremio/plugins -Xmx2048m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -XX:+UseG1GC -Ddremio.log.path=/opt/dremio/data/logs -Xlog:gc*,classhisto*=trace:file=/opt/dremio/data/gclog/gc-%t.log:uptime,time,tags,level:filecount=1,filesize=4M -Dzookeeper=zk-hs:2181 -Dservices.coordinator.enabled=true -Dservices.coordinator.master.enabled=true -Dservices.coordinator.master.embedded-zookeeper.enabled=false -Dservices.executor.enabled=false -Dservices.conduit.port=45679 -cp /opt/dremio/conf:/opt/dremio/jars/*:/opt/dremio/jars/ext/*:/opt/dremio/jars/3rdparty/* com.dremio.dac.daemon.DremioDaemon DREMIO_PLUGINS_DIR=/opt/dremio/plugins KUBERNETES_SERVICE_PORT_HTTPS=443 KUBERNETES_SERVICE_PORT=443 DREMIO_LOG_DIR=/var/log/dremio JAVA_MAJOR_VERSION=11 DREMIO_IN_CONTAINER=1 HOSTNAME=dremio-master-0 LANGUAGE=en_US:en JAVA_HOME=/opt/java/openjdk AWS_CREDENTIAL_PROFILES_FILE=/opt/dremio/aws/credentials DREMIO_CLIENT_PORT_32010_TCP_PROTO=tcp MALLOC_ARENA_MAX=4 ZK_CS_PORT_2181_TCP_ADDR=192.168.8.30 DREMIO_GC_LOGS_ENABLED=yes DREMIO_CLASSPATH=/opt/dremio/conf:/opt/dremio/jars/*:/opt/dremio/jars/ext/*:/opt/dremio/jars/3rdparty/* DREMIO_MAX_HEAP_MEMORY_SIZE_MB=2048 DREMIO_CLIENT_PORT_9047_TCP_PORT=9047 PWD=/opt/dremio JAVA_VERSION_STRING=11.0.22 DREMIO_JAVA_SERVER_EXTRA_OPTS=-Ddremio.log.path=/opt/dremio/data/logs -Xlog:gc*,classhisto*=trace:file=/opt/dremio/data/gc-%t.log:uptime,time,tags,level:filecount=1,filesize=4M -Dzookeeper=zk-hs:2181 -Dservices.coordinator.enabled=true -Dservices.coordinator.master.enabled=true -Dservices.coordinator.master.embedded-zookeeper.enabled=false -Dservices.executor.enabled=false -Dservices.conduit.port=45679 DREMIO_MAX_DIRECT_MEMORY_SIZE_MB=2048 ZK_CS_PORT_2181_TCP_PROTO=tcp MALLOC_MMAP_MAX_=65536 DREMIO_CLIENT_PORT_32010_TCP_ADDR=192.168.8.30 DREMIO_CLIENT_PORT_31010_TCP_PROTO=tcp DREMIO_CONF_DIR=/opt/dremio/conf TZ=UTC ZK_CS_PORT=tcp://192.168.8.30:2181 DREMIO_ENV_SCRIPT=dremio-env DREMIO_CLIENT_PORT_31010_TCP_ADDR=192.168.8.30 HOME=/var/lib/dremio/dremio LANG=en_US.UTF-8 KUBERNETES_PORT_443_TCP=tcp://192.168.0.1:443 ZK_CS_PORT_2181_TCP_PORT=2181 DREMIO_CLIENT_PORT_9047_TCP_PROTO=tcp LOG_TO_CONSOLE=0 DREMIO_CLIENT_PORT=tcp://192.168.8.30:31010 DREMIO_CLIENT_SERVICE_HOST=192.168.19.122 DREMIO_HOME=/opt/dremio ZK_CS_SERVICE_PORT_CLIENT=2181 DREMIO_CLIENT_SERVICE_PORT_WEB=9047 ZK_CS_SERVICE_PORT=2181 DREMIO_CLIENT_PORT_31010_TCP=tcp://192.168.8.30:31010 DREMIO_CLIENT_SERVICE_PORT_CLIENT=31010 DREMIO_CLIENT_PORT_9047_TCP=tcp://192.168.8.30:9047 DREMIO_PID_DIR=/var/run/dremio DREMIO_CLIENT_SERVICE_PORT=31010 MALLOC_TRIM_THRESHOLD_=131072 DREMIO_GC_OPTS=-XX:+UseG1GC SHLVL=0 DREMIO_CLIENT_PORT_31010_TCP_PORT=31010 DREMIO_GC_LOG_TO_CONSOLE=yes KUBERNETES_PORT_443_TCP_PROTO=tcp is_cygwin=false MALLOC_MMAP_THRESHOLD_=131072 KUBERNETES_PORT_443_TCP_ADDR=192.168.0.1 KUBERNETES_SERVICE_HOST=192.168.0.1 LC_ALL=en_US.UTF-8 AWS_SHARED_CREDENTIALS_FILE=/opt/dremio/aws/credentials KUBERNETES_PORT=tcp://192.168.8.30:443 DREMIO_CLIENT_PORT_9047_TCP_ADDR=192.168.8.30 KUBERNETES_PORT_443_TCP_PORT=443 PATH=/opt/java/openjdk/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin MALLOC_TOP_PAD_=131072 DREMIO_JAVA_OPTS=-Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED -XX:UseAVX=2 -Xlog:gc*::time,uptime,tags,level -Ddremio.plugins.path=/opt/dremio/plugins -Xmx2048m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -XX:+UseG1GC -Ddremio.log.path=/opt/dremio/data/logs -Xlog:gc*,classhisto*=trace:file=/opt/dremio/data/gclog/gc-%t.log:uptime,time,tags,level:filecount=1,filesize=4M -Dzookeeper=zk-hs:2181 -Dservices.coordinator.enabled=true -Dservices.coordinator.master.enabled=true -Dservices.coordinator.master.embedded-zookeeper.enabled=false -Dservices.executor.enabled=false -Dservices.conduit.port=45679 DREMIO_CLIENT_PORT_32010_TCP=tcp://192.168.8.30:32010 ZK_CS_SERVICE_HOST=192.168.8.30 DREMIO_CLIENT_SERVICE_PORT_FLIGHT=32010 DREMIO_LOG_TO_CONSOLE=1 DREMIO_CLIENT_PORT_32010_TCP_PORT=32010 JAVA_VERSION=jdk-11.0.22+7 ZK_CS_PORT_2181_TCP=tcp://192.168.8.30:2181` + gcRegex, gcLogLocation, err := autodetect.ParseGCLogFromFlags(psOut) if err != nil { t.Errorf("expected no error but we have %v", err) } @@ -62,4 +67,9 @@ func TestParseGCLogFromFlagsWithExtraLogFileLineDremio25Plus(t *testing.T) { if gcLogLocation != expected { t.Errorf("expected %v but was %v", expected, gcLogLocation) } + + expected = "*gc-*.log*" + if gcRegex != expected { + t.Errorf("expected %v but was %v", expected, gcRegex) + } } diff --git a/cmd/local/conf/autodetect/pid.go b/cmd/local/conf/autodetect/pid.go index 11d62a73..0b3f1c35 100644 --- a/cmd/local/conf/autodetect/pid.go +++ b/cmd/local/conf/autodetect/pid.go @@ -16,7 +16,6 @@ package autodetect import ( - "bufio" "bytes" "fmt" "strconv" @@ -27,33 +26,40 @@ import ( "github.com/dremio/dremio-diagnostic-collector/v3/pkg/simplelog" ) -func GetDremioPIDFromText(jpsOutput string) (int, error) { - var procName string - var previewName string - procName = "DremioDaemon" - previewName = "preview" - var lines []string - scanner := bufio.NewScanner(strings.NewReader(jpsOutput)) - for scanner.Scan() { - line := scanner.Text() - lines = append(lines, line) - simplelog.Debugf("jps line: %v", line) - if strings.Contains(line, procName) && !strings.Contains(line, previewName) { - tokens := strings.Split(line, " ") - if len(tokens) == 0 { - return -1, fmt.Errorf("no pid for dremio found in text '%v'", line) - } - pidText := tokens[0] - return strconv.Atoi(pidText) +// GetDremioPIDFromText takes the ouput from +// "ps aux | grep DremioDaemon | grep -v grep | grep -v /etc/dremio/preview" +// and retrieves the pid +func GetDremioPIDFromText(psOutput string) (int, error) { + // should always trim trailing spaces + cleanedOutput := strings.TrimSpace(psOutput) + linesCount := len((strings.Split(cleanedOutput, "\n"))) + if linesCount > 1 { + return -1, fmt.Errorf("to many lines in the ps outout, should only be one line '%v'", cleanedOutput) + } + if linesCount == 0 { + return -1, fmt.Errorf("no lines in the ps output, should be one line '%v'", cleanedOutput) + } + + tokens := strings.Split(cleanedOutput, " ") + var cleaned []string + for _, t := range tokens { + if t == "" { + continue } + cleaned = append(cleaned, t) + } + if len(cleaned) < 2 { + return -1, fmt.Errorf("no pid for dremio found in text '%v'", cleanedOutput) } - return -1, fmt.Errorf("found no matching process named %v in text %v therefore cannot get the pid", procName, strings.Join(lines, ", ")) + pidText := cleaned[1] + return strconv.Atoi(pidText) } +// GetDremioPID calls ps aux and finds the DremioDaemon (filtering out the preview engine) func GetDremioPID(hook shutdown.Hook) (int, error) { - var jpsOutput bytes.Buffer - if err := ddcio.Shell(hook, &jpsOutput, "jps -v"); err != nil { - simplelog.Warningf("attempting to get full jps output failed: %v", err) + var psOutput bytes.Buffer + if err := ddcio.Shell(hook, &psOutput, "ps aux | grep DremioDaemon | grep -v grep | grep -v /etc/dremio/preview"); err != nil { + simplelog.Warningf("attempting to get full ps aux output failed: %v", err) } - return GetDremioPIDFromText(jpsOutput.String()) + return GetDremioPIDFromText(psOutput.String()) } diff --git a/cmd/local/conf/autodetect/pid_test.go b/cmd/local/conf/autodetect/pid_test.go index 431a4bec..e4e0d71d 100644 --- a/cmd/local/conf/autodetect/pid_test.go +++ b/cmd/local/conf/autodetect/pid_test.go @@ -20,31 +20,60 @@ import ( "github.com/dremio/dremio-diagnostic-collector/v3/cmd/local/conf/autodetect" ) +func TestGetDremioPIDFromTextHasNoText(t *testing.T) { + psOutput := "" + pid, err := autodetect.GetDremioPIDFromText(psOutput) + if err == nil || err.Error() != "no pid for dremio found in text ''" { + t.Errorf("Unexpected error: %v", err) + } + if pid != -1 { + t.Errorf("Unexpected value for pid. Got %v, expected -1", pid) + } +} + func TestGetDremioPIDFromText(t *testing.T) { - jpsOutput1 := "12345 JavaProcess\n67890 AnotherProcess" - pid1, err1 := autodetect.GetDremioPIDFromText(jpsOutput1) - if err1 == nil || err1.Error() != "found no matching process named DremioDaemon in text 12345 JavaProcess, 67890 AnotherProcess therefore cannot get the pid" { - t.Errorf("Unexpected error: %v", err1) + psOutput := `dremio 3139 6.5 20.7 8311440 3340972 ? Ssl 08:04 2:21 /usr/lib/jvm/java-1.8.0-openjdk/bin/java -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/var/log/dremio/server-%t.gc -Ddremio.log.path=/var/log/dremio -Ddremio.plugins.path=/opt/dremio/plugins -Xmx5491m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -Xloggc:/var/log/dremio/server-%t.gc -XX:+UseG1GC -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=2000 -XX:GCLogFileSize=50M -XX:+StartAttachListener -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -cp /opt/dremio/conf:/opt/dremio/jars/*:/opt/dremio/jars/ext/*:/opt/dremio/jars/3rdparty/*:/var/dremio_efs/thirdparty/*:/usr/lib/jvm/java-1.8.0-openjdk/lib/tools.jar com.dremio.dac.daemon.AwsDremioDaemon` + pid, err := autodetect.GetDremioPIDFromText(psOutput) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if pid != 3139 { + t.Errorf("Unexpected value for pid. Got %v, expected 3139", pid) } - if pid1 != -1 { - t.Errorf("Unexpected value for pid. Got %v, expected -1", pid1) +} + +func TestGetDremioPIDFromTextWithTrailingSpace(t *testing.T) { + psOutput := `dremio 3139 6.5 20.7 8311440 3340972 ? Ssl 08:04 2:21 /usr/lib/jvm/java-1.8.0-openjdk/bin/java -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/var/log/dremio/server-%t.gc -Ddremio.log.path=/var/log/dremio -Ddremio.plugins.path=/opt/dremio/plugins -Xmx5491m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -Xloggc:/var/log/dremio/server-%t.gc -XX:+UseG1GC -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=2000 -XX:GCLogFileSize=50M -XX:+StartAttachListener -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -cp /opt/dremio/conf:/opt/dremio/jars/*:/opt/dremio/jars/ext/*:/opt/dremio/jars/3rdparty/*:/var/dremio_efs/thirdparty/*:/usr/lib/jvm/java-1.8.0-openjdk/lib/tools.jar com.dremio.dac.daemon.AwsDremioDaemon +` + pid, err := autodetect.GetDremioPIDFromText(psOutput) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if pid != 3139 { + t.Errorf("Unexpected value for pid. Got %v, expected 3139", pid) } +} - jpsOutput2 := "12345 DremioDaemon\n67890 AnotherProcess" - pid2, err2 := autodetect.GetDremioPIDFromText(jpsOutput2) - if err2 != nil { - t.Errorf("Unexpected error: %v", err2) +func TestGetDremioPIDFromTextMatchesTwoRecords(t *testing.T) { + psOutput := `dremio 3139 6.5 20.7 8311440 3340972 ? Ssl 08:04 2:21 /usr/lib/jvm/java-1.8.0-openjdk/bin/java -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/var/log/dremio/server-%t.gc -Ddremio.log.path=/var/log/dremio -Ddremio.plugins.path=/opt/dremio/plugins -Xmx5491m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -Xloggc:/var/log/dremio/server-%t.gc -XX:+UseG1GC -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=2000 -XX:GCLogFileSize=50M -XX:+StartAttachListener -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -cp /opt/dremio/conf:/opt/dremio/jars/*:/opt/dremio/jars/ext/*:/opt/dremio/jars/3rdparty/*:/var/dremio_efs/thirdparty/*:/usr/lib/jvm/java-1.8.0-openjdk/lib/tools.jar com.dremio.dac.daemon.AwsDremioDaemon +dremio 3139 6.5 20.7 8311440 3340972 ? Ssl 08:04 2:21 /usr/lib/jvm/java-1.8.0-openjdk/bin/java -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/var/log/dremio/server-%t.gc -Ddremio.log.path=/var/log/dremio -Ddremio.plugins.path=/opt/dremio/plugins -Xmx5491m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -Xloggc:/var/log/dremio/server-%t.gc -XX:+UseG1GC -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=2000 -XX:GCLogFileSize=50M -XX:+StartAttachListener -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC -cp /opt/dremio/conf:/opt/dremio/jars/*:/opt/dremio/jars/ext/*:/opt/dremio/jars/3rdparty/*:/var/dremio_efs/thirdparty/*:/usr/lib/jvm/java-1.8.0-openjdk/lib/tools.jar com.dremio.dac.daemon.AwsDremioDaemon +` + pid, err := autodetect.GetDremioPIDFromText(psOutput) + if err == nil { + t.Error("expected error") } - if pid2 != 12345 { - t.Errorf("Unexpected value for pid. Got %v, expected 12345", pid2) + if pid != -1 { + t.Errorf("Unexpected value for pid. Got %v, expected -1", pid) } +} - jpsOutput3 := "1 DremioDaemon -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Ddremio.plugins.path=/opt/dremio/plugins -Xmx2048m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -XX:+UseG1GC -Ddremio.log.path=/opt/dremio/data/logs -Xloggc:/opt/dremio/data/logs/gc.log -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution -XX:+PrintGCCause -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=5M -Dzookeeper=zk-hs:2181 -Dservices.coordinator.enabled=true -Dservices.coordinator.master.enabled=true -Dservices.coordinator.master.embedded-zookeeper.enabled=false -Dservices.executor.enabled=false -Dservices.connduit.port=45679 -Ddremio.admin-only-mode=false -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC\214 Jps -Dapplication.home=/opt/java/openjdk -Xms8m" - pid3, err3 := autodetect.GetDremioPIDFromText(jpsOutput3) - if err3 != nil { - t.Errorf("Unexpected error: %v", err3) +func TestGetK8sPID(t *testing.T) { + psOutput := `dremio 1 0.3 2.1 5169980 2891424 ? Ssl Aug26 96:42 /opt/java/openjdk/bin/java -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED -XX:UseAVX=2 -Xlog:gc*::time,uptime,tags,level -Ddremio.plugins.path=/opt/dremio/plugins -Xmx2048m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -XX:+UseG1GC -Ddremio.log.path=/opt/dremio/data/logs -Xlog:gc*,classhisto*=trace:file=/opt/dremio/data/gc-%t.log:uptime,time,tags,level:filecount=1,filesize=4M -Dzookeeper=zk-hs:2181 -Dservices.coordinator.enabled=true -Dservices.coordinator.master.enabled=true -Dservices.coordinator.master.embedded-zookeeper.enabled=false -Dservices.executor.enabled=false -Dservices.conduit.port=45679 -cp /opt/dremio/conf:/opt/dremio/jars/*:/opt/dremio/jars/ext/*:/opt/dremio/jars/3rdparty/* com.dremio.dac.daemon.DremioDaemon` + pid, err := autodetect.GetDremioPIDFromText(psOutput) + if err != nil { + t.Errorf("unexpected error: %v", err) } - if pid3 != 1 { - t.Errorf("Unexpected value for pid. Got %v, expected 1", pid3) + if pid != 1 { + t.Errorf("Unexpected value for pid. Got %v, expected 1", pid) } } diff --git a/cmd/local/conf/conf.go b/cmd/local/conf/conf.go index c31e59da..9731315d 100644 --- a/cmd/local/conf/conf.go +++ b/cmd/local/conf/conf.go @@ -304,7 +304,6 @@ func ReadConf(hook shutdown.Hook, overrides map[string]string, ddcYamlLoc, colle c.dremioLogsNumDays = GetInt(confData, KeyDremioLogsNumDays) c.dremioQueriesJSONNumDays = GetInt(confData, KeyDremioQueriesJSONNumDays) - c.dremioGCFilePattern = GetString(confData, KeyDremioGCFilePattern) c.collectQueriesJSON = GetBool(confData, KeyCollectQueriesJSON) c.collectServerLogs = GetBool(confData, KeyCollectServerLogs) c.collectMetaRefreshLogs = GetBool(confData, KeyCollectMetaRefreshLog) @@ -349,17 +348,20 @@ func ReadConf(hook shutdown.Hook, overrides map[string]string, ddcYamlLoc, colle } dremioPIDIsValid := c.dremioPID > 0 if dremioPIDIsValid { - logDir, err := autodetect.FindGCLogLocation(hook) + gcLogPattern, logDir, err := autodetect.FindGCLogLocation(hook, c.dremioPID) if err != nil { msg := fmt.Sprintf("GC LOG DETECTION DISABLED: will rely on ddc.yaml configuration as ddc is unable to retrieve configuration from pid %v: %v", c.dremioPID, err) consoleprint.ErrorPrint(msg) simplelog.Errorf(msg) c.gcLogsDir = GetString(confData, KeyDremioGCLogsDir) + c.dremioGCFilePattern = GetString(confData, KeyDremioGCFilePattern) } else { c.gcLogsDir = logDir + c.dremioGCFilePattern = gcLogPattern } } else { c.gcLogsDir = GetString(confData, KeyDremioGCLogsDir) + c.dremioGCFilePattern = GetString(confData, KeyDremioGCFilePattern) } // captures that wont work if the dremioPID is invalid c.captureHeapDump = GetBool(confData, KeyCaptureHeapDump) && dremioPIDIsValid @@ -617,7 +619,8 @@ func GetConfiguredDremioValuesFromPID(hook shutdown.CancelHook, dremioPID int) ( func ReadPSEnv(hook shutdown.CancelHook, dremioPID int) (string, error) { var w bytes.Buffer - err := ddcio.Shell(hook, &w, fmt.Sprintf("ps eww %v | grep dremio | awk '{$1=$2=$3=$4=\"\"; print $0}'", dremioPID)) + // grep -v /etc/dremio/preview filters out the AWSE discount preview engine + err := ddcio.Shell(hook, &w, fmt.Sprintf("ps eww %v | grep dremio | grep -v /etc/dremio/preview | awk '{$1=$2=$3=$4=\"\"; print $0}'", dremioPID)) if err != nil { return "", err } diff --git a/cmd/local/logcollect/logcollect.go b/cmd/local/logcollect/logcollect.go index 3e54128a..0cfda26b 100644 --- a/cmd/local/logcollect/logcollect.go +++ b/cmd/local/logcollect/logcollect.go @@ -88,6 +88,7 @@ func (l *Collector) RunCollectGcLogs() error { if file.IsDir() { continue } + simplelog.Debugf("found file %v in gc log folder: '%v'", file.Name(), l.gcLogsDir) matched, err := filepath.Match(l.dremioGCFilePattern, file.Name()) if err != nil { errs = append(errs, fmt.Errorf("error matching file pattern %v with error '%v'", l.dremioGCFilePattern, err)) @@ -109,6 +110,8 @@ func (l *Collector) RunCollectGcLogs() error { continue } simplelog.Debugf("Copied file %s to %s", srcPath, destPath) + } else { + simplelog.Debugf("skipping file %v in gc log folder: '%v' did not match gc pattern: '%v'", file.Name(), l.gcLogsDir, l.dremioGCFilePattern) } } if len(errs) > 1 { diff --git a/integrationtest/kube/testdata/dremio.yaml b/integrationtest/kube/testdata/dremio.yaml index 43cc35fa..13057b73 100644 --- a/integrationtest/kube/testdata/dremio.yaml +++ b/integrationtest/kube/testdata/dremio.yaml @@ -644,7 +644,7 @@ spec: containers: - name: dremio-coordinator - image: dremio/dremio-oss:25.0 + image: dremio/dremio-oss:25.1 imagePullPolicy: IfNotPresent resources: requests: @@ -741,7 +741,7 @@ spec: containers: - name: dremio-executor - image: dremio/dremio-oss:25.0 + image: dremio/dremio-oss:25.1 imagePullPolicy: IfNotPresent resources: requests: @@ -792,7 +792,7 @@ spec: initContainers: - name: chown-data-directory - image: dremio/dremio-oss:25.0 + image: dremio/dremio-oss:25.1 imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 @@ -802,7 +802,7 @@ spec: command: ["chown"] args: ["dremio:dremio", "/opt/dremio/data"] - name: chown-cloudcache-directory - image: dremio/dremio-oss:25.0 + image: dremio/dremio-oss:25.1 imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 @@ -880,7 +880,7 @@ spec: containers: - name: dremio-master-coordinator - image: dremio/dremio-oss:25.0 + image: dremio/dremio-oss:25.1 imagePullPolicy: IfNotPresent resources: requests: @@ -946,7 +946,7 @@ spec: image: busybox command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo Waiting for Zookeeper to be ready.; sleep 2; done;"] - name: chown-data-directory - image: dremio/dremio-oss:25.0 + image: dremio/dremio-oss:25.1 imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 @@ -958,7 +958,7 @@ spec: - "dremio:dremio" - "/opt/dremio/data" - name: upgrade-task - image: dremio/dremio-oss:25.0 + image: dremio/dremio-oss:25.1 imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-master-volume diff --git a/pkg/jps/jps.go b/pkg/jps/jps.go index 45f5a311..35bee073 100644 --- a/pkg/jps/jps.go +++ b/pkg/jps/jps.go @@ -31,7 +31,7 @@ func CaptureFlagsFromPID(hook shutdown.CancelHook, pid int) (string, error) { return "", fmt.Errorf("failed getting flags: '%w', output was: '%v'", err, buf.String()) } scanner := bufio.NewScanner(&buf) - //adjust the max line size capacity as the jpv output can be large + //adjust the max line size capacity as the jps -v output can be large const maxCapacity = 512 * 1024 lineBuffer := make([]byte, maxCapacity) scanner.Buffer(lineBuffer, maxCapacity)