Skip to content

Commit

Permalink
updated gc logging logic (#261)
Browse files Browse the repository at this point in the history
  • Loading branch information
rsvihladremio authored Sep 13, 2024
1 parent 7098722 commit e96d765
Show file tree
Hide file tree
Showing 11 changed files with 198 additions and 167 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
# Changelog

## [3.2.5] - 2024-09-13

### Added

* autodetection of gc log name from the logging parameter, this removes the need to set gc log matching pattern
* enhanced logging during file iteration while searching for logs in the gc logging folder

## [3.2.4] - 2024-09-09

### Added

* added support for using older kubectl clients since the kubectl cp interface is stable, by checking client version we can safely check if retries are supported and only add them if the are

## [3.2.3] - 2024-09-06
Expand Down Expand Up @@ -772,6 +781,7 @@ someone has added the PAT which is always available

- able to capture logs, configuration and diagnostic data from Dremio clusters deployed on Kubernetes and on-prem

[3.2.5]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.2.4...v3.2.5
[3.2.4]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.2.3...v3.2.4
[3.2.3]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.2.2...v3.2.3
[3.2.2]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.2.1...v3.2.2
Expand Down
22 changes: 0 additions & 22 deletions cmd/local/conf/autodetect/awse.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,13 @@
package autodetect

import (
"bytes"
"fmt"
"os"
"path/filepath"
"strings"

"github.com/dremio/dremio-diagnostic-collector/v3/cmd/local/ddcio"
"github.com/dremio/dremio-diagnostic-collector/v3/pkg/shutdown"
"github.com/dremio/dremio-diagnostic-collector/v3/pkg/simplelog"
)

func IsAWSEFromJPSOutput(jpsText string) (bool, error) {
if strings.Contains(jpsText, "DremioDaemon") && strings.Contains(jpsText, "preview") {
return true, nil
} else if strings.Contains(jpsText, "AwsDremioDaemon") {
return true, nil
}
return false, nil
}

func IsAWSEExecutorUsingDir(efsFolder, nodeName string) (bool, error) {
dir, err := os.ReadDir(efsFolder)
if err != nil {
Expand All @@ -56,15 +43,6 @@ func IsAWSEExecutorUsingDir(efsFolder, nodeName string) (bool, error) {
return false, nil
}

func IsAWSE(hook shutdown.Hook) (bool, error) {
var dremioPIDOutput bytes.Buffer
if err := ddcio.Shell(hook, &dremioPIDOutput, "jps -v"); err != nil {
return false, fmt.Errorf("grepping from Dremio from jps -v failed %v with output %v", err, dremioPIDOutput.String())
}
dremioPIDString := dremioPIDOutput.String()
return IsAWSEFromJPSOutput(dremioPIDString)
}

func IsAWSEExecutor(nodeName string) (bool, error) {
// search EFS folder
// Open the directory
Expand Down
36 changes: 0 additions & 36 deletions cmd/local/conf/autodetect/awse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,42 +21,6 @@ import (
"github.com/dremio/dremio-diagnostic-collector/v3/cmd/local/conf/autodetect"
)

func TestIsAWSEFromText(t *testing.T) {
//should return false when AwsDremioDaemon or DremioDaemon is not found in the text
jpsText := "12345 JavaProcess\n67890 AnotherProcess"
isAWSE, err := autodetect.IsAWSEFromJPSOutput(jpsText)
if err != nil {
t.Errorf("unexpected error %v", err)
}
if isAWSE {
t.Error("expected to not be AWSE but was detected as AWSE")
}

//should return true when AwsDremioDaemon is found in the text
jpsText = "12345 AwsDremioDaemon\n67890 AnotherProcess"
isAWSE, err = autodetect.IsAWSEFromJPSOutput(jpsText)
if err != nil {
t.Errorf("unexpected error %v", err)
}
if !isAWSE {
t.Error("expected to be AWSE but was detected as not AWSE")
}

// AWSE can show two DremioDaemon processes but one is the preview engine, this gives us indication of AWSE
//should return true when DremioDaemon and preview is found in the text
jpsText = `27059 Jps -Dapplication.home=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.362.b08-1.amzn2.0.1.x86_64 -Xms8m
31577 DremioDaemon -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/var/log/dremio/preview/server.gc -Ddremio.log.path=/var/log/dremio/preview -Ddremio.plugins.path=/opt/dremio/plugins -Xmx2048m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio/preview -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -Xloggc:/var/log/dremio/server-%t.gc -XX:+UseG1GC -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=2000 -XX:GCLogFileSize=50M -XX:+StartAttachListener -XX:+PrintClassHistogramBeforeFullGC -XX:+PrintClassHistogramAfterFullGC
28091 DremioDaemon -Djava.util.logging.config.class=org.slf4j.bridge.SLF4JBridgeHandler -Djava.library.path=/opt/dremio/lib -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/var/log/dremio/server-%t.gc -Ddremio.log.path=/var/log/dremio -Ddremio.plugins.path=/opt/dremio/plugins -Xmx5491m -XX:MaxDirectMemorySize=2048m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/dremio -Dio.netty.maxDirectMemory=0 -Dio.netty.tryReflectionSetAccessible=true -DMAPR_IMPALA_RA_THROTTLE -DMAPR_MAX_RA_STREAMS=400 -Xloggc:/var/log/dremio/server-%t.gc -XX:+UseG1GC -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=2000 -XX:GCLogFileSize=50M -XX:+StartAttachListener -XX:+AlwaysPreTouch -Xms5g -Xmx5g -XX:MaxDirectMemorySize=5g -Xloggc:/opt/dremio/data/gc.log -XX:NumberOfGCLogFiles=20 -XX:GCLogFileSize=100m -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintAdaptiveSizePolicy -XX:+UseGCLogFileRotation -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/opt/dremio/data -XX:ErrorFile=/opt/dremio/data/hs_err_pid%p.log -XX:G1
`
isAWSE, err = autodetect.IsAWSEFromJPSOutput(jpsText)
if err != nil {
t.Errorf("unexpected error %v", err)
}
if !isAWSE {
t.Error("expected to be AWSE but was detected as not AWSE")
}
}

func TestIsAWSEExecutorUsingDir(t *testing.T) {
var (
testDir string
Expand Down
114 changes: 71 additions & 43 deletions cmd/local/conf/autodetect/gclog_finder.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,107 +16,135 @@
package autodetect

import (
"bufio"
"bytes"
"fmt"
"path"
"strings"

"github.com/dremio/dremio-diagnostic-collector/v3/cmd/local/ddcio"
"github.com/dremio/dremio-diagnostic-collector/v3/pkg/shutdown"
"github.com/dremio/dremio-diagnostic-collector/v3/pkg/simplelog"
)

// findGCLogLocation retrieves the gc log location with a search string to greedily retrieve everything by prefix
func FindGCLogLocation(hook shutdown.Hook) (gcLogLoc string, err error) {
const jdk8GCLoggingFLag = "-Xloggc:"
const jdk9UnifiedGCLoggingFlag = "-Xlog:"

var jpsVerbose bytes.Buffer
err = ddcio.Shell(hook, &jpsVerbose, "jps -v")
// FindGCLogLocation retrieves the gc log location from ps eww <pid> output
func FindGCLogLocation(hook shutdown.Hook, pid int) (gcLogPattern string, gcLogLoc string, err error) {
var psEWW bytes.Buffer

// remove the header with tail -n 1
err = ddcio.Shell(hook, &psEWW, fmt.Sprintf("ps eww %v | tail -n 1", pid))
if err != nil {
return "", fmt.Errorf("unable to find gc logs due to error '%v'", err)
return "", "", fmt.Errorf("unable to find gc logs due to error '%v'", err)
}
pid, err := GetDremioPID(hook)
if err != nil {
return "", fmt.Errorf("unable to find gc logs due to error '%v'", err)

data := strings.TrimSpace(psEWW.String())
lines := len(strings.Split(data, "\n"))
if lines == 0 {
return "", "", fmt.Errorf("empty ps eww %v output cannot find gc logs", pid)
}
if lines > 1 {
return "", "", fmt.Errorf("to many results in the ps eww %v output cannot find gc logs: '%v'", pid, data)
}
var startupFlags string
scanner := bufio.NewScanner(&jpsVerbose)
for scanner.Scan() {
line := scanner.Text()
tokens := strings.Split(line, " ")
if len(tokens) > 0 {
potentialPid := strings.TrimSpace(tokens[0])
if potentialPid == fmt.Sprintf("%d", pid) {
startupFlags = strings.Join(tokens[1:], " ")
}
}
tokens := strings.Split(data, " ")
if len(tokens) > 0 {
startupFlags = strings.Join(tokens[1:], " ")
}
logLocation, err := ParseGCLogFromFlags(startupFlags)

if startupFlags == "" {
return "", "", fmt.Errorf("unable to find gc logs because there was no matching pid %v found in the jps -v output: '%v'", pid, psEWW)
}
logRegex, logLocation, err := ParseGCLogFromFlags(startupFlags)
if err != nil {
return "", fmt.Errorf("unable to find gc logs due to error '%v'", err)
return "", "", fmt.Errorf("unable to find gc logs due to error '%v'", err)
}
if logLocation != "" {
return logLocation, nil
if logLocation == "" {
simplelog.Warningf("autodetection of gc logs location failed as no %v or %v flag was found in the startup flags: '%v'", jdk8GCLoggingFLag, jdk9UnifiedGCLoggingFlag, startupFlags)
return "", "", nil
}
return "", nil
simplelog.Infof("detected gc log directory at '%v'", logLocation)
if logRegex == "" {
simplelog.Warningf("autodetection of gc logs location failed we were unable to determine gc log regex: '%v'", startupFlags)
return "", "", nil
}
simplelog.Infof("detected gc log pattern at '%v'", logRegex)
return logRegex, logLocation, nil
}

// ParseGCLogFromFlags takes a given string with java startup flags and finds the gclog directive
func ParseGCLogFromFlags(startupFlagsStr string) (gcLogLocation string, err error) {
logDir, errorFromPost25 := ParseGCLogFromFlagsPost25(startupFlagsStr)
func ParseGCLogFromFlags(startupFlagsStr string) (logRegex string, gcLogLocation string, err error) {
logRegex, logDir, errorFromPost25 := ParseGCLogFromFlagsPost25(startupFlagsStr)
if logDir == "" {
logDir, err := ParseGCLogFromFlagsPre25(startupFlagsStr)
logRegex, logDir, err := ParseGCLogFromFlagsPre25(startupFlagsStr)
if err != nil {
return "", fmt.Errorf("uanble to parse gc flags due the following errors: '%v' and '%v'", errorFromPost25, err)
return "", "", fmt.Errorf("uanble to parse gc flags due the following errors: '%v' and '%v'", errorFromPost25, err)
}
return logDir, nil
return logRegex, logDir, nil
}
return logDir, nil
return logRegex, logDir, nil
}

// ParseGCLogFromFlags takes a given string with java startup flags and finds the gclog directive
func ParseGCLogFromFlagsPost25(startupFlagsStr string) (gcLogLocation string, err error) {
func ParseGCLogFromFlagsPost25(startupFlagsStr string) (logRegex string, gcLogLocation string, err error) {
tokens := strings.Split(startupFlagsStr, " ")
var found []int
for i, token := range tokens {
if strings.HasPrefix(token, "-Xlog:") {
if strings.HasPrefix(token, jdk9UnifiedGCLoggingFlag) {
found = append(found, i)
}
}
if len(found) == 0 {
return "", nil
return "", "", nil
}
lastIndex := found[len(found)-1]
last := tokens[lastIndex]
gcLogLocationTokens := strings.Split(last, "-Xlog:")
gcLogLocationTokens := strings.Split(last, jdk9UnifiedGCLoggingFlag)
if len(gcLogLocationTokens) != 2 {
return "", fmt.Errorf("unexpected items in string '%v', expected only 2 items but found %v", last, len(gcLogLocationTokens))
return "", "", fmt.Errorf("unexpected items in string '%v', expected only 2 items but found %v", last, len(gcLogLocationTokens))
}
tokens = strings.Split(gcLogLocationTokens[1], ":")
for _, t := range tokens {
if strings.HasPrefix(t, "file=") {
return path.Dir(strings.Split(t, "file=")[1]), nil
gcPath := strings.Split(t, "file=")[1]
gcLogDir := path.Dir(gcPath)
gcRegex := fmt.Sprintf("*%v*", path.Base(gcPath))
// unified logging lets you add the timestamp, just doing a * here
gcRegex = strings.ReplaceAll(gcRegex, "%t", "*")
// unified logging lets you set the pid also just doing *
gcRegex = strings.ReplaceAll(gcRegex, "%p", "*")
return gcRegex, gcLogDir, nil
}
}
return "", fmt.Errorf("could not find an Xlog parameter with file= in the string %v", startupFlagsStr)

return "", "", fmt.Errorf("could not find an %v parameter with file= in the string %v", jdk9UnifiedGCLoggingFlag, startupFlagsStr)
}

// ParseGCLogFromFlags takes a given string with java startup flags and finds the gclog directive
func ParseGCLogFromFlagsPre25(startupFlagsStr string) (gcLogLocation string, err error) {
func ParseGCLogFromFlagsPre25(startupFlagsStr string) (logRegex string, gcLogLocation string, err error) {
tokens := strings.Split(startupFlagsStr, " ")
var found []int
for i, token := range tokens {
if strings.HasPrefix(token, "-Xloggc:") {
if strings.HasPrefix(token, jdk8GCLoggingFLag) {
found = append(found, i)
}
}
if len(found) == 0 {
return "", nil
return "", "", nil
}
lastIndex := found[len(found)-1]
last := tokens[lastIndex]
gcLogLocationTokens := strings.Split(last, "-Xloggc:")
gcLogLocationTokens := strings.Split(last, jdk8GCLoggingFLag)
if len(gcLogLocationTokens) != 2 {
return "", fmt.Errorf("unexpected items in string '%v', expected only 2 items but found %v", last, len(gcLogLocationTokens))
return "", "", fmt.Errorf("unexpected items in string '%v', expected only 2 items but found %v", last, len(gcLogLocationTokens))
}
return path.Dir(gcLogLocationTokens[1]), nil
gcPath := gcLogLocationTokens[1]
// get the file arg
gcRegex := fmt.Sprintf("*%v*", path.Base(gcPath))
// since jdk8 lets you add the timestamp, just doing a * here
gcRegex = strings.ReplaceAll(gcRegex, "%t", "*")
// since jdk8 lets you set the pid also just doing *
gcRegex = strings.ReplaceAll(gcRegex, "%p", "*")
return gcRegex, path.Dir(gcPath), nil
}
Loading

0 comments on commit e96d765

Please sign in to comment.