Skip to content

Commit

Permalink
Move runtime checks definition to healthchecks/error.go.
Browse files Browse the repository at this point in the history
  • Loading branch information
franciscovalentecastro committed Apr 12, 2024
1 parent ba7220c commit 3f2b005
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 32 deletions.
44 changes: 12 additions & 32 deletions confgenerator/self_logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"strings"

"github.com/GoogleCloudPlatform/ops-agent/confgenerator/fluentbit"
"github.com/GoogleCloudPlatform/ops-agent/internal/healthchecks"
"github.com/GoogleCloudPlatform/ops-agent/internal/logs"
"github.com/GoogleCloudPlatform/ops-agent/internal/platform"
"github.com/GoogleCloudPlatform/ops-agent/internal/version"
Expand All @@ -32,35 +33,14 @@ var (
)

const (
opsAgentLogsMatch string = "ops-agent-*"
fluentBitSelfLogsTag string = "ops-agent-fluent-bit"
healthLogsTag string = "ops-agent-health"
sourceLocationKey string = "logging.googleapis.com/sourceLocation"
agentVersionKey string = "agent.googleapis.com/health/agentVersion"
agentKindKey string = "agent.googleapis.com/health/agentKind"
schemaVersionKey string = "agent.googleapis.com/health/schemaVersion"
troubleshootFindInfoURL string = "https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/troubleshoot-find-info"
opsAgentLogsMatch string = "ops-agent-*"
fluentBitSelfLogsTag string = "ops-agent-fluent-bit"
healthLogsTag string = "ops-agent-health"
agentVersionKey string = "agent.googleapis.com/health/agentVersion"
agentKindKey string = "agent.googleapis.com/health/agentKind"
schemaVersionKey string = "agent.googleapis.com/health/schemaVersion"
)

type selfLogTranslationEntry struct {
regexMatch string
message string
code string
}

var selfLogTranslationList = []selfLogTranslationEntry{
{
regexMatch: `\[error\]\s\[lib\]\sbackend\sfailed`,
message: fmt.Sprintf("Ops Agent logging pipeline failed, Code: LogPipelineErr, Documentation: %s", troubleshootFindInfoURL),
code: "LogPipelineErr",
},
{
regexMatch: `\[error\]\s\[parser\]\scannot\sparse`,
message: fmt.Sprintf("Ops Agent failed to parse logs, Code: LogParseErr, Documentation: %s", troubleshootFindInfoURL),
code: "LogParseErr",
},
}

func fluentbitSelfLogsPath(p platform.Platform) string {
loggingModule := "logging-module.log"
if p.Type == platform.Windows {
Expand Down Expand Up @@ -160,15 +140,15 @@ func generateInputFluentBitSelfLogsComponents(ctx context.Context, logLevel stri
func generateFilterSelfLogsSamplingComponents(ctx context.Context) []fluentbit.Component {
out := make([]fluentbit.Component, 0)

for _, m := range selfLogTranslationList {
for _, m := range healthchecks.FluentBitSelfLogTranslationList {
// This filter samples specific fluent-bit logs by matching with regex and re-emits
// an `ops-agent-health` log.
out = append(out, fluentbit.Component{
Kind: "FILTER",
Config: map[string]string{
"Name": "rewrite_tag",
"Match": fluentBitSelfLogsTag,
"Rule": fmt.Sprintf(`message %s %s true`, m.regexMatch, healthLogsTag),
"Rule": fmt.Sprintf(`message %s %s true`, m.RegexMatch, healthLogsTag),
},
})
// This filter sets the appropiate health code to the previously sampled logs. The `code` is also
Expand All @@ -178,9 +158,9 @@ func generateFilterSelfLogsSamplingComponents(ctx context.Context) []fluentbit.C
OrderedConfig: [][2]string{
{"Name", "modify"},
{"Match", healthLogsTag},
{"Condition", fmt.Sprintf(`Key_value_matches message %s`, m.regexMatch)},
{"Set", fmt.Sprintf(`code %s`, m.code)},
{"Set", fmt.Sprintf(`message "%s"`, m.message)},
{"Condition", fmt.Sprintf(`Key_value_matches message %s`, m.RegexMatch)},
{"Set", fmt.Sprintf(`code %s`, m.Code)},
{"Set", fmt.Sprintf(`message "%s"`, m.Message)},
},
})
}
Expand Down
77 changes: 77 additions & 0 deletions internal/healthchecks/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const (
Generic = "GENERIC"
Port = "PORT"
Permission = "PERMISSION"
Runtime = "RUNTIME"
)

type HealthCheckError struct {
Expand Down Expand Up @@ -164,6 +165,38 @@ var (
ResourceLink: "https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/authorization",
IsFatal: true,
}
LogPipelineErr = HealthCheckError{
Code: "LogPipelineErr",
Class: Runtime,
Message: "Ops Agent logging pipeline failed",
Action: "Refer to provided documentation link.",
ResourceLink: "https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/troubleshoot-find-info",
IsFatal: true,
}
LogParseErr = HealthCheckError{
Code: "LogParseErr",
Class: Runtime,
Message: "Ops Agent failed to parse logs",
Action: "Refer to provided documentation link.",
ResourceLink: "https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/troubleshoot-find-info",
IsFatal: true,
}
LogPathErr = HealthCheckError{
Code: "LogPathNotFoundErr",
Class: Runtime,
Message: "Ops Agent log path not found or insuficient permissions",
Action: "Refer to provided documentation link.",
ResourceLink: "https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/troubleshoot-find-info",
IsFatal: true,
}
LogWinEventLogErr = HealthCheckError{
Code: "LogParseErr",
Class: Runtime,
Message: "Ops Agent failed to parse logs",
Action: "Refer to provided documentation link.",
ResourceLink: "https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/troubleshoot-find-info",
IsFatal: true,
}
HcFailureErr = HealthCheckError{
Code: "HcFailureErr",
Class: Generic,
Expand All @@ -173,3 +206,47 @@ var (
IsFatal: false,
}
)

type SelfLogTranslationEntry struct {
RegexMatch string
Message string
Code string
}

var FluentBitSelfLogTranslationList = []SelfLogTranslationEntry{
{
RegexMatch: `\[error\]\s\[lib\]\sbackend\sfailed`,
Message: singleErrorResultMessage(LogPipelineErr, "Runtime Check"),
Code: LogPipelineErr.Code,
},
{
RegexMatch: `\[error\]\s\[parser\]\scannot\sparse`,
Message: singleErrorResultMessage(LogParseErr, "Runtime Check"),
Code: LogParseErr.Code,
},
{
RegexMatch: `\[ warn\].*\serror\sparsing\slog\smessage\swith\sparser.*`,
Message: singleErrorResultMessage(LogParseErr, "Runtime Check"),
Code: LogParseErr.Code,
},
{
RegexMatch: `\[error\].*\sparsers\sreturned\san\serror.*`,
Message: singleErrorResultMessage(LogParseErr, "Runtime Check"),
Code: LogParseErr.Code,
},
{
RegexMatch: `\[error\].*\sNo\ssuch\sfile\sor\sdirectory.*`,
Message: singleErrorResultMessage(LogPathErr, "Runtime Check"),
Code: LogPathErr.Code,
},
{
RegexMatch: `\[error\].*read\serror,\scheck\spermissions:.*`,
Message: singleErrorResultMessage(LogPathErr, "Runtime Check"),
Code: LogPathErr.Code,
},
{
RegexMatch: `\[error\] \[in_winlog\] cannot read '.*' (6)`,
Message: singleErrorResultMessage(LogWinEventLogErr, "Runtime Check"),
Code: LogWinEventLogErr.Code,
},
}

0 comments on commit 3f2b005

Please sign in to comment.