Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: force HotReload after grace period instead of blocking indefinitely #1885

Merged
merged 1 commit into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1386,6 +1386,8 @@ spec:
flush:
format: int32
type: integer
forceHotReloadAfterGrace:
type: boolean
forwardOptions:
properties:
Require_ack_response:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2433,6 +2433,8 @@ spec:
flush:
format: int32
type: integer
forceHotReloadAfterGrace:
type: boolean
forwardOptions:
properties:
Require_ack_response:
Expand Down
2 changes: 2 additions & 0 deletions config/crd/bases/logging.banzaicloud.io_fluentbitagents.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1386,6 +1386,8 @@ spec:
flush:
format: int32
type: integer
forceHotReloadAfterGrace:
type: boolean
forwardOptions:
properties:
Require_ack_response:
Expand Down
2 changes: 2 additions & 0 deletions config/crd/bases/logging.banzaicloud.io_loggings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2433,6 +2433,8 @@ spec:
flush:
format: int32
type: integer
forceHotReloadAfterGrace:
type: boolean
forwardOptions:
properties:
Require_ack_response:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ metadata:
tenant: infra
spec:
loggingRef: infra
fluentd: {}
fluentd:
metrics: {}
controlNamespace: infra
---
apiVersion: logging.banzaicloud.io/v1beta1
Expand Down Expand Up @@ -49,8 +50,12 @@ metadata:
name: infra
spec:
loggingRef: infra
# this is required to reload even if there are pending tasks in one of the queues
# requires grace to be set, which is 5 by default
forceHotReloadAfterGrace: true
inputTail:
storage.type: filesystem
storage.pause_on_chunks_overlimit: "off"
positiondb:
hostPath:
path: ""
Expand All @@ -59,9 +64,15 @@ spec:
path: ""
network:
connectTimeout: 2
keepaliveMaxRecycle: 20
metrics: {}
bufferStorage:
storage.max_chunks_up: 10
forwardOptions:
storage.total_limit_size: 50MB
image:
tag: 2.2.2-debug
tag: 3.1.10-debug
configHotReload: {}
---
apiVersion: logging.banzaicloud.io/v1beta1
kind: LoggingRoute
Expand Down
5 changes: 5 additions & 0 deletions docs/configuration/crds/v1beta1/fluentbit_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@ Set the flush time in seconds.nanoseconds. The engine loop uses a Flush timeout

Default: 1

### forceHotReloadAfterGrace (bool, optional) {#fluentbitspec-forcehotreloadaftergrace}

HotReload pauses all inputs and waits until they finish. In certain situations this is unacceptable, for example if an output is down for a longer time. An undocumented option called "Hot_Reload.Ensure_Thread_Safety Off" can be used at the [SERVICE] config to force hotreload after the grace period. Please note that it might result in a SIGSEGV, but worst case kubelet will restart the container. See https://github.com/fluent/fluent-bit/pull/7509


### forwardOptions (*ForwardOptions, optional) {#fluentbitspec-forwardoptions}


Expand Down
3 changes: 3 additions & 0 deletions pkg/resources/fluentbit/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ var fluentBitConfigTemplate = `
[SERVICE]
Flush {{ .Flush }}
Grace {{ .Grace }}
{{- if .ForceHotReloadAfterGrace }}
Hot_Reload.Ensure_Thread_Safety off
{{- end }}
Daemon Off
Log_Level {{ .LogLevel }}
Parsers_File {{ .DefaultParsers }}
Expand Down
52 changes: 27 additions & 25 deletions pkg/resources/fluentbit/configsecret.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,23 +64,24 @@ type fluentBitConfig struct {
Port int32
Path string
}
Flush int32
Grace int32
LogLevel string
CoroStackSize int32
Output map[string]string
Input fluentbitInputConfig
Inputs []fluentbitInputConfigWithTenant
DisableKubernetesFilter bool
KubernetesFilter map[string]string
AwsFilter map[string]string
BufferStorage map[string]string
FilterModify []v1beta1.FilterModify
FluentForwardOutput *fluentForwardOutputConfig
SyslogNGOutput *syslogNGOutputConfig
DefaultParsers string
CustomParsers string
HealthCheck *v1beta1.HealthCheck
Flush int32
Grace int32
LogLevel string
CoroStackSize int32
Output map[string]string
ForceHotReloadAfterGrace bool
Input fluentbitInputConfig
Inputs []fluentbitInputConfigWithTenant
DisableKubernetesFilter bool
KubernetesFilter map[string]string
AwsFilter map[string]string
BufferStorage map[string]string
FilterModify []v1beta1.FilterModify
FluentForwardOutput *fluentForwardOutputConfig
SyslogNGOutput *syslogNGOutputConfig
DefaultParsers string
CustomParsers string
HealthCheck *v1beta1.HealthCheck
}

type fluentForwardOutputConfig struct {
Expand Down Expand Up @@ -213,14 +214,15 @@ func (r *Reconciler) configSecret() (runtime.Object, reconciler.DesiredState, er
}

input := fluentBitConfig{
Flush: r.fluentbitSpec.Flush,
Grace: r.fluentbitSpec.Grace,
LogLevel: r.fluentbitSpec.LogLevel,
CoroStackSize: r.fluentbitSpec.CoroStackSize,
Namespace: r.Logging.Spec.ControlNamespace,
DisableKubernetesFilter: disableKubernetesFilter,
FilterModify: r.fluentbitSpec.FilterModify,
HealthCheck: r.fluentbitSpec.HealthCheck,
Flush: r.fluentbitSpec.Flush,
Grace: r.fluentbitSpec.Grace,
ForceHotReloadAfterGrace: r.fluentbitSpec.ForceHotReloadAfterGrace,
LogLevel: r.fluentbitSpec.LogLevel,
CoroStackSize: r.fluentbitSpec.CoroStackSize,
Namespace: r.Logging.Spec.ControlNamespace,
DisableKubernetesFilter: disableKubernetesFilter,
FilterModify: r.fluentbitSpec.FilterModify,
HealthCheck: r.fluentbitSpec.HealthCheck,
}

input.DefaultParsers = fmt.Sprintf("%s/%s", StockConfigPath, "parsers.conf")
Expand Down
5 changes: 5 additions & 0 deletions pkg/sdk/logging/api/v1beta1/fluentbit_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ type FluentbitSpec struct {
Flush int32 `json:"flush,omitempty" plugin:"default:1"`
// Set the grace time in seconds as Integer value. The engine loop uses a Grace timeout to define wait time on exit.
Grace int32 `json:"grace,omitempty" plugin:"default:5"`
// HotReload pauses all inputs and waits until they finish. In certain situations this is unacceptable, for example if an output is down for a longer time.
// An undocumented option called "Hot_Reload.Ensure_Thread_Safety Off" can be used at the [SERVICE] config to force hotreload after the grace period.
// Please note that it might result in a SIGSEGV, but worst case kubelet will restart the container.
// See https://github.com/fluent/fluent-bit/pull/7509
ForceHotReloadAfterGrace bool `json:"forceHotReloadAfterGrace,omitempty"`
// Set the logging verbosity level. Allowed values are: error, warn, info, debug and trace. Values are accumulative, e.g: if 'debug' is set, it will include error, warning, info and debug. Note that trace mode is only available if Fluent Bit was built with the WITH_TRACE option enabled.
LogLevel string `json:"logLevel,omitempty" plugin:"default:info"`
// Set the coroutines stack size in bytes. The value must be greater than the page size of the running system. Don't set too small value (say 4096), or coroutine threads can overrun the stack buffer.
Expand Down
Loading