diff --git a/charts/logging-operator/charts/logging-operator-crds/templates/logging.banzaicloud.io_fluentbitagents.yaml b/charts/logging-operator/charts/logging-operator-crds/templates/logging.banzaicloud.io_fluentbitagents.yaml index 681931e29..208d3e5b4 100644 --- a/charts/logging-operator/charts/logging-operator-crds/templates/logging.banzaicloud.io_fluentbitagents.yaml +++ b/charts/logging-operator/charts/logging-operator-crds/templates/logging.banzaicloud.io_fluentbitagents.yaml @@ -1480,6 +1480,8 @@ spec: flush: format: int32 type: integer + forceHotReloadAfterGrace: + type: boolean forwardOptions: properties: Require_ack_response: diff --git a/charts/logging-operator/charts/logging-operator-crds/templates/logging.banzaicloud.io_loggings.yaml b/charts/logging-operator/charts/logging-operator-crds/templates/logging.banzaicloud.io_loggings.yaml index 21d8c7a87..7a330ae8a 100644 --- a/charts/logging-operator/charts/logging-operator-crds/templates/logging.banzaicloud.io_loggings.yaml +++ b/charts/logging-operator/charts/logging-operator-crds/templates/logging.banzaicloud.io_loggings.yaml @@ -2325,6 +2325,8 @@ spec: flush: format: int32 type: integer + forceHotReloadAfterGrace: + type: boolean forwardOptions: properties: Require_ack_response: diff --git a/charts/logging-operator/crds/logging.banzaicloud.io_fluentbitagents.yaml b/charts/logging-operator/crds/logging.banzaicloud.io_fluentbitagents.yaml index 1462165ed..b1698bb09 100644 --- a/charts/logging-operator/crds/logging.banzaicloud.io_fluentbitagents.yaml +++ b/charts/logging-operator/crds/logging.banzaicloud.io_fluentbitagents.yaml @@ -1477,6 +1477,8 @@ spec: flush: format: int32 type: integer + forceHotReloadAfterGrace: + type: boolean forwardOptions: properties: Require_ack_response: diff --git a/charts/logging-operator/crds/logging.banzaicloud.io_loggings.yaml b/charts/logging-operator/crds/logging.banzaicloud.io_loggings.yaml index 5fd69da87..244d5f326 100644 --- a/charts/logging-operator/crds/logging.banzaicloud.io_loggings.yaml +++ b/charts/logging-operator/crds/logging.banzaicloud.io_loggings.yaml @@ -2322,6 +2322,8 @@ spec: flush: format: int32 type: integer + forceHotReloadAfterGrace: + type: boolean forwardOptions: properties: Require_ack_response: diff --git a/config/crd/bases/logging.banzaicloud.io_fluentbitagents.yaml b/config/crd/bases/logging.banzaicloud.io_fluentbitagents.yaml index 1462165ed..b1698bb09 100644 --- a/config/crd/bases/logging.banzaicloud.io_fluentbitagents.yaml +++ b/config/crd/bases/logging.banzaicloud.io_fluentbitagents.yaml @@ -1477,6 +1477,8 @@ spec: flush: format: int32 type: integer + forceHotReloadAfterGrace: + type: boolean forwardOptions: properties: Require_ack_response: diff --git a/config/crd/bases/logging.banzaicloud.io_loggings.yaml b/config/crd/bases/logging.banzaicloud.io_loggings.yaml index 5fd69da87..244d5f326 100644 --- a/config/crd/bases/logging.banzaicloud.io_loggings.yaml +++ b/config/crd/bases/logging.banzaicloud.io_loggings.yaml @@ -2322,6 +2322,8 @@ spec: flush: format: int32 type: integer + forceHotReloadAfterGrace: + type: boolean forwardOptions: properties: Require_ack_response: diff --git a/config/samples/multitenant-routing/logging/tenant-infra-logging.yaml b/config/samples/multitenant-routing/logging/tenant-infra-logging.yaml index b58e6667b..bdbd6ddbd 100644 --- a/config/samples/multitenant-routing/logging/tenant-infra-logging.yaml +++ b/config/samples/multitenant-routing/logging/tenant-infra-logging.yaml @@ -11,7 +11,8 @@ metadata: tenant: infra spec: loggingRef: infra - fluentd: {} + fluentd: + metrics: {} controlNamespace: infra --- apiVersion: logging.banzaicloud.io/v1beta1 @@ -49,8 +50,12 @@ metadata: name: infra spec: loggingRef: infra + # this is required to reload even if there are pending tasks in one of the queues + # requires grace to be set, which is 5 by default + forceHotReloadAfterGrace: true inputTail: storage.type: filesystem + storage.pause_on_chunks_overlimit: "off" positiondb: hostPath: path: "" @@ -59,7 +64,15 @@ spec: path: "" network: connectTimeout: 2 + keepaliveMaxRecycle: 20 metrics: {} + bufferStorage: + storage.max_chunks_up: 10 + forwardOptions: + storage.total_limit_size: 50MB + image: + tag: 3.1.10-debug + configHotReload: {} --- apiVersion: logging.banzaicloud.io/v1beta1 kind: LoggingRoute diff --git a/docs/configuration/crds/v1beta1/fluentbit_types.md b/docs/configuration/crds/v1beta1/fluentbit_types.md index 623322e8b..12913ac11 100644 --- a/docs/configuration/crds/v1beta1/fluentbit_types.md +++ b/docs/configuration/crds/v1beta1/fluentbit_types.md @@ -128,6 +128,11 @@ Set the flush time in seconds.nanoseconds. The engine loop uses a Flush timeout Default: 1 +### forceHotReloadAfterGrace (bool, optional) {#fluentbitspec-forcehotreloadaftergrace} + +HotReload pauses all inputs and waits until they finish. In certain situations this is unacceptable, for example if an output is down for a longer time. An undocumented option called "Hot_Reload.Ensure_Thread_Safety Off" can be used at the [SERVICE] config to force hotreload after the grace period. Please note that it might result in a SIGSEGV, but worst case kubelet will restart the container. See https://github.com/fluent/fluent-bit/pull/7509 + + ### forwardOptions (*ForwardOptions, optional) {#fluentbitspec-forwardoptions} diff --git a/pkg/resources/fluentbit/config.go b/pkg/resources/fluentbit/config.go index 8514141e7..15d02c5e2 100644 --- a/pkg/resources/fluentbit/config.go +++ b/pkg/resources/fluentbit/config.go @@ -26,6 +26,9 @@ var fluentBitConfigTemplate = ` [SERVICE] Flush {{ .Flush }} Grace {{ .Grace }} + {{- if .ForceHotReloadAfterGrace }} + Hot_Reload.Ensure_Thread_Safety off + {{- end }} Daemon Off Log_Level {{ .LogLevel }} Parsers_File {{ .DefaultParsers }} diff --git a/pkg/resources/fluentbit/configsecret.go b/pkg/resources/fluentbit/configsecret.go index 729a06fb8..ad5e418d8 100644 --- a/pkg/resources/fluentbit/configsecret.go +++ b/pkg/resources/fluentbit/configsecret.go @@ -64,24 +64,25 @@ type fluentBitConfig struct { Port int32 Path string } - Flush int32 - Grace int32 - LogLevel string - EnabledIPv6 bool - CoroStackSize int32 - Output map[string]string - Input fluentbitInputConfig - Inputs []fluentbitInputConfigWithTenant - DisableKubernetesFilter bool - KubernetesFilter map[string]string - AwsFilter map[string]string - BufferStorage map[string]string - FilterModify []v1beta1.FilterModify - FluentForwardOutput *fluentForwardOutputConfig - SyslogNGOutput *syslogNGOutputConfig - DefaultParsers string - CustomParsers string - HealthCheck *v1beta1.HealthCheck + Flush int32 + Grace int32 + LogLevel string + EnabledIPv6 bool + CoroStackSize int32 + Output map[string]string + ForceHotReloadAfterGrace bool + Input fluentbitInputConfig + Inputs []fluentbitInputConfigWithTenant + DisableKubernetesFilter bool + KubernetesFilter map[string]string + AwsFilter map[string]string + BufferStorage map[string]string + FilterModify []v1beta1.FilterModify + FluentForwardOutput *fluentForwardOutputConfig + SyslogNGOutput *syslogNGOutputConfig + DefaultParsers string + CustomParsers string + HealthCheck *v1beta1.HealthCheck } type fluentForwardOutputConfig struct { @@ -214,15 +215,16 @@ func (r *Reconciler) configSecret() (runtime.Object, reconciler.DesiredState, er } input := fluentBitConfig{ - Flush: r.fluentbitSpec.Flush, - Grace: r.fluentbitSpec.Grace, - LogLevel: r.fluentbitSpec.LogLevel, - EnabledIPv6: r.fluentbitSpec.EnabledIPv6, - CoroStackSize: r.fluentbitSpec.CoroStackSize, - Namespace: r.Logging.Spec.ControlNamespace, - DisableKubernetesFilter: disableKubernetesFilter, - FilterModify: r.fluentbitSpec.FilterModify, - HealthCheck: r.fluentbitSpec.HealthCheck, + Flush: r.fluentbitSpec.Flush, + Grace: r.fluentbitSpec.Grace, + ForceHotReloadAfterGrace: r.fluentbitSpec.ForceHotReloadAfterGrace, + LogLevel: r.fluentbitSpec.LogLevel, + EnabledIPv6: r.fluentbitSpec.EnabledIPv6, + CoroStackSize: r.fluentbitSpec.CoroStackSize, + Namespace: r.Logging.Spec.ControlNamespace, + DisableKubernetesFilter: disableKubernetesFilter, + FilterModify: r.fluentbitSpec.FilterModify, + HealthCheck: r.fluentbitSpec.HealthCheck, } input.DefaultParsers = fmt.Sprintf("%s/%s", StockConfigPath, "parsers.conf") diff --git a/pkg/sdk/logging/api/v1beta1/fluentbit_types.go b/pkg/sdk/logging/api/v1beta1/fluentbit_types.go index b672e180b..d931c4d40 100644 --- a/pkg/sdk/logging/api/v1beta1/fluentbit_types.go +++ b/pkg/sdk/logging/api/v1beta1/fluentbit_types.go @@ -77,6 +77,11 @@ type FluentbitSpec struct { Flush int32 `json:"flush,omitempty" plugin:"default:1"` // Set the grace time in seconds as Integer value. The engine loop uses a Grace timeout to define wait time on exit. Grace int32 `json:"grace,omitempty" plugin:"default:5"` + // HotReload pauses all inputs and waits until they finish. In certain situations this is unacceptable, for example if an output is down for a longer time. + // An undocumented option called "Hot_Reload.Ensure_Thread_Safety Off" can be used at the [SERVICE] config to force hotreload after the grace period. + // Please note that it might result in a SIGSEGV, but worst case kubelet will restart the container. + // See https://github.com/fluent/fluent-bit/pull/7509 + ForceHotReloadAfterGrace bool `json:"forceHotReloadAfterGrace,omitempty"` // Set the logging verbosity level. Allowed values are: error, warn, info, debug and trace. Values are accumulative, e.g: if 'debug' is set, it will include error, warning, info and debug. Note that trace mode is only available if Fluent Bit was built with the WITH_TRACE option enabled. LogLevel string `json:"logLevel,omitempty" plugin:"default:info"` // Set the coroutines stack size in bytes. The value must be greater than the page size of the running system. Don't set too small value (say 4096), or coroutine threads can overrun the stack buffer.