Skip to content

Commit

Permalink
migrate to use memory-mlock flag
Browse files Browse the repository at this point in the history
Signed-off-by: Jiayin Mao <jiayin.mao@datadoghq.com>
  • Loading branch information
jmao-dd committed Jan 27, 2025
1 parent 532c601 commit 8024273
Show file tree
Hide file tree
Showing 18 changed files with 186 additions and 66 deletions.
4 changes: 2 additions & 2 deletions server/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,13 @@ type ServerConfig struct {

DowngradeCheckTime time.Duration

// ExperimentalMemoryMlock enables mlocking of etcd owned memory pages.
// MemoryMlock enables mlocking of etcd owned memory pages.
// The setting improves etcd tail latency in environments were:
// - memory pressure might lead to swapping pages to disk
// - disk latency might be unstable
// Currently all etcd memory gets mlocked, but in future the flag can
// be refined to mlock in-use area of bbolt only.
ExperimentalMemoryMlock bool `json:"experimental-memory-mlock"`
MemoryMlock bool `json:"experimental-memory-mlock"`

// ExperimentalTxnModeWriteWithSharedBuffer enable write transaction to use
// a shared buffer in its readonly check operations.
Expand Down
12 changes: 10 additions & 2 deletions server/embed/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -484,12 +484,16 @@ type Config struct {

ExperimentalDowngradeCheckTime time.Duration `json:"experimental-downgrade-check-time"`

// ExperimentalMemoryMlock enables mlocking of etcd owned memory pages.
// MemoryMlock enables mlocking of etcd owned memory pages.
// The setting improves etcd tail latency in environments were:
// - memory pressure might lead to swapping pages to disk
// - disk latency might be unstable
// Currently all etcd memory gets mlocked, but in future the flag can
// be refined to mlock in-use area of bbolt only.
MemoryMlock bool `json:"memory-mlock"`

// Deprecated in v3.6 and will be decommissioned in v3.7. Use MemoryMlock.
// TODO: Delete in v3.7
ExperimentalMemoryMlock bool `json:"experimental-memory-mlock"`

// ExperimentalTxnModeWriteWithSharedBuffer enables write transaction to use a shared buffer in its readonly check operations.
Expand Down Expand Up @@ -606,7 +610,9 @@ func NewConfig() *Config {
LogRotationConfigJSON: DefaultLogRotationConfig,
EnableGRPCGateway: true,

ExperimentalDowngradeCheckTime: DefaultDowngradeCheckTime,
ExperimentalDowngradeCheckTime: DefaultDowngradeCheckTime,
MemoryMlock: false,
// TODO: delete in v3.7
ExperimentalMemoryMlock: false,
ExperimentalStopGRPCServiceOnDefrag: false,
ExperimentalMaxLearners: membership.DefaultMaxLearners,
Expand Down Expand Up @@ -819,7 +825,9 @@ func (cfg *Config) AddFlags(fs *flag.FlagSet) {
fs.DurationVar(&cfg.WarningApplyDuration, "warning-apply-duration", cfg.WarningApplyDuration, "Time duration after which a warning is generated if watch progress takes more time.")
fs.DurationVar(&cfg.WarningUnaryRequestDuration, "warning-unary-request-duration", cfg.WarningUnaryRequestDuration, "Time duration after which a warning is generated if a unary request takes more time.")
fs.DurationVar(&cfg.ExperimentalWarningUnaryRequestDuration, "experimental-warning-unary-request-duration", cfg.ExperimentalWarningUnaryRequestDuration, "Time duration after which a warning is generated if a unary request takes more time. It's deprecated, and will be decommissioned in v3.7. Use --warning-unary-request-duration instead.")
// TODO: delete in v3.7
fs.BoolVar(&cfg.ExperimentalMemoryMlock, "experimental-memory-mlock", cfg.ExperimentalMemoryMlock, "Enable to enforce etcd pages (in particular bbolt) to stay in RAM.")
fs.BoolVar(&cfg.MemoryMlock, "memory-mlock", cfg.MemoryMlock, "Enable to enforce etcd pages (in particular bbolt) to stay in RAM.")
fs.BoolVar(&cfg.ExperimentalTxnModeWriteWithSharedBuffer, "experimental-txn-mode-write-with-shared-buffer", true, "Enable the write transaction to use a shared buffer in its readonly check operations.")
fs.BoolVar(&cfg.ExperimentalStopGRPCServiceOnDefrag, "experimental-stop-grpc-service-on-defrag", cfg.ExperimentalStopGRPCServiceOnDefrag, "Enable etcd gRPC service to stop serving client requests on defragmentation.")
// TODO: delete in v3.7
Expand Down
2 changes: 1 addition & 1 deletion server/embed/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) {
DowngradeCheckTime: cfg.ExperimentalDowngradeCheckTime,
WarningApplyDuration: cfg.WarningApplyDuration,
WarningUnaryRequestDuration: cfg.WarningUnaryRequestDuration,
ExperimentalMemoryMlock: cfg.ExperimentalMemoryMlock,
MemoryMlock: cfg.MemoryMlock,
BootstrapDefragThresholdMegabytes: cfg.BootstrapDefragThresholdMegabytes,
ExperimentalMaxLearners: cfg.ExperimentalMaxLearners,
V2Deprecation: cfg.V2DeprecationEffective(),
Expand Down
5 changes: 5 additions & 0 deletions server/etcdmain/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ var (
"experimental-watch-progress-notify-interval": "--experimental-watch-progress-notify-interval is deprecated in v3.6 and will be decommissioned in v3.7. Use '--watch-progress-notify-interval' instead.",
"experimental-warning-apply-duration": "--experimental-warning-apply-duration is deprecated in v3.6 and will be decommissioned in v3.7. Use '--warning-apply-duration' instead.",
"experimental-bootstrap-defrag-threshold-megabytes": "--experimental-bootstrap-defrag-threshold-megabytes is deprecated in v3.6 and will be decommissioned in v3.7. Use '--bootstrap-defrag-threshold-megabytes' instead.",
"experimental-memory-mlock": "--experimental-memory-mlock is deprecated in v3.6 and will be decommissioned in v3.7. Use '--memory-mlock' instead.",
}
)

Expand Down Expand Up @@ -199,6 +200,10 @@ func (cfg *config) parse(arguments []string) error {
cfg.ec.BootstrapDefragThresholdMegabytes = cfg.ec.ExperimentalBootstrapDefragThresholdMegabytes
}

if cfg.ec.FlagsExplicitlySet["experimental-memory-mlock"] {
cfg.ec.MemoryMlock = cfg.ec.ExperimentalMemoryMlock
}

// `V2Deprecation` (--v2-deprecation) is deprecated and scheduled for removal in v3.8. The default value is enforced, ignoring user input.
cfg.ec.V2Deprecation = cconfig.V2DeprDefault

Expand Down
59 changes: 59 additions & 0 deletions server/etcdmain/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,65 @@ func TestBootstrapDefragThresholdMegabytesFlagMigration(t *testing.T) {
}
}

// TestMemoryMlockFlagMigration tests the migration from
// --experimental-memory-mlock to --memory-mlock
// TODO: delete in v3.7
func TestMemoryMlockFlagMigration(t *testing.T) {
testCases := []struct {
name string
memoryMlock bool
experimentalMemoryMlock bool
expectedMemoryMlock bool
}{
{
name: "default",
expectedMemoryMlock: false,
},
{
name: "can set experimental flag",
experimentalMemoryMlock: true,
expectedMemoryMlock: true,
},
{
name: "can set non experimental flag",
memoryMlock: true,
expectedMemoryMlock: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
cmdLineArgs := []string{}
yc := struct {
MemoryMlock bool `json:"memory-mlock,omitempty"`
ExperimentalMemoryMlock bool `json:"experimental-memory-mlock,omitempty"`
}{}

if tc.memoryMlock {
cmdLineArgs = append(cmdLineArgs, "--memory-mlock")
yc.MemoryMlock = tc.memoryMlock
}

if tc.experimentalMemoryMlock {
cmdLineArgs = append(cmdLineArgs, "--experimental-memory-mlock")
yc.ExperimentalMemoryMlock = tc.experimentalMemoryMlock
}

cfgFromCmdLine, errFromCmdLine, cfgFromFile, errFromFile := generateCfgsFromFileAndCmdLine(t, yc, cmdLineArgs)

if errFromCmdLine != nil || errFromFile != nil {
t.Fatal("error parsing config")
}

if cfgFromCmdLine.ec.MemoryMlock != tc.expectedMemoryMlock {
t.Errorf("expected MemoryMlock=%v, got %v", tc.expectedMemoryMlock, cfgFromCmdLine.ec.MemoryMlock)
}
if cfgFromFile.ec.MemoryMlock != tc.expectedMemoryMlock {
t.Errorf("expected MemoryMlock=%v, got %v", tc.expectedMemoryMlock, cfgFromFile.ec.MemoryMlock)
}
})
}
}

// TODO delete in v3.7
func generateCfgsFromFileAndCmdLine(t *testing.T, yc any, cmdLineArgs []string) (*config, error, *config, error) {
b, err := yaml.Marshal(&yc)
Expand Down
4 changes: 3 additions & 1 deletion server/etcdmain/help.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ Member:
Maximum number of snapshot files to retain (0 is unlimited). Deprecated in v3.6 and will be decommissioned in v3.7.
--max-wals '` + strconv.Itoa(embed.DefaultMaxWALs) + `'
Maximum number of wal files to retain (0 is unlimited).
--memory-mlock
Enable to enforce etcd pages (in particular bbolt) to stay in RAM.
--quota-backend-bytes '0'
Raise alarms when backend size exceeds the given quota (0 defaults to low space quota).
--backend-bbolt-freelist-type 'map'
Expand Down Expand Up @@ -320,7 +322,7 @@ Experimental feature:
--experimental-enable-lease-checkpoint-persist 'false'
Enable persisting remainingTTL to prevent indefinite auto-renewal of long lived leases. Always enabled in v3.6. Should be used to ensure smooth upgrade from v3.5 clusters with this feature enabled. Requires experimental-enable-lease-checkpoint to be enabled.
--experimental-memory-mlock
Enable to enforce etcd pages (in particular bbolt) to stay in RAM.
Enable to enforce etcd pages (in particular bbolt) to stay in RAM. Deprecated in v3.6 and will be decommissioned in v3.7. Use '--memory-mlock' instead.
--experimental-snapshot-catchup-entries
Number of entries for a slow follower to catch up after compacting the raft storage entries.
--experimental-stop-grpc-service-on-defrag
Expand Down
2 changes: 1 addition & 1 deletion server/storage/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func newBackend(cfg config.ServerConfig, hooks backend.Hooks) backend.Backend {
// permit 10% excess over quota for disarm
bcfg.MmapSize = uint64(cfg.QuotaBackendBytes + cfg.QuotaBackendBytes/10)
}
bcfg.Mlock = cfg.ExperimentalMemoryMlock
bcfg.Mlock = cfg.MemoryMlock
bcfg.Hooks = hooks
return backend.New(bcfg)
}
Expand Down
1 change: 0 additions & 1 deletion tests/e2e/runtime_reconfiguration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ func addMemberAsLearnerAndPromote(ctx context.Context, t *testing.T, epc *e2e.Et

id, err := epc.StartNewProc(ctx, nil, t, true /* addAsLearner */)
require.NoError(t, err)
_, err = epc.Etcdctl(e2e.WithEndpoints(endpoints)).MemberPromote(ctx, id)

attempt := 0
for attempt < 3 {
Expand Down
1 change: 1 addition & 0 deletions tests/robustness/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ func toWatchEvent(event clientv3.Event) (watch model.WatchEvent) {
watch.PrevValue = &model.ValueRevision{
Value: model.ToValueOrHash(string(event.PrevKv.Value)),
ModRevision: event.PrevKv.ModRevision,
Version: event.PrevKv.Version,
}
}
watch.IsCreate = event.IsCreate()
Expand Down
11 changes: 9 additions & 2 deletions tests/robustness/model/describe.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ func describeGuaranteedTxn(txn *TxnRequest) string {
if txn.Conditions[0].Key != txn.OperationsOnSuccess[0].Put.Key || (len(txn.OperationsOnFailure) == 1 && txn.Conditions[0].Key != txn.OperationsOnFailure[0].Range.Start) {
return ""
}
if txn.Conditions[0].ExpectedVersion > 0 {
return ""
}
if txn.Conditions[0].ExpectedRevision == 0 {
return fmt.Sprintf("guaranteedCreate(%q, %s)", txn.Conditions[0].Key, describeValueOrHash(txn.OperationsOnSuccess[0].Put.Value))
}
Expand All @@ -106,8 +109,12 @@ func describeGuaranteedTxn(txn *TxnRequest) string {

func describeEtcdConditions(conds []EtcdCondition) string {
opsDescription := make([]string, len(conds))
for i := range conds {
opsDescription[i] = fmt.Sprintf("mod_rev(%s)==%d", conds[i].Key, conds[i].ExpectedRevision)
for i, cond := range conds {
if cond.ExpectedVersion > 0 {
opsDescription[i] = fmt.Sprintf("ver(%s)==%d", cond.Key, cond.ExpectedVersion)
} else {
opsDescription[i] = fmt.Sprintf("mod_rev(%s)==%d", cond.Key, cond.ExpectedRevision)
}
}
return strings.Join(opsDescription, " && ")
}
Expand Down
15 changes: 14 additions & 1 deletion tests/robustness/model/deterministic.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,13 @@ func (s EtcdState) Step(request EtcdRequest) (EtcdState, MaybeEtcdResponse) {
case Txn:
failure := false
for _, cond := range request.Txn.Conditions {
if val := newState.KeyValues[cond.Key]; val.ModRevision != cond.ExpectedRevision {
val := newState.KeyValues[cond.Key]
if cond.ExpectedVersion > 0 {
if val.Version != cond.ExpectedVersion {
failure = true
break
}
} else if val.ModRevision != cond.ExpectedRevision {
failure = true
break
}
Expand All @@ -149,9 +155,14 @@ func (s EtcdState) Step(request EtcdRequest) (EtcdState, MaybeEtcdResponse) {
if op.Put.LeaseID != 0 && !leaseExists {
break
}
ver := int64(1)
if val, exists := newState.KeyValues[op.Put.Key]; exists && val.Version > 0 {
ver = val.Version + 1
}
newState.KeyValues[op.Put.Key] = ValueRevision{
Value: op.Put.Value,
ModRevision: newState.Revision + 1,
Version: ver,
}
increaseRevision = true
newState = detachFromOldLease(newState, op.Put.Key)
Expand Down Expand Up @@ -326,6 +337,7 @@ type TxnRequest struct {
type EtcdCondition struct {
Key string
ExpectedRevision int64
ExpectedVersion int64
}

type EtcdOperation struct {
Expand Down Expand Up @@ -434,6 +446,7 @@ func (el EtcdLease) DeepCopy() EtcdLease {
type ValueRevision struct {
Value ValueOrHash
ModRevision int64
Version int64
}

type ValueOrHash struct {
Expand Down
Loading

0 comments on commit 8024273

Please sign in to comment.