From ea5ae20c28083f522cfa9cf8dc8cea6461134cb2 Mon Sep 17 00:00:00 2001 From: Zachary Pinto Date: Tue, 7 Jan 2025 12:10:29 -0800 Subject: [PATCH] When the new instance operation APIs are used, we need to consider that an older version of helix could have been used to modify the legacy fields of HELIX_ENABLED, HELIX_DISABLED_TYPE, and HELIX_DISABLED_REASON. When this happens, we need to write those legacy fields to the USER source instance operation to preserve the history when another source sets the instance operation. When the other source re-enables the instance, we should restore the old legacy fields with what was previously set. --- .../helix/constants/InstanceConstants.java | 18 ---- .../apache/helix/manager/zk/ZKHelixAdmin.java | 7 +- .../apache/helix/model/InstanceConfig.java | 98 ++++++++++++++----- .../helix/model/TestInstanceConfig.java | 66 ++++++++++++- 4 files changed, 136 insertions(+), 53 deletions(-) diff --git a/helix-common/src/main/java/org/apache/helix/constants/InstanceConstants.java b/helix-common/src/main/java/org/apache/helix/constants/InstanceConstants.java index df6c37bb61..21d8b8a310 100644 --- a/helix-common/src/main/java/org/apache/helix/constants/InstanceConstants.java +++ b/helix-common/src/main/java/org/apache/helix/constants/InstanceConstants.java @@ -48,24 +48,6 @@ public enum InstanceOperationSource { public int getPriority() { return _priority; } - - /** - * Convert from InstanceDisabledType to InstanceOperationTrigger - * - * @param disabledType InstanceDisabledType - * @return InstanceOperationTrigger - */ - public static InstanceOperationSource instanceDisabledTypeToInstanceOperationSource( - InstanceDisabledType disabledType) { - switch (disabledType) { - case CLOUD_EVENT: - return InstanceOperationSource.AUTOMATION; - case USER_OPERATION: - return InstanceOperationSource.USER; - default: - return InstanceOperationSource.DEFAULT; - } - } } public enum InstanceOperation { diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java index b091b70ca8..74cc8ee857 100644 --- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java +++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java @@ -747,7 +747,7 @@ public boolean forceKillInstance(String clusterName, String instanceName, String InstanceConfig.InstanceOperation instanceOperationObj = new InstanceConfig.InstanceOperation.Builder() .setOperation(InstanceConstants.InstanceOperation.UNKNOWN).setReason(reason) - .setSource(operationSource != null ? operationSource : InstanceConstants.InstanceOperationSource.USER).build(); + .setSource(operationSource).build(); InstanceConfig instanceConfig = getInstanceConfig(clusterName, instanceName); instanceConfig.setInstanceOperation(instanceOperationObj); @@ -2365,10 +2365,7 @@ public ZNRecord update(ZNRecord currentData) { InstanceConfig config = new InstanceConfig(currentData); config.setInstanceOperation(new InstanceConfig.InstanceOperation.Builder().setOperation( enabled ? InstanceConstants.InstanceOperation.ENABLE - : InstanceConstants.InstanceOperation.DISABLE).setReason(reason).setSource( - disabledType != null - ? InstanceConstants.InstanceOperationSource.instanceDisabledTypeToInstanceOperationSource( - disabledType) : null).build()); + : InstanceConstants.InstanceOperation.DISABLE).setReason(reason).build()); return config.getRecord(); } }, AccessOption.PERSISTENT); diff --git a/helix-core/src/main/java/org/apache/helix/model/InstanceConfig.java b/helix-core/src/main/java/org/apache/helix/model/InstanceConfig.java index 7b5faddc34..755d489cb4 100644 --- a/helix-core/src/main/java/org/apache/helix/model/InstanceConfig.java +++ b/helix-core/src/main/java/org/apache/helix/model/InstanceConfig.java @@ -74,7 +74,7 @@ public static class InstanceOperation { private final Map _properties; private enum InstanceOperationProperties { - OPERATION, REASON, SOURCE, TIMESTAMP + OPERATION, REASON, SOURCE, TIMESTAMP, LEGACY_DISABLED_TYPE } private InstanceOperation(@Nullable Map properties) { @@ -102,7 +102,7 @@ public Builder setOperation(@Nullable InstanceConstants.InstanceOperation operat /** * Set the reason for this instance operation. - * @param reason + * @param reason the reason for this instance operation. */ public Builder setReason(String reason) { _properties.put(InstanceOperationProperties.REASON.name(), reason != null ? reason : ""); @@ -121,6 +121,23 @@ public Builder setSource(InstanceConstants.InstanceOperationSource source) { return this; } + /** + * Set the HELIX_DISABLED_TYPE which is a legacy field that must be stored, so we can write it + * back when we write back to the legacy fields. + * + * @param disabledType InstanceDisabledType + * @return + * @throws IllegalArgumentException + */ + private Builder setLegacyDisabledType(InstanceConstants.InstanceDisabledType disabledType) { + if (disabledType == null) { + return this; + } + _properties.put(InstanceOperationProperties.LEGACY_DISABLED_TYPE.name(), + disabledType.name()); + return this; + } + public InstanceOperation build() throws IllegalArgumentException { if (!_properties.containsKey(InstanceOperationProperties.OPERATION.name())) { throw new IllegalArgumentException( @@ -165,6 +182,12 @@ public InstanceConstants.InstanceOperationSource getSource() { InstanceConstants.InstanceOperationSource.USER.name())); } + private InstanceConstants.InstanceDisabledType getLegacyDisabledType() { + return InstanceConstants.InstanceDisabledType.valueOf( + _properties.getOrDefault(InstanceOperationProperties.LEGACY_DISABLED_TYPE.name(), + InstanceConstants.InstanceDisabledType.DEFAULT_INSTANCE_DISABLE_TYPE.name())); + } + /** * Get the timestamp (milliseconds from epoch) when this instance operation was triggered. * @@ -442,7 +465,8 @@ public void setInstanceDisabledReason(String disabledReason) { */ @Deprecated public void setInstanceDisabledType(InstanceConstants.InstanceDisabledType disabledType) { - if (getInstanceOperation().getOperation().equals(InstanceConstants.InstanceOperation.DISABLE)) { + if (getInstanceOperation().getOperation().equals(InstanceConstants.InstanceOperation.DISABLE) + && disabledType != InstanceConstants.InstanceDisabledType.DEFAULT_INSTANCE_DISABLE_TYPE) { _record.setSimpleField(InstanceConfigProperty.HELIX_DISABLED_TYPE.name(), disabledType.name()); } @@ -501,14 +525,7 @@ private List getInstanceOperations() { return _deserializedInstanceOperations; } - /** - * Set the instance operation for this instance. - * This method also sets the HELIX_ENABLED, HELIX_DISABLED_REASON, and HELIX_DISABLED_TYPE fields - * for backwards compatibility. - * - * @param operation the instance operation - */ - public void setInstanceOperation(InstanceOperation operation) { + private void setInstanceOperationHelper(InstanceOperation operation) { List deserializedInstanceOperations = getInstanceOperations(); if (operation.getSource() == InstanceConstants.InstanceOperationSource.ADMIN) { @@ -518,6 +535,7 @@ public void setInstanceOperation(InstanceOperation operation) { deserializedInstanceOperations.removeIf( instanceOperation -> instanceOperation.getSource() == operation.getSource()); } + if (operation.getOperation() == InstanceConstants.InstanceOperation.ENABLE) { // Insert the operation after the last ENABLE or at the beginning if there isn't ENABLE in the list. int insertIndex = 0; @@ -532,6 +550,7 @@ public void setInstanceOperation(InstanceOperation operation) { } else { deserializedInstanceOperations.add(operation); } + // Set the actual field in the ZnRecord _record.setListField(InstanceConfigProperty.HELIX_INSTANCE_OPERATIONS.name(), deserializedInstanceOperations.stream().map(instanceOperation -> { @@ -555,10 +574,8 @@ public void setInstanceOperation(InstanceOperation operation) { setInstanceEnabledHelper(false, operation.getTimestamp()); } - _record.setSimpleField(InstanceConfigProperty.HELIX_DISABLED_REASON.name(), - operation.getReason()); - _record.setSimpleField(InstanceConfigProperty.HELIX_DISABLED_TYPE.name(), - InstanceConstants.InstanceDisabledType.DEFAULT_INSTANCE_DISABLE_TYPE.name()); + setInstanceDisabledReason(operation.getReason()); + setInstanceDisabledType(operation.getLegacyDisabledType()); } else if (operation.getOperation() == InstanceConstants.InstanceOperation.ENABLE) { // If any of the other InstanceOperations are of type DISABLE, set that in the HELIX_ENABLED, // HELIX_DISABLED_REASON, and HELIX_DISABLED_TYPE fields. @@ -572,16 +589,35 @@ public void setInstanceOperation(InstanceOperation operation) { } if (latestDisableInstanceOperation != null) { - _record.setSimpleField(InstanceConfigProperty.HELIX_DISABLED_REASON.name(), - latestDisableInstanceOperation.getReason()); - _record.setSimpleField(InstanceConfigProperty.HELIX_DISABLED_TYPE.name(), - InstanceConstants.InstanceDisabledType.DEFAULT_INSTANCE_DISABLE_TYPE.name()); + setInstanceDisabledReason(latestDisableInstanceOperation.getReason()); + setInstanceDisabledType(latestDisableInstanceOperation.getLegacyDisabledType()); } else { setInstanceEnabledHelper(true, operation.getTimestamp()); } } } + /** + * Set the instance operation for this instance. + * This method also sets the HELIX_ENABLED, HELIX_DISABLED_REASON, and HELIX_DISABLED_TYPE fields + * for backwards compatibility. + * + * @param operation the instance operation + */ + public void setInstanceOperation(InstanceOperation operation) { + // TODO: This can be removed when HELIX_ENABLED is removed. + // This is used for backwards compatibility. getInstanceOperation will respect the old + // legacy field of HELIX_ENABLED which could have been set by older versions of Helix. + // If the HELIX_ENABLED is set by an older version of Helix, we need to sync that value to the + // new InstanceOperation field. + if (!getInstanceOperations().isEmpty()) { + setInstanceOperationHelper(getInstanceOperation()); + } + + // Set the instance operation passed in. + setInstanceOperationHelper(operation); + } + /** * Set the instance operation for this instance. Provide the InstanceOperation enum and the reason * and source will be set to default values. @@ -648,12 +684,19 @@ public InstanceOperation getInstanceOperation() { HELIX_ENABLED_DEFAULT_VALUE) && (InstanceConstants.INSTANCE_DISABLED_OVERRIDABLE_OPERATIONS.contains( activeInstanceOperation.getOperation()))) { - return new InstanceOperation.Builder().setOperation( - InstanceConstants.InstanceOperation.DISABLE).setReason(getInstanceDisabledReason()) - .setSource( - InstanceConstants.InstanceOperationSource.instanceDisabledTypeToInstanceOperationSource( - InstanceConstants.InstanceDisabledType.valueOf(getInstanceDisabledType()))) - .build(); + InstanceOperation.Builder instanceOperationBuilder = + new InstanceOperation.Builder().setOperation(InstanceConstants.InstanceOperation.DISABLE) + .setReason(getInstanceDisabledReason()); + + try { + instanceOperationBuilder.setLegacyDisabledType( + InstanceConstants.InstanceDisabledType.valueOf(getInstanceDisabledType())); + } catch (IllegalArgumentException e) { + _logger.error("Invalid instance disabled type for instance: " + _record.getId() + + ". Defaulting to DEFAULT_INSTANCE_DISABLE_TYPE."); + } + + return instanceOperationBuilder.build(); } // if instance operation is DISABLE, we override it to ENABLE if HELIX_ENABLED set to true @@ -662,8 +705,9 @@ public InstanceOperation getInstanceOperation() { // we always set HELIX_ENABLED to false // If instance is enabled by old version helix (not having instance operation), the instance config // will have HELIX_ENABLED set to true. In this case, we should override the instance operation to ENABLE - if ("true".equals(_record.getSimpleField(InstanceConfigProperty.HELIX_ENABLED.name()))) { - return new InstanceOperation.Builder().setOperation(InstanceConstants.InstanceOperation.ENABLE).build(); + if (_record.getBooleanField(InstanceConfigProperty.HELIX_ENABLED.name(), false)) { + return new InstanceOperation.Builder().setOperation( + InstanceConstants.InstanceOperation.ENABLE).build(); } } diff --git a/helix-core/src/test/java/org/apache/helix/model/TestInstanceConfig.java b/helix-core/src/test/java/org/apache/helix/model/TestInstanceConfig.java index f0081bec00..9e4a2f8738 100644 --- a/helix-core/src/test/java/org/apache/helix/model/TestInstanceConfig.java +++ b/helix-core/src/test/java/org/apache/helix/model/TestInstanceConfig.java @@ -81,14 +81,74 @@ public void testEnabledInstanceBackwardCompatibility() { InstanceConfig instanceConfig = new InstanceConfig.Builder().setInstanceOperation(InstanceConstants.InstanceOperation.DISABLE).build("id"); Assert.assertFalse(instanceConfig.getInstanceEnabled()); - // assume an old version client enabled the instance by setting HELIX_ENABLED to true + Assert.assertEquals(instanceConfig.getInstanceOperation().getOperation(), + InstanceConstants.InstanceOperation.DISABLE); + Assert.assertEquals(instanceConfig.getInstanceOperation().getSource(), + InstanceConstants.InstanceOperationSource.USER); + + // Assume an old version client enabled the instance by setting HELIX_ENABLED to true instanceConfig.getRecord().setSimpleField(InstanceConfig.InstanceConfigProperty.HELIX_ENABLED.name(), "true"); Assert.assertTrue(instanceConfig.getInstanceEnabled()); - instanceConfig.setInstanceOperation(InstanceConstants.InstanceOperation.ENABLE); + Assert.assertEquals(instanceConfig.getInstanceOperation().getOperation(), + InstanceConstants.InstanceOperation.ENABLE); + Assert.assertEquals(instanceConfig.getInstanceOperation().getSource(), + InstanceConstants.InstanceOperationSource.USER); + + // Automation source then disables the instance + instanceConfig.setInstanceOperation(new InstanceConfig.InstanceOperation.Builder().setSource( + InstanceConstants.InstanceOperationSource.AUTOMATION) + .setOperation(InstanceConstants.InstanceOperation.DISABLE).setReason("disableReason") + .build()); + Assert.assertFalse(instanceConfig.getInstanceEnabled()); + Assert.assertEquals(instanceConfig.getInstanceOperation().getOperation(), + InstanceConstants.InstanceOperation.DISABLE); + Assert.assertEquals(instanceConfig.getInstanceOperation().getSource(), + InstanceConstants.InstanceOperationSource.AUTOMATION); + Assert.assertEquals(instanceConfig.getInstanceOperation().getReason(), "disableReason"); + + // Automation source then enables the instance + instanceConfig.setInstanceOperation(new InstanceConfig.InstanceOperation.Builder().setSource( + InstanceConstants.InstanceOperationSource.AUTOMATION) + .setOperation(InstanceConstants.InstanceOperation.ENABLE).build()); + // This should be enabled because we write the currently set legacy HELIX_ENABLED field into the instance operation + // associated with its source before the AUTOMATION source's operation is set. + Assert.assertTrue(instanceConfig.getInstanceEnabled()); + Assert.assertEquals(instanceConfig.getInstanceOperation().getOperation(), + InstanceConstants.InstanceOperation.ENABLE); + Assert.assertEquals(instanceConfig.getInstanceOperation().getSource(), + InstanceConstants.InstanceOperationSource.AUTOMATION); - // disable the instance by setting HELIX_ENABLED to false + // an old version client disables the instance by setting HELIX_ENABLED to false instanceConfig.getRecord().setSimpleField(InstanceConfig.InstanceConfigProperty.HELIX_ENABLED.name(), "false"); Assert.assertFalse(instanceConfig.getInstanceEnabled()); + Assert.assertEquals(instanceConfig.getInstanceOperation().getOperation(), + InstanceConstants.InstanceOperation.DISABLE); + Assert.assertEquals(instanceConfig.getInstanceOperation().getSource(), + InstanceConstants.InstanceOperationSource.USER); + + // Enable the instance with automation source which should not override the HELIX_ENABLED field + // because the source that set HELIX_ENABLED is USER + instanceConfig.setInstanceOperation(new InstanceConfig.InstanceOperation.Builder().setSource( + InstanceConstants.InstanceOperationSource.AUTOMATION) + .setOperation(InstanceConstants.InstanceOperation.ENABLE).build()); + Assert.assertFalse(instanceConfig.getInstanceEnabled()); + Assert.assertEquals(instanceConfig.getInstanceOperation().getOperation(), + InstanceConstants.InstanceOperation.DISABLE); + Assert.assertEquals(instanceConfig.getInstanceOperation().getSource(), + InstanceConstants.InstanceOperationSource.USER); + + // Enable the instance with user source which should override the HELIX_ENABLED field because + // the source that set HELIX_ENABLED is USER + instanceConfig.setInstanceOperation(new InstanceConfig.InstanceOperation.Builder().setSource( + InstanceConstants.InstanceOperationSource.USER) + .setOperation(InstanceConstants.InstanceOperation.ENABLE).build()); + Assert.assertTrue(instanceConfig.getInstanceEnabled()); + Assert.assertEquals(instanceConfig.getInstanceOperation().getOperation(), + InstanceConstants.InstanceOperation.ENABLE); + Assert.assertEquals(instanceConfig.getInstanceOperation().getSource(), + InstanceConstants.InstanceOperationSource.USER); + + // TODO: Add tests to make sure the correct DISABLED_TYPE is set when the instance is disabled } @Test