Skip to content

Commit

Permalink
[PLAT-12584] Ensure old primary is always read-only during switchover
Browse files Browse the repository at this point in the history
Summary:
Currently, there is a small window where writes are possible on the old primary during
switchover. This diff fixes that by making sure its role is set to STANDBY before the drain begins.

Switchover to B on A -> B:

Old sequence:

1. Drain A -> B
2. Delete A -> B
3. Create B -> A

New sequence:

1. Create B -> A
2. Drain A -> B
3. Delete A -> B

The diff also has a minor change to use WITH (FORCE) while dropping dbs to avoid scenarios where a full copy is required and the target universe still has active conns open.

Test Plan:
1. Switchover, verify it works and that STANDBY role is set before draining.
2. During switchover, run a workload and verify that the workload sees the error for "DML ops not allowed" before the drain happens.

Reviewers: cwang, jmak

Reviewed By: cwang

Subscribers: yugaware

Differential Revision: https://phorge.dev.yugabyte.com/D32308
  • Loading branch information
iSignal committed Feb 13, 2024
1 parent 16ea7b7 commit 7bc0e0f
Show file tree
Hide file tree
Showing 14 changed files with 54 additions and 16 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ submodules/
!python/yugabyte/test_data/org_yb_pgsql_TestDropTableWithConcurrentTxn_testDmlTxnDrop_1pct_sample.log

managed/yba-installer/bin/yba-ctl
managed/.devspace
managed/yba-installer/yba-ctl
managed/devops/pex/pexEnv/
managed/src/main/java/com/yugabyte/yw/common/operator/io/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ public void run() {

// If this is a retry and keyspace to restore to already exists, drop it.
if (!isFirstTry() && taskParams().actionType == ActionType.RESTORE) {
createDeleteKeySpaceTask(taskParams().getKeyspace(), taskParams().backupType)
createDeleteKeySpaceTask(
taskParams().getKeyspace(), taskParams().backupType, false /*ysqlForce*/)
.setSubTaskGroupType(UserTaskDetails.SubTaskGroupType.ConfigureUniverse);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,8 @@ protected void addSubtasksForTablesNeedBootstrap(
// Delete hanging replication streams, otherwise deleting the database will fail.
createDeleteRemnantStreamsTask(targetUniverse.getUniverseUUID(), namespaceName);
// If the table type is YSQL, delete the database from the target universe before restore.
createDeleteKeySpaceTask(namespaceName, CommonTypes.TableType.PGSQL_TABLE_TYPE);
createDeleteKeySpaceTask(
namespaceName, CommonTypes.TableType.PGSQL_TABLE_TYPE, true /*ysqlForce*/);
}

// Wait for sometime to make sure the above drop database has reached all the nodes.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ protected void addSubtasksToUseNewXClusterConfig(
XClusterConfig currentXClusterConfig,
XClusterConfig newXClusterConfig,
boolean forceDeleteCurrentXClusterConfig) {

// Delete the main replication config.
createDeleteXClusterConfigSubtasks(
currentXClusterConfig,
Expand All @@ -153,7 +154,6 @@ protected void addSubtasksToUseNewXClusterConfig(

createPromoteSecondaryConfigToMainConfigTask(newXClusterConfig);

// Create the new xCluster config.
createXClusterConfigSetStatusTask(newXClusterConfig, XClusterConfigStatusType.Updating);

createXClusterConfigSetStatusForTablesTask(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ public void run() {
// Lock the target universe.
lockUniverseForUpdate(targetUniverse.getUniverseUUID(), targetUniverse.getVersion());

// Todo: After DB support, put the old primary in read-only mode.

createSetDrStatesTask(
currentXClusterConfig,
State.SwitchoverInProgress,
Expand All @@ -72,12 +70,36 @@ public void run() {
null /* keyspacePending */)
.setSubTaskGroupType(UserTaskDetails.SubTaskGroupType.ConfigureUniverse);

createXClusterConfigSetStatusTask(
switchoverXClusterConfig, XClusterConfigStatusType.Updating);

createXClusterConfigSetStatusForTablesTask(
switchoverXClusterConfig,
getTableIds(taskParams().getTableInfoList()),
XClusterTableConfig.Status.Updating);

// Create new primary -> old primary (this will set old primary's role to be STANDBY)
addSubtasksToCreateXClusterConfig(
switchoverXClusterConfig,
taskParams().getTableInfoList(),
taskParams().getMainTableIndexTablesMap(),
taskParams().getSourceTableIdsWithNoTableOnTargetUniverse(),
taskParams().getPitrParams());

createWaitForReplicationDrainTask(currentXClusterConfig);

addSubtasksToUseNewXClusterConfig(
createPromoteSecondaryConfigToMainConfigTask(switchoverXClusterConfig);

// Delete old primary -> new primary (this will set new primary's role to be ACTIVE)
createDeleteXClusterConfigSubtasks(
currentXClusterConfig,
switchoverXClusterConfig,
false /* forceDeleteCurrentXClusterConfig */);
false /* keepEntry */,
false /*forceDelete*/,
true /*deletePitrConfigs*/);

createXClusterConfigSetStatusTask(
switchoverXClusterConfig, XClusterConfigStatusType.Running)
.setSubTaskGroupType(UserTaskDetails.SubTaskGroupType.ConfigureUniverse);

createMarkUniverseUpdateSuccessTasks(targetUniverse.getUniverseUUID())
.setSubTaskGroupType(UserTaskDetails.SubTaskGroupType.ConfigureUniverse);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2278,13 +2278,15 @@ public SubTaskGroup createDeleteTablesFromUniverseTask(
* @param keyspaceName : name of the database/keyspace to delete.
* @param tableType : Type of the Table YSQL/ YCQL
*/
public SubTaskGroup createDeleteKeySpaceTask(String keyspaceName, TableType tableType) {
public SubTaskGroup createDeleteKeySpaceTask(
String keyspaceName, TableType tableType, boolean ysqlForce) {
SubTaskGroup subTaskGroup = createSubTaskGroup("DeleteKeyspace");
// Create required params for this subtask.
DeleteKeyspace.Params params = new DeleteKeyspace.Params();
params.setUniverseUUID(taskParams().getUniverseUUID());
params.setKeyspace(keyspaceName);
params.backupType = tableType;
params.ysqlForce = ysqlForce;
// Create the task.
DeleteKeyspace task = createTask(DeleteKeyspace.class);
// Initialize the task.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ protected DeleteKeyspace(BaseTaskDependencies baseTaskDependencies) {
super(baseTaskDependencies);
}

public static class Params extends BackupTableParams {}
public static class Params extends BackupTableParams {
public boolean ysqlForce;
}

@Override
protected DeleteKeyspace.Params taskParams() {
Expand All @@ -63,7 +65,9 @@ public void run() {
try {
// Build the query to run.
String query = String.format("DROP DATABASE IF EXISTS \"%s\"", keyspaceName);

if (taskParams().ysqlForce) {
query += " WITH (FORCE)";
}
// Execute the query.
log.info("Executing query {} to drop existing DB {}", query, keyspaceName);
ysqlQueryExecutor.runUserDbCommands(query, DB_NAME, universe);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public void run() {
}
xClusterConfig.update();
log.info(
"Universe role for universe {} was synced to {}",
"Current universe role for universe {} is {}",
universe.getUniverseUUID(),
currentXClusterRole);

Expand All @@ -111,6 +111,7 @@ public void run() {
"Failed to set the role for universe %s to %s on XClusterConfig(%s): %s",
universe.getUniverseUUID(), requestedRole, xClusterConfig, resp.errorMessage()));
}
log.info("Universe role for universe {} was set to {}", requestedRole);

if (HighAvailabilityConfig.get().isPresent()) {
universe.incrementVersion();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public void run() {
.map(streamInfo -> streamInfo.getStreamId().toStringUtf8())
.collect(Collectors.toList());
if (undrainedStreamIds.isEmpty()) {
log.info("All streams are drained");
log.info("All streams were drained in {} ms", stopwatch.elapsed().toMillis());
break;
}
subtaskElapsedTime = stopwatch.elapsed();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ public void run() {

// For txn xCluster set the target universe role to standby.
if (xClusterConfig.getType().equals(ConfigType.Txn) && xClusterConfig.isTargetActive()) {
log.info("Setting the role of universe {} to STANDBY", targetUniverse.getUniverseUUID());
client.changeXClusterRole(XClusterRole.STANDBY);
xClusterConfig.setTargetActive(false);
xClusterConfig.update();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,12 @@ public Map<String, Boolean> isBootstrapRequired(
Map<String, String> tableIdStreamIdMap;
if (xClusterConfig != null) {
tableIdStreamIdMap = xClusterConfig.getTableIdStreamIdMap(tableIds);
log.debug(
"Using existing stream id map with {} entries for {} tables " + "for isBootstrapRequired",
tableIdStreamIdMap.size(),
tableIds.size());
} else {
log.debug("No stream ids available for isBootstrapRequired");
tableIdStreamIdMap = new HashMap<>();
tableIds.forEach(tableId -> tableIdStreamIdMap.put(tableId, null));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ const getFormSubmitLabel = (
return 'Validate Table Selection';
}
if (bootstrapRequired) {
return 'Next: Configure Fully Copy';
return 'Next: Configure Full Copy';
}
return 'Enable Replication';
case FormStep.CONFIGURE_BOOTSTRAP:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ export const DeleteConfigModal = ({
redirectUrl
}: DeleteConfigModalProps) => {
const [isSubmitting, setIsSubmitting] = useState<boolean>(false);
const [isForceDelete, setIsForceDelete] = useState<boolean>(true);
const [isForceDelete, setIsForceDelete] = useState<boolean>(false);
const [confirmationText, setConfirmationText] = useState<string>('');
const classes = useStyles();
const queryClient = useQueryClient();
Expand Down
2 changes: 1 addition & 1 deletion managed/ui/src/translations/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,7 @@
"matchingTableNote": "<bold>Note!</bold> Only matching databases on both universes are eligible for disaster recovery. Ensure the databases are created with the same name on both universes to enable Disaster Recovery for them."
},
"configureBootstrap": {
"infoText": "<paragraph>The selected databases and/or tables contain <bold>data that is not yet replicated</bold> for DR. To enable DR for these databases and/or tables, a fully copy will be required before aynchronous replication from the DR primary universe to the DR replica universe can be established.</paragraph><paragraph>Are you sure you want to perform a <bold>full copy</bold> and add these databases and/or tables?</paragraph>",
"infoText": "<paragraph>The selected databases and/or tables contain <bold>data that is not yet replicated</bold> for DR. To enable DR for these databases and/or tables, a full copy will be required before aynchronous replication from the DR primary universe to the DR replica universe can be established.</paragraph><paragraph>Are you sure you want to perform a <bold>full copy</bold> and add these databases and/or tables?</paragraph>",
"backupStorageConfig": {
"label": "Backup Storage Config"
},
Expand Down

0 comments on commit 7bc0e0f

Please sign in to comment.