Skip to content

Commit 2ec279a

Browse files
DimCitusJelteF
andauthored
Fix maintenance state related transitions. (#786)
* Fix maintenance state related transitions. We used to disallow starting maintenance on a node in some cases, but it seems that the user should be able to decide about when they need to operate maintenance on their own nodes. After all, we don't stop Postgres when going to maintenance, so users may change their mind without impacting their service. A WARNING message is now displayed in some cases that were previously prevented. Also, the transition from WAIT_MAINTENANCE to MAINTENANCE was failing since we improved the Group State Machine for the primary node, which would go from JOIN_PRIMARY to PRIMARY without waiting for the other nodes to reach their assigned state of WAIT_MAINTENANCE. * Prevent WAIT_PRIMARY state when all secondaries are in maintenance. If number-sync-standbys is set to 1 or more, then we still allow all the secondary nodes to be put in maintenance mode, and we maintain the primary node in the PRIMARY state, in a way that writes are going to be blocked on the primary. * Refrain from DRAINING state when there is no candidate. When a primary is not healthy and there is no candidate node to failover to, then assigning to the primary the DRAINING state is not helping. Also, the reason why we don't have a candidate at the moment might be that the other nodes are in MAINTENANCE and the operator is restarting Postgres to install some new configuration. * Consider wait_maintenance a maintanance state in the monitor * Allow disabling maintenance in prepare_maintenance state * In maintenance, setup Postgres as a standby node without a primary. The primary election might not be finished yet, and also if the operator is to restart the local instance, they probably don't want it to connect to any other node in the system during the maintenance window. * Allow the transition from prepare_maintenance to maintenance to happen early on a multiple standby system, at soon as an election is triggered. * Per review, allow prepare_maintenance -> catchingup transition. * Check for the right node to reach reported state Co-authored-by: Jelte Fennema <github-tech@jeltef.nl>
1 parent 329b554 commit 2ec279a

11 files changed

Lines changed: 475 additions & 253 deletions

src/bin/pg_autoctl/cli_enable_disable.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -553,15 +553,17 @@ cli_enable_maintenance(int argc, char **argv)
553553
exit(EXIT_CODE_QUIT);
554554
}
555555

556+
NodeState targetStates[] = { MAINTENANCE_STATE };
556557
if (!monitor_wait_until_node_reported_state(
557558
&(keeper.monitor),
558559
keeper.config.formation,
559560
keeper.config.groupId,
560561
keeper.state.current_node_id,
561562
keeper.config.pgSetup.pgKind,
562-
MAINTENANCE_STATE))
563+
targetStates,
564+
lengthof(targetStates)))
563565
{
564-
log_error("Failed to wait until a node reached the wait_primary state");
566+
log_error("Failed to wait until the node reached the maintenance state");
565567
exit(EXIT_CODE_MONITOR);
566568
}
567569
}
@@ -651,15 +653,18 @@ cli_disable_maintenance(int argc, char **argv)
651653
(void) pg_usleep(sleepTimeMs * 1000);
652654
}
653655

656+
NodeState targetStates[] = { SECONDARY_STATE, PRIMARY_STATE };
657+
654658
if (!monitor_wait_until_node_reported_state(
655659
&(keeper.monitor),
656660
keeper.config.formation,
657661
keeper.config.groupId,
658662
keeper.state.current_node_id,
659663
keeper.config.pgSetup.pgKind,
660-
SECONDARY_STATE))
664+
targetStates,
665+
lengthof(targetStates)))
661666
{
662-
log_error("Failed to wait until a node reached the secondary state");
667+
log_error("Failed to wait until a node reached the secondary or primary state");
663668
exit(EXIT_CODE_MONITOR);
664669
}
665670
}

src/bin/pg_autoctl/fsm.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ KeeperFSMTransition KeeperFSM[] = {
241241
*/
242242
{ PRIMARY_STATE, PREPARE_MAINTENANCE_STATE, COMMENT_PRIMARY_TO_PREPARE_MAINTENANCE, &fsm_stop_postgres_for_primary_maintenance },
243243
{ PREPARE_MAINTENANCE_STATE, MAINTENANCE_STATE, COMMENT_PRIMARY_TO_MAINTENANCE, &fsm_stop_postgres_and_setup_standby },
244-
244+
{ PRIMARY_STATE, MAINTENANCE_STATE, COMMENT_PRIMARY_TO_MAINTENANCE, &fsm_stop_postgres_for_primary_maintenance },
245245
/*
246246
* was demoted, need to be dead now.
247247
*/
@@ -342,6 +342,7 @@ KeeperFSMTransition KeeperFSM[] = {
342342
{ CATCHINGUP_STATE, WAIT_MAINTENANCE_STATE, COMMENT_SECONDARY_TO_WAIT_MAINTENANCE, NULL },
343343
{ WAIT_MAINTENANCE_STATE, MAINTENANCE_STATE, COMMENT_SECONDARY_TO_MAINTENANCE, &fsm_start_maintenance_on_standby },
344344
{ MAINTENANCE_STATE, CATCHINGUP_STATE, COMMENT_MAINTENANCE_TO_CATCHINGUP, &fsm_restart_standby },
345+
{ PREPARE_MAINTENANCE_STATE, CATCHINGUP_STATE, COMMENT_MAINTENANCE_TO_CATCHINGUP, &fsm_restart_standby },
345346

346347
/*
347348
* Applying new replication/cluster settings (per node replication quorum,
@@ -362,6 +363,9 @@ KeeperFSMTransition KeeperFSM[] = {
362363
*/
363364
{ SECONDARY_STATE, REPORT_LSN_STATE, COMMENT_SECONDARY_TO_REPORT_LSN, &fsm_report_lsn },
364365
{ CATCHINGUP_STATE, REPORT_LSN_STATE, COMMENT_SECONDARY_TO_REPORT_LSN, &fsm_report_lsn },
366+
{ MAINTENANCE_STATE, REPORT_LSN_STATE, COMMENT_SECONDARY_TO_REPORT_LSN, &fsm_report_lsn },
367+
{ PREPARE_MAINTENANCE_STATE, REPORT_LSN_STATE, COMMENT_SECONDARY_TO_REPORT_LSN, &fsm_report_lsn },
368+
365369
{ REPORT_LSN_STATE, PREP_PROMOTION_STATE, COMMENT_REPORT_LSN_TO_PREP_PROMOTION, &fsm_prepare_standby_for_promotion },
366370

367371
{ REPORT_LSN_STATE, FAST_FORWARD_STATE, COMMENT_REPORT_LSN_TO_FAST_FORWARD, &fsm_fast_forward },

src/bin/pg_autoctl/fsm_transition.c

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -720,7 +720,7 @@ fsm_stop_postgres_and_setup_standby(Keeper *keeper)
720720
PostgresSetup *pgSetup = &(postgres->postgresSetup);
721721
KeeperConfig *config = &(keeper->config);
722722

723-
NodeAddress *primaryNode = NULL;
723+
NodeAddress upstreamNode = { 0 };
724724

725725
if (!ensure_postgres_service_is_stopped(postgres))
726726
{
@@ -737,17 +737,9 @@ fsm_stop_postgres_and_setup_standby(Keeper *keeper)
737737
return false;
738738
}
739739

740-
/* get the primary node to follow */
741-
if (!keeper_get_primary(keeper, &(postgres->replicationSource.primaryNode)))
742-
{
743-
log_error("Failed to initialize standby for lack of a primary node, "
744-
"see above for details");
745-
return false;
746-
}
747-
748740
/* prepare a standby setup */
749741
if (!standby_init_replication_source(postgres,
750-
primaryNode,
742+
&upstreamNode,
751743
PG_AUTOCTL_REPLICA_USERNAME,
752744
config->replication_password,
753745
config->replication_slot_name,

src/bin/pg_autoctl/monitor.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,8 @@ typedef struct WaitUntilNodeStateNotificationContext
169169
int groupId;
170170
int64_t nodeId;
171171
NodeAddressHeaders *headers;
172-
NodeState targetState;
172+
NodeState *targetStates;
173+
int targetStatesLength;
173174
bool done;
174175
bool firstLoop;
175176
} WaitUntilNodeStateNotificationContext;
@@ -4092,11 +4093,15 @@ monitor_check_node_report_state(void *context, CurrentNodeState *nodeState)
40924093
NodeStateToString(nodeState->reportedState),
40934094
NodeStateToString(nodeState->goalState));
40944095

4095-
if (nodeState->goalState == ctx->targetState &&
4096-
nodeState->reportedState == ctx->targetState &&
4097-
!ctx->firstLoop)
4096+
for (int i = 0; i < ctx->targetStatesLength; i++)
40984097
{
4099-
ctx->done = true;
4098+
if (nodeState->goalState == ctx->targetStates[i] &&
4099+
nodeState->reportedState == ctx->targetStates[i] &&
4100+
nodeState->node.nodeId == ctx->nodeId &&
4101+
!ctx->firstLoop)
4102+
{
4103+
ctx->done = true;
4104+
}
41004105
}
41014106

41024107
if (ctx->firstLoop)
@@ -4120,7 +4125,8 @@ monitor_wait_until_node_reported_state(Monitor *monitor,
41204125
int groupId,
41214126
int64_t nodeId,
41224127
PgInstanceKind nodeKind,
4123-
NodeState targetState)
4128+
NodeState *targetStates,
4129+
int targetStatesLength)
41244130
{
41254131
PGconn *connection = monitor->notificationClient.connection;
41264132

@@ -4132,7 +4138,8 @@ monitor_wait_until_node_reported_state(Monitor *monitor,
41324138
groupId,
41334139
nodeId,
41344140
&headers,
4135-
targetState,
4141+
targetStates,
4142+
targetStatesLength,
41364143
false, /* done */
41374144
true /* firstLoop */
41384145
};

src/bin/pg_autoctl/monitor.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,8 @@ bool monitor_wait_until_node_reported_state(Monitor *monitor,
218218
int groupId,
219219
int64_t nodeId,
220220
PgInstanceKind nodeKind,
221-
NodeState targetState);
221+
NodeState *targetStates,
222+
int targetStatesLength);
222223
bool monitor_wait_for_state_change(Monitor *monitor,
223224
const char *formation,
224225
int groupId,

0 commit comments

Comments
 (0)