@@ -52,7 +52,10 @@ static int cli_drop_node_getopts(int argc, char **argv);
5252static void cli_drop_node (int argc , char * * argv );
5353static void cli_drop_monitor (int argc , char * * argv );
5454
55- static void cli_drop_node_from_monitor (KeeperConfig * config );
55+ static void cli_drop_node_from_monitor (KeeperConfig * config ,
56+ int64_t * nodeId ,
57+ int * groupId );
58+
5659static void cli_drop_local_node (KeeperConfig * config , bool dropAndDestroy );
5760static void cli_drop_local_monitor (MonitorConfig * mconfig , bool dropAndDestroy );
5861
@@ -62,6 +65,7 @@ static void cli_drop_node_files_and_directories(KeeperConfig *config);
6265static void stop_postgres_and_remove_pgdata_and_config (ConfigFilePaths * pathnames ,
6366 PostgresSetup * pgSetup );
6467
68+ static void cli_drop_node_from_monitor_and_wait (KeeperConfig * config );
6569
6670CommandLine drop_monitor_command =
6771 make_command ("monitor" ,
@@ -85,7 +89,8 @@ CommandLine drop_node_command =
8589 " --hostname drop the node with given hostname and pgport\n"
8690 " --pgport drop the node with given hostname and pgport\n"
8791 " --destroy also destroy Postgres database\n"
88- " --force force dropping the node from the monitor\n" ,
92+ " --force force dropping the node from the monitor\n"
93+ " --wait how many seconds to wait, default to 60 \n" ,
8994 cli_drop_node_getopts ,
9095 cli_drop_node );
9196
@@ -108,6 +113,7 @@ cli_drop_node_getopts(int argc, char **argv)
108113 { "hostname" , required_argument , NULL , 'n' },
109114 { "pgport" , required_argument , NULL , 'p' },
110115 { "formation" , required_argument , NULL , 'f' },
116+ { "wait" , required_argument , NULL , 'w' },
111117 { "name" , required_argument , NULL , 'a' },
112118 { "version" , no_argument , NULL , 'V' },
113119 { "verbose" , no_argument , NULL , 'v' },
@@ -118,6 +124,9 @@ cli_drop_node_getopts(int argc, char **argv)
118124
119125 optind = 0 ;
120126
127+ options .listen_notifications_timeout =
128+ PG_AUTOCTL_LISTEN_NOTIFICATIONS_TIMEOUT ;
129+
121130 while ((c = getopt_long (argc , argv , "D:dn:p:Vvqh" ,
122131 long_options , & option_index )) != -1 )
123132 {
@@ -191,6 +200,19 @@ cli_drop_node_getopts(int argc, char **argv)
191200 break ;
192201 }
193202
203+ case 'w' :
204+ {
205+ /* { "wait", required_argument, NULL, 'w' }, */
206+ if (!stringToInt (optarg , & options .listen_notifications_timeout ))
207+ {
208+ log_fatal ("--wait argument is not a valid timeout: \"%s\"" ,
209+ optarg );
210+ exit (EXIT_CODE_BAD_ARGS );
211+ }
212+ log_trace ("--wait %d" , options .listen_notifications_timeout );
213+ break ;
214+ }
215+
194216 case 'V' :
195217 {
196218 /* keeper_cli_print_version prints version and exits. */
@@ -395,7 +417,7 @@ cli_drop_node(int argc, char **argv)
395417 exit (EXIT_CODE_BAD_ARGS );
396418 }
397419
398- (void ) cli_drop_node_from_monitor (& config );
420+ (void ) cli_drop_node_from_monitor_and_wait (& config );
399421 }
400422}
401423
@@ -483,7 +505,7 @@ cli_drop_monitor(int argc, char **argv)
483505 * --name.
484506 */
485507static void
486- cli_drop_node_from_monitor (KeeperConfig * config )
508+ cli_drop_node_from_monitor (KeeperConfig * config , int64_t * nodeId , int * groupId )
487509{
488510 Monitor monitor = { 0 };
489511
@@ -498,7 +520,9 @@ cli_drop_node_from_monitor(KeeperConfig *config)
498520 if (!monitor_remove_by_nodename (& monitor ,
499521 (char * ) config -> formation ,
500522 (char * ) config -> name ,
501- dropForce ))
523+ dropForce ,
524+ nodeId ,
525+ groupId ))
502526 {
503527 /* errors have already been logged */
504528 exit (EXIT_CODE_MONITOR );
@@ -518,7 +542,9 @@ cli_drop_node_from_monitor(KeeperConfig *config)
518542 if (!monitor_remove_by_hostname (& monitor ,
519543 (char * ) config -> hostname ,
520544 pgport ,
521- dropForce ))
545+ dropForce ,
546+ nodeId ,
547+ groupId ))
522548 {
523549 /* errors have already been logged */
524550 exit (EXIT_CODE_MONITOR );
@@ -569,7 +595,10 @@ cli_drop_local_node(KeeperConfig *config, bool dropAndDestroy)
569595 /* first drop the node from the monitor */
570596 if (keeperState -> assigned_role != DROPPED_STATE )
571597 {
572- (void ) cli_drop_node_from_monitor (config );
598+ int64_t nodeId = -1 ;
599+ int groupId = -1 ;
600+
601+ (void ) cli_drop_node_from_monitor (config , & nodeId , & groupId );
573602 }
574603
575604 /*
@@ -851,3 +880,82 @@ stop_postgres_and_remove_pgdata_and_config(ConfigFilePaths *pathnames,
851880 exit (EXIT_CODE_BAD_CONFIG );
852881 }
853882}
883+
884+
885+ /*
886+ * cli_drop_node_from_monitor_and_wait waits until the node doesn't exist
887+ * anymore on the monitor, meaning it's been fully dropped now.
888+ */
889+ static void
890+ cli_drop_node_from_monitor_and_wait (KeeperConfig * config )
891+ {
892+ bool dropped = false;
893+ Monitor monitor = { 0 };
894+
895+ (void ) cli_monitor_init_from_option_or_config (& monitor , config );
896+
897+ /* call pgautofailover.remove_node() on the monitor */
898+ int64_t nodeId ;
899+ int groupId ;
900+
901+ (void ) cli_drop_node_from_monitor (config , & nodeId , & groupId );
902+
903+ /* if the timeout is zero, just don't wait at all */
904+ if (config -> listen_notifications_timeout == 0 )
905+ {
906+ return ;
907+ }
908+
909+ log_info ("Waiting until the node with id %lld in group %d has been "
910+ "dropped from the monitor, or for %ds, whichever comes first" ,
911+ (long long ) nodeId , groupId , config -> listen_notifications_timeout );
912+
913+ uint64_t start = time (NULL );
914+
915+ /* establish a connection for notifications if none present */
916+ (void ) pgsql_prepare_to_wait (& (monitor .notificationClient ));
917+
918+ while (!dropped )
919+ {
920+ NodeAddressArray nodesArray = { 0 };
921+
922+ bool groupStateHasChanged = false;
923+ int timeoutMs = PG_AUTOCTL_KEEPER_SLEEP_TIME * 1000 ;
924+
925+ uint64_t now = time (NULL );
926+
927+ if ((now - start ) > config -> listen_notifications_timeout )
928+ {
929+ log_error ("Failed to wait until the node has been dropped" );
930+ exit (EXIT_CODE_INTERNAL_ERROR );
931+ }
932+
933+ (void ) monitor_wait_for_state_change (& monitor ,
934+ config -> formation ,
935+ groupId ,
936+ nodeId ,
937+ timeoutMs ,
938+ & groupStateHasChanged );
939+
940+ if (!monitor_find_node_by_nodeid (& monitor ,
941+ config -> formation ,
942+ groupId ,
943+ nodeId ,
944+ & nodesArray ))
945+ {
946+ log_error ("Failed to query monitor to see if node id %lld "
947+ "has been dropped already" ,
948+ (long long ) nodeId );
949+ exit (EXIT_CODE_MONITOR );
950+ }
951+
952+ dropped = nodesArray .count == 0 ;
953+
954+ if (dropped )
955+ {
956+ log_info ("Node with id %lld in group %d has been successfully "
957+ "dropped from the monitor" ,
958+ (long long ) nodeId , groupId );
959+ }
960+ }
961+ }
0 commit comments