From a2bca1010d0d0c2dc68f229909a4a1fd27ad6ff6 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 18 Jan 2018 20:25:07 +1100 Subject: [PATCH 01/22] ctdb-recoverd: Drop unused nodemap argument from update_flags_on_all_nodes() An unused argument needlessly extends the length of function calls. A subsequent change will allow rec->nodemap to be used if necessary. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 6982fcb3e6c940d0047aac3b6bfbc9dfdc8d7214) --- ctdb/server/ctdb_recoverd.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 3f5d43c1e87..2aa9a14678b 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -425,9 +425,11 @@ static int set_recovery_mode(struct ctdb_context *ctdb, } /* - update flags on all active nodes + * Update flags on all connected nodes */ -static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap, uint32_t pnn, uint32_t flags) +static int update_flags_on_all_nodes(struct ctdb_context *ctdb, + uint32_t pnn, + uint32_t flags) { int ret; @@ -1125,7 +1127,9 @@ static int do_recovery(struct ctdb_recoverd *rec, continue; } - ret = update_flags_on_all_nodes(ctdb, nodemap, i, nodemap->nodes[i].flags); + ret = update_flags_on_all_nodes(ctdb, + i, + nodemap->nodes[i].flags); if (ret != 0) { if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) { DEBUG(DEBUG_WARNING, (__location__ "Unable to update flags on inactive node %d\n", i)); @@ -2610,14 +2614,20 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, nodemap->nodes[i].flags)); if (i == j) { DEBUG(DEBUG_ERR,("Use flags 0x%02x from remote node %d for cluster update of its own flags\n", remote_nodemaps[j]->nodes[i].flags, j)); - update_flags_on_all_nodes(ctdb, nodemap, nodemap->nodes[i].pnn, remote_nodemaps[j]->nodes[i].flags); + update_flags_on_all_nodes( + ctdb, + nodemap->nodes[i].pnn, + remote_nodemaps[j]->nodes[i].flags); ctdb_set_culprit(rec, nodemap->nodes[j].pnn); do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap); return; } else { DEBUG(DEBUG_ERR,("Use flags 0x%02x from local recmaster node for cluster update of node %d flags\n", nodemap->nodes[i].flags, i)); - update_flags_on_all_nodes(ctdb, nodemap, nodemap->nodes[i].pnn, nodemap->nodes[i].flags); + update_flags_on_all_nodes( + ctdb, + nodemap->nodes[i].pnn, + nodemap->nodes[i].flags); ctdb_set_culprit(rec, nodemap->nodes[j].pnn); do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap); -- 2.28.0 From be7d07536bcf827798ceacbe94dfa3a3be35409a Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 5 May 2020 23:45:15 +1000 Subject: [PATCH 02/22] ctdb-recoverd: Change update_flags_on_all_nodes() to take rec argument This makes fields such as recmaster and nodemap easily available if required. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit cb3a3147b7a3a29d7806733791e1fa6ba2e46680) --- ctdb/server/ctdb_recoverd.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 2aa9a14678b..05dd933c3f8 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -427,10 +427,11 @@ static int set_recovery_mode(struct ctdb_context *ctdb, /* * Update flags on all connected nodes */ -static int update_flags_on_all_nodes(struct ctdb_context *ctdb, +static int update_flags_on_all_nodes(struct ctdb_recoverd *rec, uint32_t pnn, uint32_t flags) { + struct ctdb_context *ctdb = rec->ctdb; int ret; ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), pnn, flags, ~flags); @@ -1127,7 +1128,7 @@ static int do_recovery(struct ctdb_recoverd *rec, continue; } - ret = update_flags_on_all_nodes(ctdb, + ret = update_flags_on_all_nodes(rec, i, nodemap->nodes[i].flags); if (ret != 0) { @@ -2615,7 +2616,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, if (i == j) { DEBUG(DEBUG_ERR,("Use flags 0x%02x from remote node %d for cluster update of its own flags\n", remote_nodemaps[j]->nodes[i].flags, j)); update_flags_on_all_nodes( - ctdb, + rec, nodemap->nodes[i].pnn, remote_nodemaps[j]->nodes[i].flags); ctdb_set_culprit(rec, nodemap->nodes[j].pnn); @@ -2625,7 +2626,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, } else { DEBUG(DEBUG_ERR,("Use flags 0x%02x from local recmaster node for cluster update of node %d flags\n", nodemap->nodes[i].flags, i)); update_flags_on_all_nodes( - ctdb, + rec, nodemap->nodes[i].pnn, nodemap->nodes[i].flags); ctdb_set_culprit(rec, nodemap->nodes[j].pnn); -- 2.28.0 From 56fec1d449b6f7a3926e6a548755cb817bc9ff68 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Sat, 15 Jun 2019 07:19:26 +1000 Subject: [PATCH 03/22] ctdb-recoverd: Introduce some local variables to improve readability Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit f681c0e947741151f8fb95d88edddfd732166dc1) --- ctdb/server/ctdb_recoverd.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 05dd933c3f8..e525e048983 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -510,9 +510,11 @@ static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map_ol */ for (j=0; jnum; j++) { struct ctdb_node_map_old *remote_nodemap=NULL; + uint32_t local_flags = nodemap->nodes[j].flags; + uint32_t remote_flags; int ret; - if (nodemap->nodes[j].flags & NODE_FLAGS_DISCONNECTED) { + if (local_flags & NODE_FLAGS_DISCONNECTED) { continue; } if (nodemap->nodes[j].pnn == ctdb->pnn) { @@ -528,26 +530,35 @@ static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map_ol talloc_free(mem_ctx); return -1; } - if (nodemap->nodes[j].flags != remote_nodemap->nodes[j].flags) { + remote_flags = remote_nodemap->nodes[j].flags; + + if (local_flags != remote_flags) { /* We should tell our daemon about this so it updates its flags or else we will log the same message again in the next iteration of recovery. Since we are the recovery master we can just as well update the flags on all nodes. */ - ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, remote_nodemap->nodes[j].flags, ~remote_nodemap->nodes[j].flags); + ret = ctdb_ctrl_modflags(ctdb, + CONTROL_TIMEOUT(), + nodemap->nodes[j].pnn, + remote_flags, + ~remote_flags); if (ret != 0) { DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n")); return -1; } - /* Update our local copy of the flags in the recovery - daemon. - */ - DEBUG(DEBUG_NOTICE,("Remote node %u had flags 0x%x, local had 0x%x - updating local\n", - nodemap->nodes[j].pnn, remote_nodemap->nodes[j].flags, - nodemap->nodes[j].flags)); - nodemap->nodes[j].flags = remote_nodemap->nodes[j].flags; + /* + * Update the local copy of the flags in the + * recovery daemon. + */ + D_NOTICE("Remote node %u had flags 0x%x, " + "local had 0x%x - updating local\n", + nodemap->nodes[j].pnn, + remote_flags, + local_flags); + nodemap->nodes[j].flags = remote_flags; } talloc_free(remote_nodemap); } -- 2.28.0 From 721537d135204ac876c21dd23af195ee4dd1c32d Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Sat, 15 Jun 2019 07:20:19 +1000 Subject: [PATCH 04/22] ctdb-recoverd: Use update_flags_on_all_nodes() This is clearer than using the MODFLAGS control directly. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 915d24ac12d27c21649d9e64d201d9df9d583129) --- ctdb/server/ctdb_recoverd.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index e525e048983..bbd375a897e 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -533,19 +533,13 @@ static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map_ol remote_flags = remote_nodemap->nodes[j].flags; if (local_flags != remote_flags) { - /* We should tell our daemon about this so it - updates its flags or else we will log the same - message again in the next iteration of recovery. - Since we are the recovery master we can just as - well update the flags on all nodes. - */ - ret = ctdb_ctrl_modflags(ctdb, - CONTROL_TIMEOUT(), - nodemap->nodes[j].pnn, - remote_flags, - ~remote_flags); + ret = update_flags_on_all_nodes(rec, + nodemap->nodes[j].pnn, + remote_flags); if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n")); + DBG_ERR( + "Unable to update flags on remote nodes\n"); + talloc_free(mem_ctx); return -1; } -- 2.28.0 From d36362569130d5005a73203dddf83e9b6203cad6 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 14 Jul 2020 14:43:04 +1000 Subject: [PATCH 05/22] ctdb-recoverd: Improve a call to update_flags_on_all_nodes() This should take a PNN, not an array index. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit b1e631ff929fd87392a80895d1c8d265d9df42dc) --- ctdb/server/ctdb_recoverd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index bbd375a897e..acb6a7f401a 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -1134,7 +1134,7 @@ static int do_recovery(struct ctdb_recoverd *rec, } ret = update_flags_on_all_nodes(rec, - i, + nodemap->nodes[i].pnn, nodemap->nodes[i].flags); if (ret != 0) { if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) { -- 2.28.0 From 29696c3e27cb942ece2de1e2ce2518628ad3ebed Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 5 May 2020 23:37:57 +1000 Subject: [PATCH 06/22] ctdb-recoverd: Move ctdb_ctrl_modflags() to ctdb_recoverd.c This file is the only user of this function. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit a88c10c5a9afcf0a3dcadef07dd95af498bfa47a) --- ctdb/include/ctdb_client.h | 5 --- ctdb/server/ctdb_client.c | 65 ----------------------------------- ctdb/server/ctdb_recoverd.c | 68 +++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 70 deletions(-) diff --git a/ctdb/include/ctdb_client.h b/ctdb/include/ctdb_client.h index 198a8a38dbb..b89c4e49b2f 100644 --- a/ctdb/include/ctdb_client.h +++ b/ctdb/include/ctdb_client.h @@ -195,11 +195,6 @@ int ctdb_ctrl_get_ifaces(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_iface_list_old **ifaces); -int ctdb_ctrl_modflags(struct ctdb_context *ctdb, - struct timeval timeout, - uint32_t destnode, - uint32_t set, uint32_t clear); - int ctdb_ctrl_get_all_tunables(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_tunable_list *tunables); diff --git a/ctdb/server/ctdb_client.c b/ctdb/server/ctdb_client.c index 453e7b28477..5d1a30d03da 100644 --- a/ctdb/server/ctdb_client.c +++ b/ctdb/server/ctdb_client.c @@ -1243,71 +1243,6 @@ int ctdb_ctrl_get_ifaces(struct ctdb_context *ctdb, return 0; } -/* - set/clear the permanent disabled bit on a remote node - */ -int ctdb_ctrl_modflags(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, - uint32_t set, uint32_t clear) -{ - int ret; - TDB_DATA data; - struct ctdb_node_map_old *nodemap=NULL; - struct ctdb_node_flag_change c; - TALLOC_CTX *tmp_ctx = talloc_new(ctdb); - uint32_t recmaster; - uint32_t *nodes; - - - /* find the recovery master */ - ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n")); - talloc_free(tmp_ctx); - return ret; - } - - - /* read the node flags from the recmaster */ - ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", destnode)); - talloc_free(tmp_ctx); - return -1; - } - if (destnode >= nodemap->num) { - DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", destnode)); - talloc_free(tmp_ctx); - return -1; - } - - c.pnn = destnode; - c.old_flags = nodemap->nodes[destnode].flags; - c.new_flags = c.old_flags; - c.new_flags |= set; - c.new_flags &= ~clear; - - data.dsize = sizeof(c); - data.dptr = (unsigned char *)&c; - - /* send the flags update to all connected nodes */ - nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true); - - if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS, - nodes, 0, - timeout, false, data, - NULL, NULL, - NULL) != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n")); - - talloc_free(tmp_ctx); - return -1; - } - - talloc_free(tmp_ctx); - return 0; -} - - /* get all tunables */ diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index acb6a7f401a..02259382382 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -424,6 +424,74 @@ static int set_recovery_mode(struct ctdb_context *ctdb, return 0; } +/* + * Set/clear flags on a remote node + */ +static int ctdb_ctrl_modflags(struct ctdb_context *ctdb, + struct timeval timeout, + uint32_t destnode, + uint32_t set, + uint32_t clear) +{ + int ret; + TDB_DATA data; + struct ctdb_node_map_old *nodemap=NULL; + struct ctdb_node_flag_change c; + TALLOC_CTX *tmp_ctx = talloc_new(ctdb); + uint32_t recmaster; + uint32_t *nodes; + + + /* find the recovery master */ + ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster); + if (ret != 0) { + DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n")); + talloc_free(tmp_ctx); + return ret; + } + + + /* read the node flags from the recmaster */ + ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap); + if (ret != 0) { + DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", destnode)); + talloc_free(tmp_ctx); + return -1; + } + if (destnode >= nodemap->num) { + DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", destnode)); + talloc_free(tmp_ctx); + return -1; + } + + c.pnn = destnode; + c.old_flags = nodemap->nodes[destnode].flags; + c.new_flags = c.old_flags; + c.new_flags |= set; + c.new_flags &= ~clear; + + data.dsize = sizeof(c); + data.dptr = (unsigned char *)&c; + + /* send the flags update to all connected nodes */ + nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true); + + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS, + nodes, 0, + timeout, false, data, + NULL, NULL, + NULL) != 0) { + DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n")); + + talloc_free(tmp_ctx); + return -1; + } + + talloc_free(tmp_ctx); + return 0; +} + + /* * Update flags on all connected nodes */ -- 2.28.0 From 0a9d9f1ee8ce0e5e851facf98f7925d480880e4e Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 28 Sep 2018 10:46:17 +1000 Subject: [PATCH 07/22] ctdb-recoverd: Flatten update_flags_on_all_nodes() The logic currently in ctdb_ctrl_modflags() will be optimised so that it no longer matches the pattern for a control function. So, remove this function and squash its functionality into the only caller. Although there are some superficial changes, the behaviour is unchanged. Flattening the 2 functions produces some seriously weird logic for setting the new flags, to the point where using ctdb_ctrl_modflags() for this purpose now looks very strange. The weirdness will be cleaned up in a subsequent commit. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 0c6a7db3ba84b8355359b0a8c52690b234bb866d) --- ctdb/server/ctdb_recoverd.c | 69 ++++++++++++++----------------------- 1 file changed, 26 insertions(+), 43 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 02259382382..e24a4578a2d 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -425,22 +425,21 @@ static int set_recovery_mode(struct ctdb_context *ctdb, } /* - * Set/clear flags on a remote node + * Update flags on all connected nodes */ -static int ctdb_ctrl_modflags(struct ctdb_context *ctdb, - struct timeval timeout, - uint32_t destnode, - uint32_t set, - uint32_t clear) +static int update_flags_on_all_nodes(struct ctdb_recoverd *rec, + uint32_t pnn, + uint32_t flags) { - int ret; + struct ctdb_context *ctdb = rec->ctdb; + struct timeval timeout = CONTROL_TIMEOUT(); TDB_DATA data; struct ctdb_node_map_old *nodemap=NULL; struct ctdb_node_flag_change c; TALLOC_CTX *tmp_ctx = talloc_new(ctdb); uint32_t recmaster; uint32_t *nodes; - + int ret; /* find the recovery master */ ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster); @@ -450,25 +449,24 @@ static int ctdb_ctrl_modflags(struct ctdb_context *ctdb, return ret; } - /* read the node flags from the recmaster */ ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap); if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", destnode)); + DBG_ERR("Unable to get nodemap from node %u\n", recmaster); talloc_free(tmp_ctx); return -1; } - if (destnode >= nodemap->num) { - DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", destnode)); + if (pnn >= nodemap->num) { + DBG_ERR("Nodemap from recmaster does not contain node %d\n", pnn); talloc_free(tmp_ctx); return -1; } - c.pnn = destnode; - c.old_flags = nodemap->nodes[destnode].flags; + c.pnn = pnn; + c.old_flags = nodemap->nodes[pnn].flags; c.new_flags = c.old_flags; - c.new_flags |= set; - c.new_flags &= ~clear; + c.new_flags |= flags; + c.new_flags &= flags; data.dsize = sizeof(c); data.dptr = (unsigned char *)&c; @@ -476,13 +474,18 @@ static int ctdb_ctrl_modflags(struct ctdb_context *ctdb, /* send the flags update to all connected nodes */ nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true); - if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS, - nodes, 0, - timeout, false, data, - NULL, NULL, - NULL) != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n")); - + ret = ctdb_client_async_control(ctdb, + CTDB_CONTROL_MODIFY_FLAGS, + nodes, + 0, + timeout, + false, + data, + NULL, + NULL, + NULL); + if (ret != 0) { + DBG_ERR("Unable to update flags on remote nodes\n"); talloc_free(tmp_ctx); return -1; } @@ -491,26 +494,6 @@ static int ctdb_ctrl_modflags(struct ctdb_context *ctdb, return 0; } - -/* - * Update flags on all connected nodes - */ -static int update_flags_on_all_nodes(struct ctdb_recoverd *rec, - uint32_t pnn, - uint32_t flags) -{ - struct ctdb_context *ctdb = rec->ctdb; - int ret; - - ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), pnn, flags, ~flags); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n")); - return -1; - } - - return 0; -} - /* called when ctdb_wait_timeout should finish */ -- 2.28.0 From c3a8b6ac0f6ce28a8831529a985cc175e517d408 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 5 May 2020 23:49:05 +1000 Subject: [PATCH 08/22] ctdb-recoverd: Do not retrieve nodemap from recovery master It is already in rec->nodemap. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 9475ab044161e687b9ced3a477746393565b49b1) --- ctdb/server/ctdb_recoverd.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index e24a4578a2d..71be10d81dc 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -437,27 +437,13 @@ static int update_flags_on_all_nodes(struct ctdb_recoverd *rec, struct ctdb_node_map_old *nodemap=NULL; struct ctdb_node_flag_change c; TALLOC_CTX *tmp_ctx = talloc_new(ctdb); - uint32_t recmaster; uint32_t *nodes; int ret; - /* find the recovery master */ - ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n")); - talloc_free(tmp_ctx); - return ret; - } + nodemap = rec->nodemap; - /* read the node flags from the recmaster */ - ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap); - if (ret != 0) { - DBG_ERR("Unable to get nodemap from node %u\n", recmaster); - talloc_free(tmp_ctx); - return -1; - } if (pnn >= nodemap->num) { - DBG_ERR("Nodemap from recmaster does not contain node %d\n", pnn); + DBG_ERR("Nodemap does not contain node %d\n", pnn); talloc_free(tmp_ctx); return -1; } -- 2.28.0 From 3ff8cbba7dfbe53f73a48f4ea55880af4fbe9684 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 14 Jul 2020 14:22:15 +1000 Subject: [PATCH 09/22] ctdb-recoverd: Correctly find nodemap entry for pnn Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 3654e416770cc7521dcc3c15976daeba37023304) --- ctdb/server/ctdb_recoverd.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 71be10d81dc..be786adbc6c 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -438,18 +438,24 @@ static int update_flags_on_all_nodes(struct ctdb_recoverd *rec, struct ctdb_node_flag_change c; TALLOC_CTX *tmp_ctx = talloc_new(ctdb); uint32_t *nodes; + uint32_t i; int ret; nodemap = rec->nodemap; - if (pnn >= nodemap->num) { + for (i = 0; i < nodemap->num; i++) { + if (pnn == nodemap->nodes[i].pnn) { + break; + } + } + if (i >= nodemap->num) { DBG_ERR("Nodemap does not contain node %d\n", pnn); talloc_free(tmp_ctx); return -1; } c.pnn = pnn; - c.old_flags = nodemap->nodes[pnn].flags; + c.old_flags = nodemap->nodes[i].flags; c.new_flags = c.old_flags; c.new_flags |= flags; c.new_flags &= flags; -- 2.28.0 From d30e648a623f94e31d963f471e250fa19dd1c4a4 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 14 Jul 2020 14:29:09 +1000 Subject: [PATCH 10/22] ctdb-recoverd: Simplify calculation of new flags Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs Autobuild-User(master): Martin Schwenke Autobuild-Date(master): Fri Jul 24 06:03:23 UTC 2020 on sn-devel-184 (cherry picked from commit 5ce6133a75107abdcb9fcfd93bc7594812dc5055) --- ctdb/server/ctdb_recoverd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index be786adbc6c..3f72619127a 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -456,9 +456,7 @@ static int update_flags_on_all_nodes(struct ctdb_recoverd *rec, c.pnn = pnn; c.old_flags = nodemap->nodes[i].flags; - c.new_flags = c.old_flags; - c.new_flags |= flags; - c.new_flags &= flags; + c.new_flags = flags; data.dsize = sizeof(c); data.dptr = (unsigned char *)&c; -- 2.28.0 From 61ac80e4867646c6ff7d9817cea69af80423d032 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 18 Jan 2018 16:19:36 +1100 Subject: [PATCH 11/22] ctdb-recoverd: Basic cleanups for get_remote_nodemaps() Don't log an error on failure - let the caller can do this. Apart from this: fix up coding style and modernise the remaining error message. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14466 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 52f520d39cd92e1cf2413fd7e0dd362debd6f463) --- ctdb/server/ctdb_recoverd.c | 39 ++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 3f72619127a..41f029e6bca 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2223,38 +2223,45 @@ done: } -static void async_getnodemap_callback(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata, void *callback_data) +static void async_getnodemap_callback(struct ctdb_context *ctdb, + uint32_t node_pnn, + int32_t res, + TDB_DATA outdata, + void *callback_data) { struct ctdb_node_map_old **remote_nodemaps = callback_data; if (node_pnn >= ctdb->num_nodes) { - DEBUG(DEBUG_ERR,(__location__ " pnn from invalid node\n")); + DBG_ERR("Invalid PNN\n"); return; } - remote_nodemaps[node_pnn] = (struct ctdb_node_map_old *)talloc_steal(remote_nodemaps, outdata.dptr); + remote_nodemaps[node_pnn] = (struct ctdb_node_map_old *)talloc_steal( + remote_nodemaps, outdata.dptr); } -static int get_remote_nodemaps(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, - struct ctdb_node_map_old *nodemap, - struct ctdb_node_map_old **remote_nodemaps) +static int get_remote_nodemaps(struct ctdb_context *ctdb, + TALLOC_CTX *mem_ctx, + struct ctdb_node_map_old *nodemap, + struct ctdb_node_map_old **remote_nodemaps) { uint32_t *nodes; + int ret; nodes = list_of_active_nodes(ctdb, nodemap, mem_ctx, true); - if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_NODEMAP, - nodes, 0, - CONTROL_TIMEOUT(), false, tdb_null, + + ret = ctdb_client_async_control(ctdb, + CTDB_CONTROL_GET_NODEMAP, + nodes, + 0, + CONTROL_TIMEOUT(), + false, + tdb_null, async_getnodemap_callback, NULL, - remote_nodemaps) != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to pull all remote nodemaps\n")); - - return -1; - } - - return 0; + remote_nodemaps); + return ret; } static bool validate_recovery_master(struct ctdb_recoverd *rec, -- 2.28.0 From 88e61d60dfdd9df88501d95c29ece229c2ad4bc7 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 17 Aug 2020 20:27:18 +1000 Subject: [PATCH 12/22] ctdb-recoverd: Fix a local memory leak The memory is allocated off the memory context used by the current iteration of main loop. It is freed when main loop completes the fix doesn't require backporting to stable branches. However, it is sloppy so it is worth fixing. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14466 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit d2d90f250214582d7124b8137aa2cf5032b2f285) --- ctdb/server/ctdb_recoverd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 41f029e6bca..5d6cfa2d009 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2261,6 +2261,7 @@ static int get_remote_nodemaps(struct ctdb_context *ctdb, async_getnodemap_callback, NULL, remote_nodemaps); + talloc_free(nodes); return ret; } -- 2.28.0 From 6d39e2c2cca944244d67638a11232f081cb153a3 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 18 Jan 2018 16:41:19 +1100 Subject: [PATCH 13/22] ctdb-recoverd: Change signature of get_remote_nodemaps() Change 1st argument to a rec context, since this will be needed later. Drop the nodemap argument and access it via rec->nodemap instead. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14466 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 3324dd272c7dafa92cd9c3fd0af8f50084bcdaaa) --- ctdb/server/ctdb_recoverd.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 5d6cfa2d009..b08950a17bc 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2241,15 +2241,15 @@ static void async_getnodemap_callback(struct ctdb_context *ctdb, } -static int get_remote_nodemaps(struct ctdb_context *ctdb, +static int get_remote_nodemaps(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx, - struct ctdb_node_map_old *nodemap, struct ctdb_node_map_old **remote_nodemaps) { + struct ctdb_context *ctdb = rec->ctdb; uint32_t *nodes; int ret; - nodes = list_of_active_nodes(ctdb, nodemap, mem_ctx, true); + nodes = list_of_active_nodes(ctdb, rec->nodemap, mem_ctx, true); ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_NODEMAP, @@ -2592,10 +2592,11 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, for(i=0; inum; i++) { remote_nodemaps[i] = NULL; } - if (get_remote_nodemaps(ctdb, mem_ctx, nodemap, remote_nodemaps) != 0) { - DEBUG(DEBUG_ERR,(__location__ " Failed to read remote nodemaps\n")); + ret = get_remote_nodemaps(rec, mem_ctx, remote_nodemaps); + if (ret != 0) { + DBG_ERR("Failed to read remote nodemaps\n"); return; - } + } /* verify that all other nodes have the same nodemap as we have */ -- 2.28.0 From f4b54ba9d91c1c354754dce413f4b3e2c87b6572 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 18 Jan 2018 16:31:39 +1100 Subject: [PATCH 14/22] ctdb-recoverd: Move memory allocation into get_remote_nodemaps() BUG: https://bugzilla.samba.org/show_bug.cgi?id=14466 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 2eaa0af6160588b6e3364b181d0976477d12b51b) --- ctdb/server/ctdb_recoverd.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index b08950a17bc..c6254ae3404 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2243,12 +2243,21 @@ static void async_getnodemap_callback(struct ctdb_context *ctdb, static int get_remote_nodemaps(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx, - struct ctdb_node_map_old **remote_nodemaps) + struct ctdb_node_map_old ***remote_nodemaps) { struct ctdb_context *ctdb = rec->ctdb; + struct ctdb_node_map_old **t; uint32_t *nodes; int ret; + t = talloc_zero_array(mem_ctx, + struct ctdb_node_map_old *, + rec->nodemap->num); + if (t == NULL) { + DBG_ERR("Memory allocation error\n"); + return -1; + } + nodes = list_of_active_nodes(ctdb, rec->nodemap, mem_ctx, true); ret = ctdb_client_async_control(ctdb, @@ -2260,9 +2269,16 @@ static int get_remote_nodemaps(struct ctdb_recoverd *rec, tdb_null, async_getnodemap_callback, NULL, - remote_nodemaps); + t); talloc_free(nodes); - return ret; + + if (ret != 0) { + talloc_free(t); + return ret; + } + + *remote_nodemaps = t; + return 0; } static bool validate_recovery_master(struct ctdb_recoverd *rec, @@ -2584,15 +2600,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, /* get the nodemap for all active remote nodes */ - remote_nodemaps = talloc_array(mem_ctx, struct ctdb_node_map_old *, nodemap->num); - if (remote_nodemaps == NULL) { - DEBUG(DEBUG_ERR, (__location__ " failed to allocate remote nodemap array\n")); - return; - } - for(i=0; inum; i++) { - remote_nodemaps[i] = NULL; - } - ret = get_remote_nodemaps(rec, mem_ctx, remote_nodemaps); + ret = get_remote_nodemaps(rec, mem_ctx, &remote_nodemaps); if (ret != 0) { DBG_ERR("Failed to read remote nodemaps\n"); return; -- 2.28.0 From 487cfd21236bb153b5f4e96146f6109d1c132653 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 18 Jan 2018 19:52:22 +1100 Subject: [PATCH 15/22] ctdb-recoverd: Add an intermediate state struct for nodemap fetching This will allow an error callback to be added. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14466 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit a079ee31690cf7110f46b41989ffcfb83b7626d6) --- ctdb/server/ctdb_recoverd.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index c6254ae3404..406f870afc8 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2223,13 +2223,19 @@ done: } +struct remote_nodemaps_state { + struct ctdb_node_map_old **remote_nodemaps; +}; + static void async_getnodemap_callback(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata, void *callback_data) { - struct ctdb_node_map_old **remote_nodemaps = callback_data; + struct remote_nodemaps_state *state = + (struct remote_nodemaps_state *)callback_data; + struct ctdb_node_map_old **remote_nodemaps = state->remote_nodemaps; if (node_pnn >= ctdb->num_nodes) { DBG_ERR("Invalid PNN\n"); @@ -2248,6 +2254,7 @@ static int get_remote_nodemaps(struct ctdb_recoverd *rec, struct ctdb_context *ctdb = rec->ctdb; struct ctdb_node_map_old **t; uint32_t *nodes; + struct remote_nodemaps_state state; int ret; t = talloc_zero_array(mem_ctx, @@ -2260,6 +2267,8 @@ static int get_remote_nodemaps(struct ctdb_recoverd *rec, nodes = list_of_active_nodes(ctdb, rec->nodemap, mem_ctx, true); + state.remote_nodemaps = t; + ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_NODEMAP, nodes, @@ -2269,7 +2278,7 @@ static int get_remote_nodemaps(struct ctdb_recoverd *rec, tdb_null, async_getnodemap_callback, NULL, - t); + &state); talloc_free(nodes); if (ret != 0) { -- 2.28.0 From 934c700ef8d88b44299388507bccc4c2e2c1f60a Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 18 Jan 2018 19:58:15 +1100 Subject: [PATCH 16/22] ctdb-recoverd: Add fail callback to assign banning credits Also drop error handling in main_loop() that is replaced by this change. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14466 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 10ce0dbf1c11eaaab7b28b6bbd014235a36d1962) --- ctdb/server/ctdb_recoverd.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 406f870afc8..e50facfe992 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2225,6 +2225,7 @@ done: struct remote_nodemaps_state { struct ctdb_node_map_old **remote_nodemaps; + struct ctdb_recoverd *rec; }; static void async_getnodemap_callback(struct ctdb_context *ctdb, @@ -2247,6 +2248,20 @@ static void async_getnodemap_callback(struct ctdb_context *ctdb, } +static void async_getnodemap_error(struct ctdb_context *ctdb, + uint32_t node_pnn, + int32_t res, + TDB_DATA outdata, + void *callback_data) +{ + struct remote_nodemaps_state *state = + (struct remote_nodemaps_state *)callback_data; + struct ctdb_recoverd *rec = state->rec; + + DBG_ERR("Failed to retrieve nodemap from node %u\n", node_pnn); + ctdb_set_culprit(rec, node_pnn); +} + static int get_remote_nodemaps(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx, struct ctdb_node_map_old ***remote_nodemaps) @@ -2268,6 +2283,7 @@ static int get_remote_nodemaps(struct ctdb_recoverd *rec, nodes = list_of_active_nodes(ctdb, rec->nodemap, mem_ctx, true); state.remote_nodemaps = t; + state.rec = rec; ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_NODEMAP, @@ -2277,7 +2293,7 @@ static int get_remote_nodemaps(struct ctdb_recoverd *rec, false, tdb_null, async_getnodemap_callback, - NULL, + async_getnodemap_error, &state); talloc_free(nodes); @@ -2622,13 +2638,6 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, continue; } - if (remote_nodemaps[j] == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Did not get a remote nodemap for node %d, restarting monitoring\n", j)); - ctdb_set_culprit(rec, j); - - return; - } - /* if the nodes disagree on how many nodes there are then this is a good reason to try recovery */ -- 2.28.0 From 59296ea6deb0ef418986849a5b14e7ed6c2000f6 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 30 Jul 2020 11:57:51 +1000 Subject: [PATCH 17/22] ctdb-recoverd: Fix node_pnn check and assignment of nodemap into array This array is indexed by the same index as nodemap, not the PNN. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14466 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 368c83bfe3bbfff568d14f65e7b1ffa41d5349ac) --- ctdb/server/ctdb_recoverd.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index e50facfe992..b910aacce7c 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2237,13 +2237,21 @@ static void async_getnodemap_callback(struct ctdb_context *ctdb, struct remote_nodemaps_state *state = (struct remote_nodemaps_state *)callback_data; struct ctdb_node_map_old **remote_nodemaps = state->remote_nodemaps; + struct ctdb_node_map_old *nodemap = state->rec->nodemap; + size_t i; - if (node_pnn >= ctdb->num_nodes) { - DBG_ERR("Invalid PNN\n"); + for (i = 0; i < nodemap->num; i++) { + if (nodemap->nodes[i].pnn == node_pnn) { + break; + } + } + + if (i >= nodemap->num) { + DBG_ERR("Invalid PNN %"PRIu32"\n", node_pnn); return; } - remote_nodemaps[node_pnn] = (struct ctdb_node_map_old *)talloc_steal( + remote_nodemaps[i] = (struct ctdb_node_map_old *)talloc_steal( remote_nodemaps, outdata.dptr); } -- 2.28.0 From bb1a8406a854c7f875c1eec5fabc5b9b870722d5 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 18 Jan 2018 20:02:42 +1100 Subject: [PATCH 18/22] ctdb-recoverd: Change get_remote_nodemaps() to use connected nodes The plan here is to use the nodemaps retrieved by get_remote_nodes() in update_local_flags(). This will improve efficiency, since get_remote_nodes() fetches flags from nodes in parallel. It also means that get_remote_nodes() can be used exactly once early on in main_loop() to retrieve remote nodemaps. Retrieving nodemaps multiple times is unnecessary and racy - a single monitoring iteration should not fetch flags multiple times and compare them. This introduces a temporary behaviour change but it will be of no consequence when the above changes are made. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14466 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 762d1d8a9605f97973a2c1176de5d29fcc61d15a) --- ctdb/server/ctdb_recoverd.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index b910aacce7c..0cdaab75878 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2288,7 +2288,7 @@ static int get_remote_nodemaps(struct ctdb_recoverd *rec, return -1; } - nodes = list_of_active_nodes(ctdb, rec->nodemap, mem_ctx, true); + nodes = list_of_connected_nodes(ctdb, rec->nodemap, mem_ctx, true); state.remote_nodemaps = t; state.rec = rec; @@ -2631,8 +2631,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, goto takeover_run_checks; } - /* get the nodemap for all active remote nodes - */ + /* Get the nodemaps for all connected remote nodes */ ret = get_remote_nodemaps(rec, mem_ctx, &remote_nodemaps); if (ret != 0) { DBG_ERR("Failed to read remote nodemaps\n"); -- 2.28.0 From 18eb828fc3d5f1c1b044c44f1ada59277400b4f2 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 14 Jun 2019 00:23:22 +1000 Subject: [PATCH 19/22] ctdb-recoverd: Do not fetch the nodemap from the recovery master The nodemap has already been fetched from the local node and is actually passed to this function. Care must be taken to avoid referencing the "remote" nodemap for the recovery master. It also isn't useful to do so, since it would be the same nodemap. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14466 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit d50919b0cb28f299c9b6985271b29d4f27c5f619) --- ctdb/server/ctdb_recoverd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 0cdaab75878..2b01c897f3d 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2288,7 +2288,7 @@ static int get_remote_nodemaps(struct ctdb_recoverd *rec, return -1; } - nodes = list_of_connected_nodes(ctdb, rec->nodemap, mem_ctx, true); + nodes = list_of_connected_nodes(ctdb, rec->nodemap, mem_ctx, false); state.remote_nodemaps = t; state.rec = rec; @@ -2641,6 +2641,9 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, /* verify that all other nodes have the same nodemap as we have */ for (j=0; jnum; j++) { + if (nodemap->nodes[j].pnn == ctdb->pnn) { + continue; + } if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } @@ -2677,6 +2680,9 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, * up-to-date information for all the nodes. */ for (j=0; jnum; j++) { + if (nodemap->nodes[j].pnn == ctdb->pnn) { + continue; + } if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } @@ -2684,6 +2690,9 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, } for (j=0; jnum; j++) { + if (nodemap->nodes[j].pnn == ctdb->pnn) { + continue; + } if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } -- 2.28.0 From a6f0369c716a2b70682482d359d67a330aba4f81 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 14 Jun 2019 03:51:01 +1000 Subject: [PATCH 20/22] ctdb-recoverd: Get remote nodemaps earlier update_local_flags() will be changed to use these nodemaps. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14466 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 910a0b3b747a987ba69b6a0b6256e964b7d85dfe) --- ctdb/server/ctdb_recoverd.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 2b01c897f3d..c31d53aa1cb 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2555,6 +2555,13 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, } + /* Get the nodemaps for all connected remote nodes */ + ret = get_remote_nodemaps(rec, mem_ctx, &remote_nodemaps); + if (ret != 0) { + DBG_ERR("Failed to read remote nodemaps\n"); + return; + } + /* ensure our local copies of flags are right */ ret = update_local_flags(rec, nodemap); if (ret != 0) { @@ -2631,13 +2638,6 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, goto takeover_run_checks; } - /* Get the nodemaps for all connected remote nodes */ - ret = get_remote_nodemaps(rec, mem_ctx, &remote_nodemaps); - if (ret != 0) { - DBG_ERR("Failed to read remote nodemaps\n"); - return; - } - /* verify that all other nodes have the same nodemap as we have */ for (j=0; jnum; j++) { -- 2.28.0 From 539565dee97fc66c711d4fabbf313999aad19117 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 18 Jan 2018 20:35:55 +1100 Subject: [PATCH 21/22] ctdb-recoverd: Change update_local_flags() to use already retrieved nodemaps BUG: https://bugzilla.samba.org/show_bug.cgi?id=14466 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 702c7c4934e79a9161fdc59df70df30ae492d89f) --- ctdb/server/ctdb_recoverd.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index c31d53aa1cb..3c355e10565 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -537,18 +537,19 @@ static void ctdb_wait_election(struct ctdb_recoverd *rec) } /* - Update our local flags from all remote connected nodes. - This is only run when we are or we belive we are the recovery master + * Update local flags from all remote connected nodes and push out + * flags changes to all nodes. This is only run by the recovery + * master. */ -static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map_old *nodemap) +static int update_local_flags(struct ctdb_recoverd *rec, + struct ctdb_node_map_old *nodemap, + struct ctdb_node_map_old **remote_nodemaps) { unsigned int j; struct ctdb_context *ctdb = rec->ctdb; TALLOC_CTX *mem_ctx = talloc_new(ctdb); - /* get the nodemap for all active remote nodes and verify - they are the same as for this node - */ + /* Check flags from remote nodes */ for (j=0; jnum; j++) { struct ctdb_node_map_old *remote_nodemap=NULL; uint32_t local_flags = nodemap->nodes[j].flags; @@ -562,15 +563,7 @@ static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map_ol continue; } - ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, - mem_ctx, &remote_nodemap); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from remote node %u\n", - nodemap->nodes[j].pnn)); - ctdb_set_culprit(rec, nodemap->nodes[j].pnn); - talloc_free(mem_ctx); - return -1; - } + remote_nodemap = remote_nodemaps[j]; remote_flags = remote_nodemap->nodes[j].flags; if (local_flags != remote_flags) { @@ -595,7 +588,6 @@ static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map_ol local_flags); nodemap->nodes[j].flags = remote_flags; } - talloc_free(remote_nodemap); } talloc_free(mem_ctx); return 0; @@ -2563,7 +2555,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, } /* ensure our local copies of flags are right */ - ret = update_local_flags(rec, nodemap); + ret = update_local_flags(rec, nodemap, remote_nodemaps); if (ret != 0) { DEBUG(DEBUG_ERR,("Unable to update local flags\n")); return; -- 2.28.0 From 42c23ce3caa20db90af3f7a2a0dad971d1943437 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 24 Jan 2018 10:21:37 +1100 Subject: [PATCH 22/22] ctdb-recoverd: Rename update_local_flags() -> update_flags() This also updates remote flags so the name is misleading. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14466 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 4aa8e72d60e92951b35190d2ffcfdb1bfb756609) --- ctdb/server/ctdb_recoverd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 3c355e10565..f825427e7a3 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -541,9 +541,9 @@ static void ctdb_wait_election(struct ctdb_recoverd *rec) * flags changes to all nodes. This is only run by the recovery * master. */ -static int update_local_flags(struct ctdb_recoverd *rec, - struct ctdb_node_map_old *nodemap, - struct ctdb_node_map_old **remote_nodemaps) +static int update_flags(struct ctdb_recoverd *rec, + struct ctdb_node_map_old *nodemap, + struct ctdb_node_map_old **remote_nodemaps) { unsigned int j; struct ctdb_context *ctdb = rec->ctdb; @@ -2554,10 +2554,10 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, return; } - /* ensure our local copies of flags are right */ - ret = update_local_flags(rec, nodemap, remote_nodemaps); + /* Ensure our local and remote flags are correct */ + ret = update_flags(rec, nodemap, remote_nodemaps); if (ret != 0) { - DEBUG(DEBUG_ERR,("Unable to update local flags\n")); + D_ERR("Unable to update flags\n"); return; } -- 2.28.0