From b2a196557f0f2b1b3d9d7c7d0e54d6a433116761 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 19 Jan 2018 14:55:21 +1100 Subject: [PATCH 1/2] ctdb-recoverd: Drop unnecessary code This has already been done in update_flags(). BUG: https://bugzilla.samba.org/show_bug.cgi?id=14513 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 3ab52b528673e08caa66f00e963528c591a84fe1) --- ctdb/server/ctdb_recoverd.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 7c53335604a..2aee9f056c7 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2667,20 +2667,6 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, } } - /* - * Update node flags obtained from each active node. This ensure we have - * up-to-date information for all the nodes. - */ - for (j=0; jnum; j++) { - if (nodemap->nodes[j].pnn == ctdb->pnn) { - continue; - } - if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { - continue; - } - nodemap->nodes[j].flags = remote_nodemaps[j]->nodes[j].flags; - } - for (j=0; jnum; j++) { if (nodemap->nodes[j].pnn == ctdb->pnn) { continue; -- 2.28.0 From 0557dcdeefe25e0e2ca7cd0c18fe2a7ede2968d9 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 16 Jan 2018 15:15:51 +1100 Subject: [PATCH 2/2] ctdb-recoverd: Drop unnecessary and broken code update_flags() has already updated the recovery master's canonical node map, based on the flags from each remote node, and pushed out these flags to all nodes. If i == j then the node map has already been updated from this remote node's flags, so simply drop this case. Although update_flags() has updated flags for all nodes, it did not update each node map in remote_nodemaps[] to reflect this. This means that remote_nodemaps[] may contain inconsistent flags for some nodes so it should not be used to check consistency when i != j. Further, a meaningful difference in flags can only really occur if update_flags() failed. In that case this code is never reached. These observations combine to imply that this whole loop should be dropped. This leaves potential sub-second inconsistencies due to out-of-band healthy/unhealthy flag changes pushed via CTDB_SRVID_PUSH_NODE_FLAGS. These updates could be dropped (takeover run asks each node for available IPs rather than making centralised decisions based on node flags) but for now they will be fixed in the next iteration of main_loop(). BUG: https://bugzilla.samba.org/show_bug.cgi?id=14513 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 4b01f54041dee469971f244e64064eed46de2ed5) --- ctdb/server/ctdb_recoverd.c | 47 ------------------------------------- 1 file changed, 47 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 2aee9f056c7..8501fe877ff 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2667,53 +2667,6 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, } } - for (j=0; jnum; j++) { - if (nodemap->nodes[j].pnn == ctdb->pnn) { - continue; - } - if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { - continue; - } - - /* verify the flags are consistent - */ - for (i=0; inum; i++) { - if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) { - continue; - } - - if (nodemap->nodes[i].flags != remote_nodemaps[j]->nodes[i].flags) { - DEBUG(DEBUG_ERR, (__location__ " Remote node:%u has different flags for node %u. It has 0x%02x vs our 0x%02x\n", - nodemap->nodes[j].pnn, - nodemap->nodes[i].pnn, - remote_nodemaps[j]->nodes[i].flags, - nodemap->nodes[i].flags)); - if (i == j) { - DEBUG(DEBUG_ERR,("Use flags 0x%02x from remote node %d for cluster update of its own flags\n", remote_nodemaps[j]->nodes[i].flags, j)); - update_flags_on_all_nodes( - rec, - nodemap->nodes[i].pnn, - remote_nodemaps[j]->nodes[i].flags); - ctdb_set_culprit(rec, nodemap->nodes[j].pnn); - do_recovery(rec, mem_ctx, pnn, nodemap, - vnnmap); - return; - } else { - DEBUG(DEBUG_ERR,("Use flags 0x%02x from local recmaster node for cluster update of node %d flags\n", nodemap->nodes[i].flags, i)); - update_flags_on_all_nodes( - rec, - nodemap->nodes[i].pnn, - nodemap->nodes[i].flags); - ctdb_set_culprit(rec, nodemap->nodes[j].pnn); - do_recovery(rec, mem_ctx, pnn, nodemap, - vnnmap); - return; - } - } - } - } - - /* count how many active nodes there are */ num_lmasters = 0; for (i=0; inum; i++) { -- 2.28.0