From 3ca9c5a9a4eedd0ff67a11682d11682e80b2c1ff Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 8 Aug 2016 07:09:38 +1000 Subject: [PATCH 1/7] ctdb-daemon: Try to release IP address even if interface is unknown The "releaseip" event in 10.interface will determine the interface and do the right thing. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 509491a868ed01bfc5a970bd36eea4b01130853a) --- ctdb/server/ctdb_takeover.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 6d182de..0d30f43 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -2126,9 +2126,6 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) ctdb_vnn_unassign_iface(ctdb, vnn); continue; } - if (!vnn->iface) { - continue; - } /* Don't allow multiple releases at once. Some code, * particularly ctdb_tickle_sentenced_connections() is -- 2.8.1 From 96e06465811380d4358727b194a502477603ffd2 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Sat, 30 Jul 2016 11:12:19 +1000 Subject: [PATCH 2/7] ctdb-daemon: Do not update the VNN state on RELEASE_IP failure If RELEASE_IP fails then updating the VNN makes it inconsistent with reality. Instead, log the failure and move on to the next IP address. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit ca22373231918dab4e94cf1bab03253aadd61993) --- ctdb/server/ctdb_takeover.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 0d30f43..3040642 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -2147,9 +2147,21 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) ctdb_vnn_iface_string(vnn))); ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u", - ctdb_vnn_iface_string(vnn), - ctdb_addr_to_str(&vnn->public_address), - vnn->public_netmask_bits); + ctdb_vnn_iface_string(vnn), + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits); + /* releaseip timeouts are converted to success, so to + * detect failures just check if the IP address is + * still there... + */ + if (ctdb_sys_have_ip(&vnn->public_address)) { + DEBUG(DEBUG_ERR, + (__location__ + " IP address %s not released\n", + ctdb_addr_to_str(&vnn->public_address))); + vnn->update_in_flight = false; + continue; + } data.dptr = (uint8_t *)talloc_strdup( vnn, ctdb_addr_to_str(&vnn->public_address)); -- 2.8.1 From 672ae230dabebdeaf0d986983cba56170da0bad7 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 11 Aug 2016 13:41:12 +1000 Subject: [PATCH 3/7] ctdb-daemon: Do not copy address for RELEASE_IP message If there's an allocation failure then the implicit early return in CTDB_NO_MEMORY_VOID() means that no reply is sent to the control. ctdb_daemon_send_message() makes a copy of the data, so don't copy it here and remove an unnecessary chance of failure. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit e653c8bb4a7bd712351a4ead3997c61b22c46f8d) --- ctdb/server/ctdb_takeover.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 3040642..4e0b7f5 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -879,8 +879,7 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, /* send a message to all clients of this node telling them that the cluster has been reconfigured and they should release any sockets on this IP */ - data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr)); - CTDB_NO_MEMORY_VOID(ctdb, data.dptr); + data.dptr = (uint8_t *) ctdb_addr_to_str(state->addr); data.dsize = strlen((char *)data.dptr)+1; DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr)); @@ -2163,14 +2162,10 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) continue; } - data.dptr = (uint8_t *)talloc_strdup( - vnn, ctdb_addr_to_str(&vnn->public_address)); - if (data.dptr != NULL) { - data.dsize = strlen((char *)data.dptr) + 1; - ctdb_daemon_send_message(ctdb, ctdb->pnn, - CTDB_SRVID_RELEASE_IP, data); - talloc_free(data.dptr); - } + data.dptr = (uint8_t *)ctdb_addr_to_str(&vnn->public_address); + data.dsize = strlen((char *)data.dptr) + 1; + ctdb_daemon_send_message(ctdb, ctdb->pnn, + CTDB_SRVID_RELEASE_IP, data); ctdb_vnn_unassign_iface(ctdb, vnn); vnn->update_in_flight = false; -- 2.8.1 From 2f6fbab75735e4be2ada7b66fe2a8899f2445cdc Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 11 Aug 2016 13:57:43 +1000 Subject: [PATCH 4/7] ctdb-daemon: Factor out new function release_ip_post() This contains the cleanup that needs to be done after an IP address is released from an interface. state->vnn is set to the return value from release_ip_post(), which is either the original VNN, or NULL if it was deleted. This allows correct handling of the in-flight flag in the destructor for state. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 46c5136e4e4bd291cbb96395374c9b133f5d8ad8) --- ctdb/server/ctdb_takeover.c | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 4e0b7f5..f96cd0a 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -850,6 +850,32 @@ static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn) talloc_free(vnn); } +static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb, + struct ctdb_vnn *vnn, + ctdb_sock_addr *addr) +{ + TDB_DATA data; + + /* Send a message to all clients of this node telling them + * that the cluster has been reconfigured and they should + * close any connections on this IP address + */ + data.dptr = (uint8_t *)ctdb_addr_to_str(addr); + data.dsize = strlen((char *)data.dptr)+1; + DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr)); + ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data); + + ctdb_vnn_unassign_iface(ctdb, vnn); + + /* Process the IP if it has been marked for deletion */ + if (vnn->delete_pending) { + do_delete_ip(ctdb, vnn); + return NULL; + } + + return vnn; +} + /* called when releaseip event finishes */ @@ -858,7 +884,6 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, { struct takeover_callback_state *state = talloc_get_type(private_data, struct takeover_callback_state); - TDB_DATA data; if (status == -ETIME) { ctdb_ban_self(ctdb); @@ -876,23 +901,7 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, } } - /* send a message to all clients of this node telling them - that the cluster has been reconfigured and they should - release any sockets on this IP */ - data.dptr = (uint8_t *) ctdb_addr_to_str(state->addr); - data.dsize = strlen((char *)data.dptr)+1; - - DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr)); - - ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data); - - ctdb_vnn_unassign_iface(ctdb, state->vnn); - - /* Process the IP if it has been marked for deletion */ - if (state->vnn->delete_pending) { - do_delete_ip(ctdb, state->vnn); - state->vnn = NULL; - } + state->vnn = release_ip_post(ctdb, state->vnn, state->addr); /* the control succeeded */ ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL); -- 2.8.1 From 75a1bc0912e455f0b76ab5dcdbe74c279a7a3b8b Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 11 Aug 2016 14:07:44 +1000 Subject: [PATCH 5/7] ctdb-daemon: Use release_ip_post() when releasing all IP addresses This has the advantage of using common code. Also, if there was previously a failed attempt to release the IP address as part of a delete, then this will finish processing the delete. Extra care needs to be taken when a VNN is actually deleted. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit d2a91394f55a2e0152bf470dac2608618db13b1f) --- ctdb/server/ctdb_takeover.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index f96cd0a..b369d03 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -2121,15 +2121,17 @@ void ctdb_takeover_client_destructor_hook(struct ctdb_client *client) void ctdb_release_all_ips(struct ctdb_context *ctdb) { - struct ctdb_vnn *vnn; + struct ctdb_vnn *vnn, *next; int count = 0; - TDB_DATA data; if (ctdb->tunable.disable_ip_failover == 1) { return; } - for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { + for (vnn = ctdb->vnn; vnn != NULL; vnn = next) { + /* vnn can be freed below in release_ip_post() */ + next = vnn->next; + if (!ctdb_sys_have_ip(&vnn->public_address)) { ctdb_vnn_unassign_iface(ctdb, vnn); continue; @@ -2171,13 +2173,10 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) continue; } - data.dptr = (uint8_t *)ctdb_addr_to_str(&vnn->public_address); - data.dsize = strlen((char *)data.dptr) + 1; - ctdb_daemon_send_message(ctdb, ctdb->pnn, - CTDB_SRVID_RELEASE_IP, data); - - ctdb_vnn_unassign_iface(ctdb, vnn); - vnn->update_in_flight = false; + vnn = release_ip_post(ctdb, vnn, &vnn->public_address); + if (vnn != NULL) { + vnn->update_in_flight = false; + } count++; } -- 2.8.1 From 3dcbfaf4f012924a4a89e775c71674c9fd333e81 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 19 Aug 2016 16:30:46 +1000 Subject: [PATCH 6/7] ctdb-daemon: Rename takeover_callback_state -> release_ip_callback_state Many years ago takeover_callback_state was used for both IP takeover and release. Now it is only used when releasing an IP so rename it to improve clarity. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 976a50af6f045765c7bf1961e26efc3cba17f3ba) --- ctdb/server/ctdb_takeover.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index b369d03..48a65e8 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -433,12 +433,6 @@ static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb, return 0; } -struct takeover_callback_state { - struct ctdb_req_control_old *c; - ctdb_sock_addr *addr; - struct ctdb_vnn *vnn; -}; - struct ctdb_do_takeip_state { struct ctdb_req_control_old *c; struct ctdb_vnn *vnn; @@ -876,14 +870,20 @@ static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb, return vnn; } +struct release_ip_callback_state { + struct ctdb_req_control_old *c; + ctdb_sock_addr *addr; + struct ctdb_vnn *vnn; +}; + /* called when releaseip event finishes */ -static void release_ip_callback(struct ctdb_context *ctdb, int status, +static void release_ip_callback(struct ctdb_context *ctdb, int status, void *private_data) { - struct takeover_callback_state *state = - talloc_get_type(private_data, struct takeover_callback_state); + struct release_ip_callback_state *state = + talloc_get_type(private_data, struct release_ip_callback_state); if (status == -ETIME) { ctdb_ban_self(ctdb); @@ -908,7 +908,7 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, talloc_free(state); } -static int ctdb_releaseip_destructor(struct takeover_callback_state *state) +static int ctdb_releaseip_destructor(struct release_ip_callback_state *state) { if (state->vnn != NULL) { state->vnn->update_in_flight = false; @@ -925,7 +925,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, bool *async_reply) { int ret; - struct takeover_callback_state *state; + struct release_ip_callback_state *state; struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr; struct ctdb_vnn *vnn; char *iface; @@ -987,7 +987,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, iface, pip->pnn)); - state = talloc(ctdb, struct takeover_callback_state); + state = talloc(ctdb, struct release_ip_callback_state); if (state == NULL) { ctdb_set_error(ctdb, "Out of memory at %s:%d", __FILE__, __LINE__); -- 2.8.1 From 5a10bf72ef8e73519a38c6c5e179c16b2434c631 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 19 Aug 2016 16:38:50 +1000 Subject: [PATCH 7/7] ctdb-daemon: When releasing an IP, update PNN in callback When an error occurs so an IP address is not released then the PNN in the VNN is currently incorrectly updated. Instead, update the PNN in the callback when the release is successful. Also, explicitly update the PNN on redundant releases. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs Autobuild-User(master): Amitay Isaacs Autobuild-Date(master): Sun Aug 21 22:45:33 CEST 2016 on sn-devel-144 (cherry picked from commit 6dc75c7d24325d2070eb7feab5399dbfda50da96) --- ctdb/server/ctdb_takeover.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 48a65e8..1f4d7ed 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -874,6 +874,7 @@ struct release_ip_callback_state { struct ctdb_req_control_old *c; ctdb_sock_addr *addr; struct ctdb_vnn *vnn; + uint32_t target_pnn; }; /* @@ -901,6 +902,7 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, } } + state->vnn->pnn = state->target_pnn; state->vnn = release_ip_post(ctdb, state->vnn, state->addr); /* the control succeeded */ @@ -937,16 +939,20 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, ctdb_addr_to_str(&pip->addr))); return 0; } - vnn->pnn = pip->pnn; /* stop any previous arps */ talloc_free(vnn->takeover_ctx); vnn->takeover_ctx = NULL; - /* Some ctdb tool commands (e.g. moveip) send - * lazy multicast to drop an IP from any node that isn't the - * intended new node. The following causes makes ctdbd ignore - * a release for any address it doesn't host. + /* RELEASE_IP controls are sent to all nodes that should not + * be hosting a particular IP. This serves 2 purposes. The + * first is to help resolve any inconsistencies. If a node + * does unexpectly host an IP then it will be released. The + * 2nd is to use a "redundant release" to tell non-takeover + * nodes where an IP is moving to. This is how "ctdb ip" can + * report the (likely) location of an IP by only asking the + * local node. Redundant releases need to update the PNN but + * are otherwise ignored. */ if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) { if (!ctdb_sys_have_ip(&pip->addr)) { @@ -954,6 +960,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, ctdb_addr_to_str(&pip->addr), vnn->public_netmask_bits, ctdb_vnn_iface_string(vnn))); + vnn->pnn = pip->pnn; ctdb_vnn_unassign_iface(ctdb, vnn); return 0; } @@ -962,6 +969,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n", ctdb_addr_to_str(&pip->addr), vnn->public_netmask_bits)); + vnn->pnn = pip->pnn; return 0; } } @@ -1005,6 +1013,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, return -1; } *state->addr = pip->addr; + state->target_pnn = pip->pnn; state->vnn = vnn; vnn->update_in_flight = true; -- 2.8.1