From 03215385f55b25afaacaac43c3764671386cdd02 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Fri, 4 Mar 2016 15:04:13 +1100 Subject: [PATCH 1/9] ctdb-takeover: Do not kill smbd processes on releasing IP CTDB already notifies Samba with RELEASE_IP message. Samba can take appropriate action based on that. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit b8272d835d6e5186568237cd8b7a2105884c0515) --- ctdb/server/ctdb_takeover.c | 42 ------------------------------------------ 1 file changed, 42 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index a613aa0..b32772f 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -815,44 +815,6 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, return 0; } -/* - kill any clients that are registered with a IP that is being released - */ -static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr) -{ - struct ctdb_client_ip *ip; - - DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n", - ctdb_addr_to_str(addr))); - - for (ip=ctdb->client_ip_list; ip; ip=ip->next) { - ctdb_sock_addr tmp_addr; - - tmp_addr = ip->addr; - DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", - ip->client_id, - ctdb_addr_to_str(&ip->addr))); - - if (ctdb_same_ip(&tmp_addr, addr)) { - struct ctdb_client *client = reqid_find(ctdb->idr, - ip->client_id, - struct ctdb_client); - DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", - ip->client_id, - ctdb_addr_to_str(&ip->addr), - client->pid)); - - if (client->pid != 0) { - DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n", - (unsigned)client->pid, - ctdb_addr_to_str(addr), - ip->client_id)); - kill(client->pid, SIGKILL); - } - } - } -} - static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn) { DLIST_REMOVE(ctdb->vnn, vnn); @@ -898,9 +860,6 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data); - /* kill clients that have registered with this IP */ - release_kill_clients(ctdb, state->addr); - ctdb_vnn_unassign_iface(ctdb, state->vnn); /* Process the IP if it has been marked for deletion */ @@ -2415,7 +2374,6 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) ctdb_vnn_iface_string(vnn), ctdb_addr_to_str(&vnn->public_address), vnn->public_netmask_bits); - release_kill_clients(ctdb, &vnn->public_address); ctdb_vnn_unassign_iface(ctdb, vnn); vnn->update_in_flight = false; count++; -- 2.8.1 From 47fdb5d85f68fe92cb2464876bcb1f8bdae5b336 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Sat, 5 Mar 2016 14:05:21 +1100 Subject: [PATCH 2/9] ctdb-takeover: Inform clients when dropping all IP addresses CTDB releases all IPs in following cases: starting up, shutting down, node gets banned, node does not come out of recovery for a long time. Always inform samba when CTDB releases IP addresses. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 00b9e76904bb1108e0f06d0dba4df89394d58252) --- ctdb/server/ctdb_takeover.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index b32772f..916f82c 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -2337,6 +2337,7 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) { struct ctdb_vnn *vnn; int count = 0; + TDB_DATA data; if (ctdb->tunable.disable_ip_failover == 1) { return; @@ -2374,6 +2375,16 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) ctdb_vnn_iface_string(vnn), ctdb_addr_to_str(&vnn->public_address), vnn->public_netmask_bits); + + data.dptr = (uint8_t *)talloc_strdup( + vnn, ctdb_addr_to_str(&vnn->public_address)); + if (data.dptr != NULL) { + data.dsize = strlen((char *)data.dptr) + 1; + ctdb_daemon_send_message(ctdb, ctdb->pnn, + CTDB_SRVID_RELEASE_IP, data); + talloc_free(data.dptr); + } + ctdb_vnn_unassign_iface(ctdb, vnn); vnn->update_in_flight = false; count++; -- 2.8.1 From 074a1c84a1927914ee489b7dcecb5a8992eb310c Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 8 Aug 2016 07:09:38 +1000 Subject: [PATCH 3/9] ctdb-daemon: Try to release IP address even if interface is unknown The "releaseip" event in 10.interface will determine the interface and do the right thing. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 509491a868ed01bfc5a970bd36eea4b01130853a) --- ctdb/server/ctdb_takeover.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 916f82c..e32dc1a 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -2348,9 +2348,6 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) ctdb_vnn_unassign_iface(ctdb, vnn); continue; } - if (!vnn->iface) { - continue; - } /* Don't allow multiple releases at once. Some code, * particularly ctdb_tickle_sentenced_connections() is -- 2.8.1 From 20005cb19b339a94b867435725f969aaa646c75b Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Sat, 30 Jul 2016 11:12:19 +1000 Subject: [PATCH 4/9] ctdb-daemon: Do not update the VNN state on RELEASE_IP failure If RELEASE_IP fails then updating the VNN makes it inconsistent with reality. Instead, log the failure and move on to the next IP address. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit ca22373231918dab4e94cf1bab03253aadd61993) --- ctdb/server/ctdb_takeover.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index e32dc1a..e37eb7a 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -2369,9 +2369,21 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) ctdb_vnn_iface_string(vnn))); ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u", - ctdb_vnn_iface_string(vnn), - ctdb_addr_to_str(&vnn->public_address), - vnn->public_netmask_bits); + ctdb_vnn_iface_string(vnn), + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits); + /* releaseip timeouts are converted to success, so to + * detect failures just check if the IP address is + * still there... + */ + if (ctdb_sys_have_ip(&vnn->public_address)) { + DEBUG(DEBUG_ERR, + (__location__ + " IP address %s not released\n", + ctdb_addr_to_str(&vnn->public_address))); + vnn->update_in_flight = false; + continue; + } data.dptr = (uint8_t *)talloc_strdup( vnn, ctdb_addr_to_str(&vnn->public_address)); -- 2.8.1 From 25658495da5e104437fa5c393f8a7053b528fb04 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 11 Aug 2016 13:41:12 +1000 Subject: [PATCH 5/9] ctdb-daemon: Do not copy address for RELEASE_IP message If there's an allocation failure then the implicit early return in CTDB_NO_MEMORY_VOID() means that no reply is sent to the control. ctdb_daemon_send_message() makes a copy of the data, so don't copy it here and remove an unnecessary chance of failure. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit e653c8bb4a7bd712351a4ead3997c61b22c46f8d) --- ctdb/server/ctdb_takeover.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index e37eb7a..d9353df 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -852,8 +852,7 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, /* send a message to all clients of this node telling them that the cluster has been reconfigured and they should release any sockets on this IP */ - data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr)); - CTDB_NO_MEMORY_VOID(ctdb, data.dptr); + data.dptr = (uint8_t *) ctdb_addr_to_str(state->addr); data.dsize = strlen((char *)data.dptr)+1; DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr)); @@ -2385,14 +2384,10 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) continue; } - data.dptr = (uint8_t *)talloc_strdup( - vnn, ctdb_addr_to_str(&vnn->public_address)); - if (data.dptr != NULL) { - data.dsize = strlen((char *)data.dptr) + 1; - ctdb_daemon_send_message(ctdb, ctdb->pnn, - CTDB_SRVID_RELEASE_IP, data); - talloc_free(data.dptr); - } + data.dptr = (uint8_t *)ctdb_addr_to_str(&vnn->public_address); + data.dsize = strlen((char *)data.dptr) + 1; + ctdb_daemon_send_message(ctdb, ctdb->pnn, + CTDB_SRVID_RELEASE_IP, data); ctdb_vnn_unassign_iface(ctdb, vnn); vnn->update_in_flight = false; -- 2.8.1 From bc2abf6f8a188ad96ae35eb6c12eec03e242f1fb Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 11 Aug 2016 13:57:43 +1000 Subject: [PATCH 6/9] ctdb-daemon: Factor out new function release_ip_post() This contains the cleanup that needs to be done after an IP address is released from an interface. state->vnn is set to the return value from release_ip_post(), which is either the original VNN, or NULL if it was deleted. This allows correct handling of the in-flight flag in the destructor for state. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 46c5136e4e4bd291cbb96395374c9b133f5d8ad8) --- ctdb/server/ctdb_takeover.c | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index d9353df..5966e36 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -823,6 +823,32 @@ static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn) talloc_free(vnn); } +static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb, + struct ctdb_vnn *vnn, + ctdb_sock_addr *addr) +{ + TDB_DATA data; + + /* Send a message to all clients of this node telling them + * that the cluster has been reconfigured and they should + * close any connections on this IP address + */ + data.dptr = (uint8_t *)ctdb_addr_to_str(addr); + data.dsize = strlen((char *)data.dptr)+1; + DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr)); + ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data); + + ctdb_vnn_unassign_iface(ctdb, vnn); + + /* Process the IP if it has been marked for deletion */ + if (vnn->delete_pending) { + do_delete_ip(ctdb, vnn); + return NULL; + } + + return vnn; +} + /* called when releaseip event finishes */ @@ -831,7 +857,6 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, { struct takeover_callback_state *state = talloc_get_type(private_data, struct takeover_callback_state); - TDB_DATA data; if (status == -ETIME) { ctdb_ban_self(ctdb); @@ -849,23 +874,7 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, } } - /* send a message to all clients of this node telling them - that the cluster has been reconfigured and they should - release any sockets on this IP */ - data.dptr = (uint8_t *) ctdb_addr_to_str(state->addr); - data.dsize = strlen((char *)data.dptr)+1; - - DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr)); - - ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data); - - ctdb_vnn_unassign_iface(ctdb, state->vnn); - - /* Process the IP if it has been marked for deletion */ - if (state->vnn->delete_pending) { - do_delete_ip(ctdb, state->vnn); - state->vnn = NULL; - } + state->vnn = release_ip_post(ctdb, state->vnn, state->addr); /* the control succeeded */ ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL); -- 2.8.1 From 4c65f4049481079f3f3211ef99e09e6691d1097a Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 11 Aug 2016 14:07:44 +1000 Subject: [PATCH 7/9] ctdb-daemon: Use release_ip_post() when releasing all IP addresses This has the advantage of using common code. Also, if there was previously a failed attempt to release the IP address as part of a delete, then this will finish processing the delete. Extra care needs to be taken when a VNN is actually deleted. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit d2a91394f55a2e0152bf470dac2608618db13b1f) --- ctdb/server/ctdb_takeover.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 5966e36..8cf4405 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -2343,15 +2343,17 @@ void ctdb_takeover_client_destructor_hook(struct ctdb_client *client) void ctdb_release_all_ips(struct ctdb_context *ctdb) { - struct ctdb_vnn *vnn; + struct ctdb_vnn *vnn, *next; int count = 0; - TDB_DATA data; if (ctdb->tunable.disable_ip_failover == 1) { return; } - for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { + for (vnn = ctdb->vnn; vnn != NULL; vnn = next) { + /* vnn can be freed below in release_ip_post() */ + next = vnn->next; + if (!ctdb_sys_have_ip(&vnn->public_address)) { ctdb_vnn_unassign_iface(ctdb, vnn); continue; @@ -2393,13 +2395,10 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) continue; } - data.dptr = (uint8_t *)ctdb_addr_to_str(&vnn->public_address); - data.dsize = strlen((char *)data.dptr) + 1; - ctdb_daemon_send_message(ctdb, ctdb->pnn, - CTDB_SRVID_RELEASE_IP, data); - - ctdb_vnn_unassign_iface(ctdb, vnn); - vnn->update_in_flight = false; + vnn = release_ip_post(ctdb, vnn, &vnn->public_address); + if (vnn != NULL) { + vnn->update_in_flight = false; + } count++; } -- 2.8.1 From ffb5ef5a4dfb8a48d97d8766b77eda153c484cc7 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 19 Aug 2016 16:30:46 +1000 Subject: [PATCH 8/9] ctdb-daemon: Rename takeover_callback_state -> release_ip_callback_state Many years ago takeover_callback_state was used for both IP takeover and release. Now it is only used when releasing an IP so rename it to improve clarity. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 976a50af6f045765c7bf1961e26efc3cba17f3ba) --- ctdb/server/ctdb_takeover.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 8cf4405..98865f1 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -403,12 +403,6 @@ static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb, return 0; } -struct takeover_callback_state { - struct ctdb_req_control_old *c; - ctdb_sock_addr *addr; - struct ctdb_vnn *vnn; -}; - struct ctdb_do_takeip_state { struct ctdb_req_control_old *c; struct ctdb_vnn *vnn; @@ -849,14 +843,20 @@ static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb, return vnn; } +struct release_ip_callback_state { + struct ctdb_req_control_old *c; + ctdb_sock_addr *addr; + struct ctdb_vnn *vnn; +}; + /* called when releaseip event finishes */ -static void release_ip_callback(struct ctdb_context *ctdb, int status, +static void release_ip_callback(struct ctdb_context *ctdb, int status, void *private_data) { - struct takeover_callback_state *state = - talloc_get_type(private_data, struct takeover_callback_state); + struct release_ip_callback_state *state = + talloc_get_type(private_data, struct release_ip_callback_state); if (status == -ETIME) { ctdb_ban_self(ctdb); @@ -881,7 +881,7 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, talloc_free(state); } -static int ctdb_releaseip_destructor(struct takeover_callback_state *state) +static int ctdb_releaseip_destructor(struct release_ip_callback_state *state) { if (state->vnn != NULL) { state->vnn->update_in_flight = false; @@ -898,7 +898,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, bool *async_reply) { int ret; - struct takeover_callback_state *state; + struct release_ip_callback_state *state; struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr; struct ctdb_vnn *vnn; char *iface; @@ -960,7 +960,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, iface, pip->pnn)); - state = talloc(ctdb, struct takeover_callback_state); + state = talloc(ctdb, struct release_ip_callback_state); if (state == NULL) { ctdb_set_error(ctdb, "Out of memory at %s:%d", __FILE__, __LINE__); -- 2.8.1 From 4c9811f4889f5704d8b146905dd85937e09e83d9 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 19 Aug 2016 16:38:50 +1000 Subject: [PATCH 9/9] ctdb-daemon: When releasing an IP, update PNN in callback When an error occurs so an IP address is not released then the PNN in the VNN is currently incorrectly updated. Instead, update the PNN in the callback when the release is successful. Also, explicitly update the PNN on redundant releases. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12158 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs Autobuild-User(master): Amitay Isaacs Autobuild-Date(master): Sun Aug 21 22:45:33 CEST 2016 on sn-devel-144 (cherry picked from commit 6dc75c7d24325d2070eb7feab5399dbfda50da96) --- ctdb/server/ctdb_takeover.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 98865f1..d998d27 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -847,6 +847,7 @@ struct release_ip_callback_state { struct ctdb_req_control_old *c; ctdb_sock_addr *addr; struct ctdb_vnn *vnn; + uint32_t target_pnn; }; /* @@ -874,6 +875,7 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, } } + state->vnn->pnn = state->target_pnn; state->vnn = release_ip_post(ctdb, state->vnn, state->addr); /* the control succeeded */ @@ -910,16 +912,20 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, ctdb_addr_to_str(&pip->addr))); return 0; } - vnn->pnn = pip->pnn; /* stop any previous arps */ talloc_free(vnn->takeover_ctx); vnn->takeover_ctx = NULL; - /* Some ctdb tool commands (e.g. moveip, rebalanceip) send - * lazy multicast to drop an IP from any node that isn't the - * intended new node. The following causes makes ctdbd ignore - * a release for any address it doesn't host. + /* RELEASE_IP controls are sent to all nodes that should not + * be hosting a particular IP. This serves 2 purposes. The + * first is to help resolve any inconsistencies. If a node + * does unexpectly host an IP then it will be released. The + * 2nd is to use a "redundant release" to tell non-takeover + * nodes where an IP is moving to. This is how "ctdb ip" can + * report the (likely) location of an IP by only asking the + * local node. Redundant releases need to update the PNN but + * are otherwise ignored. */ if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) { if (!ctdb_sys_have_ip(&pip->addr)) { @@ -927,6 +933,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, ctdb_addr_to_str(&pip->addr), vnn->public_netmask_bits, ctdb_vnn_iface_string(vnn))); + vnn->pnn = pip->pnn; ctdb_vnn_unassign_iface(ctdb, vnn); return 0; } @@ -935,6 +942,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n", ctdb_addr_to_str(&pip->addr), vnn->public_netmask_bits)); + vnn->pnn = pip->pnn; return 0; } } @@ -978,6 +986,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, return -1; } *state->addr = pip->addr; + state->target_pnn = pip->pnn; state->vnn = vnn; vnn->update_in_flight = true; -- 2.8.1