From d763076844923980c868fd85b8f61267d8222984 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 12 Nov 2019 12:04:22 +1100 Subject: [PATCH 01/11] ctdb-daemon: Rename ctdb_node private_data to transport_data This gives a casual reader a useful clue. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 53f8492caafa8556d0c2d3f272d08ce5ce098c25) --- ctdb/ib/ibw_ctdb.c | 11 ++++++++--- ctdb/ib/ibw_ctdb_init.c | 5 +++-- ctdb/include/ctdb_private.h | 2 +- ctdb/tcp/tcp_connect.c | 10 +++++----- ctdb/tcp/tcp_init.c | 8 ++++---- ctdb/tcp/tcp_io.c | 4 ++-- 6 files changed, 23 insertions(+), 17 deletions(-) diff --git a/ctdb/ib/ibw_ctdb.c b/ctdb/ib/ibw_ctdb.c index 458646faae0..53911240ff7 100644 --- a/ctdb/ib/ibw_ctdb.c +++ b/ctdb/ib/ibw_ctdb.c @@ -55,7 +55,8 @@ int ctdb_ibw_get_address(struct ctdb_context *ctdb, int ctdb_ibw_node_connect(struct ctdb_node *node) { - struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node); + struct ctdb_ibw_node *cn = talloc_get_type(node->transport_data, + struct ctdb_ibw_node); int rc; assert(cn!=NULL); @@ -118,7 +119,9 @@ int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn) case IBWC_CONNECTED: { /* after ibw_accept or ibw_connect */ struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node); if (node!=NULL) { /* after ibw_connect */ - struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node); + struct ctdb_ibw_node *cn = talloc_get_type( + node->transport_data, + struct ctdb_ibw_node); node->ctdb->upcalls->node_connected(node); ctdb_flush_cn_queue(cn); @@ -136,7 +139,9 @@ int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn) case IBWC_ERROR: { struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node); if (node!=NULL) { - struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node); + struct ctdb_ibw_node *cn = talloc_get_type( + node->transport_data, + struct ctdb_ibw_node); struct ibw_ctx *ictx = cn->conn->ctx; DEBUG(DEBUG_DEBUG, ("IBWC_ERROR, reconnecting...\n")); diff --git a/ctdb/ib/ibw_ctdb_init.c b/ctdb/ib/ibw_ctdb_init.c index 7e77ec08031..6fdb0d887cf 100644 --- a/ctdb/ib/ibw_ctdb_init.c +++ b/ctdb/ib/ibw_ctdb_init.c @@ -67,7 +67,7 @@ static int ctdb_ibw_add_node(struct ctdb_node *node) assert(cn!=NULL); cn->conn = ibw_conn_new(ictx, node); - node->private_data = (void *)cn; + node->transport_data = (void *)cn; return (cn->conn!=NULL ? 0 : -1); } @@ -153,7 +153,8 @@ int ctdb_flush_cn_queue(struct ctdb_ibw_node *cn) static int ctdb_ibw_queue_pkt(struct ctdb_node *node, uint8_t *data, uint32_t length) { - struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node); + struct ctdb_ibw_node *cn = talloc_get_type(node->transport_data, + struct ctdb_ibw_node); int rc; assert(length>=sizeof(uint32_t)); diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 0c66725d36c..61910d889e6 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -74,7 +74,7 @@ struct ctdb_node { struct ctdb_context *ctdb; ctdb_sock_addr address; const char *name; /* for debug messages */ - void *private_data; /* private to transport */ + void *transport_data; /* private to transport */ uint32_t pnn; uint32_t flags; diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index e0167740602..981dc05e6ed 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -43,7 +43,7 @@ void ctdb_tcp_stop_connection(struct ctdb_node *node) { struct ctdb_tcp_node *tnode = talloc_get_type( - node->private_data, struct ctdb_tcp_node); + node->transport_data, struct ctdb_tcp_node); TALLOC_FREE(tnode->out_queue); TALLOC_FREE(tnode->connect_te); @@ -63,7 +63,7 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) { struct ctdb_node *node = talloc_get_type(private_data, struct ctdb_node); struct ctdb_tcp_node *tnode = talloc_get_type( - node->private_data, struct ctdb_tcp_node); + node->transport_data, struct ctdb_tcp_node); if (data == NULL) { node->ctdb->upcalls->node_dead(node); @@ -85,7 +85,7 @@ static void ctdb_node_connect_write(struct tevent_context *ev, { struct ctdb_node *node = talloc_get_type(private_data, struct ctdb_node); - struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data, + struct ctdb_tcp_node *tnode = talloc_get_type(node->transport_data, struct ctdb_tcp_node); struct ctdb_context *ctdb = node->ctdb; int error = 0; @@ -167,7 +167,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, { struct ctdb_node *node = talloc_get_type(private_data, struct ctdb_node); - struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data, + struct ctdb_tcp_node *tnode = talloc_get_type(node->transport_data, struct ctdb_tcp_node); struct ctdb_context *ctdb = node->ctdb; ctdb_sock_addr sock_in; @@ -300,7 +300,7 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, return; } - tnode = talloc_get_type_abort(node->private_data, + tnode = talloc_get_type_abort(node->transport_data, struct ctdb_tcp_node); if (tnode == NULL) { /* This can't happen - see ctdb_tcp_initialise() */ diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index dc92abd4e6c..24cbc93d0c0 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -58,7 +58,7 @@ static int ctdb_tcp_add_node(struct ctdb_node *node) tnode->out_fd = -1; tnode->ctdb = node->ctdb; - node->private_data = tnode; + node->transport_data = tnode; talloc_set_destructor(tnode, tnode_destructor); return 0; @@ -97,7 +97,7 @@ static int ctdb_tcp_connect_node(struct ctdb_node *node) { struct ctdb_context *ctdb = node->ctdb; struct ctdb_tcp_node *tnode = talloc_get_type( - node->private_data, struct ctdb_tcp_node); + node->transport_data, struct ctdb_tcp_node); /* startup connection to the other server - will happen on next event loop */ @@ -118,7 +118,7 @@ static int ctdb_tcp_connect_node(struct ctdb_node *node) static void ctdb_tcp_restart(struct ctdb_node *node) { struct ctdb_tcp_node *tnode = talloc_get_type( - node->private_data, struct ctdb_tcp_node); + node->transport_data, struct ctdb_tcp_node); DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); @@ -143,7 +143,7 @@ static void ctdb_tcp_shutdown(struct ctdb_context *ctdb) ctdb->private_data = NULL; for (i=0; inum_nodes; i++) { - TALLOC_FREE(ctdb->nodes[i]->private_data); + TALLOC_FREE(ctdb->nodes[i]->transport_data); } } diff --git a/ctdb/tcp/tcp_io.c b/ctdb/tcp/tcp_io.c index 2d8ec0f7062..df9ca02b413 100644 --- a/ctdb/tcp/tcp_io.c +++ b/ctdb/tcp/tcp_io.c @@ -39,7 +39,7 @@ void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args) { struct ctdb_node *node = talloc_get_type_abort(args, struct ctdb_node); struct ctdb_tcp_node *tnode = talloc_get_type_abort( - node->private_data, struct ctdb_tcp_node); + node->transport_data, struct ctdb_tcp_node); struct ctdb_req_header *hdr = (struct ctdb_req_header *)data; if (data == NULL) { @@ -86,7 +86,7 @@ failed: */ int ctdb_tcp_queue_pkt(struct ctdb_node *node, uint8_t *data, uint32_t length) { - struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data, + struct ctdb_tcp_node *tnode = talloc_get_type(node->transport_data, struct ctdb_tcp_node); if (tnode->out_queue == NULL) { DBG_DEBUG("No outgoing connection, dropping packet\n"); -- 2.25.1 From d7abfca4e4078a7fcc2b659a9fa14cae67477564 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 12 Nov 2019 12:12:46 +1100 Subject: [PATCH 02/11] ctdb-daemon: Rename ctdb_context private_data to transport_data This gives a casual reader a useful clue. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 750f3938e4fcd6743954db6b1132751a90ee6107) --- ctdb/ib/ibw_ctdb_init.c | 8 +++++--- ctdb/include/ctdb_private.h | 2 +- ctdb/tcp/tcp_connect.c | 7 ++++--- ctdb/tcp/tcp_init.c | 8 ++++---- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/ctdb/ib/ibw_ctdb_init.c b/ctdb/ib/ibw_ctdb_init.c index 6fdb0d887cf..f9d00c60605 100644 --- a/ctdb/ib/ibw_ctdb_init.c +++ b/ctdb/ib/ibw_ctdb_init.c @@ -40,7 +40,8 @@ static int ctdb_ibw_listen(struct ctdb_context *ctdb, int backlog) { - struct ibw_ctx *ictx = talloc_get_type(ctdb->private_data, struct ibw_ctx); + struct ibw_ctx *ictx = talloc_get_type(ctdb->transport_data, + struct ibw_ctx); assert(ictx!=NULL); @@ -62,7 +63,8 @@ static int ctdb_ibw_listen(struct ctdb_context *ctdb, int backlog) */ static int ctdb_ibw_add_node(struct ctdb_node *node) { - struct ibw_ctx *ictx = talloc_get_type(node->ctdb->private_data, struct ibw_ctx); + struct ibw_ctx *ictx = talloc_get_type(node->ctdb->transport_data, + struct ibw_ctx); struct ctdb_ibw_node *cn = talloc_zero(node, struct ctdb_ibw_node); assert(cn!=NULL); @@ -246,7 +248,7 @@ int ctdb_ibw_init(struct ctdb_context *ctdb) } ctdb->methods = &ctdb_ibw_methods; - ctdb->private_data = ictx; + ctdb->transport_data = ictx; DEBUG(DEBUG_DEBUG, ("ctdb_ibw_init succeeded.\n")); return 0; diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 61910d889e6..2c0658eabf3 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -286,7 +286,7 @@ struct ctdb_context { char *err_msg; const struct ctdb_methods *methods; /* transport methods */ const struct ctdb_upcalls *upcalls; /* transport upcalls */ - void *private_data; /* private to transport */ + void *transport_data; /* private to transport */ struct ctdb_db_context *db_list; struct srvid_context *srv; struct srvid_context *tunnels; diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 981dc05e6ed..b49aa2047cb 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -277,7 +277,8 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, uint16_t flags, void *private_data) { struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); - struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, struct ctdb_tcp); + struct ctdb_tcp *ctcp = talloc_get_type(ctdb->transport_data, + struct ctdb_tcp); ctdb_sock_addr addr; socklen_t len; int fd; @@ -368,7 +369,7 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, */ static int ctdb_tcp_listen_automatic(struct ctdb_context *ctdb) { - struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, + struct ctdb_tcp *ctcp = talloc_get_type(ctdb->transport_data, struct ctdb_tcp); ctdb_sock_addr sock; int lock_fd, i; @@ -508,7 +509,7 @@ failed: */ int ctdb_tcp_listen(struct ctdb_context *ctdb) { - struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, + struct ctdb_tcp *ctcp = talloc_get_type(ctdb->transport_data, struct ctdb_tcp); ctdb_sock_addr sock; int sock_size; diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index 24cbc93d0c0..0f9423ad6fc 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -135,12 +135,12 @@ static void ctdb_tcp_restart(struct ctdb_node *node) */ static void ctdb_tcp_shutdown(struct ctdb_context *ctdb) { - struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, + struct ctdb_tcp *ctcp = talloc_get_type(ctdb->transport_data, struct ctdb_tcp); uint32_t i; talloc_free(ctcp); - ctdb->private_data = NULL; + ctdb->transport_data = NULL; for (i=0; inum_nodes; i++) { TALLOC_FREE(ctdb->nodes[i]->transport_data); @@ -191,7 +191,7 @@ static const struct ctdb_methods ctdb_tcp_methods = { static int tcp_ctcp_destructor(struct ctdb_tcp *ctcp) { - ctcp->ctdb->private_data = NULL; + ctcp->ctdb->transport_data = NULL; ctcp->ctdb->methods = NULL; return 0; @@ -209,7 +209,7 @@ int ctdb_tcp_init(struct ctdb_context *ctdb) ctcp->listen_fd = -1; ctcp->ctdb = ctdb; - ctdb->private_data = ctcp; + ctdb->transport_data = ctcp; ctdb->methods = &ctdb_tcp_methods; talloc_set_destructor(ctcp, tcp_ctcp_destructor); -- 2.25.1 From 470bde7996682b93fed347f2b6dd7831dd83463c Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sat, 29 Feb 2020 12:26:19 +0100 Subject: [PATCH 03/11] ctdb-daemon: ensure restart() callback is called in half-connected state If NODE_FLAGS_DISCONNECTED is set the node can be in half-connected state. With this change we ensure to restart the transport for this case. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Ralph Boehme Reviewed-by: Martin Schwenke (cherry picked from commit 6a4fa0785fc83561939fa41617d526eb96c1af89) --- ctdb/server/ctdb_server.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index ddff85b81c5..02a5883cdbd 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -301,6 +301,12 @@ done: */ void ctdb_node_dead(struct ctdb_node *node) { + if (node->ctdb->methods == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n")); + return; + } + + node->ctdb->methods->restart(node); if (node->flags & NODE_FLAGS_DISCONNECTED) { DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n", node->ctdb->name, node->name, @@ -315,13 +321,6 @@ void ctdb_node_dead(struct ctdb_node *node) DEBUG(DEBUG_ERR,("%s: node %s is dead: %u connected\n", node->ctdb->name, node->name, node->ctdb->num_connected)); ctdb_daemon_cancel_controls(node->ctdb, node); - - if (node->ctdb->methods == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n")); - return; - } - - node->ctdb->methods->restart(node); } /* -- 2.25.1 From c9c2e6a41f5649e680a4ed045f4dd73fb39ea006 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Sun, 1 Mar 2020 16:40:41 +1100 Subject: [PATCH 04/11] ctdb-daemon: more logical whitespace, debug modernisation BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Martin Schwenke Reviewed-by: Ralph Boehme (cherry picked from commit 15762a34559599cf908e30651a2d4c11560068ed) --- ctdb/server/ctdb_server.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index 02a5883cdbd..3fce791b27e 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -302,11 +302,11 @@ done: void ctdb_node_dead(struct ctdb_node *node) { if (node->ctdb->methods == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n")); + DBG_ERR("Can not restart transport while shutting down\n"); return; } - node->ctdb->methods->restart(node); + if (node->flags & NODE_FLAGS_DISCONNECTED) { DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n", node->ctdb->name, node->name, -- 2.25.1 From a1b3992831c12d885848892fe02b99d690077489 Mon Sep 17 00:00:00 2001 From: Noel Power Date: Sat, 29 Feb 2020 15:49:28 +0000 Subject: [PATCH 05/11] ctdb-tcp: move free of inbound queue to TCP restart Since commit 77deaadca8e8dbc3c92ea16893099c72f6dc874e, a nodeA which had previously accepted a connection from nodeB (where nodeB dies e.g. as as result of fencing) when nodeB attempts to connect again after restarting is always rejected with ctdb_listen_event: Incoming queue active, rejecting connection from w.x.y.z messages. Consolidate dead node handling in the TCP restart handling. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Noel Power Reviewed-by: Ralph Boehme Reviewed-by: Martin Schwenke (cherry picked from commit 0ff1b78fc2f0491f9e11131d0040bdaba8873770) --- ctdb/tcp/tcp_init.c | 2 +- ctdb/tcp/tcp_io.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index 0f9423ad6fc..a112b52fa0d 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -121,7 +121,7 @@ static void ctdb_tcp_restart(struct ctdb_node *node) node->transport_data, struct ctdb_tcp_node); DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); - + TALLOC_FREE(tnode->in_queue); ctdb_tcp_stop_connection(node); tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, diff --git a/ctdb/tcp/tcp_io.c b/ctdb/tcp/tcp_io.c index df9ca02b413..bcb18fbf300 100644 --- a/ctdb/tcp/tcp_io.c +++ b/ctdb/tcp/tcp_io.c @@ -75,7 +75,6 @@ void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args) return; failed: - TALLOC_FREE(tnode->in_queue); node->ctdb->upcalls->node_dead(node); TALLOC_FREE(data); -- 2.25.1 From b4f206a6495d39bda3dd67319485a61a246fa261 Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sat, 29 Feb 2020 12:13:12 +0100 Subject: [PATCH 06/11] ctdb-tcp: always call node_dead() upcall in ctdb_tcp_tnode_cb() ctdb_tcp_tnode_cb() is called when we receive data on the outgoing connection. This can happen when we get an EOF on the connection because the other side as closed. In this case data will be NULL. It would also be called if we received data from the peer. In this case data will not be NULL. The latter case is a fatal error though and we already call ctdb_tcp_stop_connection() for this case as well, which means even though the node is not fully connected anymore, by not calling the node_dead() upcall NODE_FLAGS_DISCONNECTED will not be set. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Ralph Boehme Reviewed-by: Martin Schwenke (cherry picked from commit b83ef98c7466b2a81968555de83fb977bb6ca9f0) --- ctdb/tcp/tcp_connect.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index b49aa2047cb..a3e4a471774 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -65,9 +65,7 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) struct ctdb_tcp_node *tnode = talloc_get_type( node->transport_data, struct ctdb_tcp_node); - if (data == NULL) { - node->ctdb->upcalls->node_dead(node); - } + node->ctdb->upcalls->node_dead(node); ctdb_tcp_stop_connection(node); tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, -- 2.25.1 From 543560194fe48adf4c72da6f9a68679a1b8e48ac Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sat, 29 Feb 2020 12:28:20 +0100 Subject: [PATCH 07/11] ctdb-tcp: Remove redundant restart in ctdb_tcp_tnode_cb() The node dead upcall has already restarted the outgoing connection. There's no need to repeat it. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Ralph Boehme Signed-off-by: Martin Schwenke (cherry picked from commit ea37ecdcd5960311f54a7a5510b88a654da23daa) --- ctdb/tcp/tcp_connect.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index a3e4a471774..6db6c6fb138 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -62,15 +62,9 @@ void ctdb_tcp_stop_connection(struct ctdb_node *node) void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) { struct ctdb_node *node = talloc_get_type(private_data, struct ctdb_node); - struct ctdb_tcp_node *tnode = talloc_get_type( - node->transport_data, struct ctdb_tcp_node); node->ctdb->upcalls->node_dead(node); - ctdb_tcp_stop_connection(node); - tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, - timeval_current_ofs(3, 0), - ctdb_tcp_node_connect, node); TALLOC_FREE(data); } -- 2.25.1 From 375391efcc8a6ad3c34e93b6a28a1c5930cf46de Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Fri, 28 Feb 2020 11:36:00 +0100 Subject: [PATCH 08/11] ctdb-tcp: rename ctdb_tcp_stop_connection() to ctdb_tcp_stop_outgoing() No change in behaviour. This makes the code self-documenting. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Ralph Boehme Reviewed-by: Martin Schwenke (cherry picked from commit 1e2a967ff41cc29c3a0d7f61a46937c68fdb90ba) --- ctdb/tcp/ctdb_tcp.h | 2 +- ctdb/tcp/tcp_connect.c | 12 ++++++------ ctdb/tcp/tcp_init.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ctdb/tcp/ctdb_tcp.h b/ctdb/tcp/ctdb_tcp.h index daabad74297..095056e8544 100644 --- a/ctdb/tcp/ctdb_tcp.h +++ b/ctdb/tcp/ctdb_tcp.h @@ -48,7 +48,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, struct timeval t, void *private_data); void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args); void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data); -void ctdb_tcp_stop_connection(struct ctdb_node *node); +void ctdb_tcp_stop_outgoing(struct ctdb_node *node); #define CTDB_TCP_ALIGNMENT 8 diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 6db6c6fb138..cc0c7bd2e47 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -38,9 +38,9 @@ #include "ctdb_tcp.h" /* - stop any connecting (established or pending) to a node + stop any outgoing connection (established or pending) to a node */ -void ctdb_tcp_stop_connection(struct ctdb_node *node) +void ctdb_tcp_stop_outgoing(struct ctdb_node *node) { struct ctdb_tcp_node *tnode = talloc_get_type( node->transport_data, struct ctdb_tcp_node); @@ -90,7 +90,7 @@ static void ctdb_node_connect_write(struct tevent_context *ev, ret = getsockopt(tnode->out_fd, SOL_SOCKET, SO_ERROR, &error, &len); if (ret != 0 || error != 0) { - ctdb_tcp_stop_connection(node); + ctdb_tcp_stop_outgoing(node); tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, timeval_current_ofs(1, 0), ctdb_tcp_node_connect, node); @@ -128,7 +128,7 @@ static void ctdb_node_connect_write(struct tevent_context *ev, node->name); if (tnode->out_queue == NULL) { DBG_ERR("Failed to set up outgoing queue\n"); - ctdb_tcp_stop_connection(node); + ctdb_tcp_stop_outgoing(node); tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, timeval_current_ofs(1, 0), @@ -168,7 +168,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, ctdb_sock_addr sock_out; int ret; - ctdb_tcp_stop_connection(node); + ctdb_tcp_stop_outgoing(node); sock_out = node->address; @@ -252,7 +252,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, return; failed: - ctdb_tcp_stop_connection(node); + ctdb_tcp_stop_outgoing(node); tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, timeval_current_ofs(1, 0), diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index a112b52fa0d..b0da5ad4610 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -122,7 +122,7 @@ static void ctdb_tcp_restart(struct ctdb_node *node) DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); TALLOC_FREE(tnode->in_queue); - ctdb_tcp_stop_connection(node); + ctdb_tcp_stop_outgoing(node); tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, timeval_zero(), -- 2.25.1 From 9f2c5a5117aa00688f980ae3fae2d30d196ce150 Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sat, 29 Feb 2020 11:54:51 +0100 Subject: [PATCH 09/11] ctdb-tcp: add ctdb_tcp_stop_incoming() No change in behaviour. This makes the code self-documenting. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Ralph Boehme Signed-off-by: Martin Schwenke (cherry picked from commit 2c73dbafba50b28e72a8ec7b4382fae42fca6d17) --- ctdb/tcp/ctdb_tcp.h | 1 + ctdb/tcp/tcp_connect.c | 10 ++++++++++ ctdb/tcp/tcp_init.c | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ctdb/tcp/ctdb_tcp.h b/ctdb/tcp/ctdb_tcp.h index 095056e8544..cb8d66fa5dc 100644 --- a/ctdb/tcp/ctdb_tcp.h +++ b/ctdb/tcp/ctdb_tcp.h @@ -49,6 +49,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args); void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data); void ctdb_tcp_stop_outgoing(struct ctdb_node *node); +void ctdb_tcp_stop_incoming(struct ctdb_node *node); #define CTDB_TCP_ALIGNMENT 8 diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index cc0c7bd2e47..9c09f3f4e74 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -54,6 +54,16 @@ void ctdb_tcp_stop_outgoing(struct ctdb_node *node) } } +/* + stop incoming connection to a node + */ +void ctdb_tcp_stop_incoming(struct ctdb_node *node) +{ + struct ctdb_tcp_node *tnode = talloc_get_type( + node->transport_data, struct ctdb_tcp_node); + + TALLOC_FREE(tnode->in_queue); +} /* called when a complete packet has come in - should not happen on this socket diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index b0da5ad4610..fae1fa99195 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -121,7 +121,7 @@ static void ctdb_tcp_restart(struct ctdb_node *node) node->transport_data, struct ctdb_tcp_node); DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); - TALLOC_FREE(tnode->in_queue); + ctdb_tcp_stop_incoming(node); ctdb_tcp_stop_outgoing(node); tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, -- 2.25.1 From 8b39879f8c042deb051c03493ef4ecc5a13633d3 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 6 Mar 2020 15:59:32 +1100 Subject: [PATCH 10/11] ctdb-tcp: Factor out function ctdb_tcp_start_outgoing() BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Amitay Isaacs Signed-off-by: Martin Schwenke (cherry picked from commit 3c8747fe29486a4f95308b335a5e3ec1807f62cb) --- ctdb/tcp/tcp_connect.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 9c09f3f4e74..6065829a44e 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -164,11 +164,8 @@ static void ctdb_node_connect_write(struct tevent_context *ev, /* called when we should try and establish a tcp connection to a node */ -void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, - struct timeval t, void *private_data) +static void ctdb_tcp_start_outgoing(struct ctdb_node *node) { - struct ctdb_node *node = talloc_get_type(private_data, - struct ctdb_node); struct ctdb_tcp_node *tnode = talloc_get_type(node->transport_data, struct ctdb_tcp_node); struct ctdb_context *ctdb = node->ctdb; @@ -178,8 +175,6 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, ctdb_sock_addr sock_out; int ret; - ctdb_tcp_stop_outgoing(node); - sock_out = node->address; tnode->out_fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP); @@ -270,6 +265,18 @@ failed: node); } +void ctdb_tcp_node_connect(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, + void *private_data) +{ + struct ctdb_node *node = talloc_get_type_abort(private_data, + struct ctdb_node); + + ctdb_tcp_stop_outgoing(node); + ctdb_tcp_start_outgoing(node); +} + /* called when we get contacted by another node currently makes no attempt to check if the connection is really from a ctdb -- 2.25.1 From ccaaf6ba33ec6801c4a7e867d594b3f27c8f1e71 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 6 Mar 2020 16:11:23 +1100 Subject: [PATCH 11/11] ctdb-tcp: Do not stop outbound connection in ctdb_tcp_node_connect() The only place the outgoing connection needs to be stopped is when there is a timeout when waiting for the connection to become writable. Add a new function ctdb_tcp_node_connect_timeout() to handle this case. All of the other cases are attempts to establish a new outgoing connection (initial attempt, retry after an error or disconnect, ...) so drop stopping the connection in those cases. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Amitay Isaacs Signed-off-by: Martin Schwenke Autobuild-User(master): Martin Schwenke Autobuild-Date(master): Thu Mar 12 05:29:20 UTC 2020 on sn-devel-184 (cherry picked from commit 319c93f0c6a949545229b616dfbd4f51baf11171) --- ctdb/tcp/tcp_connect.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 6065829a44e..6ce3dc16a6d 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -161,6 +161,11 @@ static void ctdb_node_connect_write(struct tevent_context *ev, } +static void ctdb_tcp_node_connect_timeout(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, + void *private_data); + /* called when we should try and establish a tcp connection to a node */ @@ -251,7 +256,7 @@ static void ctdb_tcp_start_outgoing(struct ctdb_node *node) tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, timeval_current_ofs(1, 0), - ctdb_tcp_node_connect, + ctdb_tcp_node_connect_timeout, node); return; @@ -273,6 +278,17 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct ctdb_node *node = talloc_get_type_abort(private_data, struct ctdb_node); + ctdb_tcp_start_outgoing(node); +} + +static void ctdb_tcp_node_connect_timeout(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, + void *private_data) +{ + struct ctdb_node *node = talloc_get_type_abort(private_data, + struct ctdb_node); + ctdb_tcp_stop_outgoing(node); ctdb_tcp_start_outgoing(node); } -- 2.25.1