The Samba-Bugzilla – Attachment 15862 Details for
Bug 14295
Starting ctdb node that was powered off hard before results in recovery loop
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch for 4.11 cherry-picked from master (after some preliminary cosmetic cherry-picks)
bug14295-v411.patch (text/plain), 30.02 KB, created by
Martin Schwenke
on 2020-03-14 02:54:39 UTC
(
hide
)
Description:
Patch for 4.11 cherry-picked from master (after some preliminary cosmetic cherry-picks)
Filename:
MIME Type:
Creator:
Martin Schwenke
Created:
2020-03-14 02:54:39 UTC
Size:
30.02 KB
patch
obsolete
>From f3692f8cc68f00b985655159e8235dd0a8448abc Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Tue, 12 Nov 2019 12:04:22 +1100 >Subject: [PATCH 01/11] ctdb-daemon: Rename ctdb_node private_data to > transport_data > >This gives a casual reader a useful clue. > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 53f8492caafa8556d0c2d3f272d08ce5ce098c25) >--- > ctdb/ib/ibw_ctdb.c | 11 ++++++++--- > ctdb/ib/ibw_ctdb_init.c | 5 +++-- > ctdb/include/ctdb_private.h | 2 +- > ctdb/tcp/tcp_connect.c | 10 +++++----- > ctdb/tcp/tcp_init.c | 8 ++++---- > ctdb/tcp/tcp_io.c | 4 ++-- > 6 files changed, 23 insertions(+), 17 deletions(-) > >diff --git a/ctdb/ib/ibw_ctdb.c b/ctdb/ib/ibw_ctdb.c >index 458646faae0..53911240ff7 100644 >--- a/ctdb/ib/ibw_ctdb.c >+++ b/ctdb/ib/ibw_ctdb.c >@@ -55,7 +55,8 @@ int ctdb_ibw_get_address(struct ctdb_context *ctdb, > > int ctdb_ibw_node_connect(struct ctdb_node *node) > { >- struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node); >+ struct ctdb_ibw_node *cn = talloc_get_type(node->transport_data, >+ struct ctdb_ibw_node); > int rc; > > assert(cn!=NULL); >@@ -118,7 +119,9 @@ int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn) > case IBWC_CONNECTED: { /* after ibw_accept or ibw_connect */ > struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node); > if (node!=NULL) { /* after ibw_connect */ >- struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node); >+ struct ctdb_ibw_node *cn = talloc_get_type( >+ node->transport_data, >+ struct ctdb_ibw_node); > > node->ctdb->upcalls->node_connected(node); > ctdb_flush_cn_queue(cn); >@@ -136,7 +139,9 @@ int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn) > case IBWC_ERROR: { > struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node); > if (node!=NULL) { >- struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node); >+ struct ctdb_ibw_node *cn = talloc_get_type( >+ node->transport_data, >+ struct ctdb_ibw_node); > struct ibw_ctx *ictx = cn->conn->ctx; > > DEBUG(DEBUG_DEBUG, ("IBWC_ERROR, reconnecting...\n")); >diff --git a/ctdb/ib/ibw_ctdb_init.c b/ctdb/ib/ibw_ctdb_init.c >index 7e77ec08031..6fdb0d887cf 100644 >--- a/ctdb/ib/ibw_ctdb_init.c >+++ b/ctdb/ib/ibw_ctdb_init.c >@@ -67,7 +67,7 @@ static int ctdb_ibw_add_node(struct ctdb_node *node) > > assert(cn!=NULL); > cn->conn = ibw_conn_new(ictx, node); >- node->private_data = (void *)cn; >+ node->transport_data = (void *)cn; > > return (cn->conn!=NULL ? 0 : -1); > } >@@ -153,7 +153,8 @@ int ctdb_flush_cn_queue(struct ctdb_ibw_node *cn) > > static int ctdb_ibw_queue_pkt(struct ctdb_node *node, uint8_t *data, uint32_t length) > { >- struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node); >+ struct ctdb_ibw_node *cn = talloc_get_type(node->transport_data, >+ struct ctdb_ibw_node); > int rc; > > assert(length>=sizeof(uint32_t)); >diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h >index 1f168dae2b8..e9e4d6a7f06 100644 >--- a/ctdb/include/ctdb_private.h >+++ b/ctdb/include/ctdb_private.h >@@ -74,7 +74,7 @@ struct ctdb_node { > struct ctdb_context *ctdb; > ctdb_sock_addr address; > const char *name; /* for debug messages */ >- void *private_data; /* private to transport */ >+ void *transport_data; /* private to transport */ > uint32_t pnn; > uint32_t flags; > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index 04897f44249..2261d730d52 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -43,7 +43,7 @@ > void ctdb_tcp_stop_connection(struct ctdb_node *node) > { > struct ctdb_tcp_node *tnode = talloc_get_type( >- node->private_data, struct ctdb_tcp_node); >+ node->transport_data, struct ctdb_tcp_node); > > TALLOC_FREE(tnode->out_queue); > TALLOC_FREE(tnode->connect_te); >@@ -63,7 +63,7 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) > { > struct ctdb_node *node = talloc_get_type(private_data, struct ctdb_node); > struct ctdb_tcp_node *tnode = talloc_get_type( >- node->private_data, struct ctdb_tcp_node); >+ node->transport_data, struct ctdb_tcp_node); > > if (data == NULL) { > node->ctdb->upcalls->node_dead(node); >@@ -85,7 +85,7 @@ static void ctdb_node_connect_write(struct tevent_context *ev, > { > struct ctdb_node *node = talloc_get_type(private_data, > struct ctdb_node); >- struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data, >+ struct ctdb_tcp_node *tnode = talloc_get_type(node->transport_data, > struct ctdb_tcp_node); > struct ctdb_context *ctdb = node->ctdb; > int error = 0; >@@ -167,7 +167,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, > { > struct ctdb_node *node = talloc_get_type(private_data, > struct ctdb_node); >- struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data, >+ struct ctdb_tcp_node *tnode = talloc_get_type(node->transport_data, > struct ctdb_tcp_node); > struct ctdb_context *ctdb = node->ctdb; > ctdb_sock_addr sock_in; >@@ -300,7 +300,7 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, > return; > } > >- tnode = talloc_get_type_abort(node->private_data, >+ tnode = talloc_get_type_abort(node->transport_data, > struct ctdb_tcp_node); > if (tnode == NULL) { > /* This can't happen - see ctdb_tcp_initialise() */ >diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c >index 0eb9799ac4a..9ad13aabc74 100644 >--- a/ctdb/tcp/tcp_init.c >+++ b/ctdb/tcp/tcp_init.c >@@ -58,7 +58,7 @@ static int ctdb_tcp_add_node(struct ctdb_node *node) > tnode->out_fd = -1; > tnode->ctdb = node->ctdb; > >- node->private_data = tnode; >+ node->transport_data = tnode; > talloc_set_destructor(tnode, tnode_destructor); > > return 0; >@@ -97,7 +97,7 @@ static int ctdb_tcp_connect_node(struct ctdb_node *node) > { > struct ctdb_context *ctdb = node->ctdb; > struct ctdb_tcp_node *tnode = talloc_get_type( >- node->private_data, struct ctdb_tcp_node); >+ node->transport_data, struct ctdb_tcp_node); > > /* startup connection to the other server - will happen on > next event loop */ >@@ -118,7 +118,7 @@ static int ctdb_tcp_connect_node(struct ctdb_node *node) > static void ctdb_tcp_restart(struct ctdb_node *node) > { > struct ctdb_tcp_node *tnode = talloc_get_type( >- node->private_data, struct ctdb_tcp_node); >+ node->transport_data, struct ctdb_tcp_node); > > DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); > >@@ -143,7 +143,7 @@ static void ctdb_tcp_shutdown(struct ctdb_context *ctdb) > ctdb->private_data = NULL; > > for (i=0; i<ctdb->num_nodes; i++) { >- TALLOC_FREE(ctdb->nodes[i]->private_data); >+ TALLOC_FREE(ctdb->nodes[i]->transport_data); > } > } > >diff --git a/ctdb/tcp/tcp_io.c b/ctdb/tcp/tcp_io.c >index 2d8ec0f7062..df9ca02b413 100644 >--- a/ctdb/tcp/tcp_io.c >+++ b/ctdb/tcp/tcp_io.c >@@ -39,7 +39,7 @@ void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args) > { > struct ctdb_node *node = talloc_get_type_abort(args, struct ctdb_node); > struct ctdb_tcp_node *tnode = talloc_get_type_abort( >- node->private_data, struct ctdb_tcp_node); >+ node->transport_data, struct ctdb_tcp_node); > struct ctdb_req_header *hdr = (struct ctdb_req_header *)data; > > if (data == NULL) { >@@ -86,7 +86,7 @@ failed: > */ > int ctdb_tcp_queue_pkt(struct ctdb_node *node, uint8_t *data, uint32_t length) > { >- struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data, >+ struct ctdb_tcp_node *tnode = talloc_get_type(node->transport_data, > struct ctdb_tcp_node); > if (tnode->out_queue == NULL) { > DBG_DEBUG("No outgoing connection, dropping packet\n"); >-- >2.25.1 > > >From 8c47a57a988d6ae38009394bc04fff588d606964 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Tue, 12 Nov 2019 12:12:46 +1100 >Subject: [PATCH 02/11] ctdb-daemon: Rename ctdb_context private_data to > transport_data > >This gives a casual reader a useful clue. > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 750f3938e4fcd6743954db6b1132751a90ee6107) >--- > ctdb/ib/ibw_ctdb_init.c | 8 +++++--- > ctdb/include/ctdb_private.h | 2 +- > ctdb/tcp/tcp_connect.c | 7 ++++--- > ctdb/tcp/tcp_init.c | 8 ++++---- > 4 files changed, 14 insertions(+), 11 deletions(-) > >diff --git a/ctdb/ib/ibw_ctdb_init.c b/ctdb/ib/ibw_ctdb_init.c >index 6fdb0d887cf..f9d00c60605 100644 >--- a/ctdb/ib/ibw_ctdb_init.c >+++ b/ctdb/ib/ibw_ctdb_init.c >@@ -40,7 +40,8 @@ > > static int ctdb_ibw_listen(struct ctdb_context *ctdb, int backlog) > { >- struct ibw_ctx *ictx = talloc_get_type(ctdb->private_data, struct ibw_ctx); >+ struct ibw_ctx *ictx = talloc_get_type(ctdb->transport_data, >+ struct ibw_ctx); > > assert(ictx!=NULL); > >@@ -62,7 +63,8 @@ static int ctdb_ibw_listen(struct ctdb_context *ctdb, int backlog) > */ > static int ctdb_ibw_add_node(struct ctdb_node *node) > { >- struct ibw_ctx *ictx = talloc_get_type(node->ctdb->private_data, struct ibw_ctx); >+ struct ibw_ctx *ictx = talloc_get_type(node->ctdb->transport_data, >+ struct ibw_ctx); > struct ctdb_ibw_node *cn = talloc_zero(node, struct ctdb_ibw_node); > > assert(cn!=NULL); >@@ -246,7 +248,7 @@ int ctdb_ibw_init(struct ctdb_context *ctdb) > } > > ctdb->methods = &ctdb_ibw_methods; >- ctdb->private_data = ictx; >+ ctdb->transport_data = ictx; > > DEBUG(DEBUG_DEBUG, ("ctdb_ibw_init succeeded.\n")); > return 0; >diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h >index e9e4d6a7f06..7f160c0c9db 100644 >--- a/ctdb/include/ctdb_private.h >+++ b/ctdb/include/ctdb_private.h >@@ -286,7 +286,7 @@ struct ctdb_context { > char *err_msg; > const struct ctdb_methods *methods; /* transport methods */ > const struct ctdb_upcalls *upcalls; /* transport upcalls */ >- void *private_data; /* private to transport */ >+ void *transport_data; /* private to transport */ > struct ctdb_db_context *db_list; > struct srvid_context *srv; > struct srvid_context *tunnels; >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index 2261d730d52..3e9cf062184 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -277,7 +277,8 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, > uint16_t flags, void *private_data) > { > struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); >- struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, struct ctdb_tcp); >+ struct ctdb_tcp *ctcp = talloc_get_type(ctdb->transport_data, >+ struct ctdb_tcp); > ctdb_sock_addr addr; > socklen_t len; > int fd; >@@ -368,7 +369,7 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, > */ > static int ctdb_tcp_listen_automatic(struct ctdb_context *ctdb) > { >- struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, >+ struct ctdb_tcp *ctcp = talloc_get_type(ctdb->transport_data, > struct ctdb_tcp); > ctdb_sock_addr sock; > int lock_fd; >@@ -509,7 +510,7 @@ failed: > */ > int ctdb_tcp_listen(struct ctdb_context *ctdb) > { >- struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, >+ struct ctdb_tcp *ctcp = talloc_get_type(ctdb->transport_data, > struct ctdb_tcp); > ctdb_sock_addr sock; > int sock_size; >diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c >index 9ad13aabc74..5bd9e2f3a09 100644 >--- a/ctdb/tcp/tcp_init.c >+++ b/ctdb/tcp/tcp_init.c >@@ -135,12 +135,12 @@ static void ctdb_tcp_restart(struct ctdb_node *node) > */ > static void ctdb_tcp_shutdown(struct ctdb_context *ctdb) > { >- struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, >+ struct ctdb_tcp *ctcp = talloc_get_type(ctdb->transport_data, > struct ctdb_tcp); > uint32_t i; > > talloc_free(ctcp); >- ctdb->private_data = NULL; >+ ctdb->transport_data = NULL; > > for (i=0; i<ctdb->num_nodes; i++) { > TALLOC_FREE(ctdb->nodes[i]->transport_data); >@@ -191,7 +191,7 @@ static const struct ctdb_methods ctdb_tcp_methods = { > > static int tcp_ctcp_destructor(struct ctdb_tcp *ctcp) > { >- ctcp->ctdb->private_data = NULL; >+ ctcp->ctdb->transport_data = NULL; > ctcp->ctdb->methods = NULL; > > return 0; >@@ -209,7 +209,7 @@ int ctdb_tcp_init(struct ctdb_context *ctdb) > > ctcp->listen_fd = -1; > ctcp->ctdb = ctdb; >- ctdb->private_data = ctcp; >+ ctdb->transport_data = ctcp; > ctdb->methods = &ctdb_tcp_methods; > > talloc_set_destructor(ctcp, tcp_ctcp_destructor); >-- >2.25.1 > > >From dca87ea1693f63192b4c23ba97b847ce976a937b Mon Sep 17 00:00:00 2001 >From: Ralph Boehme <slow@samba.org> >Date: Sat, 29 Feb 2020 12:26:19 +0100 >Subject: [PATCH 03/11] ctdb-daemon: ensure restart() callback is called in > half-connected state > >If NODE_FLAGS_DISCONNECTED is set the node can be in half-connected state. With >this change we ensure to restart the transport for this case. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 > >Signed-off-by: Ralph Boehme <slow@samba.org> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 6a4fa0785fc83561939fa41617d526eb96c1af89) >--- > ctdb/server/ctdb_server.c | 13 ++++++------- > 1 file changed, 6 insertions(+), 7 deletions(-) > >diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c >index 9724d1fe0a8..c5d92af0cfb 100644 >--- a/ctdb/server/ctdb_server.c >+++ b/ctdb/server/ctdb_server.c >@@ -301,6 +301,12 @@ done: > */ > void ctdb_node_dead(struct ctdb_node *node) > { >+ if (node->ctdb->methods == NULL) { >+ DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n")); >+ return; >+ } >+ >+ node->ctdb->methods->restart(node); > if (node->flags & NODE_FLAGS_DISCONNECTED) { > DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n", > node->ctdb->name, node->name, >@@ -315,13 +321,6 @@ void ctdb_node_dead(struct ctdb_node *node) > DEBUG(DEBUG_ERR,("%s: node %s is dead: %u connected\n", > node->ctdb->name, node->name, node->ctdb->num_connected)); > ctdb_daemon_cancel_controls(node->ctdb, node); >- >- if (node->ctdb->methods == NULL) { >- DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n")); >- return; >- } >- >- node->ctdb->methods->restart(node); > } > > /* >-- >2.25.1 > > >From 39add06579a648b47b4c51729f2b1a0e6362c40b Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Sun, 1 Mar 2020 16:40:41 +1100 >Subject: [PATCH 04/11] ctdb-daemon: more logical whitespace, debug > modernisation > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Ralph Boehme <slow@samba.org> >(cherry picked from commit 15762a34559599cf908e30651a2d4c11560068ed) >--- > ctdb/server/ctdb_server.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > >diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c >index c5d92af0cfb..4b4c2e9896f 100644 >--- a/ctdb/server/ctdb_server.c >+++ b/ctdb/server/ctdb_server.c >@@ -302,11 +302,11 @@ done: > void ctdb_node_dead(struct ctdb_node *node) > { > if (node->ctdb->methods == NULL) { >- DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n")); >+ DBG_ERR("Can not restart transport while shutting down\n"); > return; > } >- > node->ctdb->methods->restart(node); >+ > if (node->flags & NODE_FLAGS_DISCONNECTED) { > DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n", > node->ctdb->name, node->name, >-- >2.25.1 > > >From 710c95304dec39b37119c8462eb1e550e9b7cc8f Mon Sep 17 00:00:00 2001 >From: Noel Power <noel.power@suse.com> >Date: Sat, 29 Feb 2020 15:49:28 +0000 >Subject: [PATCH 05/11] ctdb-tcp: move free of inbound queue to TCP restart > >Since commit 77deaadca8e8dbc3c92ea16893099c72f6dc874e, a nodeA which >had previously accepted a connection from nodeB (where nodeB dies >e.g. as as result of fencing) when nodeB attempts to connect again >after restarting is always rejected with > > ctdb_listen_event: Incoming queue active, rejecting connection from w.x.y.z > >messages. > >Consolidate dead node handling in the TCP restart handling. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 > >Signed-off-by: Noel Power <noel.power@suse.com> >Reviewed-by: Ralph Boehme <slow@samba.org> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 0ff1b78fc2f0491f9e11131d0040bdaba8873770) >--- > ctdb/tcp/tcp_init.c | 2 +- > ctdb/tcp/tcp_io.c | 1 - > 2 files changed, 1 insertion(+), 2 deletions(-) > >diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c >index 5bd9e2f3a09..7ea079d2303 100644 >--- a/ctdb/tcp/tcp_init.c >+++ b/ctdb/tcp/tcp_init.c >@@ -121,7 +121,7 @@ static void ctdb_tcp_restart(struct ctdb_node *node) > node->transport_data, struct ctdb_tcp_node); > > DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); >- >+ TALLOC_FREE(tnode->in_queue); > ctdb_tcp_stop_connection(node); > > tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, >diff --git a/ctdb/tcp/tcp_io.c b/ctdb/tcp/tcp_io.c >index df9ca02b413..bcb18fbf300 100644 >--- a/ctdb/tcp/tcp_io.c >+++ b/ctdb/tcp/tcp_io.c >@@ -75,7 +75,6 @@ void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args) > return; > > failed: >- TALLOC_FREE(tnode->in_queue); > node->ctdb->upcalls->node_dead(node); > > TALLOC_FREE(data); >-- >2.25.1 > > >From 3f6a4db993259e1e4e2effde33c4d4002f8a0e0f Mon Sep 17 00:00:00 2001 >From: Ralph Boehme <slow@samba.org> >Date: Sat, 29 Feb 2020 12:13:12 +0100 >Subject: [PATCH 06/11] ctdb-tcp: always call node_dead() upcall in > ctdb_tcp_tnode_cb() > >ctdb_tcp_tnode_cb() is called when we receive data on the outgoing connection. > >This can happen when we get an EOF on the connection because the other side as >closed. In this case data will be NULL. > >It would also be called if we received data from the peer. In this case data >will not be NULL. > >The latter case is a fatal error though and we already call >ctdb_tcp_stop_connection() for this case as well, which means even though the >node is not fully connected anymore, by not calling the node_dead() upcall >NODE_FLAGS_DISCONNECTED will not be set. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 > >Signed-off-by: Ralph Boehme <slow@samba.org> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit b83ef98c7466b2a81968555de83fb977bb6ca9f0) >--- > ctdb/tcp/tcp_connect.c | 4 +--- > 1 file changed, 1 insertion(+), 3 deletions(-) > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index 3e9cf062184..758d4140d43 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -65,9 +65,7 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) > struct ctdb_tcp_node *tnode = talloc_get_type( > node->transport_data, struct ctdb_tcp_node); > >- if (data == NULL) { >- node->ctdb->upcalls->node_dead(node); >- } >+ node->ctdb->upcalls->node_dead(node); > > ctdb_tcp_stop_connection(node); > tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, >-- >2.25.1 > > >From 9d1350943261a7756af109394700293e7ac7b0ac Mon Sep 17 00:00:00 2001 >From: Ralph Boehme <slow@samba.org> >Date: Sat, 29 Feb 2020 12:28:20 +0100 >Subject: [PATCH 07/11] ctdb-tcp: Remove redundant restart in > ctdb_tcp_tnode_cb() > >The node dead upcall has already restarted the outgoing connection. >There's no need to repeat it. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 > >Signed-off-by: Ralph Boehme <slow@samba.org> >Signed-off-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit ea37ecdcd5960311f54a7a5510b88a654da23daa) >--- > ctdb/tcp/tcp_connect.c | 6 ------ > 1 file changed, 6 deletions(-) > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index 758d4140d43..60e43db0306 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -62,15 +62,9 @@ void ctdb_tcp_stop_connection(struct ctdb_node *node) > void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) > { > struct ctdb_node *node = talloc_get_type(private_data, struct ctdb_node); >- struct ctdb_tcp_node *tnode = talloc_get_type( >- node->transport_data, struct ctdb_tcp_node); > > node->ctdb->upcalls->node_dead(node); > >- ctdb_tcp_stop_connection(node); >- tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, >- timeval_current_ofs(3, 0), >- ctdb_tcp_node_connect, node); > TALLOC_FREE(data); > } > >-- >2.25.1 > > >From e305725e2425f68b082c63fffacc87c2d9f165b4 Mon Sep 17 00:00:00 2001 >From: Ralph Boehme <slow@samba.org> >Date: Fri, 28 Feb 2020 11:36:00 +0100 >Subject: [PATCH 08/11] ctdb-tcp: rename ctdb_tcp_stop_connection() to > ctdb_tcp_stop_outgoing() > >No change in behaviour. This makes the code self-documenting. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 > >Signed-off-by: Ralph Boehme <slow@samba.org> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 1e2a967ff41cc29c3a0d7f61a46937c68fdb90ba) >--- > ctdb/tcp/ctdb_tcp.h | 2 +- > ctdb/tcp/tcp_connect.c | 12 ++++++------ > ctdb/tcp/tcp_init.c | 2 +- > 3 files changed, 8 insertions(+), 8 deletions(-) > >diff --git a/ctdb/tcp/ctdb_tcp.h b/ctdb/tcp/ctdb_tcp.h >index daabad74297..095056e8544 100644 >--- a/ctdb/tcp/ctdb_tcp.h >+++ b/ctdb/tcp/ctdb_tcp.h >@@ -48,7 +48,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, > struct timeval t, void *private_data); > void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args); > void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data); >-void ctdb_tcp_stop_connection(struct ctdb_node *node); >+void ctdb_tcp_stop_outgoing(struct ctdb_node *node); > > #define CTDB_TCP_ALIGNMENT 8 > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index 60e43db0306..2a314d6554b 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -38,9 +38,9 @@ > #include "ctdb_tcp.h" > > /* >- stop any connecting (established or pending) to a node >+ stop any outgoing connection (established or pending) to a node > */ >-void ctdb_tcp_stop_connection(struct ctdb_node *node) >+void ctdb_tcp_stop_outgoing(struct ctdb_node *node) > { > struct ctdb_tcp_node *tnode = talloc_get_type( > node->transport_data, struct ctdb_tcp_node); >@@ -90,7 +90,7 @@ static void ctdb_node_connect_write(struct tevent_context *ev, > > ret = getsockopt(tnode->out_fd, SOL_SOCKET, SO_ERROR, &error, &len); > if (ret != 0 || error != 0) { >- ctdb_tcp_stop_connection(node); >+ ctdb_tcp_stop_outgoing(node); > tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, > timeval_current_ofs(1, 0), > ctdb_tcp_node_connect, node); >@@ -128,7 +128,7 @@ static void ctdb_node_connect_write(struct tevent_context *ev, > node->name); > if (tnode->out_queue == NULL) { > DBG_ERR("Failed to set up outgoing queue\n"); >- ctdb_tcp_stop_connection(node); >+ ctdb_tcp_stop_outgoing(node); > tnode->connect_te = tevent_add_timer(ctdb->ev, > tnode, > timeval_current_ofs(1, 0), >@@ -168,7 +168,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, > ctdb_sock_addr sock_out; > int ret; > >- ctdb_tcp_stop_connection(node); >+ ctdb_tcp_stop_outgoing(node); > > sock_out = node->address; > >@@ -252,7 +252,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, > return; > > failed: >- ctdb_tcp_stop_connection(node); >+ ctdb_tcp_stop_outgoing(node); > tnode->connect_te = tevent_add_timer(ctdb->ev, > tnode, > timeval_current_ofs(1, 0), >diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c >index 7ea079d2303..83a718172ac 100644 >--- a/ctdb/tcp/tcp_init.c >+++ b/ctdb/tcp/tcp_init.c >@@ -122,7 +122,7 @@ static void ctdb_tcp_restart(struct ctdb_node *node) > > DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); > TALLOC_FREE(tnode->in_queue); >- ctdb_tcp_stop_connection(node); >+ ctdb_tcp_stop_outgoing(node); > > tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, > timeval_zero(), >-- >2.25.1 > > >From fbd3320f21d18eacb8c842a078d40148d154e4ed Mon Sep 17 00:00:00 2001 >From: Ralph Boehme <slow@samba.org> >Date: Sat, 29 Feb 2020 11:54:51 +0100 >Subject: [PATCH 09/11] ctdb-tcp: add ctdb_tcp_stop_incoming() > >No change in behaviour. This makes the code self-documenting. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 > >Signed-off-by: Ralph Boehme <slow@samba.org> >Signed-off-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 2c73dbafba50b28e72a8ec7b4382fae42fca6d17) >--- > ctdb/tcp/ctdb_tcp.h | 1 + > ctdb/tcp/tcp_connect.c | 10 ++++++++++ > ctdb/tcp/tcp_init.c | 2 +- > 3 files changed, 12 insertions(+), 1 deletion(-) > >diff --git a/ctdb/tcp/ctdb_tcp.h b/ctdb/tcp/ctdb_tcp.h >index 095056e8544..cb8d66fa5dc 100644 >--- a/ctdb/tcp/ctdb_tcp.h >+++ b/ctdb/tcp/ctdb_tcp.h >@@ -49,6 +49,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, > void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args); > void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data); > void ctdb_tcp_stop_outgoing(struct ctdb_node *node); >+void ctdb_tcp_stop_incoming(struct ctdb_node *node); > > #define CTDB_TCP_ALIGNMENT 8 > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index 2a314d6554b..e8ac148baa1 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -54,6 +54,16 @@ void ctdb_tcp_stop_outgoing(struct ctdb_node *node) > } > } > >+/* >+ stop incoming connection to a node >+ */ >+void ctdb_tcp_stop_incoming(struct ctdb_node *node) >+{ >+ struct ctdb_tcp_node *tnode = talloc_get_type( >+ node->transport_data, struct ctdb_tcp_node); >+ >+ TALLOC_FREE(tnode->in_queue); >+} > > /* > called when a complete packet has come in - should not happen on this socket >diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c >index 83a718172ac..3c99c8b31f5 100644 >--- a/ctdb/tcp/tcp_init.c >+++ b/ctdb/tcp/tcp_init.c >@@ -121,7 +121,7 @@ static void ctdb_tcp_restart(struct ctdb_node *node) > node->transport_data, struct ctdb_tcp_node); > > DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); >- TALLOC_FREE(tnode->in_queue); >+ ctdb_tcp_stop_incoming(node); > ctdb_tcp_stop_outgoing(node); > > tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, >-- >2.25.1 > > >From 1af80bb722457b454772e0e86a23897480f38230 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Fri, 6 Mar 2020 15:59:32 +1100 >Subject: [PATCH 10/11] ctdb-tcp: Factor out function ctdb_tcp_start_outgoing() > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Signed-off-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 3c8747fe29486a4f95308b335a5e3ec1807f62cb) >--- > ctdb/tcp/tcp_connect.c | 19 +++++++++++++------ > 1 file changed, 13 insertions(+), 6 deletions(-) > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index e8ac148baa1..a7bda43ebbf 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -164,11 +164,8 @@ static void ctdb_node_connect_write(struct tevent_context *ev, > /* > called when we should try and establish a tcp connection to a node > */ >-void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, >- struct timeval t, void *private_data) >+static void ctdb_tcp_start_outgoing(struct ctdb_node *node) > { >- struct ctdb_node *node = talloc_get_type(private_data, >- struct ctdb_node); > struct ctdb_tcp_node *tnode = talloc_get_type(node->transport_data, > struct ctdb_tcp_node); > struct ctdb_context *ctdb = node->ctdb; >@@ -178,8 +175,6 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, > ctdb_sock_addr sock_out; > int ret; > >- ctdb_tcp_stop_outgoing(node); >- > sock_out = node->address; > > tnode->out_fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP); >@@ -270,6 +265,18 @@ failed: > node); > } > >+void ctdb_tcp_node_connect(struct tevent_context *ev, >+ struct tevent_timer *te, >+ struct timeval t, >+ void *private_data) >+{ >+ struct ctdb_node *node = talloc_get_type_abort(private_data, >+ struct ctdb_node); >+ >+ ctdb_tcp_stop_outgoing(node); >+ ctdb_tcp_start_outgoing(node); >+} >+ > /* > called when we get contacted by another node > currently makes no attempt to check if the connection is really from a ctdb >-- >2.25.1 > > >From 783335f207d73ebe81ff514452c535b2906ea3c3 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Fri, 6 Mar 2020 16:11:23 +1100 >Subject: [PATCH 11/11] ctdb-tcp: Do not stop outbound connection in > ctdb_tcp_node_connect() > >The only place the outgoing connection needs to be stopped is when >there is a timeout when waiting for the connection to become writable. >Add a new function ctdb_tcp_node_connect_timeout() to handle this >case. > >All of the other cases are attempts to establish a new outgoing >connection (initial attempt, retry after an error or disconnect, ...) >so drop stopping the connection in those cases. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Signed-off-by: Martin Schwenke <martin@meltin.net> > >Autobuild-User(master): Martin Schwenke <martins@samba.org> >Autobuild-Date(master): Thu Mar 12 05:29:20 UTC 2020 on sn-devel-184 > >(cherry picked from commit 319c93f0c6a949545229b616dfbd4f51baf11171) >--- > ctdb/tcp/tcp_connect.c | 18 +++++++++++++++++- > 1 file changed, 17 insertions(+), 1 deletion(-) > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index a7bda43ebbf..d9bc52aa22f 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -161,6 +161,11 @@ static void ctdb_node_connect_write(struct tevent_context *ev, > } > > >+static void ctdb_tcp_node_connect_timeout(struct tevent_context *ev, >+ struct tevent_timer *te, >+ struct timeval t, >+ void *private_data); >+ > /* > called when we should try and establish a tcp connection to a node > */ >@@ -251,7 +256,7 @@ static void ctdb_tcp_start_outgoing(struct ctdb_node *node) > tnode->connect_te = tevent_add_timer(ctdb->ev, > tnode, > timeval_current_ofs(1, 0), >- ctdb_tcp_node_connect, >+ ctdb_tcp_node_connect_timeout, > node); > > return; >@@ -273,6 +278,17 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, > struct ctdb_node *node = talloc_get_type_abort(private_data, > struct ctdb_node); > >+ ctdb_tcp_start_outgoing(node); >+} >+ >+static void ctdb_tcp_node_connect_timeout(struct tevent_context *ev, >+ struct tevent_timer *te, >+ struct timeval t, >+ void *private_data) >+{ >+ struct ctdb_node *node = talloc_get_type_abort(private_data, >+ struct ctdb_node); >+ > ctdb_tcp_stop_outgoing(node); > ctdb_tcp_start_outgoing(node); > } >-- >2.25.1 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Flags:
slow
:
review+
Actions:
View
Attachments on
bug 14295
:
15823
|
15826
|
15827
|
15828
|
15829
|
15830
|
15855
|
15859
|
15860
| 15862 |
15869