The Samba-Bugzilla – Attachment 15827 Details for
Bug 14295
Starting ctdb node that was powered off hard before results in recovery loop
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
More complete patch
bug14295-master-v3.patch (text/plain), 8.27 KB, created by
Ralph Böhme
on 2020-02-28 11:15:36 UTC
(
hide
)
Description:
More complete patch
Filename:
MIME Type:
Creator:
Ralph Böhme
Created:
2020-02-28 11:15:36 UTC
Size:
8.27 KB
patch
obsolete
>From 0f433da363fd85d2d2faeeb17cca745c7b57755c Mon Sep 17 00:00:00 2001 >From: Ralph Boehme <slow@samba.org> >Date: Fri, 28 Feb 2020 11:36:00 +0100 >Subject: [PATCH 1/4] ctdb: rename ctdb_tcp_stop_connection() to > ctdb_tcp_stop_outgoing_connection() > >No change in behavour, just a function rename that prepares for adding a new >function ctdb_tcp_stop_connection() that will also tear down any incoming >connection. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 > >Signed-off-by: Ralph Boehme <slow@samba.org> >--- > ctdb/tcp/ctdb_tcp.h | 2 +- > ctdb/tcp/tcp_connect.c | 14 +++++++------- > ctdb/tcp/tcp_init.c | 2 +- > 3 files changed, 9 insertions(+), 9 deletions(-) > >diff --git a/ctdb/tcp/ctdb_tcp.h b/ctdb/tcp/ctdb_tcp.h >index daabad74297..5e11dab1156 100644 >--- a/ctdb/tcp/ctdb_tcp.h >+++ b/ctdb/tcp/ctdb_tcp.h >@@ -48,7 +48,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, > struct timeval t, void *private_data); > void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args); > void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data); >-void ctdb_tcp_stop_connection(struct ctdb_node *node); >+void ctdb_tcp_stop_outgoing_connection(struct ctdb_node *node); > > #define CTDB_TCP_ALIGNMENT 8 > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index 559442f14bf..da1b1df3b93 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -38,9 +38,9 @@ > #include "ctdb_tcp.h" > > /* >- stop any connecting (established or pending) to a node >+ stop any outgoing connection (established or pending) to a node > */ >-void ctdb_tcp_stop_connection(struct ctdb_node *node) >+void ctdb_tcp_stop_outgoing_connection(struct ctdb_node *node) > { > struct ctdb_tcp_node *tnode = talloc_get_type( > node->transport_data, struct ctdb_tcp_node); >@@ -69,7 +69,7 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) > node->ctdb->upcalls->node_dead(node); > } > >- ctdb_tcp_stop_connection(node); >+ ctdb_tcp_stop_outgoing_connection(node); > tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, > timeval_current_ofs(3, 0), > ctdb_tcp_node_connect, node); >@@ -97,7 +97,7 @@ static void ctdb_node_connect_write(struct tevent_context *ev, > > ret = getsockopt(tnode->out_fd, SOL_SOCKET, SO_ERROR, &error, &len); > if (ret != 0 || error != 0) { >- ctdb_tcp_stop_connection(node); >+ ctdb_tcp_stop_outgoing_connection(node); > tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, > timeval_current_ofs(1, 0), > ctdb_tcp_node_connect, node); >@@ -134,7 +134,7 @@ static void ctdb_node_connect_write(struct tevent_context *ev, > node->name); > if (tnode->out_queue == NULL) { > DBG_ERR("Failed to set up outgoing queue\n"); >- ctdb_tcp_stop_connection(node); >+ ctdb_tcp_stop_outgoing_connection(node); > tnode->connect_te = tevent_add_timer(ctdb->ev, > tnode, > timeval_current_ofs(1, 0), >@@ -174,7 +174,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, > ctdb_sock_addr sock_out; > int ret; > >- ctdb_tcp_stop_connection(node); >+ ctdb_tcp_stop_outgoing_connection(node); > > sock_out = node->address; > >@@ -258,7 +258,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, > return; > > failed: >- ctdb_tcp_stop_connection(node); >+ ctdb_tcp_stop_outgoing_connection(node); > tnode->connect_te = tevent_add_timer(ctdb->ev, > tnode, > timeval_current_ofs(1, 0), >diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c >index 559ad8691d0..967eb3ee494 100644 >--- a/ctdb/tcp/tcp_init.c >+++ b/ctdb/tcp/tcp_init.c >@@ -122,7 +122,7 @@ static void ctdb_tcp_restart(struct ctdb_node *node) > > DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); > >- ctdb_tcp_stop_connection(node); >+ ctdb_tcp_stop_outgoing_connection(node); > > tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, > timeval_zero(), >-- >2.24.1 > > >From 934e7f79a182568620ea88f480cb2e42b58c6f87 Mon Sep 17 00:00:00 2001 >From: Ralph Boehme <slow@samba.org> >Date: Fri, 28 Feb 2020 11:38:28 +0100 >Subject: [PATCH 2/4] ctdb: add ctdb_tcp_stop_connection() > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 > >Signed-off-by: Ralph Boehme <slow@samba.org> >--- > ctdb/tcp/ctdb_tcp.h | 1 + > ctdb/tcp/tcp_connect.c | 11 +++++++++++ > 2 files changed, 12 insertions(+) > >diff --git a/ctdb/tcp/ctdb_tcp.h b/ctdb/tcp/ctdb_tcp.h >index 5e11dab1156..6e0a9af9ab2 100644 >--- a/ctdb/tcp/ctdb_tcp.h >+++ b/ctdb/tcp/ctdb_tcp.h >@@ -48,6 +48,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, > struct timeval t, void *private_data); > void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args); > void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data); >+void ctdb_tcp_stop_connection(struct ctdb_node *node); > void ctdb_tcp_stop_outgoing_connection(struct ctdb_node *node); > > #define CTDB_TCP_ALIGNMENT 8 >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index da1b1df3b93..7da5f6f6870 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -54,6 +54,17 @@ void ctdb_tcp_stop_outgoing_connection(struct ctdb_node *node) > } > } > >+/* >+ stop both incoming and outgoing connection (established or pending) to a node >+ */ >+void ctdb_tcp_stop_connection(struct ctdb_node *node) >+{ >+ struct ctdb_tcp_node *tnode = talloc_get_type( >+ node->transport_data, struct ctdb_tcp_node); >+ >+ ctdb_tcp_stop_outgoing_connection(node); >+ TALLOC_FREE(tnode->in_queue); >+} > > /* > called when a complete packet has come in - should not happen on this socket >-- >2.24.1 > > >From 7eb82b9b9c5ef5ef5291901271ffeed15fde1b8a Mon Sep 17 00:00:00 2001 >From: Ralph Boehme <slow@samba.org> >Date: Fri, 28 Feb 2020 11:39:07 +0100 >Subject: [PATCH 3/4] ctdb: use ctdb_tcp_stop_connection() in > ctdb_tcp_restart() > >This fixes a regression introduced by commit >d0baad257e511280ff3e5c7372c38c43df841070 as part of the fixes for bug 14175. > >The scenario that triggers this is: > >- hard power off of a node A > >- all other nodes in the cluster fail to free > struct ctdb_tcp_node.in_queue > >- restart node A and start ctdb > >- node A connect to other nodes but the other nodes > reject the incoming connection with > > Feb 21 13:47:13 somenode ctdbd[302424]: ctdb_listen_event: > Incoming queue active, rejecting connection from SOMEIP > >struct ctdb_tcp_node.in_queue is only ever freed in the fd readable handler >ctdb_tcp_read_cb(), but this gets never called as the TCP stacks on the nodes >doesn't notice the connection is dead. ctdb sets SO_KEEPALIVE on the socket, but >the default timeout for tcp_keepalive_time is 2 hours. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 > >Signed-off-by: Ralph Boehme <slow@samba.org> >--- > ctdb/tcp/tcp_init.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > >diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c >index 967eb3ee494..559ad8691d0 100644 >--- a/ctdb/tcp/tcp_init.c >+++ b/ctdb/tcp/tcp_init.c >@@ -122,7 +122,7 @@ static void ctdb_tcp_restart(struct ctdb_node *node) > > DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); > >- ctdb_tcp_stop_outgoing_connection(node); >+ ctdb_tcp_stop_connection(node); > > tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, > timeval_zero(), >-- >2.24.1 > > >From c18d11b225976bdfc8781aa0dc9c39b612d91085 Mon Sep 17 00:00:00 2001 >From: Ralph Boehme <slow@samba.org> >Date: Fri, 28 Feb 2020 11:40:44 +0100 >Subject: [PATCH 4/4] ctdb: use ctdb_tcp_stop_connection() in > ctdb_tcp_tnode_cb() > >ctdb_tcp_tnode_cb() get called when we receive data on the outgoing >connection. Intead of only tearing down the outgoing connection, we better tear >down outgoing *and* incoming connection to the node. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 > >Signed-off-by: Ralph Boehme <slow@samba.org> >--- > ctdb/tcp/tcp_connect.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index 7da5f6f6870..f1fc745cbab 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -80,7 +80,7 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) > node->ctdb->upcalls->node_dead(node); > } > >- ctdb_tcp_stop_outgoing_connection(node); >+ ctdb_tcp_stop_connection(node); > tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, > timeval_current_ofs(3, 0), > ctdb_tcp_node_connect, node); >-- >2.24.1 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 14295
:
15823
|
15826
|
15827
|
15828
|
15829
|
15830
|
15855
|
15859
|
15860
|
15862
|
15869