The Samba-Bugzilla – Attachment 15412 Details for
Bug 14084
CTDB replies can be lost before nodes are bidirectionally connected
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch for 4.10
BZ14084-v4-10.patch (text/plain), 28.37 KB, created by
Martin Schwenke
on 2019-08-22 07:00:58 UTC
(
hide
)
Description:
Patch for 4.10
Filename:
MIME Type:
Creator:
Martin Schwenke
Created:
2019-08-22 07:00:58 UTC
Size:
28.37 KB
patch
obsolete
>From 0f7cd079de80715d7ba6084ae439ebf26e4854ef Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Sat, 22 Jun 2019 05:53:15 +1000 >Subject: [PATCH 1/8] ctdb-daemon: Replace function ctdb_ip_to_nodeid() with > ctdb_ip_to_pnn() > >Node ID is a poorly defined concept, indicating the slot in the node >map where the IP address was found. This signed value also ends up >compared to num_nodes, which is unsigned, producing unwanted warnings. > >Just return the PNN because this what both callers really want. > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 010c1d77cd7e192b1fff39b7b91fccbdbbf4a786) >--- > ctdb/include/ctdb_private.h | 3 ++- > ctdb/server/ctdb_daemon.c | 11 ++++------- > ctdb/server/ctdb_server.c | 14 ++++++-------- > ctdb/tcp/tcp_connect.c | 10 ++++++---- > 4 files changed, 18 insertions(+), 20 deletions(-) > >diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h >index ea00bb12128..b6d0d0c678a 100644 >--- a/ctdb/include/ctdb_private.h >+++ b/ctdb/include/ctdb_private.h >@@ -841,7 +841,8 @@ void ctdb_stop_recoverd(struct ctdb_context *ctdb); > > int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport); > >-int ctdb_ip_to_nodeid(struct ctdb_context *ctdb, const ctdb_sock_addr *nodeip); >+uint32_t ctdb_ip_to_pnn(struct ctdb_context *ctdb, >+ const ctdb_sock_addr *nodeip); > > void ctdb_load_nodes_file(struct ctdb_context *ctdb); > >diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c >index aa0694548f8..95b5b6381de 100644 >--- a/ctdb/server/ctdb_daemon.c >+++ b/ctdb/server/ctdb_daemon.c >@@ -1251,21 +1251,18 @@ static void ctdb_initialise_vnn_map(struct ctdb_context *ctdb) > > static void ctdb_set_my_pnn(struct ctdb_context *ctdb) > { >- int nodeid; >- > if (ctdb->address == NULL) { > ctdb_fatal(ctdb, > "Can not determine PNN - node address is not set\n"); > } > >- nodeid = ctdb_ip_to_nodeid(ctdb, ctdb->address); >- if (nodeid == -1) { >+ ctdb->pnn = ctdb_ip_to_pnn(ctdb, ctdb->address); >+ if (ctdb->pnn == CTDB_UNKNOWN_PNN) { > ctdb_fatal(ctdb, >- "Can not determine PNN - node address not found in node list\n"); >+ "Can not determine PNN - unknown node address\n"); > } > >- ctdb->pnn = ctdb->nodes[nodeid]->pnn; >- DEBUG(DEBUG_NOTICE, ("PNN is %u\n", ctdb->pnn)); >+ D_NOTICE("PNN is %u\n", ctdb->pnn); > } > > /* >diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c >index c991b85d99b..6973a0b6056 100644 >--- a/ctdb/server/ctdb_server.c >+++ b/ctdb/server/ctdb_server.c >@@ -45,24 +45,22 @@ int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport) > return 0; > } > >-/* >- Check whether an ip is a valid node ip >- Returns the node id for this ip address or -1 >-*/ >-int ctdb_ip_to_nodeid(struct ctdb_context *ctdb, const ctdb_sock_addr *nodeip) >+/* Return the PNN for nodeip, CTDB_UNKNOWN_PNN if nodeip is invalid */ >+uint32_t ctdb_ip_to_pnn(struct ctdb_context *ctdb, >+ const ctdb_sock_addr *nodeip) > { >- int nodeid; >+ unsigned int nodeid; > > for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) { > if (ctdb->nodes[nodeid]->flags & NODE_FLAGS_DELETED) { > continue; > } > if (ctdb_same_ip(&ctdb->nodes[nodeid]->address, nodeip)) { >- return nodeid; >+ return ctdb->nodes[nodeid]->pnn; > } > } > >- return -1; >+ return CTDB_UNKNOWN_PNN; > } > > /* Load a nodes list file into a nodes array */ >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index 385547e0e78..ccd7c044fb2 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -244,7 +244,8 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, > struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, struct ctdb_tcp); > ctdb_sock_addr addr; > socklen_t len; >- int fd, nodeid; >+ int fd; >+ uint32_t pnn; > struct ctdb_incoming *in; > int one = 1; > int ret; >@@ -255,10 +256,11 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, > if (fd == -1) return; > smb_set_close_on_exec(fd); > >- nodeid = ctdb_ip_to_nodeid(ctdb, &addr); >+ pnn = ctdb_ip_to_pnn(ctdb, &addr); > >- if (nodeid == -1) { >- DEBUG(DEBUG_ERR, ("Refused connection from unknown node %s\n", ctdb_addr_to_str(&addr))); >+ if (pnn == CTDB_UNKNOWN_PNN) { >+ D_ERR("Refused connection from unknown node %s\n", >+ ctdb_addr_to_str(&addr)); > close(fd); > return; > } >-- >2.23.0.rc1 > > >From 0061cc28e730f33a889254581fb97a12d91ba355 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Thu, 8 Aug 2019 16:20:44 +1000 >Subject: [PATCH 2/8] ctdb-daemon: Add function ctdb_ip_to_node() > >This is the core logic from ctdb_ip_to_pnn(), so re-implement that >that function using ctdb_ip_to_node(). > >Something similar (ctdb_ip_to_nodeid()) was recently removed in commit >010c1d77cd7e192b1fff39b7b91fccbdbbf4a786 because it wasn't required. >Now there is a use case. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 3acb8e9d1c854b577d6be282257269df83055d31) >--- > ctdb/include/ctdb_private.h | 2 ++ > ctdb/server/ctdb_server.c | 24 +++++++++++++++++++----- > 2 files changed, 21 insertions(+), 5 deletions(-) > >diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h >index b6d0d0c678a..af7756f4522 100644 >--- a/ctdb/include/ctdb_private.h >+++ b/ctdb/include/ctdb_private.h >@@ -841,6 +841,8 @@ void ctdb_stop_recoverd(struct ctdb_context *ctdb); > > int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport); > >+struct ctdb_node *ctdb_ip_to_node(struct ctdb_context *ctdb, >+ const ctdb_sock_addr *nodeip); > uint32_t ctdb_ip_to_pnn(struct ctdb_context *ctdb, > const ctdb_sock_addr *nodeip); > >diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c >index 6973a0b6056..ddff85b81c5 100644 >--- a/ctdb/server/ctdb_server.c >+++ b/ctdb/server/ctdb_server.c >@@ -45,9 +45,9 @@ int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport) > return 0; > } > >-/* Return the PNN for nodeip, CTDB_UNKNOWN_PNN if nodeip is invalid */ >-uint32_t ctdb_ip_to_pnn(struct ctdb_context *ctdb, >- const ctdb_sock_addr *nodeip) >+/* Return the node structure for nodeip, NULL if nodeip is invalid */ >+struct ctdb_node *ctdb_ip_to_node(struct ctdb_context *ctdb, >+ const ctdb_sock_addr *nodeip) > { > unsigned int nodeid; > >@@ -56,11 +56,25 @@ uint32_t ctdb_ip_to_pnn(struct ctdb_context *ctdb, > continue; > } > if (ctdb_same_ip(&ctdb->nodes[nodeid]->address, nodeip)) { >- return ctdb->nodes[nodeid]->pnn; >+ return ctdb->nodes[nodeid]; > } > } > >- return CTDB_UNKNOWN_PNN; >+ return NULL; >+} >+ >+/* Return the PNN for nodeip, CTDB_UNKNOWN_PNN if nodeip is invalid */ >+uint32_t ctdb_ip_to_pnn(struct ctdb_context *ctdb, >+ const ctdb_sock_addr *nodeip) >+{ >+ struct ctdb_node *node; >+ >+ node = ctdb_ip_to_node(ctdb, nodeip); >+ if (node == NULL) { >+ return CTDB_UNKNOWN_PNN; >+ } >+ >+ return node->pnn; > } > > /* Load a nodes list file into a nodes array */ >-- >2.23.0.rc1 > > >From 0c83f561391703120f4ade1391ef3f9389a453a0 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Fri, 9 Aug 2019 15:06:34 +1000 >Subject: [PATCH 3/8] ctdb-tcp: Rename fd -> out_fd > >in_fd is coming soon. > >Fix coding style violations in the affected and adjacent lines. >Modernise some debug macros and make them more consistent (e.g. drop >logging of errno when strerror(errno) is already logged. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit c06620169fc178ea6db2631f03edf008285d8cf2) >--- > ctdb/tcp/ctdb_tcp.h | 2 +- > ctdb/tcp/tcp_connect.c | 97 +++++++++++++++++++++++++----------------- > ctdb/tcp/tcp_init.c | 22 ++++++---- > 3 files changed, 72 insertions(+), 49 deletions(-) > >diff --git a/ctdb/tcp/ctdb_tcp.h b/ctdb/tcp/ctdb_tcp.h >index 0a998c94da4..acd343fb8f3 100644 >--- a/ctdb/tcp/ctdb_tcp.h >+++ b/ctdb/tcp/ctdb_tcp.h >@@ -39,7 +39,7 @@ struct ctdb_incoming { > state associated with one tcp node > */ > struct ctdb_tcp_node { >- int fd; >+ int out_fd; > struct ctdb_queue *out_queue; > struct tevent_fd *connect_fde; > struct tevent_timer *connect_te; >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index ccd7c044fb2..b30269772c0 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -50,9 +50,9 @@ void ctdb_tcp_stop_connection(struct ctdb_node *node) > talloc_free(tnode->connect_fde); > tnode->connect_fde = NULL; > tnode->connect_te = NULL; >- if (tnode->fd != -1) { >- close(tnode->fd); >- tnode->fd = -1; >+ if (tnode->out_fd != -1) { >+ close(tnode->out_fd); >+ tnode->out_fd = -1; > } > } > >@@ -93,12 +93,13 @@ static void ctdb_node_connect_write(struct tevent_context *ev, > int error = 0; > socklen_t len = sizeof(error); > int one = 1; >+ int ret; > > talloc_free(tnode->connect_te); > tnode->connect_te = NULL; > >- if (getsockopt(tnode->fd, SOL_SOCKET, SO_ERROR, &error, &len) != 0 || >- error != 0) { >+ ret = getsockopt(tnode->out_fd, SOL_SOCKET, SO_ERROR, &error, &len); >+ if (ret != 0 || error != 0) { > ctdb_tcp_stop_connection(node); > tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, > timeval_current_ofs(1, 0), >@@ -109,19 +110,28 @@ static void ctdb_node_connect_write(struct tevent_context *ev, > talloc_free(tnode->connect_fde); > tnode->connect_fde = NULL; > >- if (setsockopt(tnode->fd,IPPROTO_TCP,TCP_NODELAY,(char *)&one,sizeof(one)) == -1) { >- DEBUG(DEBUG_WARNING, ("Failed to set TCP_NODELAY on fd - %s\n", >- strerror(errno))); >+ ret = setsockopt(tnode->out_fd, >+ IPPROTO_TCP, >+ TCP_NODELAY, >+ (char *)&one, >+ sizeof(one)); >+ if (ret == -1) { >+ DBG_WARNING("Failed to set TCP_NODELAY on fd - %s\n", >+ strerror(errno)); > } >- if (setsockopt(tnode->fd,SOL_SOCKET,SO_KEEPALIVE,(char *)&one,sizeof(one)) == -1) { >- DEBUG(DEBUG_WARNING, ("Failed to set KEEPALIVE on fd - %s\n", >- strerror(errno))); >+ ret = setsockopt(tnode->out_fd, >+ SOL_SOCKET, >+ SO_KEEPALIVE,(char *)&one, >+ sizeof(one)); >+ if (ret == -1) { >+ DBG_WARNING("Failed to set KEEPALIVE on fd - %s\n", >+ strerror(errno)); > } > >- ctdb_queue_set_fd(tnode->out_queue, tnode->fd); >+ ctdb_queue_set_fd(tnode->out_queue, tnode->out_fd); > > /* the queue subsystem now owns this fd */ >- tnode->fd = -1; >+ tnode->out_fd = -1; > > /* tell the ctdb layer we are connected */ > node->ctdb->upcalls->node_connected(node); >@@ -149,26 +159,24 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, > > sock_out = node->address; > >- tnode->fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP); >- if (tnode->fd == -1) { >- DEBUG(DEBUG_ERR, (__location__ " Failed to create socket\n")); >+ tnode->out_fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP); >+ if (tnode->out_fd == -1) { >+ DBG_ERR("Failed to create socket\n"); > return; > } > >- ret = set_blocking(tnode->fd, false); >+ ret = set_blocking(tnode->out_fd, false); > if (ret != 0) { >- DEBUG(DEBUG_ERR, >- (__location__ >- " failed to set socket non-blocking (%s)\n", >- strerror(errno))); >- close(tnode->fd); >- tnode->fd = -1; >+ DBG_ERR("Failed to set socket non-blocking (%s)\n", >+ strerror(errno)); >+ close(tnode->out_fd); >+ tnode->out_fd = -1; > return; > } > >- set_close_on_exec(tnode->fd); >+ set_close_on_exec(tnode->out_fd); > >- DEBUG(DEBUG_DEBUG, (__location__ " Created TCP SOCKET FD:%d\n", tnode->fd)); >+ DBG_DEBUG("Created TCP SOCKET FD:%d\n", tnode->out_fd); > > /* Bind our side of the socketpair to the same address we use to listen > * on incoming CTDB traffic. >@@ -197,39 +205,48 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, > default: > DEBUG(DEBUG_ERR, (__location__ " unknown family %u\n", > sock_in.sa.sa_family)); >- close(tnode->fd); >- tnode->fd = -1; >+ close(tnode->out_fd); >+ tnode->out_fd = -1; > return; > } > >- if (bind(tnode->fd, (struct sockaddr *)&sock_in, sockin_size) == -1) { >- DEBUG(DEBUG_ERR, (__location__ " Failed to bind socket %s(%d)\n", >- strerror(errno), errno)); >- close(tnode->fd); >- tnode->fd = -1; >+ ret = bind(tnode->out_fd, (struct sockaddr *)&sock_in, sockin_size); >+ if (ret == -1) { >+ DBG_ERR("Failed to bind socket (%s)\n", strerror(errno)); >+ close(tnode->out_fd); >+ tnode->out_fd = -1; > return; > } > >- if (connect(tnode->fd, (struct sockaddr *)&sock_out, sockout_size) != 0 && >- errno != EINPROGRESS) { >+ ret = connect(tnode->out_fd, >+ (struct sockaddr *)&sock_out, >+ sockout_size); >+ if (ret != 0 && errno != EINPROGRESS) { > ctdb_tcp_stop_connection(node); >- tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, >+ tnode->connect_te = tevent_add_timer(ctdb->ev, >+ tnode, > timeval_current_ofs(1, 0), >- ctdb_tcp_node_connect, node); >+ ctdb_tcp_node_connect, >+ node); > return; > } > > /* non-blocking connect - wait for write event */ >- tnode->connect_fde = tevent_add_fd(node->ctdb->ev, tnode, tnode->fd, >+ tnode->connect_fde = tevent_add_fd(node->ctdb->ev, >+ tnode, >+ tnode->out_fd, > TEVENT_FD_WRITE|TEVENT_FD_READ, >- ctdb_node_connect_write, node); >+ ctdb_node_connect_write, >+ node); > > /* don't give it long to connect - retry in one second. This ensures > that we find a node is up quickly (tcp normally backs off a syn reply > delay by quite a lot) */ >- tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, >+ tnode->connect_te = tevent_add_timer(ctdb->ev, >+ tnode, > timeval_current_ofs(1, 0), >- ctdb_tcp_node_connect, node); >+ ctdb_tcp_node_connect, >+ node); > } > > /* >diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c >index b6083666e18..b6115015af3 100644 >--- a/ctdb/tcp/tcp_init.c >+++ b/ctdb/tcp/tcp_init.c >@@ -38,16 +38,16 @@ static int tnode_destructor(struct ctdb_tcp_node *tnode) > { > // struct ctdb_node *node = talloc_find_parent_bytype(tnode, struct ctdb_node); > >- if (tnode->fd != -1) { >- close(tnode->fd); >- tnode->fd = -1; >+ if (tnode->out_fd != -1) { >+ close(tnode->out_fd); >+ tnode->out_fd = -1; > } > > return 0; > } > > /* >- initialise tcp portion of a ctdb node >+ initialise tcp portion of a ctdb node > */ > static int ctdb_tcp_add_node(struct ctdb_node *node) > { >@@ -55,13 +55,19 @@ static int ctdb_tcp_add_node(struct ctdb_node *node) > tnode = talloc_zero(node, struct ctdb_tcp_node); > CTDB_NO_MEMORY(node->ctdb, tnode); > >- tnode->fd = -1; >+ tnode->out_fd = -1; > node->private_data = tnode; > talloc_set_destructor(tnode, tnode_destructor); > >- tnode->out_queue = ctdb_queue_setup(node->ctdb, node, tnode->fd, CTDB_TCP_ALIGNMENT, >- ctdb_tcp_tnode_cb, node, "to-node-%s", node->name); >- >+ tnode->out_queue = ctdb_queue_setup(node->ctdb, >+ node, >+ tnode->out_fd, >+ CTDB_TCP_ALIGNMENT, >+ ctdb_tcp_tnode_cb, >+ node, >+ "to-node-%s", >+ node->name); >+ > return 0; > } > >-- >2.23.0.rc1 > > >From b96aec97030c0c6f12a8c40e2c3b25fea8bac636 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Fri, 9 Aug 2019 15:29:36 +1000 >Subject: [PATCH 4/8] ctdb-tcp: Move incoming fd and queue into struct > ctdb_tcp_node > >This makes it easy to track both incoming and outgoing connectivity >states. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit c68b6f96f26664459187ab2fbd56767fb31767e0) >--- > ctdb/tcp/ctdb_tcp.h | 14 ++++----- > ctdb/tcp/tcp_connect.c | 64 +++++++++++++++++++++++++++--------------- > ctdb/tcp/tcp_init.c | 8 ++++++ > ctdb/tcp/tcp_io.c | 9 ++++-- > 4 files changed, 61 insertions(+), 34 deletions(-) > >diff --git a/ctdb/tcp/ctdb_tcp.h b/ctdb/tcp/ctdb_tcp.h >index acd343fb8f3..9a615fc6393 100644 >--- a/ctdb/tcp/ctdb_tcp.h >+++ b/ctdb/tcp/ctdb_tcp.h >@@ -26,23 +26,19 @@ struct ctdb_tcp { > int listen_fd; > }; > >-/* >- state associated with an incoming connection >-*/ >-struct ctdb_incoming { >- struct ctdb_context *ctdb; >- int fd; >- struct ctdb_queue *queue; >-}; >- > /* > state associated with one tcp node > */ > struct ctdb_tcp_node { > int out_fd; > struct ctdb_queue *out_queue; >+ > struct tevent_fd *connect_fde; > struct tevent_timer *connect_te; >+ >+ struct ctdb_context *ctdb; >+ int in_fd; >+ struct ctdb_queue *in_queue; > }; > > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index b30269772c0..1f2379cdb79 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -262,8 +262,8 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, > ctdb_sock_addr addr; > socklen_t len; > int fd; >- uint32_t pnn; >- struct ctdb_incoming *in; >+ struct ctdb_node *node; >+ struct ctdb_tcp_node *tnode; > int one = 1; > int ret; > >@@ -273,41 +273,61 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, > if (fd == -1) return; > smb_set_close_on_exec(fd); > >- pnn = ctdb_ip_to_pnn(ctdb, &addr); >- >- if (pnn == CTDB_UNKNOWN_PNN) { >+ node = ctdb_ip_to_node(ctdb, &addr); >+ if (node == NULL) { > D_ERR("Refused connection from unknown node %s\n", > ctdb_addr_to_str(&addr)); > close(fd); > return; > } > >- in = talloc_zero(ctcp, struct ctdb_incoming); >- in->fd = fd; >- in->ctdb = ctdb; >+ tnode = talloc_get_type_abort(node->private_data, >+ struct ctdb_tcp_node); >+ if (tnode == NULL) { >+ /* This can't happen - see ctdb_tcp_initialise() */ >+ DBG_ERR("INTERNAL ERROR setting up connection from node %s\n", >+ ctdb_addr_to_str(&addr)); >+ close(fd); >+ return; >+ } > >- ret = set_blocking(in->fd, false); >+ ret = set_blocking(fd, false); > if (ret != 0) { >- DEBUG(DEBUG_ERR, >- (__location__ >- " failed to set socket non-blocking (%s)\n", >- strerror(errno))); >- close(in->fd); >- in->fd = -1; >+ DBG_ERR("Failed to set socket non-blocking (%s)\n", >+ strerror(errno)); >+ close(fd); > return; > } > >- set_close_on_exec(in->fd); >+ set_close_on_exec(fd); > >- DEBUG(DEBUG_DEBUG, (__location__ " Created SOCKET FD:%d to incoming ctdb connection\n", fd)); >+ DBG_DEBUG("Created SOCKET FD:%d to incoming ctdb connection\n", fd); > >- if (setsockopt(in->fd,SOL_SOCKET,SO_KEEPALIVE,(char *)&one,sizeof(one)) == -1) { >- DEBUG(DEBUG_WARNING, ("Failed to set KEEPALIVE on fd - %s\n", >- strerror(errno))); >+ ret = setsockopt(fd, >+ SOL_SOCKET, >+ SO_KEEPALIVE, >+ (char *)&one, >+ sizeof(one)); >+ if (ret == -1) { >+ DBG_WARNING("Failed to set KEEPALIVE on fd - %s\n", >+ strerror(errno)); >+ } >+ >+ tnode->in_queue = ctdb_queue_setup(ctdb, >+ tnode, >+ fd, >+ CTDB_TCP_ALIGNMENT, >+ ctdb_tcp_read_cb, >+ tnode, >+ "ctdbd-%s", >+ ctdb_addr_to_str(&addr)); >+ if (tnode->in_queue == NULL) { >+ DBG_ERR("Failed to set up incoming queue\n"); >+ close(fd); >+ return; > } > >- in->queue = ctdb_queue_setup(ctdb, in, in->fd, CTDB_TCP_ALIGNMENT, >- ctdb_tcp_read_cb, in, "ctdbd-%s", ctdb_addr_to_str(&addr)); >+ tnode->in_fd = fd; > } > > >diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c >index b6115015af3..ec45d645c83 100644 >--- a/ctdb/tcp/tcp_init.c >+++ b/ctdb/tcp/tcp_init.c >@@ -43,6 +43,11 @@ static int tnode_destructor(struct ctdb_tcp_node *tnode) > tnode->out_fd = -1; > } > >+ if (tnode->in_fd != -1) { >+ close(tnode->in_fd); >+ tnode->in_fd = -1; >+ } >+ > return 0; > } > >@@ -56,6 +61,9 @@ static int ctdb_tcp_add_node(struct ctdb_node *node) > CTDB_NO_MEMORY(node->ctdb, tnode); > > tnode->out_fd = -1; >+ tnode->in_fd = -1; >+ tnode->ctdb = node->ctdb; >+ > node->private_data = tnode; > talloc_set_destructor(tnode, tnode_destructor); > >diff --git a/ctdb/tcp/tcp_io.c b/ctdb/tcp/tcp_io.c >index 0eb8e25eea3..be4558b16ea 100644 >--- a/ctdb/tcp/tcp_io.c >+++ b/ctdb/tcp/tcp_io.c >@@ -37,7 +37,8 @@ > */ > void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args) > { >- struct ctdb_incoming *in = talloc_get_type(args, struct ctdb_incoming); >+ struct ctdb_tcp_node *tnode = talloc_get_type_abort( >+ args, struct ctdb_tcp_node); > struct ctdb_req_header *hdr = (struct ctdb_req_header *)data; > > if (data == NULL) { >@@ -69,11 +70,13 @@ void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args) > } > > /* tell the ctdb layer above that we have a packet */ >- in->ctdb->upcalls->recv_pkt(in->ctdb, data, cnt); >+ tnode->ctdb->upcalls->recv_pkt(tnode->ctdb, data, cnt); > return; > > failed: >- TALLOC_FREE(in); >+ TALLOC_FREE(tnode->in_queue); >+ close(tnode->in_fd); >+ tnode->in_fd = -1; > TALLOC_FREE(data); > } > >-- >2.23.0.rc1 > > >From 88a01b2825749f57ec476220531f32d9c66bd029 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Thu, 15 Aug 2019 15:45:16 +1000 >Subject: [PATCH 5/8] ctdb-tcp: Use TALLOC_FREE() > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit d80d9edb4dc107b15a35a39e5c966a3eaed6453a) >--- > ctdb/tcp/tcp_connect.c | 6 ++---- > 1 file changed, 2 insertions(+), 4 deletions(-) > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index 1f2379cdb79..f2d036fb5f7 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -46,10 +46,8 @@ void ctdb_tcp_stop_connection(struct ctdb_node *node) > node->private_data, struct ctdb_tcp_node); > > ctdb_queue_set_fd(tnode->out_queue, -1); >- talloc_free(tnode->connect_te); >- talloc_free(tnode->connect_fde); >- tnode->connect_fde = NULL; >- tnode->connect_te = NULL; >+ TALLOC_FREE(tnode->connect_te); >+ TALLOC_FREE(tnode->connect_fde); > if (tnode->out_fd != -1) { > close(tnode->out_fd); > tnode->out_fd = -1; >-- >2.23.0.rc1 > > >From 9c6eba3dddfbb24262def8944bd634b2d56b512c Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Thu, 15 Aug 2019 15:57:31 +1000 >Subject: [PATCH 6/8] ctdb-tcp: Create outbound queue when the connection > becomes writable > >Since commit ddd97553f0a8bfaada178ec4a7460d76fa21f079 >ctdb_queue_send() doesn't queue a packet if the connection isn't yet >established (i.e. when fd == -1). So, don't bother creating the >outbound queue during initialisation but create it when the connection >becomes writable. > >Now the presence of the queue indicates that the outbound connection >is up. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 7f4854d9643a096a6d8a354fcd27b7c6ed24a75e) >--- > ctdb/tcp/tcp_connect.c | 23 ++++++++++++++++++++--- > ctdb/tcp/tcp_init.c | 9 --------- > ctdb/tcp/tcp_io.c | 5 +++++ > 3 files changed, 25 insertions(+), 12 deletions(-) > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index f2d036fb5f7..15ce73103b9 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -44,8 +44,8 @@ void ctdb_tcp_stop_connection(struct ctdb_node *node) > { > struct ctdb_tcp_node *tnode = talloc_get_type( > node->private_data, struct ctdb_tcp_node); >- >- ctdb_queue_set_fd(tnode->out_queue, -1); >+ >+ TALLOC_FREE(tnode->out_queue); > TALLOC_FREE(tnode->connect_te); > TALLOC_FREE(tnode->connect_fde); > if (tnode->out_fd != -1) { >@@ -126,7 +126,24 @@ static void ctdb_node_connect_write(struct tevent_context *ev, > strerror(errno)); > } > >- ctdb_queue_set_fd(tnode->out_queue, tnode->out_fd); >+ tnode->out_queue = ctdb_queue_setup(node->ctdb, >+ tnode, >+ tnode->out_fd, >+ CTDB_TCP_ALIGNMENT, >+ ctdb_tcp_tnode_cb, >+ node, >+ "to-node-%s", >+ node->name); >+ if (tnode->out_queue == NULL) { >+ DBG_ERR("Failed to set up outgoing queue\n"); >+ ctdb_tcp_stop_connection(node); >+ tnode->connect_te = tevent_add_timer(ctdb->ev, >+ tnode, >+ timeval_current_ofs(1, 0), >+ ctdb_tcp_node_connect, >+ node); >+ return; >+ } > > /* the queue subsystem now owns this fd */ > tnode->out_fd = -1; >diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c >index ec45d645c83..91d4e992c8f 100644 >--- a/ctdb/tcp/tcp_init.c >+++ b/ctdb/tcp/tcp_init.c >@@ -67,15 +67,6 @@ static int ctdb_tcp_add_node(struct ctdb_node *node) > node->private_data = tnode; > talloc_set_destructor(tnode, tnode_destructor); > >- tnode->out_queue = ctdb_queue_setup(node->ctdb, >- node, >- tnode->out_fd, >- CTDB_TCP_ALIGNMENT, >- ctdb_tcp_tnode_cb, >- node, >- "to-node-%s", >- node->name); >- > return 0; > } > >diff --git a/ctdb/tcp/tcp_io.c b/ctdb/tcp/tcp_io.c >index be4558b16ea..e33ed44048e 100644 >--- a/ctdb/tcp/tcp_io.c >+++ b/ctdb/tcp/tcp_io.c >@@ -87,5 +87,10 @@ int ctdb_tcp_queue_pkt(struct ctdb_node *node, uint8_t *data, uint32_t length) > { > struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data, > struct ctdb_tcp_node); >+ if (tnode->out_queue == NULL) { >+ DBG_DEBUG("No outgoing connection, dropping packet\n"); >+ return 0; >+ } >+ > return ctdb_queue_send(tnode->out_queue, data, length); > } >-- >2.23.0.rc1 > > >From 135b88e7c81bc8639ea3f1df16ec845ad30a0f29 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Fri, 9 Aug 2019 15:33:05 +1000 >Subject: [PATCH 7/8] ctdb-tcp: Only mark a node connected if both directions > are up > >Nodes are currently marked as up if the outgoing connection is >established. However, if the incoming connection is not yet >established then this node could send a request where the replying >node can not queue its reply. Wait until both directions are up >before marking a node as connected. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 8c98c10f242bc722beffc711e85c0e4f2e74cd57) >--- > ctdb/tcp/tcp_connect.c | 20 +++++++++++++++++--- > 1 file changed, 17 insertions(+), 3 deletions(-) > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index 15ce73103b9..e4d7f0dfd9c 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -148,8 +148,14 @@ static void ctdb_node_connect_write(struct tevent_context *ev, > /* the queue subsystem now owns this fd */ > tnode->out_fd = -1; > >- /* tell the ctdb layer we are connected */ >- node->ctdb->upcalls->node_connected(node); >+ /* >+ * Mark the node to which this connection has been established >+ * as connected, but only if the corresponding listening >+ * socket is also connected >+ */ >+ if (tnode->in_fd != -1) { >+ node->ctdb->upcalls->node_connected(node); >+ } > } > > >@@ -343,7 +349,15 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, > } > > tnode->in_fd = fd; >-} >+ >+ /* >+ * Mark the connecting node as connected, but only if the >+ * corresponding outbound connected is also up >+ */ >+ if (tnode->out_queue != NULL) { >+ node->ctdb->upcalls->node_connected(node); >+ } >+ } > > > /* >-- >2.23.0.rc1 > > >From 8afa8ab265318243d6414e9442b950d444bea310 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Tue, 13 Aug 2019 17:08:43 +1000 >Subject: [PATCH 8/8] ctdb-tcp: Mark node as disconnected if incoming > connection goes away > >To make it easy to pass the node data to the upcall, the private data >for ctdb_tcp_read_cb() needs to be changed from tnode to node. > >RN: Avoid marking a node as connected before it can receive packets >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> > >Autobuild-User(master): Martin Schwenke <martins@samba.org> >Autobuild-Date(master): Fri Aug 16 22:50:35 UTC 2019 on sn-devel-184 > >(cherry picked from commit 73c850eda4209b688a169aeeb20c453b738cbb35) >--- > ctdb/tcp/tcp_connect.c | 2 +- > ctdb/tcp/tcp_io.c | 5 ++++- > 2 files changed, 5 insertions(+), 2 deletions(-) > >diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c >index e4d7f0dfd9c..f02340c1789 100644 >--- a/ctdb/tcp/tcp_connect.c >+++ b/ctdb/tcp/tcp_connect.c >@@ -339,7 +339,7 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, > fd, > CTDB_TCP_ALIGNMENT, > ctdb_tcp_read_cb, >- tnode, >+ node, > "ctdbd-%s", > ctdb_addr_to_str(&addr)); > if (tnode->in_queue == NULL) { >diff --git a/ctdb/tcp/tcp_io.c b/ctdb/tcp/tcp_io.c >index e33ed44048e..e8ebff887e1 100644 >--- a/ctdb/tcp/tcp_io.c >+++ b/ctdb/tcp/tcp_io.c >@@ -37,8 +37,9 @@ > */ > void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args) > { >+ struct ctdb_node *node = talloc_get_type_abort(args, struct ctdb_node); > struct ctdb_tcp_node *tnode = talloc_get_type_abort( >- args, struct ctdb_tcp_node); >+ node->private_data, struct ctdb_tcp_node); > struct ctdb_req_header *hdr = (struct ctdb_req_header *)data; > > if (data == NULL) { >@@ -77,6 +78,8 @@ failed: > TALLOC_FREE(tnode->in_queue); > close(tnode->in_fd); > tnode->in_fd = -1; >+ node->ctdb->upcalls->node_dead(node); >+ > TALLOC_FREE(data); > } > >-- >2.23.0.rc1 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Flags:
amitay
:
review+
Actions:
View
Attachments on
bug 14084
:
15411
| 15412 |
15413