From c7c3806bbb083107c5f919c4db11dba2b0501ba5 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 23 Jul 2021 14:39:05 +1000 Subject: [PATCH 01/16] ctdb-build: Use pcap-config when available The build currently fails on AIX, which can't find the pcap headers because they're installed in a non-standard place. However, there is a pcap-config script available. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit a83e9ca696a37b00231ce40cca5a043beb9b5590) --- ctdb/wscript | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ctdb/wscript b/ctdb/wscript index c082c3b7a7d..8f6e2bb5203 100644 --- a/ctdb/wscript +++ b/ctdb/wscript @@ -204,6 +204,12 @@ def configure(conf): if sys.platform.startswith('linux'): conf.SET_TARGET_TYPE('pcap', 'EMPTY') else: + conf.find_program('pcap-config', var='PCAP_CONFIG') + if conf.env.PCAP_CONFIG: + conf.CHECK_CFG(path=conf.env.PCAP_CONFIG, + args="--cflags --libs", + package="", + uselib_store="PCAP") if not conf.CHECK_HEADERS('pcap.h'): Logs.error('Need libpcap') sys.exit(1) -- 2.39.2 From 194a63bf08c1d873d7d15158b8bea3ffc89cef3c Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 8 Aug 2022 11:26:54 +1000 Subject: [PATCH 02/16] ctdb-build: Add --enable-pcap configure option This forces the use pcap for packet capture on Linux. It appears that using a raw socket for capture does not work with infiniband - pcap support for that to come. Don't (yet?) change the default capture method to pcap. On some platforms (e.g. my personal Intel NUC, running Debian testing), pcap is much less reliable than the raw socket. However, pcap seems fine on most other platforms. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit d1543d5c7889f3ac42f80fc5d1eddf54f9c5d0d6) --- ctdb/wscript | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ctdb/wscript b/ctdb/wscript index 8f6e2bb5203..88e42439f5a 100644 --- a/ctdb/wscript +++ b/ctdb/wscript @@ -98,6 +98,9 @@ def options(opt): opt.add_option('--enable-etcd-reclock', help=("Enable etcd recovery lock helper (default=no)"), action="store_true", dest='ctdb_etcd_reclock', default=False) + opt.add_option('--enable-pcap', + help=("Use pcap for packet capture (default=no)"), + action="store_true", dest='ctdb_pcap', default=False) opt.add_option('--with-libcephfs', help=("Directory under which libcephfs is installed"), @@ -201,7 +204,9 @@ def configure(conf): if not conf.CHECK_VARIABLE('ETIME', headers='errno.h'): conf.DEFINE('ETIME', 'ETIMEDOUT') - if sys.platform.startswith('linux'): + if Options.options.ctdb_pcap or not sys.platform.startswith('linux'): + conf.DEFINE('ENABLE_PCAP', 1) + if not conf.env.ENABLE_PCAP: conf.SET_TARGET_TYPE('pcap', 'EMPTY') else: conf.find_program('pcap-config', var='PCAP_CONFIG') -- 2.39.2 From 11388a2b32dae07b4d1b1697283329f7739bab40 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 8 Aug 2022 11:29:36 +1000 Subject: [PATCH 03/16] ctdb-common: Move a misplaced comment Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit c522f4f6045b48bffe47a12a246f356e71fbeec0) --- ctdb/common/system_socket.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c index bb513508353..13e346a8866 100644 --- a/ctdb/common/system_socket.c +++ b/ctdb/common/system_socket.c @@ -747,13 +747,6 @@ int ctdb_sys_send_tcp(const ctdb_sock_addr *dest, return 0; } -/* - * Packet capture - * - * If AF_PACKET is available then use a raw socket otherwise use pcap. - * wscript has checked to make sure that pcap is available if needed. - */ - static int tcp4_extract(const uint8_t *ip_pkt, size_t pktlen, struct sockaddr_in *src, @@ -864,6 +857,12 @@ static int tcp6_extract(const uint8_t *ip_pkt, return 0; } +/* + * Packet capture + * + * If AF_PACKET is available then use a raw socket otherwise use pcap. + * wscript has checked to make sure that pcap is available if needed. + */ #ifdef HAVE_AF_PACKET -- 2.39.2 From 9144524c5f635beb531936af5db619e8ae2b57dd Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 8 Aug 2022 11:30:15 +1000 Subject: [PATCH 04/16] ctdb-common: Do not use raw socket when ENABLE_PCAP is defined Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit ad445abebdea55f71b0c79eb31c0e6b0aee06763) --- ctdb/common/system_socket.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c index 13e346a8866..ced2b17fc21 100644 --- a/ctdb/common/system_socket.c +++ b/ctdb/common/system_socket.c @@ -864,7 +864,7 @@ static int tcp6_extract(const uint8_t *ip_pkt, * wscript has checked to make sure that pcap is available if needed. */ -#ifdef HAVE_AF_PACKET +#if defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) /* * This function is used to open a raw socket to capture from @@ -963,7 +963,7 @@ int ctdb_sys_read_tcp_packet(int s, void *private_data, return ENOMSG; } -#else /* HAVE_AF_PACKET */ +#else /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */ #include @@ -1043,4 +1043,4 @@ int ctdb_sys_read_tcp_packet(int s, return ENOMSG; } -#endif /* HAVE_AF_PACKET */ +#endif /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */ -- 2.39.2 From e37455dc74b701cec353b1ab3f812c8f5421e48b Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 8 Aug 2022 11:31:03 +1000 Subject: [PATCH 05/16] ctdb-common: Fix a warning in the pcap code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [173/416] Compiling ctdb/common/system_socket.c ../../common/system_socket.c: In function ‘ctdb_sys_read_tcp_packet’: ../../common/system_socket.c:1016:15: error: cast discards ‘const’ qualifier from pointer target type [-Werror=cast-qual] 1016 | eth = (struct ether_header *)buffer; | ^ Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 8b54587b1aed28aa2f3af7161a077aa9dd83894c) --- ctdb/common/system_socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c index ced2b17fc21..71c60853ee3 100644 --- a/ctdb/common/system_socket.c +++ b/ctdb/common/system_socket.c @@ -998,7 +998,7 @@ int ctdb_sys_read_tcp_packet(int s, uint16_t *window) { int ret; - struct ether_header *eth; + const struct ether_header *eth; struct pcap_pkthdr pkthdr; const u_char *buffer; pcap_t *pt = (pcap_t *)private_data; @@ -1012,7 +1012,7 @@ int ctdb_sys_read_tcp_packet(int s, ZERO_STRUCTP(dst); /* Ethernet */ - eth = (struct ether_header *)buffer; + eth = (const struct ether_header *)buffer; /* we want either IPv4 or IPv6 */ if (eth->ether_type == htons(ETHERTYPE_IP)) { -- 2.39.2 From 4e15c197a180203914cbe356429bb8d38eebbaeb Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 9 Aug 2022 13:49:42 +1000 Subject: [PATCH 06/16] ctdb-common: Stop a pcap-related crash on error errbuf can't be NULL. Might as well use it. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 40380a8042dfc2efa6f8f06ed7ac86c3c20a343f) --- ctdb/common/system_socket.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c index 71c60853ee3..43910e9b177 100644 --- a/ctdb/common/system_socket.c +++ b/ctdb/common/system_socket.c @@ -969,11 +969,14 @@ int ctdb_sys_read_tcp_packet(int s, void *private_data, int ctdb_sys_open_capture_socket(const char *iface, void **private_data) { + char errbuf[PCAP_ERRBUF_SIZE]; pcap_t *pt; - pt=pcap_open_live(iface, 100, 0, 0, NULL); + pt = pcap_open_live(iface, 100, 0, 0, errbuf); if (pt == NULL) { - DBG_ERR("Failed to open capture device %s\n", iface); + DBG_ERR("Failed to open pcap capture device %s (%s)\n", + iface, + errbuf); return -1; } *((pcap_t **)private_data) = pt; -- 2.39.2 From bcacf49ceefbe7f92f2db25441260a1b413b7b4a Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 11 Aug 2022 09:00:25 +1000 Subject: [PATCH 07/16] ctdb-common: Use pcap_get_selectable_fd() This is preferred because it will fail for devices that do not support epoll_wait() and similar. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 075414dc05455a5cd33a244efd51be60fc294e95) --- ctdb/common/system_socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c index 43910e9b177..74fe1593a01 100644 --- a/ctdb/common/system_socket.c +++ b/ctdb/common/system_socket.c @@ -981,7 +981,7 @@ int ctdb_sys_open_capture_socket(const char *iface, void **private_data) } *((pcap_t **)private_data) = pt; - return pcap_fileno(pt); + return pcap_get_selectable_fd(pt); } int ctdb_sys_close_capture_socket(void *private_data) -- 2.39.2 From 8c4034434821d112e764a708d0e923549dc8dfcf Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 15 Aug 2022 14:30:09 +1000 Subject: [PATCH 08/16] ctdb-common: Improve/add debug Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 33a80c1d63fd2e6163ef6c704b2e714e71b01384) --- ctdb/common/system_socket.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c index 74fe1593a01..e8ccd301f6c 100644 --- a/ctdb/common/system_socket.c +++ b/ctdb/common/system_socket.c @@ -880,7 +880,7 @@ int ctdb_sys_open_capture_socket(const char *iface, void **private_data) return -1; } - DBG_DEBUG("Created RAW SOCKET FD:%d for tcp tickle\n", s); + DBG_DEBUG("Opened raw socket for TCP tickle capture (fd=%d)\n", s); ret = set_blocking(s, false); if (ret != 0) { @@ -971,6 +971,7 @@ int ctdb_sys_open_capture_socket(const char *iface, void **private_data) { char errbuf[PCAP_ERRBUF_SIZE]; pcap_t *pt; + int fd; pt = pcap_open_live(iface, 100, 0, 0, errbuf); if (pt == NULL) { @@ -980,8 +981,11 @@ int ctdb_sys_open_capture_socket(const char *iface, void **private_data) return -1; } *((pcap_t **)private_data) = pt; + fd = pcap_get_selectable_fd(pt); - return pcap_get_selectable_fd(pt); + DBG_DEBUG("Opened pcap capture for TCP tickle capture (fd=%d)\n", fd); + + return fd; } int ctdb_sys_close_capture_socket(void *private_data) -- 2.39.2 From bc3a3757f24975c402658b1a841f54048c48f256 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 15 Aug 2022 09:41:09 +1000 Subject: [PATCH 09/16] ctdb-tools: Improve/add debug In particular, knowing the reason fetching the packet fails can help with debugging unsupported protocols in the pcap code. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 5dd964aa0297b6e9ab8e1d0ff9fa0565c97ea43e) --- ctdb/tools/ctdb_killtcp.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ctdb/tools/ctdb_killtcp.c b/ctdb/tools/ctdb_killtcp.c index bab81092058..007422f42fc 100644 --- a/ctdb/tools/ctdb_killtcp.c +++ b/ctdb/tools/ctdb_killtcp.c @@ -169,17 +169,18 @@ static void reset_connections_capture_tcp_handler(struct tevent_context *ev, &conn.server, &conn.client, &ack_seq, &seq, &rst, &window); if (ret != 0) { - /* probably a non-tcp ACK packet */ + /* Not a TCP-ACK? Unexpected protocol? */ + DBG_DEBUG("Failed to parse packet, errno=%d\n", ret); return; } if (window == htons(1234) && (rst || seq == 0)) { /* Ignore packets that we sent! */ - D_DEBUG("Ignoring packet: %s, " - "seq=%"PRIu32", ack_seq=%"PRIu32", " - "rst=%d, window=%"PRIu16"\n", - ctdb_connection_to_string(state, &conn, false), - seq, ack_seq, rst, ntohs(window)); + DBG_DEBUG("Ignoring sent packet: %s, " + "seq=%"PRIu32", ack_seq=%"PRIu32", " + "rst=%d, window=%"PRIu16"\n", + ctdb_connection_to_string(state, &conn, false), + seq, ack_seq, rst, ntohs(window)); return; } -- 2.39.2 From c53c2f16c342475d2d40bb05ca9a2073ff5bee58 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 15 Aug 2022 09:43:58 +1000 Subject: [PATCH 10/16] ctdb-common: Add packet type detection to pcap-based capture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current code will almost certainly generate ENOMSG for non-ethernet packets, even for ethernet packets when the "any" interface is used. pcap_datalink(3PCAP) says: Do NOT assume that the packets for a given capture or ``savefile`` will have any given link-layer header type, such as DLT_EN10MB for Ethernet. For example, the "any" device on Linux will have a link-layer header type of DLT_LINUX_SLL or DLT_LINUX_SLL2 even if all devices on the sys‐ tem at the time the "any" device is opened have some other data link type, such as DLT_EN10MB for Ethernet. So, pcap_datalink() must be used. Detect pcap packet types that are supported (currently only ethernet) in the open code. There is no use continuing if the read code can't parse packets. The pattern of using switch statements supports future addition of other packet types. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 3bf20300ac5962e71069be3998ef7f0502045d24) --- ctdb/common/system_socket.c | 67 +++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 18 deletions(-) diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c index e8ccd301f6c..51f5d030906 100644 --- a/ctdb/common/system_socket.c +++ b/ctdb/common/system_socket.c @@ -971,6 +971,8 @@ int ctdb_sys_open_capture_socket(const char *iface, void **private_data) { char errbuf[PCAP_ERRBUF_SIZE]; pcap_t *pt; + int pcap_packet_type; + const char *t; int fd; pt = pcap_open_live(iface, 100, 0, 0, errbuf); @@ -981,9 +983,22 @@ int ctdb_sys_open_capture_socket(const char *iface, void **private_data) return -1; } *((pcap_t **)private_data) = pt; - fd = pcap_get_selectable_fd(pt); - DBG_DEBUG("Opened pcap capture for TCP tickle capture (fd=%d)\n", fd); + pcap_packet_type = pcap_datalink(pt); + switch (pcap_packet_type) { + case DLT_EN10MB: + t = "DLT_EN10MB"; + break; + default: + DBG_ERR("Unknown pcap packet type %d\n", pcap_packet_type); + pcap_close(pt); + return -1; + } + + fd = pcap_get_selectable_fd(pt); + DBG_DEBUG("Opened pcap capture for TCP tickle (type=%s, fd=%d)\n", + t, + fd); return fd; } @@ -1005,10 +1020,12 @@ int ctdb_sys_read_tcp_packet(int s, uint16_t *window) { int ret; - const struct ether_header *eth; struct pcap_pkthdr pkthdr; const u_char *buffer; pcap_t *pt = (pcap_t *)private_data; + int pcap_packet_type; + uint16_t ether_type; + size_t ll_hdr_len; buffer=pcap_next(pt, &pkthdr); if (buffer==NULL) { @@ -1018,36 +1035,50 @@ int ctdb_sys_read_tcp_packet(int s, ZERO_STRUCTP(src); ZERO_STRUCTP(dst); - /* Ethernet */ - eth = (const struct ether_header *)buffer; + pcap_packet_type = pcap_datalink(pt); + switch (pcap_packet_type) { + case DLT_EN10MB: { + const struct ether_header *eth = + (const struct ether_header *)buffer; + ether_type = ntohs(eth->ether_type); + ll_hdr_len = sizeof(struct ether_header); + break; + } + default: + DBG_DEBUG("Unknown pcap packet type %d\n", pcap_packet_type); + return EPROTONOSUPPORT; + } - /* we want either IPv4 or IPv6 */ - if (eth->ether_type == htons(ETHERTYPE_IP)) { - ret = tcp4_extract(buffer + sizeof(struct ether_header), - (size_t)(pkthdr.caplen - - sizeof(struct ether_header)), + switch (ether_type) { + case ETHERTYPE_IP: + ret = tcp4_extract(buffer + ll_hdr_len, + (size_t)pkthdr.caplen - ll_hdr_len, &src->ip, &dst->ip, ack_seq, seq, rst, window); - return ret; - - } else if (eth->ether_type == htons(ETHERTYPE_IP6)) { - ret = tcp6_extract(buffer + sizeof(struct ether_header), - (size_t)(pkthdr.caplen - - sizeof(struct ether_header)), + break; + case ETHERTYPE_IP6: + ret = tcp6_extract(buffer + ll_hdr_len, + (size_t)pkthdr.caplen - ll_hdr_len, &src->ip6, &dst->ip6, ack_seq, seq, rst, window); - return ret; + break; + case ETHERTYPE_ARP: + /* Silently ignore ARP packets */ + return EPROTO; + default: + DBG_DEBUG("Unknown ether type %"PRIu16"\n", ether_type); + return EPROTO; } - return ENOMSG; + return ret; } #endif /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */ -- 2.39.2 From e8254da0d20f6ab3bd0931774d227a183c4199e1 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 15 Aug 2022 10:51:47 +1000 Subject: [PATCH 11/16] ctdb-common: Support "any" interface for pcap-based capture This uses Linux cooked capture link-layer headers. See: https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL.html https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html The header type needs to be checked to ensure the protocol type (i.e. ether type, for the protocols we might be interested in) is meaningful. The size of the header needs to be known so it can be skipped, allowing the IP header to be found and parsed. It would be possible to define support for DLT_LINUX_SLL2 if it is missing. However, if a platform is missing support in the header file then it is almost certainly missing in the run-time library too. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit e5541a7e0220a88d59d574d501626b0598050c52) --- ctdb/common/system_socket.c | 50 ++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c index 51f5d030906..9f7ca07e098 100644 --- a/ctdb/common/system_socket.c +++ b/ctdb/common/system_socket.c @@ -967,12 +967,18 @@ int ctdb_sys_read_tcp_packet(int s, void *private_data, #include +/* + * Assume this exists if pcap.h exists - it has been around for a + * while + */ +#include + int ctdb_sys_open_capture_socket(const char *iface, void **private_data) { char errbuf[PCAP_ERRBUF_SIZE]; pcap_t *pt; int pcap_packet_type; - const char *t; + const char *t = NULL; int fd; pt = pcap_open_live(iface, 100, 0, 0, errbuf); @@ -989,6 +995,14 @@ int ctdb_sys_open_capture_socket(const char *iface, void **private_data) case DLT_EN10MB: t = "DLT_EN10MB"; break; + case DLT_LINUX_SLL: + t = "DLT_LINUX_SLL"; + break; +#ifdef DLT_LINUX_SLL2 + case DLT_LINUX_SLL2: + t = "DLT_LINUX_SLL2"; + break; +#endif /* DLT_LINUX_SLL2 */ default: DBG_ERR("Unknown pcap packet type %d\n", pcap_packet_type); pcap_close(pt); @@ -1044,6 +1058,40 @@ int ctdb_sys_read_tcp_packet(int s, ll_hdr_len = sizeof(struct ether_header); break; } + case DLT_LINUX_SLL: { + const struct sll_header *sll = + (const struct sll_header *)buffer; + uint16_t arphrd_type = ntohs(sll->sll_hatype); + switch (arphrd_type) { + case ARPHRD_ETHER: + break; + default: + DBG_DEBUG("SLL: Unknown arphrd_type %"PRIu16"\n", + arphrd_type); + return EPROTONOSUPPORT; + } + ether_type = ntohs(sll->sll_protocol); + ll_hdr_len = SLL_HDR_LEN; + break; + } +#ifdef DLT_LINUX_SLL2 + case DLT_LINUX_SLL2: { + const struct sll2_header *sll2 = + (const struct sll2_header *)buffer; + uint16_t arphrd_type = ntohs(sll2->sll2_hatype); + switch (arphrd_type) { + case ARPHRD_ETHER: + break; + default: + DBG_DEBUG("SLL2: Unknown arphrd_type %"PRIu16"\n", + arphrd_type); + return EPROTONOSUPPORT; + } + ether_type = ntohs(sll2->sll2_protocol); + ll_hdr_len = SLL2_HDR_LEN; + break; + } +#endif /* DLT_LINUX_SLL2 */ default: DBG_DEBUG("Unknown pcap packet type %d\n", pcap_packet_type); return EPROTONOSUPPORT; -- 2.39.2 From ed12afeab223a2d9459bbc433647c087535d0fa7 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 15 Aug 2022 10:52:27 +1000 Subject: [PATCH 12/16] ctdb-common: Support IB in pcap-based capture Add simple support for IPoIB via DLT_LINUX_SLL and DLT_LINUX_SLL2. This seems to work, even when an IB interface is specified. If this is later found to be insufficient, support for DLT_IPOIB can be implemented. See https://www.tcpdump.org/linktypes.html for a starting point. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 9f7d69a05b6114efe18bf4c86ca8de7789e9a96d) --- ctdb/common/system_socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c index 9f7ca07e098..06dc558eb22 100644 --- a/ctdb/common/system_socket.c +++ b/ctdb/common/system_socket.c @@ -1064,6 +1064,7 @@ int ctdb_sys_read_tcp_packet(int s, uint16_t arphrd_type = ntohs(sll->sll_hatype); switch (arphrd_type) { case ARPHRD_ETHER: + case ARPHRD_INFINIBAND: break; default: DBG_DEBUG("SLL: Unknown arphrd_type %"PRIu16"\n", @@ -1081,6 +1082,7 @@ int ctdb_sys_read_tcp_packet(int s, uint16_t arphrd_type = ntohs(sll2->sll2_hatype); switch (arphrd_type) { case ARPHRD_ETHER: + case ARPHRD_INFINIBAND: break; default: DBG_DEBUG("SLL2: Unknown arphrd_type %"PRIu16"\n", -- 2.39.2 From 843e2ddec48316c80a72d999c25008ca5d23b5e4 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 6 Sep 2022 11:59:11 +1000 Subject: [PATCH 13/16] ctdb-scripts: Add debugging variable CTDB_KILLTCP_DEBUGLEVEL To debug ctdb_killtcp failures, add CTDB_KILLTCP_DEBUGLEVEL=DEBUG to script.options. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs Autobuild-User(master): Amitay Isaacs Autobuild-Date(master): Tue Sep 20 11:42:16 UTC 2022 on sn-devel-184 (cherry picked from commit d9dda4b7af284ecbee4d04a89bd16fc0098e2931) --- ctdb/config/functions | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ctdb/config/functions b/ctdb/config/functions index 82ed0957aa0..725993ca12f 100755 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -452,8 +452,14 @@ kill_tcp_connections () return fi + if [ -n "$CTDB_KILLTCP_DEBUGLEVEL" ]; then + _debuglevel="$CTDB_KILLTCP_DEBUGLEVEL" + else + _debuglevel="$CTDB_DEBUGLEVEL" + fi echo "$_connections" | \ - "${CTDB_HELPER_BINDIR}/ctdb_killtcp" "$_iface" || { + CTDB_DEBUGLEVEL="$_debuglevel" \ + "${CTDB_HELPER_BINDIR}/ctdb_killtcp" "$_iface" || { echo "Failed to kill TCP connections" return } -- 2.39.2 From ee01d06a8e46f82cdebf92eb068d1f2fe0dd3136 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 15 Aug 2023 10:43:57 +1000 Subject: [PATCH 14/16] ctdb-common: Improve error handling Factor out a failure label, which will get more use in subsequent commits, and only set private_data when success is certain. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15451 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit d87041d8968e91db9d257445321b85693303f95e) --- ctdb/common/system_socket.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c index 06dc558eb22..3a7b6eb41de 100644 --- a/ctdb/common/system_socket.c +++ b/ctdb/common/system_socket.c @@ -988,7 +988,6 @@ int ctdb_sys_open_capture_socket(const char *iface, void **private_data) errbuf); return -1; } - *((pcap_t **)private_data) = pt; pcap_packet_type = pcap_datalink(pt); switch (pcap_packet_type) { @@ -1005,8 +1004,7 @@ int ctdb_sys_open_capture_socket(const char *iface, void **private_data) #endif /* DLT_LINUX_SLL2 */ default: DBG_ERR("Unknown pcap packet type %d\n", pcap_packet_type); - pcap_close(pt); - return -1; + goto fail; } fd = pcap_get_selectable_fd(pt); @@ -1014,7 +1012,12 @@ int ctdb_sys_open_capture_socket(const char *iface, void **private_data) t, fd); + *((pcap_t **)private_data) = pt; return fd; + +fail: + pcap_close(pt); + return -1; } int ctdb_sys_close_capture_socket(void *private_data) -- 2.39.2 From bad110f152cb07ec8e543d6b34483aeaa4b39eaa Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 15 Aug 2023 10:57:59 +1000 Subject: [PATCH 15/16] ctdb-common: Replace pcap_open_live() by lower level calls A subsequent commit will insert an additional call before pcap_activate(). This sequence of calls is taken from the source for pcap_open_live(), so there should be no change in behaviour. Given the defaults set by pcap_create_common(), it would be possible to omit the calls to pcap_set_promisc() and pcap_set_timeout(). However, those defaults don't seem to be well documented, so continue to explicitly set everything that was set before. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15451 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit ffc2ae616d8fab7528fbdfd8c6b94c5b9a0e3a7c) --- ctdb/common/system_socket.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c index 3a7b6eb41de..16463af5a33 100644 --- a/ctdb/common/system_socket.c +++ b/ctdb/common/system_socket.c @@ -980,14 +980,38 @@ int ctdb_sys_open_capture_socket(const char *iface, void **private_data) int pcap_packet_type; const char *t = NULL; int fd; + int ret; - pt = pcap_open_live(iface, 100, 0, 0, errbuf); + pt = pcap_create(iface, errbuf); if (pt == NULL) { DBG_ERR("Failed to open pcap capture device %s (%s)\n", iface, errbuf); return -1; } + /* + * pcap isn't very clear about defaults... + */ + ret = pcap_set_snaplen(pt, 100); + if (ret < 0) { + DBG_ERR("Failed to set snaplen for pcap capture\n"); + goto fail; + } + ret = pcap_set_promisc(pt, 0); + if (ret < 0) { + DBG_ERR("Failed to unset promiscuous mode for pcap capture\n"); + goto fail; + } + ret = pcap_set_timeout(pt, 0); + if (ret < 0) { + DBG_ERR("Failed to set timeout for pcap capture\n"); + goto fail; + } + ret = pcap_activate(pt); + if (ret < 0) { + DBG_ERR("Failed to activate pcap capture\n"); + goto fail; + } pcap_packet_type = pcap_datalink(pt); switch (pcap_packet_type) { -- 2.39.2 From 42e2af82728488ba35d5da84581311eed132eb8d Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 15 Aug 2023 12:34:20 +1000 Subject: [PATCH 16/16] ctdb-common: Set immediate mode for pcap capture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a problem where ctdb_killtcp (almost always) fails to capture packets with --enable-pcap and libpcap ≥ 1.9.1. The problem is due to a gradual change in libpcap semantics when using pcap_get_selectable_fd(3PCAP) to get a file descriptor and then using that file descriptor in non-blocking mode. pcap_set_immediate_mode(3PCAP) says: pcap_set_immediate_mode() sets whether immediate mode should be set on a capture handle when the handle is activated. In immediate mode, packets are always delivered as soon as they arrive, with no buffering. and On Linux, with previous releases of libpcap, capture devices are always in immediate mode; however, in 1.5.0 and later, they are, by default, not in immediate mode, so if pcap_set_immediate_mode() is available, it should be used. However, it wasn't until libpcap commit 2ade7676101366983bd4f86bc039ffd25da8c126 (before libpcap 1.9.1) that it became a requirement to use pcap_set_immediate_mode(), even with a timeout of 0. More explanation in this libpcap issue comment: https://github.com/the-tcpdump-group/libpcap/issues/860#issuecomment-541204548 Do a configure check for pcap_set_immediate_mode() even though it has existed for 10 years. It is easy enough. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15451 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs Autobuild-User(master): Amitay Isaacs Autobuild-Date(master): Tue Aug 15 10:53:52 UTC 2023 on atb-devel-224 (cherry picked from commit dc7b48c404337891b5105df4d6751cf549a533eb) --- ctdb/common/system_socket.c | 7 +++++++ ctdb/wscript | 1 + 2 files changed, 8 insertions(+) diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c index 16463af5a33..273b9c3400e 100644 --- a/ctdb/common/system_socket.c +++ b/ctdb/common/system_socket.c @@ -1007,6 +1007,13 @@ int ctdb_sys_open_capture_socket(const char *iface, void **private_data) DBG_ERR("Failed to set timeout for pcap capture\n"); goto fail; } +#ifdef HAVE_PCAP_SET_IMMEDIATE_MODE + ret = pcap_set_immediate_mode(pt, 1); + if (ret < 0) { + DBG_ERR("Failed to set immediate mode for pcap capture\n"); + goto fail; + } +#endif ret = pcap_activate(pt); if (ret < 0) { DBG_ERR("Failed to activate pcap capture\n"); diff --git a/ctdb/wscript b/ctdb/wscript index 88e42439f5a..a7b04541014 100644 --- a/ctdb/wscript +++ b/ctdb/wscript @@ -221,6 +221,7 @@ def configure(conf): if not conf.CHECK_FUNCS_IN('pcap_open_live', 'pcap', headers='pcap.h'): Logs.error('Need libpcap') sys.exit(1) + conf.CHECK_FUNCS_IN('pcap_set_immediate_mode', 'pcap', headers='pcap.h') if not conf.CHECK_FUNCS_IN('backtrace backtrace_symbols', 'execinfo', checklibc=True, headers='execinfo.h'): -- 2.39.2