From 98cda73a28d2b8685ecfd565e36d72acaf12f282 Mon Sep 17 00:00:00 2001 From: Andrew Bartlett Date: Mon, 17 Oct 2022 16:08:42 +1300 Subject: [PATCH 1/6] lib/tsocket: Add tests for loop on EAGAIN BUG: https://bugzilla.samba.org/show_bug.cgi?id=15202 Pair-Programmed-With: Stefan Metzmacher Signed-off-by: Andrew Bartlett Signed-off-by: Stefan Metzmacher (cherry picked from commit f0fb8b9508346aed50528216fd959a9b1a941409) --- lib/tsocket/tests/socketpair_tcp.c | 89 +++++ lib/tsocket/tests/socketpair_tcp.h | 29 ++ lib/tsocket/tests/test_tstream.c | 517 +++++++++++++++++++++++++++++ lib/tsocket/wscript_build | 5 + selftest/knownfail.d/tstream | 6 + selftest/tests.py | 3 + 6 files changed, 649 insertions(+) create mode 100644 lib/tsocket/tests/socketpair_tcp.c create mode 100644 lib/tsocket/tests/socketpair_tcp.h create mode 100644 lib/tsocket/tests/test_tstream.c create mode 100644 selftest/knownfail.d/tstream diff --git a/lib/tsocket/tests/socketpair_tcp.c b/lib/tsocket/tests/socketpair_tcp.c new file mode 100644 index 00000000000..251b8bc0212 --- /dev/null +++ b/lib/tsocket/tests/socketpair_tcp.c @@ -0,0 +1,89 @@ +/* + Unix SMB/CIFS implementation. + Samba utility functions + Copyright (C) Andrew Tridgell 1992-1998 + Copyright (C) Tim Potter 2000-2001 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "includes.h" +#include "system/network.h" +#include "socketpair_tcp.h" + +/******************************************************************* +this is like socketpair but uses tcp. It is used by the Samba +regression test code +The function guarantees that nobody else can attach to the socket, +or if they do that this function fails and the socket gets closed +returns 0 on success, -1 on failure +the resulting file descriptors are symmetrical + ******************************************************************/ +int socketpair_tcp(int fd[2]) +{ + int listener; + struct sockaddr_in sock; + struct sockaddr_in sock2; + socklen_t socklen = sizeof(sock); + int connect_done = 0; + + fd[0] = fd[1] = listener = -1; + + memset(&sock, 0, sizeof(sock)); + + if ((listener = socket(PF_INET, SOCK_STREAM, 0)) == -1) goto failed; + + memset(&sock2, 0, sizeof(sock2)); +#ifdef HAVE_SOCK_SIN_LEN + sock2.sin_len = sizeof(sock2); +#endif + sock2.sin_family = PF_INET; + + if (bind(listener, (struct sockaddr *)&sock2, sizeof(sock2)) != 0) goto failed; + + if (listen(listener, 1) != 0) goto failed; + + if (getsockname(listener, (struct sockaddr *)&sock, &socklen) != 0) goto failed; + + if ((fd[1] = socket(PF_INET, SOCK_STREAM, 0)) == -1) goto failed; + + set_blocking(fd[1], 0); + + sock.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + if (connect(fd[1], (struct sockaddr *)&sock, socklen) == -1) { + if (errno != EINPROGRESS) goto failed; + } else { + connect_done = 1; + } + + if ((fd[0] = accept(listener, (struct sockaddr *)&sock, &socklen)) == -1) goto failed; + + if (connect_done == 0) { + if (connect(fd[1], (struct sockaddr *)&sock, socklen) != 0 + && errno != EISCONN) goto failed; + } + close(listener); + + set_blocking(fd[1], 1); + + /* all OK! */ + return 0; + + failed: + if (fd[0] != -1) close(fd[0]); + if (fd[1] != -1) close(fd[1]); + if (listener != -1) close(listener); + return -1; +} diff --git a/lib/tsocket/tests/socketpair_tcp.h b/lib/tsocket/tests/socketpair_tcp.h new file mode 100644 index 00000000000..dbee4efec70 --- /dev/null +++ b/lib/tsocket/tests/socketpair_tcp.h @@ -0,0 +1,29 @@ +/* + Unix SMB/CIFS implementation. + Samba utility functions + Copyright (C) Andrew Tridgell 1992-1998 + Copyright (C) Tim Potter 2000-2001 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/******************************************************************* +this is like socketpair but uses tcp. It is used by the Samba +regression test code +The function guarantees that nobody else can attach to the socket, +or if they do that this function fails and the socket gets closed +returns 0 on success, -1 on failure +the resulting file descriptors are symmetrical + ******************************************************************/ +int socketpair_tcp(int fd[2]); diff --git a/lib/tsocket/tests/test_tstream.c b/lib/tsocket/tests/test_tstream.c new file mode 100644 index 00000000000..a920e671cda --- /dev/null +++ b/lib/tsocket/tests/test_tstream.c @@ -0,0 +1,517 @@ +/* + * Unix SMB/CIFS implementation. + * + * Copyright (C) 2022 Andrew Bartlett + * Copyright (C) 2021 Andreas Schneider + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include "includes.h" +#include "system/network.h" +#include "socketpair_tcp.h" +#include "tsocket.h" + +enum socket_pair_selector { + SOCKET_SERVER = 0, + SOCKET_CLIENT = 1, +}; + +struct socket_pair { + struct tevent_context *ev; + int socket_server; + int socket_client; + + /* for tstream tests */ + int rc; + int sys_errno; + int expected_errno; + struct timeval endtime; + size_t max_loops; + size_t num_loops; +}; + +/* If this is too large, we get EPIPE rather than EAGAIN */ +static const uint8_t TEST_STRING[128] = { 0 }; + +static int sigpipe_setup(void **state) +{ + BlockSignals(true, SIGPIPE); + return 0; +} + +static int setup_socketpair_tcp_context(void **state) +{ + int fd[2]; + struct socket_pair *sp = talloc_zero(NULL, struct socket_pair); + assert_non_null(sp); + + /* Set up a socketpair over TCP to test with */ + assert_return_code(socketpair_tcp(fd), errno); + + sp->socket_server = fd[SOCKET_SERVER]; + sp->socket_client = fd[SOCKET_CLIENT]; + + sp->ev = tevent_context_init(sp); + assert_non_null(sp->ev); + + *state = sp; + return 0; +} + +static int setup_socketpair_context(void **state) +{ + int fd[2]; + struct socket_pair *sp = talloc_zero(NULL, struct socket_pair); + assert_non_null(sp); + + /* Set up a socketpair over TCP to test with */ + assert_return_code(socketpair(AF_UNIX, SOCK_STREAM, 0, fd), errno); + + sp->socket_server = fd[SOCKET_SERVER]; + sp->socket_client = fd[SOCKET_CLIENT]; + + sp->ev = tevent_context_init(sp); + assert_non_null(sp->ev); + + *state = sp; + return 0; +} + +static int teardown_socketpair_context(void **state) +{ + struct socket_pair *sp = *state; + struct socket_pair sp_save = *sp; + + TALLOC_FREE(sp); + + /* + * Close these after the TALLOC_FREE() to allow clean shutdown + * of epoll() in tstream + */ + if (sp_save.socket_client != -1) { + close(sp_save.socket_client); + } + if (sp_save.socket_server != -1) { + close(sp_save.socket_server); + } + return 0; +} + + +/* Test socket behaviour */ +static void test_simple_socketpair(void **state) { + + struct socket_pair *sp = *state; + + char buf[sizeof(TEST_STRING)]; + + assert_int_equal(write(sp->socket_server, TEST_STRING, sizeof(TEST_STRING)), + sizeof(TEST_STRING)); + assert_int_equal(read(sp->socket_client, buf, sizeof(buf)), + sizeof(buf)); + + +} + +/* Test socket behaviour */ +static void test_read_client_after_close_server_socket(void **state) { + + struct socket_pair *sp = *state; + int rc; + char buf[sizeof(TEST_STRING)]; + + rc = write(sp->socket_server, TEST_STRING, sizeof(TEST_STRING)); + assert_return_code(rc, errno); + assert_int_equal(rc, sizeof(TEST_STRING)); + + assert_return_code(close(sp->socket_server), 0); + + rc = read(sp->socket_client, buf, sizeof(buf)); + + assert_return_code(rc, errno); + assert_int_equal(rc, sizeof(buf)); +} + +static void test_write_server_after_close_client_socket(void **state) { + + struct socket_pair *sp = *state; + int rc; + + assert_return_code(close(sp->socket_client), 0); + sp->socket_client = -1; + + rc = write(sp->socket_server, TEST_STRING, sizeof(TEST_STRING)); + assert_return_code(rc, errno); + assert_int_equal(rc, sizeof(TEST_STRING)); +} + +static void test_fill_socket(int sock) +{ + size_t num_busy = 0; + int rc; + + while (true) { + rc = write(sock, TEST_STRING, sizeof(TEST_STRING)); + if (rc == -1 && errno == EAGAIN) { + /* + * This makes sure we write until we get a whole second + * only with EAGAIN every 50 ms (20 times) + * + * Otherwise the tests are not reliable... + */ + num_busy++; + if (num_busy > 20) { + break; + } + smb_msleep(50); + continue; + } + /* try again next time */ + num_busy = 0; + } + + assert_int_equal(rc, -1); + assert_int_equal(errno, EAGAIN); +} + +static void test_big_write_server(void **state) { + + struct socket_pair *sp = *state; + int rc; + + rc = write(sp->socket_server, TEST_STRING, sizeof(TEST_STRING)); + assert_return_code(rc, errno); + assert_int_equal(rc, sizeof(TEST_STRING)); + + rc = set_blocking(sp->socket_server, 0); + assert_return_code(rc, errno); + + test_fill_socket(sp->socket_server); +} + +static void test_big_write_server_close_write(void **state) { + + struct socket_pair *sp = *state; + int rc; + + rc = write(sp->socket_server, TEST_STRING, sizeof(TEST_STRING)); + assert_return_code(rc, errno); + assert_int_equal(rc, sizeof(TEST_STRING)); + + rc = set_blocking(sp->socket_server, 0); + assert_return_code(rc, errno); + + test_fill_socket(sp->socket_server); + + assert_return_code(close(sp->socket_client), 0); + sp->socket_client = -1; + + rc = write(sp->socket_server, TEST_STRING, sizeof(TEST_STRING)); + assert_int_equal(errno, ECONNRESET); + +} + +static void test_big_write_server_shutdown_wr_write(void **state) { + + struct socket_pair *sp = *state; + int rc; + + rc = write(sp->socket_server, TEST_STRING, sizeof(TEST_STRING)); + assert_return_code(rc, errno); + assert_int_equal(rc, sizeof(TEST_STRING)); + + rc = set_blocking(sp->socket_server, 0); + assert_return_code(rc, errno); + + test_fill_socket(sp->socket_server); + + assert_return_code(shutdown(sp->socket_client, SHUT_WR), 0); + sp->socket_client = -1; + + rc = write(sp->socket_server, TEST_STRING, sizeof(TEST_STRING)); + assert_int_equal(rc, -1); + assert_int_equal(errno, EAGAIN); +} + +static void test_big_write_server_shutdown_rd_write(void **state) { + + struct socket_pair *sp = *state; + int rc; + + rc = write(sp->socket_server, TEST_STRING, sizeof(TEST_STRING)); + assert_return_code(rc, errno); + assert_int_equal(rc, sizeof(TEST_STRING)); + + rc = set_blocking(sp->socket_server, 0); + assert_return_code(rc, errno); + + test_fill_socket(sp->socket_server); + + assert_return_code(shutdown(sp->socket_client, SHUT_RD), 0); + sp->socket_client = -1; + + rc = write(sp->socket_server, TEST_STRING, sizeof(TEST_STRING)); + assert_int_equal(rc, -1); + assert_int_equal(errno, EAGAIN); +} + +static void test_call_writev_done(struct tevent_req *subreq) +{ + struct socket_pair *sp = + tevent_req_callback_data(subreq, + struct socket_pair); + int rc; + + rc = tstream_writev_recv(subreq, &sp->sys_errno); + TALLOC_FREE(subreq); + + sp->rc = rc; +} + +static void test_tstream_server_spin_client_shutdown(struct socket_pair *sp) +{ + int rc; + + rc = shutdown(sp->socket_client, SHUT_WR); + assert_return_code(rc, errno); + /* + * It should only take a few additional loop to realise that this socket is + * in CLOSE_WAIT + */ + sp->max_loops = sp->num_loops + 2; + sp->expected_errno = ECONNRESET; +} + +static void test_tstream_server_spin_client_write(struct socket_pair *sp) +{ + int rc; + int timeout = 5000; + + sp->endtime = timeval_current_ofs_msec(timeout); + + rc = write(sp->socket_client, TEST_STRING, sizeof(TEST_STRING)); + assert_return_code(rc, errno); + sp->expected_errno = ETIMEDOUT; +} + +static void test_tstream_server_spin_client_tcp_user_timeout(struct socket_pair *sp) +{ + int rc; + int timeout = 5000; + + rc = setsockopt(sp->socket_server, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout, sizeof(timeout)); + assert_return_code(rc, errno); + + rc = write(sp->socket_client, TEST_STRING, sizeof(TEST_STRING)); + assert_return_code(rc, errno); + sp->expected_errno = ETIMEDOUT; + sp->max_loops = 15; +} + +static void test_tstream_server_spin_client_both_timer(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval current_time, + void *private_data) +{ + struct socket_pair *sp = + talloc_get_type_abort(private_data, + struct socket_pair); + + test_tstream_server_spin_client_shutdown(sp); +} + +static void test_tstream_server_spin_client_both(struct socket_pair *sp) +{ + struct tevent_timer *te = NULL; + struct timeval endtime; + + test_tstream_server_spin_client_write(sp); + + endtime = timeval_current_ofs_msec(2500); + + te = tevent_add_timer(sp->ev, + sp, + endtime, + test_tstream_server_spin_client_both_timer, + sp); + assert_non_null(te); + sp->expected_errno = ENXIO; +} + +static void test_tstream_server_spin(struct socket_pair *sp, + void (*client_fn)(struct socket_pair *sp)) +{ + struct tstream_context *stream = NULL; + struct tevent_req *req = NULL; + struct iovec iov; + int rc; + + rc = write(sp->socket_server, TEST_STRING, sizeof(TEST_STRING)); + assert_return_code(rc, errno); + assert_int_equal(rc, sizeof(TEST_STRING)); + + rc = set_blocking(sp->socket_server, 0); + assert_return_code(rc, errno); + + test_fill_socket(sp->socket_server); + + /* + * by default we don't expect more then 2 loop iterations + * for a timeout of 5 seconds. + */ + sp->max_loops = 10; + + client_fn(sp); + + rc = write(sp->socket_server, TEST_STRING, sizeof(TEST_STRING)); + assert_int_equal(rc, -1); + assert_int_equal(errno, EAGAIN); + + /* OK, so we now know the socket is in CLOSE_WAIT */ + + rc = tstream_bsd_existing_socket(sp->ev, sp->socket_server, &stream); + assert_return_code(rc, errno); + sp->socket_server = -1; + + iov.iov_base = discard_const_p(char, TEST_STRING); + iov.iov_len = sizeof(TEST_STRING); + + req = tstream_writev_send(stream, sp->ev, stream, &iov, 1); + assert_non_null(req); + if (!timeval_is_zero(&sp->endtime)) { + assert_true(tevent_req_set_endtime(req, sp->ev, sp->endtime)); + } + tevent_req_set_callback(req, test_call_writev_done, sp); + + while (tevent_req_is_in_progress(req)) { + if (sp->num_loops >= sp->max_loops) { + assert_int_not_equal(sp->num_loops, sp->max_loops); + assert_int_equal(sp->num_loops, sp->max_loops); + } + sp->num_loops += 1; + + rc = tevent_loop_once(sp->ev); + assert_int_equal(rc, 0); + } + + assert_int_equal(sp->rc, -1); + assert_int_equal(sp->sys_errno, sp->expected_errno); + return; +} + +/* + * We need two names to run this with the two different setup + * routines + */ +static void test_tstream_disconnected_tcp_client_spin(void **state) +{ + struct socket_pair *sp = *state; + test_tstream_server_spin(sp, test_tstream_server_spin_client_shutdown); +} + +static void test_tstream_disconnected_unix_client_spin(void **state) +{ + struct socket_pair *sp = *state; + test_tstream_server_spin(sp, test_tstream_server_spin_client_shutdown); +} + +static void test_tstream_more_tcp_client_spin(void **state) +{ + struct socket_pair *sp = *state; + test_tstream_server_spin(sp, test_tstream_server_spin_client_write); +} + +static void test_tstream_more_unix_client_spin(void **state) +{ + struct socket_pair *sp = *state; + test_tstream_server_spin(sp, test_tstream_server_spin_client_write); +} + +static void test_tstream_more_disconnect_tcp_client_spin(void **state) +{ + struct socket_pair *sp = *state; + test_tstream_server_spin(sp, test_tstream_server_spin_client_both); +} + +static void test_tstream_more_disconnect_unix_client_spin(void **state) +{ + struct socket_pair *sp = *state; + test_tstream_server_spin(sp, test_tstream_server_spin_client_both); +} + +static void test_tstream_more_tcp_user_timeout_spin(void **state) +{ + struct socket_pair *sp = *state; + if (socket_wrapper_enabled()) { + skip(); + } + test_tstream_server_spin(sp, test_tstream_server_spin_client_tcp_user_timeout); +} + +int main(void) { + const struct CMUnitTest tests[] = { + cmocka_unit_test_setup_teardown(test_simple_socketpair, + setup_socketpair_tcp_context, + teardown_socketpair_context), + cmocka_unit_test_setup_teardown(test_read_client_after_close_server_socket, + setup_socketpair_tcp_context, + teardown_socketpair_context), + cmocka_unit_test_setup_teardown(test_write_server_after_close_client_socket, + setup_socketpair_tcp_context, + teardown_socketpair_context), + cmocka_unit_test_setup_teardown(test_big_write_server, + setup_socketpair_tcp_context, + teardown_socketpair_context), + cmocka_unit_test_setup_teardown(test_big_write_server_close_write, + setup_socketpair_tcp_context, + teardown_socketpair_context), + cmocka_unit_test_setup_teardown(test_big_write_server_shutdown_wr_write, + setup_socketpair_tcp_context, + teardown_socketpair_context), + cmocka_unit_test_setup_teardown(test_big_write_server_shutdown_rd_write, + setup_socketpair_tcp_context, + teardown_socketpair_context), + cmocka_unit_test_setup_teardown(test_tstream_disconnected_tcp_client_spin, + setup_socketpair_tcp_context, + teardown_socketpair_context), + cmocka_unit_test_setup_teardown(test_tstream_disconnected_unix_client_spin, + setup_socketpair_context, + teardown_socketpair_context), + cmocka_unit_test_setup_teardown(test_tstream_more_tcp_client_spin, + setup_socketpair_tcp_context, + teardown_socketpair_context), + cmocka_unit_test_setup_teardown(test_tstream_more_unix_client_spin, + setup_socketpair_context, + teardown_socketpair_context), + cmocka_unit_test_setup_teardown(test_tstream_more_disconnect_tcp_client_spin, + setup_socketpair_tcp_context, + teardown_socketpair_context), + cmocka_unit_test_setup_teardown(test_tstream_more_disconnect_unix_client_spin, + setup_socketpair_context, + teardown_socketpair_context), + cmocka_unit_test_setup_teardown(test_tstream_more_tcp_user_timeout_spin, + setup_socketpair_tcp_context, + teardown_socketpair_context), + }; + + cmocka_set_message_output(CM_OUTPUT_SUBUNIT); + + return cmocka_run_group_tests(tests, sigpipe_setup, NULL); +} diff --git a/lib/tsocket/wscript_build b/lib/tsocket/wscript_build index 420daff8174..f419e71d830 100644 --- a/lib/tsocket/wscript_build +++ b/lib/tsocket/wscript_build @@ -7,3 +7,8 @@ bld.SAMBA_SUBSYSTEM('LIBTSOCKET', public_headers='tsocket.h tsocket_internal.h', ) +bld.SAMBA_BINARY('test_tstream', + source='tests/test_tstream.c tests/socketpair_tcp.c', + deps='cmocka replace LIBTSOCKET', + local_include=False, + for_selftest=True) diff --git a/selftest/knownfail.d/tstream b/selftest/knownfail.d/tstream new file mode 100644 index 00000000000..4d67b2c8b61 --- /dev/null +++ b/selftest/knownfail.d/tstream @@ -0,0 +1,6 @@ +^samba.unittests.tsocket_tstream.test_tstream_disconnected_tcp_client_spin +^samba.unittests.tsocket_tstream.test_tstream_disconnected_unix_client_spin +^samba.unittests.tsocket_tstream.test_tstream_more_tcp_client_spin +^samba.unittests.tsocket_tstream.test_tstream_more_unix_client_spin +^samba.unittests.tsocket_tstream.test_tstream_more_disconnect_tcp_client_spin +^samba.unittests.tsocket_tstream.test_tstream_more_disconnect_unix_client_spin diff --git a/selftest/tests.py b/selftest/tests.py index 10648b19155..a1290044b8a 100644 --- a/selftest/tests.py +++ b/selftest/tests.py @@ -411,3 +411,6 @@ plantestsuite("samba.unittests.test_oLschema2ldif", "none", [os.path.join(bindir(), "default/source4/utils/oLschema2ldif/test_oLschema2ldif")]) plantestsuite("samba.unittests.mdsparser_es", "none", [os.path.join(bindir(), "default/source3/test_mdsparser_es")] + [configuration]) +plantestsuite("samba.unittests.tsocket_tstream", "none", + [os.path.join(bindir(), "default/lib/tsocket/test_tstream")], + environ={'SOCKET_WRAPPER_DIR': ''}) -- 2.25.1 From 1d5becfcc0733bd84221cd24672c69c5abe75458 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 13 Oct 2022 10:39:59 +0200 Subject: [PATCH 2/6] lib/tsocket: split out tsocket_bsd_error() from tsocket_bsd_pending() This will be used on its own soon. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15202 Signed-off-by: Stefan Metzmacher Reviewed-by: Ralph Boehme (cherry picked from commit 9950efd83e1a4b5e711f1d36fefa8a5d5e8b2410) --- lib/tsocket/tsocket_bsd.c | 42 +++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/lib/tsocket/tsocket_bsd.c b/lib/tsocket/tsocket_bsd.c index 1627051f3ec..52ec3d164ef 100644 --- a/lib/tsocket/tsocket_bsd.c +++ b/lib/tsocket/tsocket_bsd.c @@ -170,11 +170,31 @@ static ssize_t tsocket_bsd_netlink_pending(int fd) } #endif +static ssize_t tsocket_bsd_error(int fd) +{ + int ret, error = 0; + socklen_t len = sizeof(error); + + /* + * if no data is available check if the socket is in error state. For + * dgram sockets it's the way to return ICMP error messages of + * connected sockets to the caller. + */ + ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len); + if (ret == -1) { + return ret; + } + if (error != 0) { + errno = error; + return -1; + } + return 0; +} + static ssize_t tsocket_bsd_pending(int fd) { - int ret, error; + int ret; int value = 0; - socklen_t len; ret = ioctl(fd, FIONREAD, &value); if (ret == -1) { @@ -191,23 +211,7 @@ static ssize_t tsocket_bsd_pending(int fd) return value; } - error = 0; - len = sizeof(error); - - /* - * if no data is available check if the socket is in error state. For - * dgram sockets it's the way to return ICMP error messages of - * connected sockets to the caller. - */ - ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len); - if (ret == -1) { - return ret; - } - if (error != 0) { - errno = error; - return -1; - } - return 0; + return tsocket_bsd_error(fd); } static const struct tsocket_address_ops tsocket_address_bsd_ops; -- 2.25.1 From 0062757ce5c8bce11dafa9b626cdf5400fcae0bd Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 13 Oct 2022 14:46:14 +0200 Subject: [PATCH 3/6] lib/tsocket: check for errors indicated by poll() before getsockopt(fd, SOL_SOCKET, SO_ERROR) This also returns an error if we got TCP_FIN from the peer, which is only reported by an explicit POLLRDHUP check. Also on FreeBSD getsockopt(fd, SOL_SOCKET, SO_ERROR) fetches and resets the error, so a 2nd call no longer returns an error. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15202 Signed-off-by: Stefan Metzmacher Reviewed-by: Ralph Boehme (cherry picked from commit 29a65da63d730ecead1e7d4a81a76dd1c8c179ea) --- lib/tsocket/tsocket_bsd.c | 80 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/lib/tsocket/tsocket_bsd.c b/lib/tsocket/tsocket_bsd.c index 52ec3d164ef..c05705303d6 100644 --- a/lib/tsocket/tsocket_bsd.c +++ b/lib/tsocket/tsocket_bsd.c @@ -24,8 +24,10 @@ #include "replace.h" #include "system/filesys.h" #include "system/network.h" +#include "system/select.h" #include "tsocket.h" #include "tsocket_internal.h" +#include "lib/util/select.h" #include "lib/util/iov_buf.h" #include "lib/util/blocking.h" #include "lib/util/util_net.h" @@ -170,7 +172,42 @@ static ssize_t tsocket_bsd_netlink_pending(int fd) } #endif -static ssize_t tsocket_bsd_error(int fd) +static int tsocket_bsd_poll_error(int fd) +{ + struct pollfd pfd = { + .fd = fd, +#ifdef POLLRDHUP + .events = POLLRDHUP, /* POLLERR and POLLHUP are not needed */ +#endif + }; + int ret; + + errno = 0; + ret = sys_poll_intr(&pfd, 1, 0); + if (ret == 0) { + return 0; + } + if (ret != 1) { + return POLLNVAL; + } + + if (pfd.revents & POLLERR) { + return POLLERR; + } + if (pfd.revents & POLLHUP) { + return POLLHUP; + } +#ifdef POLLRDHUP + if (pfd.revents & POLLRDHUP) { + return POLLRDHUP; + } +#endif + + /* should never be reached! */ + return POLLNVAL; +} + +static int tsocket_bsd_sock_error(int fd) { int ret, error = 0; socklen_t len = sizeof(error); @@ -191,6 +228,47 @@ static ssize_t tsocket_bsd_error(int fd) return 0; } +static int tsocket_bsd_error(int fd) +{ + int ret; + int poll_error = 0; + + poll_error = tsocket_bsd_poll_error(fd); + if (poll_error == 0) { + return 0; + } + +#ifdef POLLRDHUP + if (poll_error == POLLRDHUP) { + errno = ECONNRESET; + return -1; + } +#endif + + if (poll_error == POLLHUP) { + errno = EPIPE; + return -1; + } + + /* + * POLLERR and POLLNVAL fallback to + * getsockopt(fd, SOL_SOCKET, SO_ERROR) + * and force EPIPE as fallback. + */ + + errno = 0; + ret = tsocket_bsd_sock_error(fd); + if (ret == 0) { + errno = EPIPE; + } + + if (errno == 0) { + errno = EPIPE; + } + + return -1; +} + static ssize_t tsocket_bsd_pending(int fd) { int ret; -- 2.25.1 From 808a0f0ba43c6ccd7cddaa9b962005210502a924 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 13 Oct 2022 16:23:03 +0200 Subject: [PATCH 4/6] lib/tsocket: remember the first error as tstream_bsd->error If we found that the connection is broken, there's no point in trying to use it anymore, so just return the first error we detected. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15202 Signed-off-by: Stefan Metzmacher Reviewed-by: Ralph Boehme (cherry picked from commit 4c7e2b9b60de5d02bb3f69effe7eddbf466a6155) --- lib/tsocket/tsocket_bsd.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/lib/tsocket/tsocket_bsd.c b/lib/tsocket/tsocket_bsd.c index c05705303d6..454173930a9 100644 --- a/lib/tsocket/tsocket_bsd.c +++ b/lib/tsocket/tsocket_bsd.c @@ -1640,6 +1640,7 @@ int _tdgram_unix_socket(const struct tsocket_address *local, struct tstream_bsd { int fd; + int error; void *event_ptr; struct tevent_fd *fde; @@ -1817,7 +1818,19 @@ static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream) return -1; } + if (bsds->error != 0) { + errno = bsds->error; + return -1; + } + ret = tsocket_bsd_pending(bsds->fd); + if (ret == -1) { + /* + * remember the error and don't + * allow further requests + */ + bsds->error = errno; + } return ret; } @@ -1925,9 +1938,15 @@ static void tstream_bsd_readv_handler(void *private_data) int _count; bool ok, retry; + if (bsds->error != 0) { + tevent_req_error(req, bsds->error); + return; + } + ret = readv(bsds->fd, state->vector, state->count); if (ret == 0) { /* propagate end of file */ + bsds->error = EPIPE; tevent_req_error(req, EPIPE); return; } @@ -1936,6 +1955,13 @@ static void tstream_bsd_readv_handler(void *private_data) /* retry later */ return; } + if (err != 0) { + /* + * remember the error and don't + * allow further requests + */ + bsds->error = err; + } if (tevent_req_error(req, err)) { return; } @@ -2068,9 +2094,15 @@ static void tstream_bsd_writev_handler(void *private_data) int _count; bool ok, retry; + if (bsds->error != 0) { + tevent_req_error(req, bsds->error); + return; + } + ret = writev(bsds->fd, state->vector, state->count); if (ret == 0) { /* propagate end of file */ + bsds->error = EPIPE; tevent_req_error(req, EPIPE); return; } @@ -2079,6 +2111,13 @@ static void tstream_bsd_writev_handler(void *private_data) /* retry later */ return; } + if (err != 0) { + /* + * remember the error and don't + * allow further requests + */ + bsds->error = err; + } if (tevent_req_error(req, err)) { return; } -- 2.25.1 From 10e06eace186a1d69e8b834e1cd551a48109da9d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 12 Oct 2022 17:26:16 +0200 Subject: [PATCH 5/6] lib/tsocket: avoid endless cpu-spinning in tstream_bsd_fde_handler() There were some reports that strace output an LDAP server socket is in CLOSE_WAIT state, returning EAGAIN for writev over and over (after a call to epoll() each time). In the tstream_bsd code the problem happens when we have a pending writev_send, while there's no readv_send pending. In that case we still ask for TEVENT_FD_READ in order to notice connection errors early, so we try to call writev even if the socket doesn't report TEVENT_FD_WRITE. And there are situations where we do that over and over again. It happens like this with a Linux kernel: tcp_fin() has this: struct tcp_sock *tp = tcp_sk(sk); inet_csk_schedule_ack(sk); sk->sk_shutdown |= RCV_SHUTDOWN; sock_set_flag(sk, SOCK_DONE); switch (sk->sk_state) { case TCP_SYN_RECV: case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); inet_csk_enter_pingpong_mode(sk); break; It means RCV_SHUTDOWN gets set as well as TCP_CLOSE_WAIT, but sk->sk_err is not changed to indicate an error. tcp_sendmsg_locked has this: ... err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; while (msg_data_left(msg)) { int copy = 0; skb = tcp_write_queue_tail(sk); if (skb) copy = size_goal - skb->len; if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { bool first_skb; new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_space; ... wait_for_space: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); if (copied) tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH, size_goal); err = sk_stream_wait_memory(sk, &timeo); if (err != 0) goto do_error; It means if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) doesn't hit as we only have RCV_SHUTDOWN and sk_stream_wait_memory returns -EAGAIN. tcp_poll has this: if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; So we'll get EPOLLIN | EPOLLRDNORM | EPOLLRDHUP triggering TEVENT_FD_READ and writev/sendmsg keeps getting EAGAIN. So we need to always clear TEVENT_FD_READ if we don't have readable handler in order to avoid burning cpu. But we turn it on again after a timeout of 1 second in order to monitor the error state of the connection. And now that our tsocket_bsd_error() helper checks for POLLRDHUP, we can check if the socket is in an error state before calling the writable handler when TEVENT_FD_READ was reported. Only on error we'll call the writable handler, which will pick the error without calling writev(). BUG: https://bugzilla.samba.org/show_bug.cgi?id=15202 Signed-off-by: Stefan Metzmacher Reviewed-by: Ralph Boehme [abartlet@samba.org adapted from commit e232ba946f00aac39d67197d9939bc923814479c to add lib/util/time.h header] --- lib/tsocket/tsocket_bsd.c | 122 +++++++++++++++++++++++++++++++++-- selftest/knownfail.d/tstream | 6 -- 2 files changed, 117 insertions(+), 11 deletions(-) delete mode 100644 selftest/knownfail.d/tstream diff --git a/lib/tsocket/tsocket_bsd.c b/lib/tsocket/tsocket_bsd.c index 454173930a9..7ea843b9a04 100644 --- a/lib/tsocket/tsocket_bsd.c +++ b/lib/tsocket/tsocket_bsd.c @@ -31,6 +31,7 @@ #include "lib/util/iov_buf.h" #include "lib/util/blocking.h" #include "lib/util/util_net.h" +#include "lib/util/time.h" static int tsocket_bsd_error_from_errno(int ret, int sys_errno, @@ -1650,6 +1651,9 @@ struct tstream_bsd { void (*readable_handler)(void *private_data); void *writeable_private; void (*writeable_handler)(void *private_data); + + struct tevent_context *error_ctx; + struct tevent_timer *error_timer; }; bool tstream_bsd_optimize_readv(struct tstream_context *stream, @@ -1671,6 +1675,28 @@ bool tstream_bsd_optimize_readv(struct tstream_context *stream, return old; } +static void tstream_bsd_error_timer(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval current_time, + void *private_data) +{ + struct tstream_bsd *bsds = + talloc_get_type(private_data, + struct tstream_bsd); + + TALLOC_FREE(bsds->error_timer); + + /* + * Turn on TEVENT_FD_READABLE() again + * if we have a writeable_handler that + * wants to monitor the connection + * for errors. + */ + if (bsds->writeable_handler != NULL) { + TEVENT_FD_READABLE(bsds->fde); + } +} + static void tstream_bsd_fde_handler(struct tevent_context *ev, struct tevent_fd *fde, uint16_t flags, @@ -1685,11 +1711,74 @@ static void tstream_bsd_fde_handler(struct tevent_context *ev, } if (flags & TEVENT_FD_READ) { if (!bsds->readable_handler) { - if (bsds->writeable_handler) { + struct timeval recheck_time; + + /* + * In order to avoid cpu-spinning + * we no longer want to get TEVENT_FD_READ + */ + TEVENT_FD_NOT_READABLE(bsds->fde); + + if (!bsds->writeable_handler) { + return; + } + + /* + * If we have a writeable handler we + * want that to report connection errors + * early. + * + * So we check if the socket is in an + * error state. + */ + if (bsds->error == 0) { + int ret = tsocket_bsd_error(bsds->fd); + + if (ret == -1) { + bsds->error = errno; + } + } + + if (bsds->error != 0) { + /* + * Let the writeable handler report the error + */ + bsds->writeable_handler(bsds->writeable_private); + return; + } + + /* + * Here we called TEVENT_FD_NOT_READABLE() without + * calling into the writeable handler. + * + * So we may have to wait for the kernels tcp stack + * to report TEVENT_FD_WRITE in order to let + * make progress and turn on TEVENT_FD_READABLE() + * again. + * + * As a fallback we use a timer that turns on + * TEVENT_FD_READABLE() again after a timeout of + * 1 second. + */ + + if (bsds->error_timer != NULL) { + return; + } + + recheck_time = timeval_current_ofs(1, 0); + bsds->error_timer = tevent_add_timer(bsds->error_ctx, + bsds, + recheck_time, + tstream_bsd_error_timer, + bsds); + if (bsds->error_timer == NULL) { + bsds->error = ENOMEM; + /* + * Let the writeable handler report the error + */ bsds->writeable_handler(bsds->writeable_private); return; } - TEVENT_FD_NOT_READABLE(bsds->fde); return; } bsds->readable_handler(bsds->readable_private); @@ -1744,6 +1833,8 @@ static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds, TEVENT_FD_READABLE(bsds->fde); } + TALLOC_FREE(bsds->error_timer); + bsds->readable_handler = handler; bsds->readable_private = private_data; @@ -1766,7 +1857,8 @@ static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds, bsds->writeable_handler = NULL; bsds->writeable_private = NULL; TEVENT_FD_NOT_WRITEABLE(bsds->fde); - + TALLOC_FREE(bsds->error_timer); + bsds->error_ctx = NULL; return 0; } @@ -1778,6 +1870,8 @@ static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds, } bsds->event_ptr = NULL; TALLOC_FREE(bsds->fde); + TALLOC_FREE(bsds->error_timer); + bsds->error_ctx = NULL; } if (tevent_fd_get_flags(bsds->fde) == 0) { @@ -1803,6 +1897,7 @@ static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds, bsds->writeable_handler = handler; bsds->writeable_private = private_data; + bsds->error_ctx = ev; return 0; } @@ -2108,7 +2203,14 @@ static void tstream_bsd_writev_handler(void *private_data) } err = tsocket_bsd_error_from_errno(ret, errno, &retry); if (retry) { - /* retry later */ + /* + * retry later... + * + * make sure we also wait readable again + * in order to notice errors early + */ + TEVENT_FD_READABLE(bsds->fde); + TALLOC_FREE(bsds->error_timer); return; } if (err != 0) { @@ -2134,7 +2236,13 @@ static void tstream_bsd_writev_handler(void *private_data) } if (state->count > 0) { - /* we have more to read */ + /* + * we have more to write + * + * make sure we also wait readable again + * in order to notice errors early + */ + TEVENT_FD_READABLE(bsds->fde); return; } @@ -2182,6 +2290,8 @@ static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx, goto post; } + TALLOC_FREE(bsds->error_timer); + bsds->error_ctx = NULL; TALLOC_FREE(bsds->fde); ret = close(bsds->fd); bsds->fd = -1; @@ -2224,6 +2334,8 @@ static const struct tstream_context_ops tstream_bsd_ops = { static int tstream_bsd_destructor(struct tstream_bsd *bsds) { + TALLOC_FREE(bsds->error_timer); + bsds->error_ctx = NULL; TALLOC_FREE(bsds->fde); if (bsds->fd != -1) { close(bsds->fd); diff --git a/selftest/knownfail.d/tstream b/selftest/knownfail.d/tstream deleted file mode 100644 index 4d67b2c8b61..00000000000 --- a/selftest/knownfail.d/tstream +++ /dev/null @@ -1,6 +0,0 @@ -^samba.unittests.tsocket_tstream.test_tstream_disconnected_tcp_client_spin -^samba.unittests.tsocket_tstream.test_tstream_disconnected_unix_client_spin -^samba.unittests.tsocket_tstream.test_tstream_more_tcp_client_spin -^samba.unittests.tsocket_tstream.test_tstream_more_unix_client_spin -^samba.unittests.tsocket_tstream.test_tstream_more_disconnect_tcp_client_spin -^samba.unittests.tsocket_tstream.test_tstream_more_disconnect_unix_client_spin -- 2.25.1 From 7dc77a537530879f90b7f5dad46d97cd574ddf9b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 13 Oct 2022 10:17:25 +0200 Subject: [PATCH 6/6] s4:ldap_server: let ldapsrv_call_writev_start use conn_idle_time to limit the time If the client is not able to receive the results within connections idle time, then we should treat it as dead. It's value is 15 minutes (900 s) by default. In order to limit that further an admin can use 'socket options' and set TCP_KEEPCNT, TCP_KEEPIDLE, TCP_KEEPINTVL and/or TCP_USER_TIMEOUT to useful values. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15202 Signed-off-by: Stefan Metzmacher Reviewed-by: Ralph Boehme Autobuild-User(master): Stefan Metzmacher Autobuild-Date(master): Wed Oct 19 17:13:39 UTC 2022 on sn-devel-184 (cherry picked from commit eb2f3526032803f34c88ef1619a832a741f71910) --- source4/ldap_server/ldap_server.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source4/ldap_server/ldap_server.c b/source4/ldap_server/ldap_server.c index fd2309b7ce3..2d612fc7af0 100644 --- a/source4/ldap_server/ldap_server.c +++ b/source4/ldap_server/ldap_server.c @@ -702,6 +702,7 @@ static void ldapsrv_call_writev_start(struct ldapsrv_call *call) struct ldapsrv_connection *conn = call->conn; struct ldapsrv_reply *reply = NULL; struct tevent_req *subreq = NULL; + struct timeval endtime; size_t length = 0; size_t i; @@ -786,6 +787,10 @@ static void ldapsrv_call_writev_start(struct ldapsrv_call *call) ldapsrv_terminate_connection(conn, "stream_writev_queue_send failed"); return; } + endtime = timeval_current_ofs(conn->limits.conn_idle_time, 0); + tevent_req_set_endtime(subreq, + conn->connection->event.ctx, + endtime); tevent_req_set_callback(subreq, ldapsrv_call_writev_done, call); } -- 2.25.1