From c5b1f91834aedf855995632dabd72c4d17896a1a Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 24 Aug 2018 14:44:12 +1000 Subject: [PATCH 1/6] ctdb-common: Add support for sock daemon to notify of successful startup The daemon writes 0 into the specified file descriptor when it is up and listening. This can be used to avoid loops in clients that attempt to connect until they succeed. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13592 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit dc6040c121c65d5551c686f3f1be2891795f48aa) --- ctdb/common/sock_daemon.c | 26 ++++++++++++++++++++++++++ ctdb/common/sock_daemon.h | 10 ++++++++++ 2 files changed, 36 insertions(+) diff --git a/ctdb/common/sock_daemon.c b/ctdb/common/sock_daemon.c index 7554cd6da07..03d3ac1f1ec 100644 --- a/ctdb/common/sock_daemon.c +++ b/ctdb/common/sock_daemon.c @@ -31,6 +31,7 @@ #include "lib/util/dlinklist.h" #include "lib/util/tevent_unix.h" #include "lib/util/become_daemon.h" +#include "lib/util/sys_rw.h" #include "common/logging.h" #include "common/reqid.h" @@ -71,6 +72,7 @@ struct sock_daemon_context { struct pidfile_context *pid_ctx; struct sock_socket *socket_list; + int startup_fd; }; /* @@ -483,6 +485,7 @@ int sock_daemon_setup(TALLOC_CTX *mem_ctx, const char *daemon_name, sockd->funcs = funcs; sockd->private_data = private_data; + sockd->startup_fd = -1; ret = logging_init(sockd, logging, debug_level, daemon_name); if (ret != 0) { @@ -514,6 +517,11 @@ int sock_daemon_add_unix(struct sock_daemon_context *sockd, return 0; } +void sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd) +{ + sockd->startup_fd = fd; +} + /* * Run socket daemon */ @@ -543,6 +551,7 @@ static void sock_daemon_run_socket_fail(struct tevent_req *subreq); static void sock_daemon_run_watch_pid(struct tevent_req *subreq); static void sock_daemon_run_wait(struct tevent_req *req); static void sock_daemon_run_wait_done(struct tevent_req *subreq); +static void sock_daemon_startup_notify(struct sock_daemon_context *sockd); struct tevent_req *sock_daemon_run_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, @@ -669,6 +678,8 @@ static void sock_daemon_run_started(struct tevent_req *subreq) return; } sock_daemon_run_wait(req); + + sock_daemon_startup_notify(sockd); } static void sock_daemon_run_startup_done(struct tevent_req *subreq) @@ -696,6 +707,8 @@ static void sock_daemon_run_startup_done(struct tevent_req *subreq) return; } sock_daemon_run_wait(req); + + sock_daemon_startup_notify(sockd); } static void sock_daemon_run_signal_handler(struct tevent_context *ev, @@ -961,6 +974,19 @@ static void sock_daemon_run_wait_done(struct tevent_req *subreq) sock_daemon_run_shutdown(req); } +static void sock_daemon_startup_notify(struct sock_daemon_context *sockd) +{ + if (sockd->startup_fd != -1) { + unsigned int zero = 0; + ssize_t num; + + num = sys_write(sockd->startup_fd, &zero, sizeof(zero)); + if (num != sizeof(zero)) { + D_WARNING("Failed to write zero to pipe FD\n"); + } + } +} + bool sock_daemon_run_recv(struct tevent_req *req, int *perr) { int ret; diff --git a/ctdb/common/sock_daemon.h b/ctdb/common/sock_daemon.h index a071833c2f3..a28f8c6f39c 100644 --- a/ctdb/common/sock_daemon.h +++ b/ctdb/common/sock_daemon.h @@ -207,6 +207,16 @@ int sock_daemon_add_unix(struct sock_daemon_context *sockd, struct sock_socket_funcs *funcs, void *private_data); +/** + * @brief Set file descriptor for indicating startup success + * + * On successful completion, 0 (unsigned int) will be written to the fd. + * + * @param[in] sockd Socket daemon context + * @param[in] fd File descriptor + */ +void sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd); + /** * @brief Async computation start to run a socket daemon * -- 2.18.0 From a1f8671eed9a0bbf4ce2b6af371533fe303ec595 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 24 Aug 2018 14:52:29 +1000 Subject: [PATCH 2/6] ctdb-event: Add support to eventd for the startup notification FD BUG: https://bugzilla.samba.org/show_bug.cgi?id=13592 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 11ee92d1bfd73c509d90e7a7386af60a4e1a7fca) --- ctdb/server/ctdb_eventd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ctdb/server/ctdb_eventd.c b/ctdb/server/ctdb_eventd.c index feeac074890..f79ee9990d1 100644 --- a/ctdb/server/ctdb_eventd.c +++ b/ctdb/server/ctdb_eventd.c @@ -952,8 +952,10 @@ static struct { const char *pidfile; const char *socket; int pid; + int startup_fd; } options = { .debug_level = "ERR", + .startup_fd = -1, }; struct poptOption cmdline_options[] = { @@ -972,6 +974,8 @@ struct poptOption cmdline_options[] = { "eventd pid file", "FILE" }, { "socket", 's', POPT_ARG_STRING, &options.socket, 0, "eventd socket path", "FILE" }, + { "startup-fd", 'S', POPT_ARG_INT, &options.startup_fd, 0, + "file descriptor to notify of successful start", "FD" }, POPT_TABLEEND }; @@ -1068,6 +1072,10 @@ int main(int argc, const char **argv) goto fail; } + if (options.startup_fd != -1) { + sock_daemon_set_startup_fd(sockd, options.startup_fd); + } + ret = sock_daemon_run(ev, sockd, options.pidfile, false, false, options.pid); if (ret == EINTR) { -- 2.18.0 From fb020914ddd113b222e9989491259b5e86c6dc97 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 27 Aug 2018 15:28:47 +1000 Subject: [PATCH 3/6] ctdb-daemon: Improve error handling consistency Other errors free argv, so do it here too. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13592 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit e357b62fe556609750bdb8d27cf48dfb85c62ec8) --- ctdb/server/eventscript.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index 41807ffdcb0..74f132c0e80 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -219,6 +219,7 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) if (pid == -1) { close(fd[0]); close(fd[1]); + talloc_free(argv); return -1; } -- 2.18.0 From 61b2e26ff84ad1ecf45072926fe69e83f7262104 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 27 Aug 2018 14:44:24 +1000 Subject: [PATCH 4/6] ctdb-daemon: Open eventd pipe earlier The pipe will soon be needed earlier, so initialise it earlier. Ensure the file descriptors are closed on error. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13592 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit c446ae5e1382d5e32c33ce92243daf6b4338e15a) --- ctdb/server/eventscript.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index 74f132c0e80..02924a7f471 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -175,8 +175,15 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) return -1; } + ret = pipe(fd); + if (ret != 0) { + return -1; + } + argv = talloc_array(ectx, const char *, 14); if (argv == NULL) { + close(fd[0]); + close(fd[1]); return -1; } @@ -201,6 +208,8 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) argv[13] = NULL; if (argv[6] == NULL) { + close(fd[0]); + close(fd[1]); talloc_free(argv); return -1; } @@ -210,11 +219,6 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10])); - ret = pipe(fd); - if (ret != 0) { - return -1; - } - pid = ctdb_fork(ctdb); if (pid == -1) { close(fd[0]); -- 2.18.0 From 05c584a2c6ae14176a8ff0c5cd49141f33659efb Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 27 Aug 2018 14:47:38 +1000 Subject: [PATCH 5/6] ctdb-daemon: Wait for eventd to be ready before connecting The current method of retrying the connection to eventd means that messages get logged for each failure. Instead, pass a pipe file descriptor to eventd and wait for it to write 0 to the pipe to indicate that it is ready to accept client connections. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13592 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 62ec1ab1470206d6a2cf300f30ca0b4a39413a38) Signed-off-by: Martin Schwenke --- ctdb/server/eventscript.c | 110 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 3 deletions(-) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index 02924a7f471..c6a430236c0 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -142,6 +142,100 @@ static bool eventd_context_init(TALLOC_CTX *mem_ctx, return true; } +struct eventd_startup_state { + bool done; + int ret; + int fd; +}; + +static void eventd_startup_timeout_handler(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, + void *private_data) +{ + struct eventd_startup_state *state = + (struct eventd_startup_state *) private_data; + + state->done = true; + state->ret = ETIMEDOUT; +} + +static void eventd_startup_handler(struct tevent_context *ev, + struct tevent_fd *fde, uint16_t flags, + void *private_data) +{ + struct eventd_startup_state *state = + (struct eventd_startup_state *)private_data; + unsigned int data; + ssize_t num_read; + + num_read = sys_read(state->fd, &data, sizeof(data)); + if (num_read == sizeof(data)) { + if (data == 0) { + state->ret = 0; + } else { + state->ret = EIO; + } + } else if (num_read == 0) { + state->ret = EPIPE; + } else if (num_read == -1) { + state->ret = errno; + } else { + state->ret = EINVAL; + } + + state->done = true; +} + + +static int wait_for_daemon_startup(struct tevent_context *ev, + int fd) +{ + TALLOC_CTX *mem_ctx; + struct tevent_timer *timer; + struct tevent_fd *fde; + struct eventd_startup_state state = { + .done = false, + .ret = 0, + .fd = fd, + }; + + mem_ctx = talloc_new(ev); + if (mem_ctx == NULL) { + return ENOMEM; + } + + timer = tevent_add_timer(ev, + mem_ctx, + tevent_timeval_current_ofs(10, 0), + eventd_startup_timeout_handler, + &state); + if (timer == NULL) { + talloc_free(mem_ctx); + return ENOMEM; + } + + fde = tevent_add_fd(ev, + mem_ctx, + fd, + TEVENT_FD_READ, + eventd_startup_handler, + &state); + if (fde == NULL) { + talloc_free(mem_ctx); + return ENOMEM; + } + + while (! state.done) { + tevent_loop_once(ev); + } + + talloc_free(mem_ctx); + + return state.ret; +} + + /* * Start and stop event daemon */ @@ -180,7 +274,7 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) return -1; } - argv = talloc_array(ectx, const char *, 14); + argv = talloc_array(ectx, const char *, 16); if (argv == NULL) { close(fd[0]); close(fd[1]); @@ -205,9 +299,11 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) argv[11] = "-D"; argv[12] = ectx->debug_hung_script; } - argv[13] = NULL; + argv[13] = "-S"; + argv[14] = talloc_asprintf(argv, "%d", fd[1]); + argv[15] = NULL; - if (argv[6] == NULL) { + if (argv[6] == NULL || argv[14] == NULL) { close(fd[0]); close(fd[1]); talloc_free(argv); @@ -239,6 +335,14 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) talloc_free(argv); close(fd[1]); + ret = wait_for_daemon_startup(ctdb->ev, fd[0]); + if (ret != 0) { + ctdb_kill(ctdb, pid, SIGKILL); + close(fd[0]); + D_ERR("Failed to initialize event daemon (%d)\n", ret); + return -1; + } + ectx->eventd_fde = tevent_add_fd(ctdb->ev, ectx, fd[0], TEVENT_FD_READ, eventd_dead_handler, ectx); -- 2.18.0 From 480653a14faf2bfc52456e1a9039d8aa56be2f3e Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 27 Aug 2018 14:53:37 +1000 Subject: [PATCH 6/6] ctdb-daemon: Do not retry connection to eventd Confirmation is now received from eventd that it is accepting connections, so this is no longer needed. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13592 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit b430a1ace69bcef3336907557ab5bf04271c1110) --- ctdb/server/eventscript.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index c6a430236c0..211f099d4f5 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -251,7 +251,7 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) const char **argv; int fd[2]; pid_t pid; - int ret, i; + int ret; bool status; if (ctdb->ectx == NULL) { @@ -355,17 +355,9 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) tevent_fd_set_auto_close(ectx->eventd_fde); ectx->eventd_pid = pid; - /* Wait to connect to eventd */ - for (i=0; i<10; i++) { - status = eventd_client_connect(ectx); - if (status) { - break; - } - sleep(1); - } - + status = eventd_client_connect(ectx); if (! status) { - DEBUG(DEBUG_ERR, ("Failed to initialize event daemon\n")); + DEBUG(DEBUG_ERR, ("Failed to connect to event daemon\n")); ctdb_stop_eventd(ctdb); return -1; } -- 2.18.0