From a4245a72da3bab051281059e366ae6ceb41d522b Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 24 Aug 2018 14:44:12 +1000 Subject: [PATCH 1/6] ctdb-common: Add support for sock daemon to notify of successful startup The daemon writes 0 into the specified file descriptor when it is up and listening. This can be used to avoid loops in clients that attempt to connect until they succeed. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13592 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit dc6040c121c65d5551c686f3f1be2891795f48aa) --- ctdb/common/sock_daemon.c | 26 ++++++++++++++++++++++++++ ctdb/common/sock_daemon.h | 10 ++++++++++ 2 files changed, 36 insertions(+) diff --git a/ctdb/common/sock_daemon.c b/ctdb/common/sock_daemon.c index 3c17519ff89..90f6bce2fd3 100644 --- a/ctdb/common/sock_daemon.c +++ b/ctdb/common/sock_daemon.c @@ -31,6 +31,7 @@ #include "lib/util/dlinklist.h" #include "lib/util/tevent_unix.h" #include "lib/util/become_daemon.h" +#include "lib/util/sys_rw.h" #include "common/logging.h" #include "common/reqid.h" @@ -72,6 +73,7 @@ struct sock_daemon_context { struct pidfile_context *pid_ctx; struct sock_socket *socket_list; + int startup_fd; }; /* @@ -489,6 +491,7 @@ int sock_daemon_setup(TALLOC_CTX *mem_ctx, const char *daemon_name, sockd->funcs = funcs; sockd->private_data = private_data; + sockd->startup_fd = -1; ret = logging_init(sockd, logging, debug_level, daemon_name); if (ret != 0) { @@ -520,6 +523,11 @@ int sock_daemon_add_unix(struct sock_daemon_context *sockd, return 0; } +void sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd) +{ + sockd->startup_fd = fd; +} + /* * Run socket daemon */ @@ -549,6 +557,7 @@ static void sock_daemon_run_socket_fail(struct tevent_req *subreq); static void sock_daemon_run_watch_pid(struct tevent_req *subreq); static void sock_daemon_run_wait(struct tevent_req *req); static void sock_daemon_run_wait_done(struct tevent_req *subreq); +static void sock_daemon_startup_notify(struct sock_daemon_context *sockd); struct tevent_req *sock_daemon_run_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, @@ -675,6 +684,8 @@ static void sock_daemon_run_started(struct tevent_req *subreq) return; } sock_daemon_run_wait(req); + + sock_daemon_startup_notify(sockd); } static void sock_daemon_run_startup_done(struct tevent_req *subreq) @@ -702,6 +713,8 @@ static void sock_daemon_run_startup_done(struct tevent_req *subreq) return; } sock_daemon_run_wait(req); + + sock_daemon_startup_notify(sockd); } static void sock_daemon_run_signal_handler(struct tevent_context *ev, @@ -967,6 +980,19 @@ static void sock_daemon_run_wait_done(struct tevent_req *subreq) sock_daemon_run_shutdown(req); } +static void sock_daemon_startup_notify(struct sock_daemon_context *sockd) +{ + if (sockd->startup_fd != -1) { + unsigned int zero = 0; + ssize_t num; + + num = sys_write(sockd->startup_fd, &zero, sizeof(zero)); + if (num != sizeof(zero)) { + D_WARNING("Failed to write zero to pipe FD\n"); + } + } +} + bool sock_daemon_run_recv(struct tevent_req *req, int *perr) { int ret; diff --git a/ctdb/common/sock_daemon.h b/ctdb/common/sock_daemon.h index 705c4fab359..972245a9650 100644 --- a/ctdb/common/sock_daemon.h +++ b/ctdb/common/sock_daemon.h @@ -209,6 +209,16 @@ int sock_daemon_add_unix(struct sock_daemon_context *sockd, struct sock_socket_funcs *funcs, void *private_data); +/** + * @brief Set file descriptor for indicating startup success + * + * On successful completion, 0 (unsigned int) will be written to the fd. + * + * @param[in] sockd Socket daemon context + * @param[in] fd File descriptor + */ +void sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd); + /** * @brief Async computation start to run a socket daemon * -- 2.18.0 From 0ea766324e40dd0c5b824d5b8f103a865fac77de Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 24 Aug 2018 14:52:29 +1000 Subject: [PATCH 2/6] ctdb-event: Add support to eventd for the startup notification FD BUG: https://bugzilla.samba.org/show_bug.cgi?id=13592 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 11ee92d1bfd73c509d90e7a7386af60a4e1a7fca) --- ctdb/event/event_daemon.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ctdb/event/event_daemon.c b/ctdb/event/event_daemon.c index 63c4dad2866..c1c6852cca5 100644 --- a/ctdb/event/event_daemon.c +++ b/ctdb/event/event_daemon.c @@ -218,14 +218,18 @@ static bool event_client_recv(struct tevent_req *req, int *perr) static struct { int pid; + int startup_fd; } options = { .pid = -1, + .startup_fd = -1, }; struct poptOption cmdline_options[] = { POPT_AUTOHELP { "pid", 'P', POPT_ARG_INT, &options.pid, 0, "pid to wait for", "PID" }, + { "startup-fd", 'S', POPT_ARG_INT, &options.startup_fd, 0, + "file descriptor to notify of successful start", "FD" }, POPT_TABLEEND }; @@ -338,6 +342,10 @@ int main(int argc, const char **argv) goto fail; } + if (options.startup_fd != -1) { + sock_daemon_set_startup_fd(e_state->sockd, options.startup_fd); + } + ret = sock_daemon_run(e_state->ev, e_state->sockd, e_state->pidfile, -- 2.18.0 From c6678f9a17882526adfe6b9f21ade83bc4a7d730 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 27 Aug 2018 15:28:47 +1000 Subject: [PATCH 3/6] ctdb-daemon: Improve error handling consistency Other errors free argv, so do it here too. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13592 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit e357b62fe556609750bdb8d27cf48dfb85c62ec8) --- ctdb/server/eventscript.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index 4ef3b80706a..403e10432b2 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -170,6 +170,7 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) if (pid == -1) { close(fd[0]); close(fd[1]); + talloc_free(argv); return -1; } -- 2.18.0 From 16e98ece557903a34f5318be84f92ba5519c7c49 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 27 Aug 2018 14:44:24 +1000 Subject: [PATCH 4/6] ctdb-daemon: Open eventd pipe earlier The pipe will soon be needed earlier, so initialise it earlier. Ensure the file descriptors are closed on error. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13592 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit c446ae5e1382d5e32c33ce92243daf6b4338e15a) --- ctdb/server/eventscript.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index 403e10432b2..cf4f7f6860b 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -143,8 +143,15 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) return -1; } + ret = pipe(fd); + if (ret != 0) { + return -1; + } + argv = talloc_array(ectx, const char *, 4); if (argv == NULL) { + close(fd[0]); + close(fd[1]); return -1; } @@ -154,6 +161,8 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) argv[3] = NULL; if (argv[2] == NULL) { + close(fd[0]); + close(fd[1]); talloc_free(argv); return -1; } @@ -161,11 +170,6 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) DEBUG(DEBUG_NOTICE, ("Starting event daemon %s %s %s\n", argv[0], argv[1], argv[2])); - ret = pipe(fd); - if (ret != 0) { - return -1; - } - pid = ctdb_fork(ctdb); if (pid == -1) { close(fd[0]); -- 2.18.0 From c03d5bc35dd6ec1096a3668e71f54500e97c81ce Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 27 Aug 2018 14:47:38 +1000 Subject: [PATCH 5/6] ctdb-daemon: Wait for eventd to be ready before connecting The current method of retrying the connection to eventd means that messages get logged for each failure. Instead, pass a pipe file descriptor to eventd and wait for it to write 0 to the pipe to indicate that it is ready to accept client connections. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13592 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 62ec1ab1470206d6a2cf300f30ca0b4a39413a38) --- ctdb/server/eventscript.c | 110 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 3 deletions(-) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index cf4f7f6860b..30916a07ed5 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -110,6 +110,100 @@ static bool eventd_context_init(TALLOC_CTX *mem_ctx, return true; } +struct eventd_startup_state { + bool done; + int ret; + int fd; +}; + +static void eventd_startup_timeout_handler(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, + void *private_data) +{ + struct eventd_startup_state *state = + (struct eventd_startup_state *) private_data; + + state->done = true; + state->ret = ETIMEDOUT; +} + +static void eventd_startup_handler(struct tevent_context *ev, + struct tevent_fd *fde, uint16_t flags, + void *private_data) +{ + struct eventd_startup_state *state = + (struct eventd_startup_state *)private_data; + unsigned int data; + ssize_t num_read; + + num_read = sys_read(state->fd, &data, sizeof(data)); + if (num_read == sizeof(data)) { + if (data == 0) { + state->ret = 0; + } else { + state->ret = EIO; + } + } else if (num_read == 0) { + state->ret = EPIPE; + } else if (num_read == -1) { + state->ret = errno; + } else { + state->ret = EINVAL; + } + + state->done = true; +} + + +static int wait_for_daemon_startup(struct tevent_context *ev, + int fd) +{ + TALLOC_CTX *mem_ctx; + struct tevent_timer *timer; + struct tevent_fd *fde; + struct eventd_startup_state state = { + .done = false, + .ret = 0, + .fd = fd, + }; + + mem_ctx = talloc_new(ev); + if (mem_ctx == NULL) { + return ENOMEM; + } + + timer = tevent_add_timer(ev, + mem_ctx, + tevent_timeval_current_ofs(10, 0), + eventd_startup_timeout_handler, + &state); + if (timer == NULL) { + talloc_free(mem_ctx); + return ENOMEM; + } + + fde = tevent_add_fd(ev, + mem_ctx, + fd, + TEVENT_FD_READ, + eventd_startup_handler, + &state); + if (fde == NULL) { + talloc_free(mem_ctx); + return ENOMEM; + } + + while (! state.done) { + tevent_loop_once(ev); + } + + talloc_free(mem_ctx); + + return state.ret; +} + + /* * Start and stop event daemon */ @@ -148,7 +242,7 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) return -1; } - argv = talloc_array(ectx, const char *, 4); + argv = talloc_array(ectx, const char *, 6); if (argv == NULL) { close(fd[0]); close(fd[1]); @@ -158,9 +252,11 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) argv[0] = ectx->path; argv[1] = "-P"; argv[2] = talloc_asprintf(argv, "%d", ctdb->ctdbd_pid); - argv[3] = NULL; + argv[3] = "-S"; + argv[4] = talloc_asprintf(argv, "%d", fd[1]); + argv[5] = NULL; - if (argv[2] == NULL) { + if (argv[2] == NULL || argv[4] == NULL) { close(fd[0]); close(fd[1]); talloc_free(argv); @@ -190,6 +286,14 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) talloc_free(argv); close(fd[1]); + ret = wait_for_daemon_startup(ctdb->ev, fd[0]); + if (ret != 0) { + ctdb_kill(ctdb, pid, SIGKILL); + close(fd[0]); + D_ERR("Failed to initialize event daemon (%d)\n", ret); + return -1; + } + ectx->eventd_fde = tevent_add_fd(ctdb->ev, ectx, fd[0], TEVENT_FD_READ, eventd_dead_handler, ectx); -- 2.18.0 From 717dd5794c197da5259bd3508533119e32442730 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 27 Aug 2018 14:53:37 +1000 Subject: [PATCH 6/6] ctdb-daemon: Do not retry connection to eventd Confirmation is now received from eventd that it is accepting connections, so this is no longer needed. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13592 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit b430a1ace69bcef3336907557ab5bf04271c1110) --- ctdb/server/eventscript.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index 30916a07ed5..8c87a3db27a 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -219,7 +219,7 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) const char **argv; int fd[2]; pid_t pid; - int ret, i; + int ret; bool status; if (ctdb->ectx == NULL) { @@ -306,17 +306,9 @@ int ctdb_start_eventd(struct ctdb_context *ctdb) tevent_fd_set_auto_close(ectx->eventd_fde); ectx->eventd_pid = pid; - /* Wait to connect to eventd */ - for (i=0; i<10; i++) { - status = eventd_client_connect(ectx); - if (status) { - break; - } - sleep(1); - } - + status = eventd_client_connect(ectx); if (! status) { - DEBUG(DEBUG_ERR, ("Failed to initialize event daemon\n")); + DEBUG(DEBUG_ERR, ("Failed to connect to event daemon\n")); ctdb_stop_eventd(ctdb); return -1; } -- 2.18.0