From fbe296b4daf28e522d0a9497135d91abfed13ffb Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 10 Oct 2018 13:35:00 +1100 Subject: [PATCH 1/5] ctdb-daemon: Return early when refusing to run an event script BUG: https://bugzilla.samba.org/show_bug.cgi?id=13659 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit a3d12252fa8e0a7e900b819dec30bdb9da458254) --- ctdb/server/eventscript.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index 267f38a075f..31f267c5c8e 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -616,6 +616,7 @@ int ctdb_event_script_run(struct ctdb_context *ctdb, DEBUG(DEBUG_ERR, ("Refusing to run event '%s' while in recovery\n", ctdb_eventscript_call_names[event])); + return -1; } state = talloc_zero(mem_ctx, struct ctdb_event_script_run_state); -- 2.19.1 From 1e72316f1858b0e2547fe6562c2291c979678928 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 11 Oct 2018 11:26:06 +1100 Subject: [PATCH 2/5] ctdb-daemon: Exit if eventd goes away ctdbd enters a broken state if eventd goes away. A clean shutdown is not possible because that involves running events. Restarting eventd is possible but this might mask a serious problem and it is possible that eventd might keep on disappearing. Just exit. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13659 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit c9e1603a5d0c1a216439d4a2b0e7cdc05181e898) --- ctdb/server/eventscript.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index 31f267c5c8e..11a87063122 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -324,13 +324,8 @@ static void eventd_dead_handler(struct tevent_context *ev, struct tevent_fd *fde, uint16_t flags, void *private_data) { - struct eventd_context *ectx = talloc_get_type_abort( - private_data, struct eventd_context); - - DEBUG(DEBUG_ERR, ("Eventd went away\n")); - - TALLOC_FREE(ectx->eventd_fde); - ectx->eventd_pid = -1; + D_ERR("Eventd went away - exiting\n"); + exit(1); } void ctdb_stop_eventd(struct ctdb_context *ctdb) -- 2.19.1 From 1ba9f6ad57058c9ad2948ee24775b3c19ccb1075 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Wed, 10 Oct 2018 18:16:33 +1100 Subject: [PATCH 3/5] ctdb-common: Set close-on-exec for startup fd The startup_fd should not be propagated to the child processes created from a daemon. It should only be used in the daemon code to return the status of the startup. Another use of startup_fd is to notify the parent if the daemon process has exited. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13659 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 80549927bc1741a4b8af8b8e830de4d37fa0c4a8) --- ctdb/common/sock_daemon.c | 8 +++++++- ctdb/common/sock_daemon.h | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ctdb/common/sock_daemon.c b/ctdb/common/sock_daemon.c index 90f6bce2fd3..e5e16f8af12 100644 --- a/ctdb/common/sock_daemon.c +++ b/ctdb/common/sock_daemon.c @@ -523,9 +523,15 @@ int sock_daemon_add_unix(struct sock_daemon_context *sockd, return 0; } -void sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd) +bool sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd) { + if (! set_close_on_exec(fd)) { + D_ERR("Failed to set close-on-exec on startup fd\n"); + return false; + } + sockd->startup_fd = fd; + return true; } /* diff --git a/ctdb/common/sock_daemon.h b/ctdb/common/sock_daemon.h index 972245a9650..7aa9d5dfd34 100644 --- a/ctdb/common/sock_daemon.h +++ b/ctdb/common/sock_daemon.h @@ -216,8 +216,9 @@ int sock_daemon_add_unix(struct sock_daemon_context *sockd, * * @param[in] sockd Socket daemon context * @param[in] fd File descriptor + * @return true on success, false on error */ -void sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd); +bool sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd); /** * @brief Async computation start to run a socket daemon -- 2.19.1 From bc520e63d5cd6a5e6f3f4b4da54378bd5ca0293d Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Wed, 10 Oct 2018 18:19:32 +1100 Subject: [PATCH 4/5] ctdb-event: Check the return status of sock_daemon_set_startup_fd BUG: https://bugzilla.samba.org/show_bug.cgi?id=13659 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit a1909603808b994b7822b697494e39e8da4aaa66) --- ctdb/event/event_daemon.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ctdb/event/event_daemon.c b/ctdb/event/event_daemon.c index c1c6852cca5..3cab3306b02 100644 --- a/ctdb/event/event_daemon.c +++ b/ctdb/event/event_daemon.c @@ -244,6 +244,7 @@ int main(int argc, const char **argv) const char *t; int interactive = 0; int opt, ret; + bool ok; pc = poptGetContext(argv[0], argc, @@ -343,7 +344,11 @@ int main(int argc, const char **argv) } if (options.startup_fd != -1) { - sock_daemon_set_startup_fd(e_state->sockd, options.startup_fd); + ok = sock_daemon_set_startup_fd(e_state->sockd, + options.startup_fd); + if (!ok) { + goto fail; + } } ret = sock_daemon_run(e_state->ev, -- 2.19.1 From 7cc732a8d467041028158483d596d76c381ff709 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 17 Oct 2018 17:19:06 +1100 Subject: [PATCH 5/5] ctdb-daemon: Fix valgrind hit in event code ==25741== Syscall param write(buf) points to uninitialised byte(s) ==25741== at 0x4939291: write (write.c:27) ==25741== by 0x4868285: sys_write (sys_rw.c:68) ==25741== by 0x13915D: sock_queue_trigger (sock_io.c:316) ==25741== by 0x4DE6478: tevent_common_invoke_immediate_handler (in /usr/lib/x86_64-linux-gnu/libtevent.so.0.9.37) ==25741== by 0x4DE64A2: tevent_common_loop_immediate (in /usr/lib/x86_64-linux-gnu/libtevent.so.0.9.37) ==25741== by 0x4DEBE5A: ??? (in /usr/lib/x86_64-linux-gnu/libtevent.so.0.9.37) ==25741== by 0x4DEA2D6: ??? (in /usr/lib/x86_64-linux-gnu/libtevent.so.0.9.37) ==25741== by 0x4DE57E3: _tevent_loop_once (in /usr/lib/x86_64-linux-gnu/libtevent.so.0.9.37) ==25741== by 0x15D1BA: ctdb_event_script_args (eventscript.c:821) ==25741== by 0x13B437: ctdb_start_daemon (ctdb_daemon.c:1315) ==25741== by 0x110642: main (ctdbd.c:393) ==25741== Address 0x57888a4 is 100 bytes inside a block of size 144 alloc'd ==25741== at 0x48357BF: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==25741== by 0x4B9B7C0: talloc_named_const (in /usr/lib/x86_64-linux-gnu/libtalloc.so.2.1.14) ==25741== by 0x15CCC6: eventd_client_write (eventscript.c:430) ==25741== by 0x15CCC6: eventd_client_run (eventscript.c:556) ==25741== by 0x15CCC6: ctdb_event_script_run (eventscript.c:649) ==25741== by 0x15D198: ctdb_event_script_args (eventscript.c:812) ==25741== by 0x13B437: ctdb_start_daemon (ctdb_daemon.c:1315) ==25741== by 0x110642: main (ctdbd.c:393) ==25741== BUG: https://bugzilla.samba.org/show_bug.cgi?id=13659 Pair-programmed-with: Amitay Isaacs Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs Autobuild-User(master): Amitay Isaacs Autobuild-Date(master): Mon Oct 22 09:27:15 CEST 2018 on sn-devel-144 (cherry picked from commit fbea9d36996f248ba2b077f12ad16c199b853134) --- ctdb/server/eventscript.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index 11a87063122..801e8a85e66 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -394,7 +394,7 @@ static int eventd_client_write(struct eventd_context *ectx, void *private_data), void *private_data) { - struct ctdb_event_header header; + struct ctdb_event_header header = { 0 }; struct eventd_client_state *state; int ret; -- 2.19.1