From ead491c25a3825f3729bb3ec8d290e3ad97809b2 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 20 Jul 2015 16:37:58 +1000 Subject: [PATCH 1/3] ctdb-tool: Correctly print timed out event scripts output The timed out error is ignored for certain events (start_recovery, recoverd, takeip, releaseip). If these events time out, then the debug hung script outputs the following: 3 scripts were executed last releaseip cycle 00.ctdb Status:OK Duration:4.381 Thu Jul 16 23:45:24 2015 01.reclock Status:OK Duration:13.422 Thu Jul 16 23:45:28 2015 10.external Status:DISABLED 10.interface Status:OK Duration:-1437083142.208 Thu Jul 16 23:45:42 2015 The endtime for timed out scripts is not set. Since the status is not returned as -ETIME for some events, ctdb scriptstatus prints -ve duration. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11431 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 71b89b2b7a9768de437347e6678370b2682da892) --- ctdb/tools/ctdb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index c386d35..6911dff 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -1453,6 +1453,14 @@ static int control_one_scriptstatus(struct ctdb_context *ctdb, for (i=0; inum_scripts; i++) { const char *status = NULL; + /* The ETIME status is ignored for certain events. + * In that case the status is 0, but endtime is not set. + */ + if (script_status->scripts[i].status == 0 && + timeval_is_zero(&script_status->scripts[i].finished)) { + script_status->scripts[i].status = -ETIME; + } + switch (script_status->scripts[i].status) { case -ETIME: status = "TIMEDOUT"; -- 2.5.0 From c83f44a8cfb36c0154cf99785e714ad715a30edb Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Thu, 13 Nov 2014 11:02:26 +1100 Subject: [PATCH 2/3] ctdb-daemon: Improve error handling for running event scripts BUG: https://bugzilla.samba.org/show_bug.cgi?id=11431 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke Autobuild-User(master): Martin Schwenke Autobuild-Date(master): Fri Nov 14 03:06:12 CET 2014 on sn-devel-104 (cherry picked from commit d04bfc6ec6ad7a4749ebfee2284253c4a91a81aa) --- ctdb/server/ctdb_event_helper.c | 48 ++++++++++++++++++++++++++--------------- ctdb/server/eventscript.c | 10 +++++++-- 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/ctdb/server/ctdb_event_helper.c b/ctdb/server/ctdb_event_helper.c index 9ff763c..f14e336 100644 --- a/ctdb/server/ctdb_event_helper.c +++ b/ctdb/server/ctdb_event_helper.c @@ -67,7 +67,7 @@ int main(int argc, char *argv[]) { int log_fd, write_fd; pid_t pid; - int status, output; + int status, output, ret; progname = argv[0]; @@ -99,33 +99,47 @@ int main(int argc, char *argv[]) pid = fork(); if (pid < 0) { + int save_errno = errno; fprintf(stderr, "Failed to fork - %s\n", strerror(errno)); - exit(errno); + sys_write(write_fd, &save_errno, sizeof(save_errno)); + exit(1); } if (pid == 0) { - int save_errno; - - execv(argv[3], &argv[3]); - if (errno == EACCES) { - save_errno = check_executable(argv[3]); - } else { - save_errno = errno; + ret = check_executable(argv[3]); + if (ret != 0) { + _exit(ret); + } + ret = execv(argv[3], &argv[3]); + if (ret != 0) { + int save_errno = errno; fprintf(stderr, "Error executing '%s' - %s\n", - argv[3], strerror(errno)); + argv[3], strerror(save_errno)); } - _exit(save_errno); + /* This should never happen */ + _exit(ENOEXEC); } - waitpid(pid, &status, 0); + ret = waitpid(pid, &status, 0); + if (ret == -1) { + output = -errno; + fprintf(stderr, "waitpid() failed - %s\n", strerror(errno)); + sys_write(write_fd, &output, sizeof(output)); + exit(1); + } if (WIFEXITED(status)) { - output = WEXITSTATUS(status); - if (output == ENOENT || output == ENOEXEC) { - output = -output; - } + output = -WEXITSTATUS(status); + sys_write(write_fd, &output, sizeof(output)); + exit(0); + } + if (WIFSIGNALED(status)) { + output = -EINTR; + fprintf(stderr, "Process terminated with signal - %d\n", + WTERMSIG(status)); sys_write(write_fd, &output, sizeof(output)); - exit(output); + exit(0); } + fprintf(stderr, "waitpid() status=%d\n", status); exit(1); } diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index e3131b3..1f16580 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -379,6 +379,8 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event r = sys_read(state->fd[0], ¤t->status, sizeof(current->status)); if (r < 0) { current->status = -errno; + } else if (r == 0) { + current->status = -EINTR; } else if (r != sizeof(current->status)) { current->status = -EIO; } @@ -396,8 +398,12 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event /* Aborted or finished all scripts? We're done. */ if (status != 0 || state->current+1 == state->scripts->num_scripts) { - DEBUG(DEBUG_INFO,(__location__ " Eventscript %s %s finished with state %d\n", - ctdb_eventscript_call_names[state->call], state->options, status)); + if (status != 0) { + DEBUG(DEBUG_INFO, + ("Eventscript %s %s finished with state %d\n", + ctdb_eventscript_call_names[state->call], + state->options, status)); + } ctdb->event_script_timeouts = 0; talloc_free(state); -- 2.5.0 From b0a13d44679581bb871b65718627e0376f2cdd5b Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 21 Jul 2015 16:37:04 +1000 Subject: [PATCH 3/3] ctdb-daemon: Correctly process the exit code from failed eventscripts BUG: https://bugzilla.samba.org/show_bug.cgi?id=11431 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke Autobuild-User(master): Martin Schwenke Autobuild-Date(master): Wed Jul 22 15:03:53 CEST 2015 on sn-devel-104 (cherry picked from commit 00ec3c477eba50206801b451ae4eb64c12aba5db) --- ctdb/server/ctdb_event_helper.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_event_helper.c b/ctdb/server/ctdb_event_helper.c index f14e336..a1b5318 100644 --- a/ctdb/server/ctdb_event_helper.c +++ b/ctdb/server/ctdb_event_helper.c @@ -128,7 +128,11 @@ int main(int argc, char *argv[]) exit(1); } if (WIFEXITED(status)) { - output = -WEXITSTATUS(status); + output = WEXITSTATUS(status); + /* Only errors should be returned as -ve values */ + if (output == ENOENT || output == ENOEXEC) { + output = -output; + } sys_write(write_fd, &output, sizeof(output)); exit(0); } -- 2.5.0