From 78e1f8ebc82f82869427d551ecc9afd90e138863 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Fri, 7 May 2021 17:36:58 +0200 Subject: [PATCH 1/6] ctdb: fix typos Bug: https://bugzilla.samba.org/show_bug.cgi?id=14475 Signed-off-by: Volker Lendecke Reviewed-by: Ralph Boehme (cherry picked from commit f188c9d732e4b9b3d37c4cb09608aba747845997) --- ctdb/tests/src/run_event_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctdb/tests/src/run_event_test.c b/ctdb/tests/src/run_event_test.c index cfe5f161d1d..3afad557ae9 100644 --- a/ctdb/tests/src/run_event_test.c +++ b/ctdb/tests/src/run_event_test.c @@ -86,7 +86,7 @@ static void do_run(TALLOC_CTX *mem_ctx, struct tevent_context *ev, timeout, false); if (req == NULL) { - fprintf(stderr, "run_proc_send() failed\n"); + fprintf(stderr, "run_event_send() failed\n"); return; } @@ -94,7 +94,7 @@ static void do_run(TALLOC_CTX *mem_ctx, struct tevent_context *ev, status = run_event_recv(req, &ret, mem_ctx, &script_list); if (! status) { - fprintf(stderr, "run_proc_recv() failed, ret=%d\n", ret); + fprintf(stderr, "run_event_recv() failed, ret=%d\n", ret); return; } -- 2.25.1 From 27bcab785e0a4bee9a74264e381ef32f3dc62b25 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Tue, 18 May 2021 08:01:06 +0200 Subject: [PATCH 2/6] ctdb: Call run_event_recv() in a callback function Triggers a different code path in run_event_* and aligns it more what the ctdb eventd really does. Bug: https://bugzilla.samba.org/show_bug.cgi?id=14475 Signed-off-by: Volker Lendecke Reviewed-by: Ralph Boehme (cherry picked from commit 9398d4b912387be8cde0c2ca30734eca7d547d19) --- ctdb/tests/src/run_event_test.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/ctdb/tests/src/run_event_test.c b/ctdb/tests/src/run_event_test.c index 3afad557ae9..398ab2a0df0 100644 --- a/ctdb/tests/src/run_event_test.c +++ b/ctdb/tests/src/run_event_test.c @@ -52,6 +52,19 @@ static char *compact_args(const char **argv, int argc, int from) return arg_str; } +static void run_done(struct tevent_req *req) +{ + struct run_event_script_list **script_list = + tevent_req_callback_data_void(req); + bool status; + int ret; + + status = run_event_recv(req, &ret, NULL, script_list); + if (!status) { + fprintf(stderr, "run_event_recv() failed, ret=%d\n", ret); + } +} + static void do_run(TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct run_event_context *run_ctx, int argc, const char **argv) @@ -61,8 +74,7 @@ static void do_run(TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct run_event_script_list *script_list = NULL; char *arg_str; unsigned int i; - int ret, t; - bool status; + int t; if (argc < 5) { usage(argv[0]); @@ -90,13 +102,9 @@ static void do_run(TALLOC_CTX *mem_ctx, struct tevent_context *ev, return; } - tevent_req_poll(req, ev); + tevent_req_set_callback(req, run_done, &script_list); - status = run_event_recv(req, &ret, mem_ctx, &script_list); - if (! status) { - fprintf(stderr, "run_event_recv() failed, ret=%d\n", ret); - return; - } + tevent_req_poll(req, ev); if (script_list == NULL || script_list->num_scripts == 0) { printf("No event scripts found\n"); -- 2.25.1 From ded1cd0e1fdf1418d7668f77bc48699481b235c8 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Tue, 18 May 2021 08:18:25 +0200 Subject: [PATCH 3/6] ctdb: Introduce a helper variable in run_event_test.c Bug: https://bugzilla.samba.org/show_bug.cgi?id=14475 Signed-off-by: Volker Lendecke Reviewed-by: Ralph Boehme (cherry picked from commit 07ab9b7a71d59f3ff2b9dee662632315062213ab) --- ctdb/tests/src/run_event_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctdb/tests/src/run_event_test.c b/ctdb/tests/src/run_event_test.c index 398ab2a0df0..61c4ecc30f9 100644 --- a/ctdb/tests/src/run_event_test.c +++ b/ctdb/tests/src/run_event_test.c @@ -114,8 +114,8 @@ static void do_run(TALLOC_CTX *mem_ctx, struct tevent_context *ev, printf("Event %s completed with result=%d\n", argv[4], script_list->summary); for (i=0; inum_scripts; i++) { - printf("%s result=%d\n", script_list->script[i].name, - script_list->script[i].summary); + struct run_event_script *s = &script_list->script[i]; + printf("%s result=%d\n", s->name, s->summary); } } -- 2.25.1 From 3c959e2009a07de14479c23f5ea42fd43a98a5d7 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Tue, 18 May 2021 08:23:05 +0200 Subject: [PATCH 4/6] ctdb: Wait for SIGCHLD if script timed out Bug: https://bugzilla.samba.org/show_bug.cgi?id=14475 Signed-off-by: Volker Lendecke Reviewed-by: Ralph Boehme (cherry picked from commit 19290f10c7d39e055847eb45affd9e229a116b18) --- ctdb/tests/src/run_event_test.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/ctdb/tests/src/run_event_test.c b/ctdb/tests/src/run_event_test.c index 61c4ecc30f9..08e8b95e13d 100644 --- a/ctdb/tests/src/run_event_test.c +++ b/ctdb/tests/src/run_event_test.c @@ -75,6 +75,7 @@ static void do_run(TALLOC_CTX *mem_ctx, struct tevent_context *ev, char *arg_str; unsigned int i; int t; + bool wait_for_signal = false; if (argc < 5) { usage(argv[0]); @@ -116,7 +117,28 @@ static void do_run(TALLOC_CTX *mem_ctx, struct tevent_context *ev, for (i=0; inum_scripts; i++) { struct run_event_script *s = &script_list->script[i]; printf("%s result=%d\n", s->name, s->summary); + + if (s->summary == -ETIMEDOUT) { + wait_for_signal = true; + } + } + + TALLOC_FREE(script_list); + TALLOC_FREE(req); + + if (!wait_for_signal) { + return; } + + req = tevent_wakeup_send( + ev, ev, tevent_timeval_current_ofs(1, 0)); + if (req == NULL) { + fprintf(stderr, "Could not wait for signal\n"); + return; + } + + tevent_req_poll(req, ev); + TALLOC_FREE(req); } static void do_list(TALLOC_CTX *mem_ctx, struct tevent_context *ev, -- 2.25.1 From 20c0ddbe64d7a6a6705101f30281e537cc69d99d Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Tue, 18 May 2021 08:28:16 +0200 Subject: [PATCH 5/6] ctdb: Introduce output before and after the 10-second timeout This will lead to a crash in run_event_test.c soon Bug: https://bugzilla.samba.org/show_bug.cgi?id=14475 Signed-off-by: Volker Lendecke Reviewed-by: Ralph Boehme (cherry picked from commit f320d1a7ab0f81eefdb28b36bfe346eacb8980de) --- ctdb/tests/UNIT/cunit/run_event_001.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ctdb/tests/UNIT/cunit/run_event_001.sh b/ctdb/tests/UNIT/cunit/run_event_001.sh index 50051bfaab2..4df3b4bdad6 100755 --- a/ctdb/tests/UNIT/cunit/run_event_001.sh +++ b/ctdb/tests/UNIT/cunit/run_event_001.sh @@ -113,7 +113,9 @@ unit_test run_event_test "$scriptdir" run 10 monitor cat > "$scriptdir/22.bar.script" < Date: Tue, 18 May 2021 08:32:45 +0200 Subject: [PATCH 6/6] ctdb: Fix a crash in run_proc_signal_handler() If a script times out the caller can talloc_free() the script_list output of run_event_recv, which talloc_free's proc->output from run_proc.c as well. If the script generates further output after the timeout and then exits after a while, the SIGCHLD handler in the eventd tries to read into proc->output, which was already free'ed. Fix this by not doing just a talloc_steal but a talloc_move. This way proc_read_handler() called from run_proc_signal_handler() does not try to realloc the stale reference to proc->output but gets a NULL reference. I don't really know how to do a knownfail in ctdb, so this commit actually activates catching the signal by waiting long enough for 22.bar to exit and generate the SIGCHLD. Bug: https://bugzilla.samba.org/show_bug.cgi?id=14475 Signed-off-by: Volker Lendecke Reviewed-by: Ralph Boehme (cherry picked from commit adef87a621b17baf746d12f991c60a8a3ffcfcd3) --- ctdb/common/run_proc.c | 6 +++--- ctdb/tests/src/run_event_test.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ctdb/common/run_proc.c b/ctdb/common/run_proc.c index 0c3c1de72fe..d55af6c3a1e 100644 --- a/ctdb/common/run_proc.c +++ b/ctdb/common/run_proc.c @@ -426,7 +426,7 @@ static void run_proc_done(struct tevent_req *req) state->result = state->proc->result; if (state->proc->output != NULL) { - state->output = talloc_steal(state, state->proc->output); + state->output = talloc_move(state, &state->proc->output); } talloc_steal(state, state->proc); @@ -464,7 +464,7 @@ static void run_proc_timedout(struct tevent_req *subreq) state->result.err = ETIMEDOUT; if (state->proc->output != NULL) { - state->output = talloc_steal(state, state->proc->output); + state->output = talloc_move(state, &state->proc->output); } state->pid = state->proc->pid; @@ -495,7 +495,7 @@ bool run_proc_recv(struct tevent_req *req, int *perr, } if (output != NULL) { - *output = talloc_steal(mem_ctx, state->output); + *output = talloc_move(mem_ctx, &state->output); } return true; diff --git a/ctdb/tests/src/run_event_test.c b/ctdb/tests/src/run_event_test.c index 08e8b95e13d..94548647014 100644 --- a/ctdb/tests/src/run_event_test.c +++ b/ctdb/tests/src/run_event_test.c @@ -131,7 +131,7 @@ static void do_run(TALLOC_CTX *mem_ctx, struct tevent_context *ev, } req = tevent_wakeup_send( - ev, ev, tevent_timeval_current_ofs(1, 0)); + ev, ev, tevent_timeval_current_ofs(10, 0)); if (req == NULL) { fprintf(stderr, "Could not wait for signal\n"); return; -- 2.25.1