The Samba-Bugzilla – Attachment 13532 Details for
Bug 13006
Fork handling under messaging load broken
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
git-am full backport for 4.6.x - includes test.
bug-13006-4.6.patch (text/plain), 7.82 KB, created by
Jeremy Allison
on 2017-09-01 22:56:29 UTC
(
hide
)
Description:
git-am full backport for 4.6.x - includes test.
Filename:
MIME Type:
Creator:
Jeremy Allison
Created:
2017-09-01 22:56:29 UTC
Size:
7.82 KB
patch
obsolete
>From d25f27a53d8382ee6b3fe4a009a755f5c4050777 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Mon, 28 Aug 2017 16:38:19 +0200 >Subject: [PATCH 1/2] pthreadpool: Fix fork behaviour > >glibc's pthread_cond_wait(&c, &m) increments m.__data.__nusers, making >pthread_mutex_destroy return EBUSY. Thus we can't allow any thread waiting for >a job across a fork. Also, the state of the condvar itself is unclear across a >fork. Right now to me it looks like an initialized but unused condvar can be >used in the child. Busy worker threads don't cause any trouble here, they don't >hold mutexes or condvars. Also, they can't reach the condvar because _prepare >holds all mutexes. > >Bug: https://bugzilla.samba.org/show_bug.cgi?id=13006 >Signed-off-by: Volker Lendecke <vl@samba.org> >Reviewed-by: Jeremy Allison <jra@samba.org> >Reviewed-by: Stefan Metzmacher <metze@samba.org> >(cherry picked from commit ff98e3fb666b57b56a1427aa1196948ceebdec66) >--- > source3/lib/pthreadpool/pthreadpool.c | 67 +++++++++++++++++++++++++++++++++-- > 1 file changed, 65 insertions(+), 2 deletions(-) > >diff --git a/source3/lib/pthreadpool/pthreadpool.c b/source3/lib/pthreadpool/pthreadpool.c >index f97cdcc6c15..23885aa6d11 100644 >--- a/source3/lib/pthreadpool/pthreadpool.c >+++ b/source3/lib/pthreadpool/pthreadpool.c >@@ -89,6 +89,13 @@ struct pthreadpool { > * Number of idle threads > */ > int num_idle; >+ >+ /* >+ * Condition variable indicating that we should quickly go >+ * away making way for fork() without anybody waiting on >+ * pool->condvar. >+ */ >+ pthread_cond_t *prefork_cond; > }; > > static pthread_mutex_t pthreadpools_mutex = PTHREAD_MUTEX_INITIALIZER; >@@ -148,6 +155,7 @@ int pthreadpool_init(unsigned max_threads, struct pthreadpool **presult, > pool->num_threads = 0; > pool->max_threads = max_threads; > pool->num_idle = 0; >+ pool->prefork_cond = NULL; > > ret = pthread_mutex_lock(&pthreadpools_mutex); > if (ret != 0) { >@@ -169,6 +177,47 @@ int pthreadpool_init(unsigned max_threads, struct pthreadpool **presult, > return 0; > } > >+static void pthreadpool_prepare_pool(struct pthreadpool *pool) >+{ >+ pthread_cond_t prefork_cond = PTHREAD_COND_INITIALIZER; >+ int ret; >+ >+ ret = pthread_mutex_lock(&pool->mutex); >+ assert(ret == 0); >+ >+ while (pool->num_idle != 0) { >+ /* >+ * Exit all idle threads, which are all blocked in >+ * pool->condvar. In the child we can destroy the >+ * pool, which would result in undefined behaviour in >+ * the pthread_cond_destroy(pool->condvar). glibc just >+ * blocks here. >+ */ >+ pool->prefork_cond = &prefork_cond; >+ >+ ret = pthread_cond_signal(&pool->condvar); >+ assert(ret == 0); >+ >+ ret = pthread_cond_wait(&prefork_cond, &pool->mutex); >+ assert(ret == 0); >+ >+ pool->prefork_cond = NULL; >+ } >+ >+ ret = pthread_cond_destroy(&prefork_cond); >+ assert(ret == 0); >+ >+ /* >+ * Probably it's well-defined somewhere: What happens to >+ * condvars after a fork? The rationale of pthread_atfork only >+ * writes about mutexes. So better be safe than sorry and >+ * destroy/reinit pool->condvar across a fork. >+ */ >+ >+ ret = pthread_cond_destroy(&pool->condvar); >+ assert(ret == 0); >+} >+ > static void pthreadpool_prepare(void) > { > int ret; >@@ -180,8 +229,7 @@ static void pthreadpool_prepare(void) > pool = pthreadpools; > > while (pool != NULL) { >- ret = pthread_mutex_lock(&pool->mutex); >- assert(ret == 0); >+ pthreadpool_prepare_pool(pool); > pool = pool->next; > } > } >@@ -194,6 +242,8 @@ static void pthreadpool_parent(void) > for (pool = DLIST_TAIL(pthreadpools); > pool != NULL; > pool = DLIST_PREV(pool)) { >+ ret = pthread_cond_init(&pool->condvar, NULL); >+ assert(ret == 0); > ret = pthread_mutex_unlock(&pool->mutex); > assert(ret == 0); > } >@@ -216,6 +266,8 @@ static void pthreadpool_child(void) > pool->head = 0; > pool->num_jobs = 0; > >+ ret = pthread_cond_init(&pool->condvar, NULL); >+ assert(ret == 0); > ret = pthread_mutex_unlock(&pool->mutex); > assert(ret == 0); > } >@@ -406,6 +458,17 @@ static void *pthreadpool_server(void *arg) > &pool->condvar, &pool->mutex, &ts); > pool->num_idle -= 1; > >+ if (pool->prefork_cond != NULL) { >+ /* >+ * Me must allow fork() to continue >+ * without anybody waiting on >+ * &pool->condvar. >+ */ >+ pthread_cond_signal(pool->prefork_cond); >+ pthreadpool_server_exit(pool); >+ return NULL; >+ } >+ > if (res == ETIMEDOUT) { > > if (pool->num_jobs == 0) { >-- >2.14.1.581.gf28d330327-goog > > >From e325fceee51ab007fc73c61f9a3236db21cb9f16 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Fri, 1 Sep 2017 15:55:00 -0700 >Subject: [PATCH 2/2] pthreadpool: Test fork with an active thread > >Bug: https://bugzilla.samba.org/show_bug.cgi?id=13006 >Signed-off-by: Volker Lendecke <vl@samba.org> >Reviewed-by: Jeremy Allison <jra@samba.org> >Reviewed-by: Stefan Metzmacher <metze@samba.org> > >Autobuild-User(master): Volker Lendecke <vl@samba.org> >Autobuild-Date(master): Thu Aug 31 21:34:57 CEST 2017 on sn-devel-144 > >(cherry picked from commit 981e674a7472017274c9b169c776d5c5e8bd1469) >--- > source3/lib/pthreadpool/tests.c | 114 ++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 114 insertions(+) > >diff --git a/source3/lib/pthreadpool/tests.c b/source3/lib/pthreadpool/tests.c >index c4d2e6a1382..999118286eb 100644 >--- a/source3/lib/pthreadpool/tests.c >+++ b/source3/lib/pthreadpool/tests.c >@@ -7,6 +7,7 @@ > #include <unistd.h> > #include <sys/types.h> > #include <sys/wait.h> >+#include <signal.h> > #include "pthreadpool_pipe.h" > #include "pthreadpool_tevent.h" > >@@ -192,6 +193,113 @@ static int test_fork(void) > return 0; > } > >+static void busyfork_job(void *private_data) >+{ >+ return; >+} >+ >+static int test_busyfork(void) >+{ >+ struct pthreadpool_pipe *p; >+ int fds[2]; >+ struct pollfd pfd; >+ pid_t child, waitret; >+ int ret, jobnum, wstatus; >+ >+ ret = pipe(fds); >+ if (ret == -1) { >+ perror("pipe failed"); >+ return -1; >+ } >+ >+ ret = pthreadpool_pipe_init(1, &p); >+ if (ret != 0) { >+ fprintf(stderr, "pthreadpool_pipe_init failed: %s\n", >+ strerror(ret)); >+ return -1; >+ } >+ >+ ret = pthreadpool_pipe_add_job(p, 1, busyfork_job, NULL); >+ if (ret != 0) { >+ fprintf(stderr, "pthreadpool_add_job failed: %s\n", >+ strerror(ret)); >+ return -1; >+ } >+ >+ ret = pthreadpool_pipe_finished_jobs(p, &jobnum, 1); >+ if (ret != 1) { >+ fprintf(stderr, "pthreadpool_pipe_finished_jobs failed\n"); >+ return -1; >+ } >+ >+ poll(NULL, 0, 200); >+ >+ child = fork(); >+ if (child < 0) { >+ perror("fork failed"); >+ return -1; >+ } >+ >+ if (child == 0) { >+ ret = pthreadpool_pipe_destroy(p); >+ if (ret != 0) { >+ fprintf(stderr, "pthreadpool_pipe_destroy failed: " >+ "%s\n", strerror(ret)); >+ exit(1); >+ } >+ exit(0); >+ } >+ >+ ret = close(fds[1]); >+ if (ret == -1) { >+ perror("close failed"); >+ return -1; >+ } >+ >+ pfd = (struct pollfd) { .fd = fds[0], .events = POLLIN }; >+ >+ ret = poll(&pfd, 1, 5000); >+ if (ret == -1) { >+ perror("poll failed"); >+ return -1; >+ } >+ if (ret == 0) { >+ fprintf(stderr, "Child did not exit for 5 seconds\n"); >+ /* >+ * The child might hang forever in >+ * pthread_cond_destroy for example. Be kind to the >+ * system and kill it. >+ */ >+ kill(child, SIGTERM); >+ return -1; >+ } >+ if (ret != 1) { >+ fprintf(stderr, "poll returned %d -- huh??\n", ret); >+ return -1; >+ } >+ >+ poll(NULL, 0, 200); >+ >+ waitret = waitpid(child, &wstatus, WNOHANG); >+ if (waitret != child) { >+ fprintf(stderr, "waitpid returned %d\n", (int)waitret); >+ return -1; >+ } >+ >+ if (!WIFEXITED(wstatus)) { >+ fprintf(stderr, "child did not properly exit\n"); >+ return -1; >+ } >+ >+ ret = WEXITSTATUS(wstatus); >+ if (ret != 0) { >+ fprintf(stderr, "child returned %d\n", ret); >+ return -1; >+ } >+ >+ return 0; >+} >+ > static void test_tevent_wait(void *private_data) > { > int *timeout = private_data; >@@ -301,6 +409,12 @@ int main(void) > return 1; > } > >+ ret = test_busyfork(); >+ if (ret != 0) { >+ fprintf(stderr, "test_busyfork failed\n"); >+ return 1; >+ } >+ > printf("success\n"); > return 0; > } >-- >2.14.1.581.gf28d330327-goog >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Flags:
vl
:
review+
Actions:
View
Attachments on
bug 13006
:
13530
|
13531
| 13532 |
13533
|
13551