From cc2568e02e369e83f9965f48da5ba2bbde9f98e6 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Fri, 9 Feb 2018 10:27:55 +0100 Subject: [PATCH 1/3] winbind: Improve child selection This improves the situation when a client request blocks a winbind child. This might be a slow samlogon or lookupnames to a domain that's far away. With random selection of the child for new request coming in we could end up with a long queue when other, non-blocked children could serve those new requests. Choose the shortest queue. This is an immediate and simple fix. Step two will be to have a per-domain and not a per-child queue. Right now we're pre-selecting the check-out queue at Fry's randomly without looking at the queue length. With this change we're picking the shortest queue. The better change will be what Fry's really does: One central queue and red/green lights on the busy/free checkout counters. Bug: https://bugzilla.samba.org/show_bug.cgi?id=13290 Signed-off-by: Volker Lendecke Reviewed-by: Andreas Schneider Autobuild-User(master): Volker Lendecke Autobuild-Date(master): Mon Feb 12 19:51:35 CET 2018 on sn-devel-144 (cherry picked from commit b4384b7f0ecf3b47dd60acaf77636b679e3adc05) --- source3/winbindd/winbindd_dual.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/source3/winbindd/winbindd_dual.c b/source3/winbindd/winbindd_dual.c index 33f1393e082..993166d82d9 100644 --- a/source3/winbindd/winbindd_dual.c +++ b/source3/winbindd/winbindd_dual.c @@ -248,33 +248,31 @@ static void wb_child_request_cleanup(struct tevent_req *req, DLIST_REMOVE(winbindd_children, state->child); } -static bool winbindd_child_busy(struct winbindd_child *child) -{ - return tevent_queue_length(child->queue) > 0; -} - -static struct winbindd_child *find_idle_child(struct winbindd_domain *domain) +struct winbindd_child *choose_domain_child(struct winbindd_domain *domain) { + struct winbindd_child *shortest = &domain->children[0]; + struct winbindd_child *current; int i; for (i=0; ichildren[i])) { - return &domain->children[i]; - } - } + size_t shortest_len, current_len; - return NULL; -} + current = &domain->children[i]; + current_len = tevent_queue_length(current->queue); -struct winbindd_child *choose_domain_child(struct winbindd_domain *domain) -{ - struct winbindd_child *result; + if (current_len == 0) { + /* idle child */ + return current; + } - result = find_idle_child(domain); - if (result != NULL) { - return result; + shortest_len = tevent_queue_length(shortest->queue); + + if (current_len < shortest_len) { + shortest = current; + } } - return &domain->children[rand() % lp_winbind_max_domain_connections()]; + + return shortest; } struct dcerpc_binding_handle *dom_child_handle(struct winbindd_domain *domain) -- 2.13.6 From 4cd6b8423a0a0a3d082f27b22e8b7f173f9be8c1 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 16 Feb 2018 15:02:42 +0100 Subject: [PATCH 2/3] winbind: use tevent_queue_wait_send/recv in wb_child_request_*() We need a way to keep the child->queue blocked without relying on the current 'req' (wb_child_request_state). The next commit will make use of this. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13290 Signed-off-by: Stefan Metzmacher Reviewed-by: Volker Lendecke (cherry picked from commit d29dda141e08af42c535e8718226f95c45aadab8) --- source3/winbindd/winbindd_dual.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/source3/winbindd/winbindd_dual.c b/source3/winbindd/winbindd_dual.c index 993166d82d9..aae47e4d173 100644 --- a/source3/winbindd/winbindd_dual.c +++ b/source3/winbindd/winbindd_dual.c @@ -119,6 +119,7 @@ static NTSTATUS child_write_response(int sock, struct winbindd_response *wrsp) struct wb_child_request_state { struct tevent_context *ev; + struct tevent_req *queue_subreq; struct tevent_req *subreq; struct winbindd_child *child; struct winbindd_request *request; @@ -127,8 +128,7 @@ struct wb_child_request_state { static bool fork_domain_child(struct winbindd_child *child); -static void wb_child_request_trigger(struct tevent_req *req, - void *private_data); +static void wb_child_request_waited(struct tevent_req *subreq); static void wb_child_request_done(struct tevent_req *subreq); static void wb_child_request_cleanup(struct tevent_req *req, @@ -141,6 +141,7 @@ struct tevent_req *wb_child_request_send(TALLOC_CTX *mem_ctx, { struct tevent_req *req; struct wb_child_request_state *state; + struct tevent_req *subreq; req = tevent_req_create(mem_ctx, &state, struct wb_child_request_state); @@ -152,23 +153,36 @@ struct tevent_req *wb_child_request_send(TALLOC_CTX *mem_ctx, state->child = child; state->request = request; - if (!tevent_queue_add(child->queue, ev, req, - wb_child_request_trigger, NULL)) { - tevent_req_oom(req); + subreq = tevent_queue_wait_send(state, ev, child->queue); + if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } + tevent_req_set_callback(subreq, wb_child_request_waited, req); + state->queue_subreq = subreq; tevent_req_set_cleanup_fn(req, wb_child_request_cleanup); return req; } -static void wb_child_request_trigger(struct tevent_req *req, - void *private_data) +static void wb_child_request_waited(struct tevent_req *subreq) { + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); struct wb_child_request_state *state = tevent_req_data( req, struct wb_child_request_state); - struct tevent_req *subreq; + bool ok; + + ok = tevent_queue_wait_recv(subreq); + if (!ok) { + tevent_req_oom(req); + return; + } + /* + * We need to keep state->queue_subreq + * in order to block the queue. + */ + subreq = NULL; if ((state->child->sock == -1) && (!fork_domain_child(state->child))) { tevent_req_error(req, errno); @@ -231,6 +245,7 @@ static void wb_child_request_cleanup(struct tevent_req *req, } TALLOC_FREE(state->subreq); + TALLOC_FREE(state->queue_subreq); if (req_state == TEVENT_REQ_DONE) { /* transmitted request and got response */ -- 2.13.6 From fd175aa5f51059f2fef29e3080cb1d79dbea5269 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 16 Feb 2018 15:05:57 +0100 Subject: [PATCH 3/3] winbind: protect a pending wb_child_request against a talloc_free() If the (winbind) client gave up we call TALLOC_FREE(state->mem_ctx) in remove_client(). This triggers a recursive talloc_free() for all in flight requests. In order to maintain the winbindd parent-child protocol, we need to keep the orphaned wb_simple_trans request until the parent got the response from the child. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13290 Signed-off-by: Stefan Metzmacher Reviewed-by: Volker Lendecke (cherry picked from commit 43af57d8728883c5ddbe169e1483181246fb68a8) --- source3/winbindd/winbindd_dual.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/source3/winbindd/winbindd_dual.c b/source3/winbindd/winbindd_dual.c index aae47e4d173..a5d8eeed465 100644 --- a/source3/winbindd/winbindd_dual.c +++ b/source3/winbindd/winbindd_dual.c @@ -130,6 +130,7 @@ static bool fork_domain_child(struct winbindd_child *child); static void wb_child_request_waited(struct tevent_req *subreq); static void wb_child_request_done(struct tevent_req *subreq); +static void wb_child_request_orphaned(struct tevent_req *subreq); static void wb_child_request_cleanup(struct tevent_req *req, enum tevent_req_state req_state); @@ -220,6 +221,12 @@ static void wb_child_request_done(struct tevent_req *subreq) tevent_req_done(req); } +static void wb_child_request_orphaned(struct tevent_req *subreq) +{ + DBG_WARNING("cleanup orphaned subreq[%p]\n", subreq); + TALLOC_FREE(subreq); +} + int wb_child_request_recv(struct tevent_req *req, TALLOC_CTX *mem_ctx, struct winbindd_response **presponse, int *err) { @@ -244,6 +251,28 @@ static void wb_child_request_cleanup(struct tevent_req *req, return; } + if (req_state == TEVENT_REQ_RECEIVED) { + struct tevent_req *subreq = NULL; + + /* + * Our caller gave up, but we need to keep + * the low level request (wb_simple_trans) + * in order to maintain the parent child protocol. + * + * We also need to keep the child queue blocked + * until we got the response from the child. + */ + + subreq = talloc_move(state->child->queue, &state->subreq); + talloc_move(subreq, &state->queue_subreq); + tevent_req_set_callback(subreq, + wb_child_request_orphaned, + NULL); + + DBG_WARNING("keep orphaned subreq[%p]\n", subreq); + return; + } + TALLOC_FREE(state->subreq); TALLOC_FREE(state->queue_subreq); -- 2.13.6