Attachment 5580 Details for Bug 7312 – Patches by Tridge that fix this bug.

[patch] Patches by Tridge that fix this bug.

bug-7312.patches (text/plain), 9.33 KB, created by Michael Adam on 2010-03-31 09:54:19 UTC

(hide)

Description:

Filename:

MIME Type:

Creator: Michael Adam

Created: 2010-03-31 09:54:19 UTC

Size: 9.33 KB

patch

obsolete

>From 9920e7b8089398578959f5bee53539c19dc7bde5 Mon Sep 17 00:00:00 2001
>From: Andrew Tridgell <tridge@samba.org>
>Date: Fri, 5 Feb 2010 19:14:45 -0800
>Subject: [PATCH 1/4] s3-events: make the old timed events compatible with tevent
>
>tevent ensures that a timed event is only called once. The old events
>code relied on the called handler removing the event itself. If the
>handler removed the event after calling a function which invoked the
>event loop then the timed event could loop forever.
>
>This change makes the two timed event systems more compatible, by
>allowing the handler to free the te if it wants to, but ensuring it is
>off the linked list of events before the handler is called, and
>ensuring it is freed even if the handler doesn't free it.
>(cherry picked from commit 5dbf175c75bd6139f3238f36665000641f7f7f79)
>---
> source3/lib/events.c |   21 +++++++++++++++++++--
> 1 files changed, 19 insertions(+), 2 deletions(-)
>
>diff --git a/source3/lib/events.c b/source3/lib/events.c
>index 7a06ad0..75aa250 100644
>--- a/source3/lib/events.c
>+++ b/source3/lib/events.c
>@@ -105,12 +105,29 @@ bool run_events(struct tevent_context *ev,
> 
> 	if ((ev->timer_events != NULL)
> 	    && (timeval_compare(&now, &ev->timer_events->next_event) >= 0)) {
>+		/* this older events system did not auto-free timed
>+		   events on running them, and had a race condition
>+		   where the event could be called twice if the
>+		   talloc_free of the te happened after the callback
>+		   made a call which invoked the event loop. To avoid
>+		   this while still allowing old code which frees the
>+		   te, we need to create a temporary context which
>+		   will be used to ensure the te is freed. We also
>+		   remove the te from the timed event list before we
>+		   call the handler, to ensure we can't loop */
>+
>+		struct tevent_timer *te = ev->timer_events;
>+		TALLOC_CTX *tmp_ctx = talloc_new(ev);
> 
> 		DEBUG(10, ("Running timed event \"%s\" %p\n",
> 			   ev->timer_events->handler_name, ev->timer_events));
> 
>-		ev->timer_events->handler(ev, ev->timer_events, now,
>-					  ev->timer_events->private_data);
>+		DLIST_REMOVE(ev->timer_events, te);
>+		talloc_steal(tmp_ctx, te);
>+
>+		te->handler(ev, te, now, te->private_data);
>+
>+		talloc_free(tmp_ctx);
> 		return true;
> 	}
> 
>-- 
>1.6.3.3
>
>
>From c6d9e832ca09fc0c7bc2d31333caf751fcd1803a Mon Sep 17 00:00:00 2001
>From: Andrew Tridgell <tridge@samba.org>
>Date: Fri, 5 Feb 2010 20:59:43 -0800
>Subject: [PATCH 2/4] s3-brlock: add a minimim retry time for pending blocking locks
>
>When we are waiting on a pending byte range lock, another smbd might
>exit uncleanly, and therefore not notify us of the removal of the
>lock, and thus not trigger the lock to be retried.
>
>We coped with this up to now by adding a message_send_all() in the
>SIGCHLD and cluster reconfigure handlers to send a MSG_SMB_UNLOCK to
>all smbd processes. That would generate O(N^2) work when a large
>number of clients disconnected at once (such as on a network outage),
>which could leave the whole system unusable for a very long time (many
>minutes, or even longer).
>
>By adding a minimum re-check time for pending byte range locks we
>avoid this problem by ensuring that pending locks are retried at a
>more regular interval.
>(cherry picked from commit 5b398edbee672392f2cea260ab17445ecca927d7)
>---
> source3/smbd/blocking.c |   20 ++++++++++++++++++++
> 1 files changed, 20 insertions(+), 0 deletions(-)
>
>diff --git a/source3/smbd/blocking.c b/source3/smbd/blocking.c
>index e33c0b6..3f49421 100644
>--- a/source3/smbd/blocking.c
>+++ b/source3/smbd/blocking.c
>@@ -72,6 +72,7 @@ static bool recalc_brl_timeout(void)
> {
> 	struct blocking_lock_record *blr;
> 	struct timeval next_timeout;
>+	int max_brl_timeout = lp_parm_int(-1, "brl", "recalctime", 5);
> 
> 	TALLOC_FREE(brl_timeout);
> 
>@@ -100,6 +101,25 @@ static bool recalc_brl_timeout(void)
> 		return True;
> 	}
> 
>+	/* 
>+	 to account for unclean shutdowns by clients we need a
>+	 maximum timeout that we use for checking pending locks. If
>+	 we have any pending locks at all, then check if the pending
>+	 lock can continue at least every brl:recalctime seconds
>+	 (default 5 seconds).
>+
>+	 This saves us needing to do a message_send_all() in the
>+	 SIGCHLD handler in the parent daemon. That
>+	 message_send_all() caused O(n^2) work to be done when IP
>+	 failovers happened in clustered Samba, which could make the
>+	 entire system unusable for many minutes.
>+	*/
>+
>+	if (max_brl_timeout > 0) {
>+		struct timeval min_to = timeval_current_ofs(max_brl_timeout, 0);
>+		next_timeout = timeval_min(&next_timeout, &min_to);             
>+	}
>+
> 	if (DEBUGLVL(10)) {
> 		struct timeval cur, from_now;
> 
>-- 
>1.6.3.3
>
>
>From 92b4f744b847d9ac65063f5c674afbf8b4df0df9 Mon Sep 17 00:00:00 2001
>From: Andrew Tridgell <tridge@samba.org>
>Date: Fri, 5 Feb 2010 21:02:24 -0800
>Subject: [PATCH 3/4] s3-brlock: we don't need these MSG_SMB_UNLOCK calls now
>
>These have been replaced with the min timeout in blocking.c
>(cherry picked from commit 74267d652485cdcb711f734f0d80da0fb1495867)
>---
> source3/lib/ctdbd_conn.c |    8 --------
> source3/smbd/server.c    |    2 --
> 2 files changed, 0 insertions(+), 10 deletions(-)
>
>diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c
>index 84bba3b..8ddb12a 100644
>--- a/source3/lib/ctdbd_conn.c
>+++ b/source3/lib/ctdbd_conn.c
>@@ -542,15 +542,7 @@ static NTSTATUS ctdb_handle_message(uint8_t *buf, size_t length,
> 		messaging_send(conn->msg_ctx, procid_self(),
> 			       MSG_SMB_BRL_VALIDATE, &data_blob_null);
> 
>-		/*
>-		 * it's possible that we have just rejoined the cluster after
>-		 * an outage. In that case our pending locks could have been
>-		 * removed from the lockdb, so retry them once more
>-		 */
>-		message_send_all(conn->msg_ctx, MSG_SMB_UNLOCK, NULL, 0, NULL);
>-
> 		TALLOC_FREE(buf);
>-
> 		return NT_STATUS_OK;
> 	}
> 
>diff --git a/source3/smbd/server.c b/source3/smbd/server.c
>index f719961..56c7e2f 100644
>--- a/source3/smbd/server.c
>+++ b/source3/smbd/server.c
>@@ -230,8 +230,6 @@ static void remove_child_pid(pid_t pid, bool unclean_shutdown)
> 		DEBUG(3,(__location__ " Unclean shutdown of pid %u\n", (unsigned int)pid));
> 		messaging_send_buf(smbd_messaging_context(), procid_self(), 
> 				   MSG_SMB_BRL_VALIDATE, NULL, 0);
>-		message_send_all(smbd_messaging_context(), 
>-				 MSG_SMB_UNLOCK, NULL, 0, NULL);
> 	}
> 
> 	for (child = children; child != NULL; child = child->next) {
>-- 
>1.6.3.3
>
>
>From 55846f5a71b3b575029d99aedab2dbe754dc1a10 Mon Sep 17 00:00:00 2001
>From: Andrew Tridgell <tridge@samba.org>
>Date: Fri, 5 Feb 2010 21:08:56 -0800
>Subject: [PATCH 4/4] s3-smbd: add a rate limited cleanup of brl, connections and locking db
>
>On unclean shutdown we can end up with stale entries in the brlock,
>connections and locking db. Previously we would do the cleanup on
>every unclean exit, but that can cause smbd to be completely
>unavailable for several minutes when a large number of child smbd
>processes exit.
>
>This adds a rate limited cleanup of the databases, with the default
>that cleanup happens at most every 20s
>(cherry picked from commit dd498d2eecf124a03b6117ddab892a1112f9e9db)
>---
> source3/smbd/server.c |   48 ++++++++++++++++++++++++++++++++++++++++++------
> 1 files changed, 42 insertions(+), 6 deletions(-)
>
>diff --git a/source3/smbd/server.c b/source3/smbd/server.c
>index 56c7e2f..0c59135 100644
>--- a/source3/smbd/server.c
>+++ b/source3/smbd/server.c
>@@ -219,17 +219,53 @@ static void add_child_pid(pid_t pid)
> 	num_children += 1;
> }
> 
>+/*
>+  at most every smbd:cleanuptime seconds (default 20), we scan the BRL
>+  and locking database for entries to cleanup. As a side effect this
>+  also cleans up dead entries in the connections database (due to the
>+  traversal in message_send_all()
>+
>+  Using a timer for this prevents a flood of traversals when a large
>+  number of clients disconnect at the same time (perhaps due to a
>+  network outage).  
>+*/
>+
>+static void cleanup_timeout_fn(struct event_context *event_ctx,
>+				struct timed_event *te,
>+				struct timeval now,
>+				void *private_data)
>+{
>+	struct timed_event **cleanup_te = (struct timed_event **)private_data;
>+
>+	DEBUG(1,("Cleaning up brl and lock database after unclean shutdown\n"));
>+	message_send_all(smbd_messaging_context(), MSG_SMB_UNLOCK, NULL, 0, NULL);
>+	messaging_send_buf(smbd_messaging_context(), procid_self(), 
>+				MSG_SMB_BRL_VALIDATE, NULL, 0);
>+	/* mark the cleanup as having been done */
>+	(*cleanup_te) = NULL;
>+}
>+
> static void remove_child_pid(pid_t pid, bool unclean_shutdown)
> {
> 	struct child_pid *child;
>+	static struct timed_event *cleanup_te;
> 
> 	if (unclean_shutdown) {
>-		/* a child terminated uncleanly so tickle all processes to see 
>-		   if they can grab any of the pending locks
>-		*/
>-		DEBUG(3,(__location__ " Unclean shutdown of pid %u\n", (unsigned int)pid));
>-		messaging_send_buf(smbd_messaging_context(), procid_self(), 
>-				   MSG_SMB_BRL_VALIDATE, NULL, 0);
>+		/* a child terminated uncleanly so tickle all
>+		   processes to see if they can grab any of the
>+		   pending locks
>+                */
>+		DEBUG(3,(__location__ " Unclean shutdown of pid %u\n", 
>+			(unsigned int)pid));
>+		if (!cleanup_te) {
>+			/* call the cleanup timer, but not too often */
>+			int cleanup_time = lp_parm_int(-1, "smbd", "cleanuptime", 20);
>+			cleanup_te = event_add_timed(smbd_event_context(), NULL,
>+						timeval_current_ofs(cleanup_time, 0),
>+						cleanup_timeout_fn, 
>+						&cleanup_te);
>+			DEBUG(1,("Scheduled cleanup of brl and lock database after unclean shutdown\n"));
>+		}
> 	}
> 
> 	for (child = children; child != NULL; child = child->next) {
>-- 
>1.6.3.3
>

Flags:

metze: review+

Actions: View

Attachments on bug 7312: 5580