From 7dd7b3e35304a9fc435e4474cda85a97023a2a56 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 6 Oct 2012 13:23:05 +0200 Subject: [PATCH 1/3] tdb: Make robust against shrinking tdbs When probing for a size change (eg. just before tdb_expand, tdb_check, tdb_rescue) we call tdb_oob(tdb, tdb->map_size, 1, 1). Unfortunately this does nothing if the tdb has actually shrunk, which as Volker demonstrated, can actually happen if a "longlived" parent crashes. So move the map/update size/remap before the limit check. Signed-off-by: Rusty Russell Signed-off-by: Jeremy Allison --- lib/tdb/common/io.c | 32 ++++++++++++++++++++------------ 1 files changed, 20 insertions(+), 12 deletions(-) diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c index 3131f4f..25968bf 100644 --- a/lib/tdb/common/io.c +++ b/lib/tdb/common/io.c @@ -63,16 +63,6 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, return -1; } - if (st.st_size < (size_t)off + len) { - if (!probe) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %u beyond eof at %u\n", - (int)(off + len), (int)st.st_size)); - } - return -1; - } - /* Beware >4G files! */ if ((tdb_off_t)st.st_size != st.st_size) { /* Ensure ecode is set for log fn. */ @@ -82,13 +72,31 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, return -1; } - /* Unmap, update size, remap */ + /* Unmap, update size, remap. We do this unconditionally, to handle + * the unusual case where the db is truncated. + * + * This can happen to a child using tdb_reopen_all(true) on a + * TDB_CLEAR_IF_FIRST tdb whose parent crashes: the next + * opener will truncate the database. */ if (tdb_munmap(tdb) == -1) { tdb->ecode = TDB_ERR_IO; return -1; } tdb->map_size = st.st_size; - return tdb_mmap(tdb); + if (tdb_mmap(tdb) != 0) { + return - 1; + } + + if (st.st_size < (size_t)off + len) { + if (!probe) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %u beyond eof at %u\n", + (int)(off + len), (int)st.st_size)); + } + return -1; + } + return 0; } /* write a lump of data at a specified offset */ -- 1.7.7.3 From 85718dd5ac9f035b7fb3af5b29c11e1f1ec97534 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Tue, 2 Oct 2012 15:26:14 +0200 Subject: [PATCH 2/3] tdb: Make tdb robust against improper CLEAR_IF_FIRST restart When winbind is restarted, there is a potential crash in tdb. Following situation: We are in a cluster with ctdb. A winbind child hangs in a request to the DC. Cluster monitoring decides the node has a problem. Cluster monitoring decides to kill ctdbd. winbind child still hangs in a RPC request. winbind parent figures that ctdb is dead and immediately commits suicide. winbind parent is restarted by cluster management, overwriting gencache.tdb with CLEAR_IF_FIRST. The CLEAR_IF_FIRST logic as implemented now will not see that a child still has the tdb open, only the parent holds the ACTIVE_LOCK due to performance reasons. During the CLEAR_IF_FIRST logic is done, there is a very small window where we ftruncate(tfd, 0) the file and re-write a proper header without a lock. When during this small window the winbind child comes back, wanting to store something into gencache.tdb, that winbind child will crash with a SIGBUS. Sounds unlikely? See: [2012/09/29 07:02:31.871607, 0] lib/util.c:1183(smb_panic) PANIC (pid 1814517): internal error [2012/09/29 07:02:31.877596, 0] lib/util.c:1287(log_stack_trace) BACKTRACE: 35 stack frames: #0 winbindd(log_stack_trace+0x1a) [0x7feb7d4ca18a] #1 winbindd(smb_panic+0x2b) [0x7feb7d4ca25b] #2 winbindd(+0x1a3cc4) [0x7feb7d4bacc4] #3 /lib64/libc.so.6(+0x32900) [0x7feb7a929900] #4 /lib64/libc.so.6(memcpy+0x35) [0x7feb7a97f355] #5 /usr/lib64/libtdb.so.1(+0x6e76) [0x7feb7b0b0e76] #6 /usr/lib64/libtdb.so.1(+0x3d37) [0x7feb7b0add37] #7 /usr/lib64/libtdb.so.1(+0x863d) [0x7feb7b0b263d] #8 /usr/lib64/libtdb.so.1(+0x8700) [0x7feb7b0b2700] #9 /usr/lib64/libtdb.so.1(+0x2505) [0x7feb7b0ac505] #10 /usr/lib64/libtdb.so.1(+0x25b7) [0x7feb7b0ac5b7] #11 /usr/lib64/libtdb.so.1(tdb_fetch+0x13) [0x7feb7b0ac633] #12 winbindd(gencache_set_data_blob+0x259) [0x7feb7d4d8449] #13 winbindd(gencache_set+0x53) [0x7feb7d4d85b3] #14 winbindd(gencache_del+0x5e) [0x7feb7d4d879e] #15 winbindd(saf_delete+0x93) [0x7feb7d54b693] #16 winbindd(+0xe507e) [0x7feb7d3fc07e] #17 winbindd(+0xe85e5) [0x7feb7d3ff5e5] #18 winbindd(+0xe65be) [0x7feb7d3fd5be] #19 winbindd(+0xe7562) [0x7feb7d3fe562] #20 winbindd(init_dc_connection+0x2e) [0x7feb7d3fe5be] #21 winbindd(+0xe75d9) [0x7feb7d3fe5d9] #22 winbindd(cm_connect_netlogon+0x58) [0x7feb7d3fe658] #23 winbindd(_wbint_PingDc+0x61) [0x7feb7d410991] #24 winbindd(+0x103175) [0x7feb7d41a175] #25 winbindd(winbindd_dual_ndrcmd+0xb7) [0x7feb7d4107d7] #26 winbindd(+0xf8609) [0x7feb7d40f609] #27 winbindd(+0xf9075) [0x7feb7d410075] #28 winbindd(tevent_common_loop_immediate+0xe8) [0x7feb7d4db198] #29 winbindd(run_events_poll+0x3c) [0x7feb7d4d93fc] #30 winbindd(+0x1c2b52) [0x7feb7d4d9b52] #31 winbindd(_tevent_loop_once+0x90) [0x7feb7d4d9f60] #32 winbindd(main+0x7b3) [0x7feb7d3e7aa3] #33 /lib64/libc.so.6(__libc_start_main+0xfd) [0x7feb7a915cdd] #34 winbindd(+0xce2a9) [0x7feb7d3e52a9] This is in a winbind child, logfiles surrounding indicate the parent was restarted. This patch takes all chain locks around the CLEAR_IF_FIRST introduced tdb_new_database. Signed-off-by: Jeremy Allison --- lib/tdb/common/open.c | 32 ++++++++++++++++++++++++++++---- 1 files changed, 28 insertions(+), 4 deletions(-) diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c index 8836f84..d9f76f0 100644 --- a/lib/tdb/common/open.c +++ b/lib/tdb/common/open.c @@ -313,12 +313,36 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td if ((tdb_flags & TDB_CLEAR_IF_FIRST) && (!tdb->read_only) && (locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) { - open_flags |= O_CREAT; - if (ftruncate(tdb->fd, 0) == -1) { + int ret; + ret = tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, + TDB_LOCK_WAIT); + if (ret == -1) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " - "failed to truncate %s: %s\n", + "tdb_brlock failed for %s: %s\n", name, strerror(errno))); - goto fail; /* errno set by ftruncate */ + goto fail; + } + ret = tdb_new_database(tdb, hash_size); + if (ret == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " + "tdb_new_database failed for %s: %s\n", + name, strerror(errno))); + tdb_unlockall(tdb); + goto fail; + } + ret = tdb_brunlock(tdb, F_WRLCK, FREELIST_TOP, 0); + if (ret == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " + "tdb_unlockall failed for %s: %s\n", + name, strerror(errno))); + goto fail; + } + ret = lseek(tdb->fd, 0, SEEK_SET); + if (ret == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " + "lseek failed for %s: %s\n", + name, strerror(errno))); + goto fail; } } -- 1.7.7.3 From 0f11a7bd894e030ad86125ce06c681d11ec3394d Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Tue, 2 Oct 2012 15:44:41 +0200 Subject: [PATCH 3/3] s3: Add two tests a CLEAR_IF_FIRST crash Autobuild-User(master): Volker Lendecke Autobuild-Date(master): Sat Oct 6 17:16:39 CEST 2012 on sn-devel-104 Signed-off-by: Jeremy Allison --- source3/torture/torture.c | 56 +++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 56 insertions(+), 0 deletions(-) diff --git a/source3/torture/torture.c b/source3/torture/torture.c index 5254847..0cca680 100644 --- a/source3/torture/torture.c +++ b/source3/torture/torture.c @@ -8882,6 +8882,60 @@ static bool run_local_remove_duplicate_addrs2(int dummy) return true; } +static bool run_local_tdb_opener(int dummy) +{ + TDB_CONTEXT *t; + unsigned v = 0; + + while (1) { + t = tdb_open("test.tdb", 1000, TDB_CLEAR_IF_FIRST, + O_RDWR|O_CREAT, 0755); + if (t == NULL) { + perror("tdb_open failed"); + return false; + } + tdb_close(t); + + v += 1; + printf("\r%u", v); + } + return true; +} + +static bool run_local_tdb_writer(int dummy) +{ + TDB_CONTEXT *t; + unsigned v = 0; + TDB_DATA val; + + t = tdb_open("test.tdb", 1000, 0, O_RDWR|O_CREAT, 0755); + if (t == 0) { + perror("tdb_open failed"); + return 1; + } + + val.dptr = (uint8_t *)&v; + val.dsize = sizeof(v); + + while (1) { + TDB_DATA data; + int ret; + + ret = tdb_store(t, val, val, 0); + if (ret != 0) { + printf("%s\n", tdb_errorstr(t)); + } + v += 1; + printf("\r%u", v); + + data = tdb_fetch(t, val); + if (data.dptr != NULL) { + SAFE_FREE(data.dptr); + } + } + return true; +} + static double create_procs(bool (*fn)(int), bool *result) { int i, status; @@ -9094,6 +9148,8 @@ static struct { { "LOCAL-hex_encode_buf", run_local_hex_encode_buf, 0}, { "LOCAL-IDMAP-TDB-COMMON", run_idmap_tdb_common_test, 0}, { "LOCAL-remove_duplicate_addrs2", run_local_remove_duplicate_addrs2, 0}, + { "local-tdb-opener", run_local_tdb_opener, 0 }, + { "local-tdb-writer", run_local_tdb_writer, 0 }, {NULL, NULL, 0}}; -- 1.7.7.3