The Samba-Bugzilla – Attachment 9964 Details for
Bug 10525
tdb mutex patches need integration.
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch on top of master to be reviewed and signed off
look-mutex-02.txt (text/plain), 159.53 KB, created by
Stefan Metzmacher
on 2014-05-22 06:28:56 UTC
(
hide
)
Description:
Patch on top of master to be reviewed and signed off
Filename:
MIME Type:
Creator:
Stefan Metzmacher
Created:
2014-05-22 06:28:56 UTC
Size:
159.53 KB
patch
obsolete
>From 2b8e198b31228070460e1511b6341c713dab1596 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Thu, 21 Feb 2013 16:34:32 +0100 >Subject: [PATCH 01/25] tdb/tools: add -l option to tdbbackup > >This opens the tdb with TDB_NOLOCK. > >Signed-off-by: Volker Lendecke <vl@samba.org> >Reviewed-by: Stefan Metzmacher <metze@samba.org> >--- > lib/tdb/man/tdbbackup.8.xml | 12 ++++++++++++ > lib/tdb/tools/tdbbackup.c | 18 +++++++++++++----- > 2 files changed, 25 insertions(+), 5 deletions(-) > >diff --git a/lib/tdb/man/tdbbackup.8.xml b/lib/tdb/man/tdbbackup.8.xml >index f24202e..30a658d 100644 >--- a/lib/tdb/man/tdbbackup.8.xml >+++ b/lib/tdb/man/tdbbackup.8.xml >@@ -22,6 +22,7 @@ > <arg choice="opt">-s suffix</arg> > <arg choice="opt">-v</arg> > <arg choice="opt">-h</arg> >+ <arg choice="opt">-l</arg> > </cmdsynopsis> > </refsynopsisdiv> > >@@ -68,6 +69,17 @@ > </para></listitem> > </varlistentry> > >+ <varlistentry> >+ <term>-l</term> >+ <listitem><para> >+ This options disables any locking, by passing TDB_NOLOCK >+ to tdb_open_ex(). Only use this for database files which >+ are not used by any other process! And also only if it is otherwise not >+ possible to open the database, e.g. databases which were created with >+ mutex locking. >+ </para></listitem> >+ </varlistentry> >+ > </variablelist> > </refsect1> > >diff --git a/lib/tdb/tools/tdbbackup.c b/lib/tdb/tools/tdbbackup.c >index 276a281..eb33e25 100644 >--- a/lib/tdb/tools/tdbbackup.c >+++ b/lib/tdb/tools/tdbbackup.c >@@ -104,7 +104,8 @@ static int test_fn(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, void *state) > only doing the backup if its OK > this function is also used for restore > */ >-static int backup_tdb(const char *old_name, const char *new_name, int hash_size) >+static int backup_tdb(const char *old_name, const char *new_name, >+ int hash_size, int nolock) > { > TDB_CONTEXT *tdb; > TDB_CONTEXT *tdb_new; >@@ -122,7 +123,8 @@ static int backup_tdb(const char *old_name, const char *new_name, int hash_size) > } > > /* open the old tdb */ >- tdb = tdb_open_ex(old_name, 0, 0, >+ tdb = tdb_open_ex(old_name, 0, >+ TDB_DEFAULT | (nolock ? TDB_NOLOCK : 0), > O_RDWR, 0, &log_ctx, NULL); > if (!tdb) { > printf("Failed to open %s\n", old_name); >@@ -249,7 +251,7 @@ static int verify_tdb(const char *fname, const char *bak_name) > /* count is < 0 means an error */ > if (count < 0) { > printf("restoring %s\n", fname); >- return backup_tdb(bak_name, fname, 0); >+ return backup_tdb(bak_name, fname, 0, 0); > } > > printf("%s : %d records\n", fname, count); >@@ -279,6 +281,7 @@ static void usage(void) > printf(" -s suffix set the backup suffix\n"); > printf(" -v verify mode (restore if corrupt)\n"); > printf(" -n hashsize set the new hash size for the backup\n"); >+ printf(" -l open without locking to back up mutex dbs\n"); > } > > int main(int argc, char *argv[]) >@@ -288,11 +291,12 @@ static void usage(void) > int c; > int verify = 0; > int hashsize = 0; >+ int nolock = 0; > const char *suffix = ".bak"; > > log_ctx.log_fn = tdb_log; > >- while ((c = getopt(argc, argv, "vhs:n:")) != -1) { >+ while ((c = getopt(argc, argv, "vhs:n:l")) != -1) { > switch (c) { > case 'h': > usage(); >@@ -306,6 +310,9 @@ static void usage(void) > case 'n': > hashsize = atoi(optarg); > break; >+ case 'l': >+ nolock = 1; >+ break; > } > } > >@@ -329,7 +336,8 @@ static void usage(void) > } > } else { > if (file_newer(fname, bak_name) && >- backup_tdb(fname, bak_name, hashsize) != 0) { >+ backup_tdb(fname, bak_name, hashsize, >+ nolock) != 0) { > ret = 1; > } > } >-- >1.7.9.5 > > >From 89198f0fa37a617bbdc354f0a88524bd46cf9153 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Thu, 21 Feb 2013 16:34:32 +0100 >Subject: [PATCH 02/25] tdb/tools: add -l option to tdbtool > >This opens the tdb with TDB_NOLOCK. > >Signed-off-by: Volker Lendecke <vl@samba.org> >Reviewed-by: Stefan Metzmacher <metze@samba.org> >--- > lib/tdb/man/tdbtool.8.xml | 21 +++++++++++++++++++++ > lib/tdb/tools/tdbtool.c | 18 ++++++++++++++++-- > 2 files changed, 37 insertions(+), 2 deletions(-) > >diff --git a/lib/tdb/man/tdbtool.8.xml b/lib/tdb/man/tdbtool.8.xml >index ddca04c..cedc7eb 100644 >--- a/lib/tdb/man/tdbtool.8.xml >+++ b/lib/tdb/man/tdbtool.8.xml >@@ -24,6 +24,7 @@ > > <cmdsynopsis> > <command>tdbtool</command> >+ <arg choice="opt">-l</arg> > <arg choice="plain"> > <replaceable>TDBFILE</replaceable> > </arg> >@@ -48,6 +49,26 @@ > > </refsect1> > >+<refsect1> >+ <title>OPTIONS</title> >+ >+ <variablelist> >+ >+ <varlistentry> >+ <term>-l</term> >+ <listitem><para> >+ This options disables any locking, by passing TDB_NOLOCK >+ to tdb_open_ex(). Only use this for database files which >+ are not used by any other process! And also only if it is otherwise not >+ possible to open the database, e.g. databases which were created with >+ mutex locking. >+ </para></listitem> >+ </varlistentry> >+ >+ </variablelist> >+</refsect1> >+ >+ > > <refsect1> > <title>COMMANDS</title> >diff --git a/lib/tdb/tools/tdbtool.c b/lib/tdb/tools/tdbtool.c >index 01b9a14..c486117 100644 >--- a/lib/tdb/tools/tdbtool.c >+++ b/lib/tdb/tools/tdbtool.c >@@ -36,6 +36,7 @@ char *line; > TDB_DATA iterate_kbuf; > char cmdline[1024]; > static int disable_mmap; >+static int disable_lock; > > enum commands { > CMD_CREATE_TDB, >@@ -226,7 +227,10 @@ static void create_tdb(const char *tdbname) > log_ctx.log_fn = tdb_log; > > if (tdb) tdb_close(tdb); >- tdb = tdb_open_ex(tdbname, 0, TDB_CLEAR_IF_FIRST | (disable_mmap?TDB_NOMMAP:0), >+ tdb = tdb_open_ex(tdbname, 0, >+ TDB_CLEAR_IF_FIRST | >+ (disable_mmap?TDB_NOMMAP:0) | >+ (disable_lock?TDB_NOLOCK:0), > O_RDWR | O_CREAT | O_TRUNC, 0600, &log_ctx, NULL); > if (!tdb) { > printf("Could not create %s: %s\n", tdbname, strerror(errno)); >@@ -239,7 +243,10 @@ static void open_tdb(const char *tdbname) > log_ctx.log_fn = tdb_log; > > if (tdb) tdb_close(tdb); >- tdb = tdb_open_ex(tdbname, 0, disable_mmap?TDB_NOMMAP:0, O_RDWR, 0600, >+ tdb = tdb_open_ex(tdbname, 0, >+ (disable_mmap?TDB_NOMMAP:0) | >+ (disable_lock?TDB_NOLOCK:0), >+ O_RDWR, 0600, > &log_ctx, NULL); > if (!tdb) { > printf("Could not open %s: %s\n", tdbname, strerror(errno)); >@@ -736,6 +743,13 @@ int main(int argc, char *argv[]) > arg2 = NULL; > arg2len = 0; > >+ if (argv[1] && (strcmp(argv[1], "-l") == 0)) { >+ disable_lock = 1; >+ argv[1] = argv[0]; >+ argv += 1; >+ argc -= 1; >+ } >+ > if (argv[1]) { > cmdname = "open"; > arg1 = argv[1]; >-- >1.7.9.5 > > >From 4d704ad961fba6d57512d5aea5d52d70bd9c6068 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Thu, 21 Feb 2013 16:34:32 +0100 >Subject: [PATCH 03/25] tdb/tools: explicitly use TDB_NOLOCK in tdbdump > >Signed-off-by: Volker Lendecke <vl@samba.org> >Reviewed-by: Stefan Metzmacher <metze@samba.org> >--- > lib/tdb/tools/tdbdump.c | 10 +++++++++- > 1 file changed, 9 insertions(+), 1 deletion(-) > >diff --git a/lib/tdb/tools/tdbdump.c b/lib/tdb/tools/tdbdump.c >index a739f99..9a0a7fe 100644 >--- a/lib/tdb/tools/tdbdump.c >+++ b/lib/tdb/tools/tdbdump.c >@@ -99,8 +99,16 @@ static int dump_tdb(const char *fname, const char *keyname, bool emergency) > TDB_CONTEXT *tdb; > TDB_DATA key, value; > struct tdb_logging_context logfn = { log_stderr }; >+ int tdb_flags = TDB_DEFAULT; > >- tdb = tdb_open_ex(fname, 0, 0, O_RDONLY, 0, &logfn, NULL); >+ /* >+ * Note: that O_RDONLY implies TDB_NOLOCK, but we want to make it >+ * explicit as it's important when working on databases which were >+ * created with mutex locking. >+ */ >+ tdb_flags |= TDB_NOLOCK; >+ >+ tdb = tdb_open_ex(fname, 0, tdb_flags, O_RDONLY, 0, &logfn, NULL); > if (!tdb) { > printf("Failed to open %s\n", fname); > return 1; >-- >1.7.9.5 > > >From ddaeba8ba6a9be5475aa0eee4a70bdebbea8962e Mon Sep 17 00:00:00 2001 >From: Stefan Metzmacher <metze@samba.org> >Date: Tue, 6 May 2014 11:10:23 +0200 >Subject: [PATCH 04/25] tdb/test: correctly use stderr, not stdout, in fail() > >Signed-off-by: Stefan Metzmacher <metze@samba.org> >Reviewed-by: Michael Adam <obnox@samba.org> >--- > lib/tdb/test/tap-interface.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > >diff --git a/lib/tdb/test/tap-interface.h b/lib/tdb/test/tap-interface.h >index b8ce7df..13193d5 100644 >--- a/lib/tdb/test/tap-interface.h >+++ b/lib/tdb/test/tap-interface.h >@@ -33,7 +33,7 @@ > #define plan_tests(num) > #define fail(...) do { \ > fprintf(stderr, __VA_ARGS__); \ >- fprintf(stdout, "\n"); \ >+ fprintf(stderr, "\n"); \ > fflush(stderr); \ > exit(1); \ > } while(0) >-- >1.7.9.5 > > >From 2a80bbf18164f382a4a30b49b3dc9694ca83af76 Mon Sep 17 00:00:00 2001 >From: Stefan Metzmacher <metze@samba.org> >Date: Tue, 6 May 2014 11:11:38 +0200 >Subject: [PATCH 05/25] tdb/test: add a "skip()" macro. > >Signed-off-by: Stefan Metzmacher <metze@samba.org> >Reviewed-by: Michael Adam <obnox@samba.org> >--- > lib/tdb/test/tap-interface.h | 1 + > 1 file changed, 1 insertion(+) > >diff --git a/lib/tdb/test/tap-interface.h b/lib/tdb/test/tap-interface.h >index 13193d5..8f742d8 100644 >--- a/lib/tdb/test/tap-interface.h >+++ b/lib/tdb/test/tap-interface.h >@@ -54,4 +54,5 @@ > } \ > } while(0) > #define ok1(e) ok((e), "%s:%s", __location__, #e) >+#define skip(n, ...) diag(__VA_ARGS__) > #define exit_status() 0 >-- >1.7.9.5 > > >From ce316876c23ef00bfb0d5f6345035b301493656a Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Thu, 21 Feb 2013 16:34:32 +0100 >Subject: [PATCH 06/25] tdb/test: add shutdown_agent() helper function > >Signed-off-by: Volker Lendecke <vl@samba.org> >Signed-off-by: Stefan Metzmacher <metze@samba.org> >--- > lib/tdb/test/external-agent.c | 26 ++++++++++++++++++++------ > lib/tdb/test/external-agent.h | 1 + > 2 files changed, 21 insertions(+), 6 deletions(-) > >diff --git a/lib/tdb/test/external-agent.c b/lib/tdb/test/external-agent.c >index 8710b47..0aca081 100644 >--- a/lib/tdb/test/external-agent.c >+++ b/lib/tdb/test/external-agent.c >@@ -99,29 +99,29 @@ static enum agent_return do_operation(enum operation op, const char *name) > > struct agent { > int cmdfd, responsefd; >+ pid_t pid; > }; > > /* Do this before doing any tdb stuff. Return handle, or NULL. */ > struct agent *prepare_external_agent(void) > { >- int pid, ret; >+ int ret; > int command[2], response[2]; > char name[1+PATH_MAX]; >+ struct agent *agent = malloc(sizeof(*agent)); > > if (pipe(command) != 0 || pipe(response) != 0) { > fprintf(stderr, "pipe failed: %s\n", strerror(errno)); > exit(1); > } > >- pid = fork(); >- if (pid < 0) { >+ agent->pid = fork(); >+ if (agent->pid < 0) { > fprintf(stderr, "fork failed: %s\n", strerror(errno)); > exit(1); > } > >- if (pid != 0) { >- struct agent *agent = malloc(sizeof(*agent)); >- >+ if (agent->pid != 0) { > close(command[0]); > close(response[1]); > agent->cmdfd = command[1]; >@@ -146,6 +146,20 @@ struct agent *prepare_external_agent(void) > exit(0); > } > >+void shutdown_agent(struct agent *agent) >+{ >+ pid_t p; >+ >+ close(agent->cmdfd); >+ close(agent->responsefd); >+ p = waitpid(agent->pid, NULL, WNOHANG); >+ if (p == 0) { >+ kill(agent->pid, SIGKILL); >+ } >+ waitpid(agent->pid, NULL, 0); >+ free(agent); >+} >+ > /* Ask the external agent to try to do an operation. */ > enum agent_return external_agent_operation(struct agent *agent, > enum operation op, >diff --git a/lib/tdb/test/external-agent.h b/lib/tdb/test/external-agent.h >index dffdca9..354f5b9 100644 >--- a/lib/tdb/test/external-agent.h >+++ b/lib/tdb/test/external-agent.h >@@ -17,6 +17,7 @@ enum operation { > > /* Do this before doing any tdb stuff. Return handle, or -1. */ > struct agent *prepare_external_agent(void); >+void shutdown_agent(struct agent *agent); > > enum agent_return { > SUCCESS, >-- >1.7.9.5 > > >From ae7552f75f667e9bc7bc9fac84a67d3f945a1e0b Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Thu, 21 Feb 2013 16:34:32 +0100 >Subject: [PATCH 07/25] tdb/test: add PING command to external-agent.c > >Signed-off-by: Volker Lendecke <vl@samba.org> >Reviewed-by: Stefan Metzmacher <metze@samba.org> >--- > lib/tdb/test/external-agent.c | 4 ++++ > lib/tdb/test/external-agent.h | 1 + > 2 files changed, 5 insertions(+) > >diff --git a/lib/tdb/test/external-agent.c b/lib/tdb/test/external-agent.c >index 0aca081..57ed2f5 100644 >--- a/lib/tdb/test/external-agent.c >+++ b/lib/tdb/test/external-agent.c >@@ -87,6 +87,9 @@ static enum agent_return do_operation(enum operation op, const char *name) > ret = tdb_close(tdb) == 0 ? SUCCESS : OTHER_FAILURE; > tdb = NULL; > break; >+ case PING: >+ ret = SUCCESS; >+ break; > default: > ret = OTHER_FAILURE; > } >@@ -207,6 +210,7 @@ const char *operation_name(enum operation op) > case CHECK: return "CHECK"; > case NEEDS_RECOVERY: return "NEEDS_RECOVERY"; > case CLOSE: return "CLOSE"; >+ case PING: return "PING"; > } > return "**INVALID**"; > } >diff --git a/lib/tdb/test/external-agent.h b/lib/tdb/test/external-agent.h >index 354f5b9..bf8a221 100644 >--- a/lib/tdb/test/external-agent.h >+++ b/lib/tdb/test/external-agent.h >@@ -13,6 +13,7 @@ enum operation { > CHECK, > NEEDS_RECOVERY, > CLOSE, >+ PING, > }; > > /* Do this before doing any tdb stuff. Return handle, or -1. */ >-- >1.7.9.5 > > >From c5f8074840711604e4851d559d1903544e7f6928 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Thu, 21 Feb 2013 16:34:32 +0100 >Subject: [PATCH 08/25] tdb/test: add UNMAP command to external-agent.c > >Signed-off-by: Volker Lendecke <vl@samba.org> >Reviewed-by: Stefan Metzmacher <metze@samba.org> >--- > lib/tdb/test/external-agent.c | 7 +++++++ > lib/tdb/test/external-agent.h | 1 + > 2 files changed, 8 insertions(+) > >diff --git a/lib/tdb/test/external-agent.c b/lib/tdb/test/external-agent.c >index 57ed2f5..443d382 100644 >--- a/lib/tdb/test/external-agent.c >+++ b/lib/tdb/test/external-agent.c >@@ -90,6 +90,12 @@ static enum agent_return do_operation(enum operation op, const char *name) > case PING: > ret = SUCCESS; > break; >+ case UNMAP: >+ ret = tdb_munmap(tdb) == 0 ? SUCCESS : OTHER_FAILURE; >+ if (ret == SUCCESS) { >+ tdb->flags |= TDB_NOMMAP; >+ } >+ break; > default: > ret = OTHER_FAILURE; > } >@@ -211,6 +217,7 @@ const char *operation_name(enum operation op) > case NEEDS_RECOVERY: return "NEEDS_RECOVERY"; > case CLOSE: return "CLOSE"; > case PING: return "PING"; >+ case UNMAP: return "UNMAP"; > } > return "**INVALID**"; > } >diff --git a/lib/tdb/test/external-agent.h b/lib/tdb/test/external-agent.h >index bf8a221..de9d0ac 100644 >--- a/lib/tdb/test/external-agent.h >+++ b/lib/tdb/test/external-agent.h >@@ -14,6 +14,7 @@ enum operation { > NEEDS_RECOVERY, > CLOSE, > PING, >+ UNMAP, > }; > > /* Do this before doing any tdb stuff. Return handle, or -1. */ >-- >1.7.9.5 > > >From 3e113a10c87ea78f96d057935b510331c0752d34 Mon Sep 17 00:00:00 2001 >From: Stefan Metzmacher <metze@samba.org> >Date: Thu, 16 May 2013 11:48:02 +0200 >Subject: [PATCH 09/25] wscript: set conf.env.replace_add_global_pthread = > True > >In Samba we currently add PTHREAD CFLAGS/LDFLAGS globally. >The following changes will move the configure checks to >lib/replace and the the default of adding the flags globally will change there. > >Signed-off-by: Stefan Metzmacher <metze@samba.org> >Reviewed-by: Michael Adam <obnox@samba.org> >--- > wscript | 1 + > 1 file changed, 1 insertion(+) > >diff --git a/wscript b/wscript >index 80587ef..58af180 100644 >--- a/wscript >+++ b/wscript >@@ -96,6 +96,7 @@ def configure(conf): > > conf.ADD_EXTRA_INCLUDES('#include/public #source4 #lib #source4/lib #source4/include #include #lib/replace') > >+ conf.env.replace_add_global_pthread = True > conf.RECURSE('lib/replace') > > conf.find_program('perl', var='PERL', mandatory=True) >-- >1.7.9.5 > > >From 59bd3105cfe2bde8935ffc83ec988812bd84dc40 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Mon, 14 Jan 2013 14:56:25 +0100 >Subject: [PATCH 10/25] libreplace: Move thread checks from source3/wscript > >Signed-off-by: Volker Lendecke <vl@samba.org> >Reviewed-by: Stefan Metzmacher <metze@samba.org> >--- > lib/replace/wscript | 32 ++++++++++++++++++++++++++++++++ > source3/wscript | 32 -------------------------------- > 2 files changed, 32 insertions(+), 32 deletions(-) > >diff --git a/lib/replace/wscript b/lib/replace/wscript >index fd5b386..45e5d90 100644 >--- a/lib/replace/wscript >+++ b/lib/replace/wscript >@@ -422,6 +422,38 @@ removeea setea > > conf.CHECK_FUNCS_IN('pthread_create', 'pthread', checklibc=True, headers='pthread.h') > >+ PTHREAD_CFLAGS='error' >+ PTHREAD_LDFLAGS='error' >+ >+ if PTHREAD_LDFLAGS == 'error': >+ if conf.CHECK_FUNCS_IN('pthread_attr_init', 'pthread'): >+ PTHREAD_CFLAGS='-D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS' >+ PTHREAD_LDFLAGS='-lpthread' >+ if PTHREAD_LDFLAGS == 'error': >+ if conf.CHECK_FUNCS_IN('pthread_attr_init', 'pthreads'): >+ PTHREAD_CFLAGS='-D_THREAD_SAFE' >+ PTHREAD_LDFLAGS='-lpthreads' >+ if PTHREAD_LDFLAGS == 'error': >+ if conf.CHECK_FUNCS_IN('pthread_attr_init', 'c_r'): >+ PTHREAD_CFLAGS='-D_THREAD_SAFE -pthread' >+ PTHREAD_LDFLAGS='-pthread' >+ if PTHREAD_LDFLAGS == 'error': >+ if conf.CHECK_FUNCS('pthread_attr_init'): >+ PTHREAD_CFLAGS='-D_REENTRANT' >+ PTHREAD_LDFLAGS='-lpthread' >+ # especially for HP-UX, where the CHECK_FUNC macro fails to test for >+ # pthread_attr_init. On pthread_mutex_lock it works there... >+ if PTHREAD_LDFLAGS == 'error': >+ if conf.CHECK_FUNCS_IN('pthread_mutex_lock', 'pthread'): >+ PTHREAD_CFLAGS='-D_REENTRANT' >+ PTHREAD_LDFLAGS='-lpthread' >+ >+ if PTHREAD_CFLAGS != 'error' and PTHREAD_LDFLAGS != 'error': >+ conf.ADD_CFLAGS(PTHREAD_CFLAGS) >+ conf.ADD_LDFLAGS(PTHREAD_LDFLAGS) >+ conf.CHECK_HEADERS('pthread.h') >+ conf.DEFINE('HAVE_PTHREAD', '1') >+ > conf.CHECK_FUNCS_IN('crypt', 'crypt', checklibc=True) > > conf.CHECK_VARIABLE('rl_event_hook', define='HAVE_DECL_RL_EVENT_HOOK', always=True, >diff --git a/source3/wscript b/source3/wscript >index 2bca8fa..7cc62cc 100644 >--- a/source3/wscript >+++ b/source3/wscript >@@ -1741,38 +1741,6 @@ main() { > if Options.options.with_profiling_data: > conf.DEFINE('WITH_PROFILE', 1); > >- PTHREAD_CFLAGS='error' >- PTHREAD_LDFLAGS='error' >- >- if PTHREAD_LDFLAGS == 'error': >- if conf.CHECK_FUNCS_IN('pthread_attr_init', 'pthread'): >- PTHREAD_CFLAGS='-D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS' >- PTHREAD_LDFLAGS='-lpthread' >- if PTHREAD_LDFLAGS == 'error': >- if conf.CHECK_FUNCS_IN('pthread_attr_init', 'pthreads'): >- PTHREAD_CFLAGS='-D_THREAD_SAFE' >- PTHREAD_LDFLAGS='-lpthreads' >- if PTHREAD_LDFLAGS == 'error': >- if conf.CHECK_FUNCS_IN('pthread_attr_init', 'c_r'): >- PTHREAD_CFLAGS='-D_THREAD_SAFE -pthread' >- PTHREAD_LDFLAGS='-pthread' >- if PTHREAD_LDFLAGS == 'error': >- if conf.CHECK_FUNCS('pthread_attr_init'): >- PTHREAD_CFLAGS='-D_REENTRANT' >- PTHREAD_LDFLAGS='-lpthread' >- # especially for HP-UX, where the CHECK_FUNC macro fails to test for >- # pthread_attr_init. On pthread_mutex_lock it works there... >- if PTHREAD_LDFLAGS == 'error': >- if conf.CHECK_FUNCS_IN('pthread_mutex_lock', 'pthread'): >- PTHREAD_CFLAGS='-D_REENTRANT' >- PTHREAD_LDFLAGS='-lpthread' >- >- if PTHREAD_CFLAGS != 'error' and PTHREAD_LDFLAGS != 'error': >- conf.ADD_CFLAGS(PTHREAD_CFLAGS) >- conf.ADD_LDFLAGS(PTHREAD_LDFLAGS) >- conf.CHECK_HEADERS('pthread.h') >- conf.DEFINE('HAVE_PTHREAD', '1') >- > if Options.options.with_pthreadpool: > if conf.CONFIG_SET('HAVE_PTHREAD'): > conf.DEFINE('WITH_PTHREADPOOL', '1') >-- >1.7.9.5 > > >From 2bf555ab30e1c0b3b56fb498640494def4f6ee0b Mon Sep 17 00:00:00 2001 >From: Stefan Metzmacher <metze@samba.org> >Date: Thu, 16 May 2013 11:50:38 +0200 >Subject: [PATCH 11/25] libreplace: only add PTHREAD CFLAGS and LDFLAGS > globally if asked for > >Signed-off-by: Stefan Metzmacher <metze@samba.org> >Reviewed-by: Michael Adam <obnox@samba.org> >--- > lib/replace/wscript | 5 +++-- > 1 file changed, 3 insertions(+), 2 deletions(-) > >diff --git a/lib/replace/wscript b/lib/replace/wscript >index 45e5d90..483c0dc 100644 >--- a/lib/replace/wscript >+++ b/lib/replace/wscript >@@ -449,8 +449,9 @@ removeea setea > PTHREAD_LDFLAGS='-lpthread' > > if PTHREAD_CFLAGS != 'error' and PTHREAD_LDFLAGS != 'error': >- conf.ADD_CFLAGS(PTHREAD_CFLAGS) >- conf.ADD_LDFLAGS(PTHREAD_LDFLAGS) >+ if conf.CONFIG_SET('replace_add_global_pthread'): >+ conf.ADD_CFLAGS(PTHREAD_CFLAGS) >+ conf.ADD_LDFLAGS(PTHREAD_LDFLAGS) > conf.CHECK_HEADERS('pthread.h') > conf.DEFINE('HAVE_PTHREAD', '1') > >-- >1.7.9.5 > > >From 5d9b883a3558774ac9da61c1bb79562060d086fb Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Sat, 22 Dec 2012 08:42:48 +0100 >Subject: [PATCH 12/25] libreplace: Add support for > pthread_mutexattr_setrobust > >Signed-off-by: Volker Lendecke <vl@samba.org> >Reviewed-by: Stefan Metzmacher <metze@samba.org> >--- > lib/replace/system/threads.h | 35 +++++++++++++++++++++++++++++++++++ > lib/replace/wscript | 14 ++++++++++++++ > 2 files changed, 49 insertions(+) > create mode 100644 lib/replace/system/threads.h > >diff --git a/lib/replace/system/threads.h b/lib/replace/system/threads.h >new file mode 100644 >index 0000000..3d89bb2 >--- /dev/null >+++ b/lib/replace/system/threads.h >@@ -0,0 +1,35 @@ >+#ifndef _system_threads_h >+#define _system_threads_h >+/* >+ Unix SMB/CIFS implementation. >+ >+ macros to go along with the lib/replace/ portability layer code >+ >+ Copyright (C) Volker Lendecke 2012 >+ >+ ** NOTE! The following LGPL license applies to the replace >+ ** library. This does NOT imply that all of Samba is released >+ ** under the LGPL >+ >+ This library is free software; you can redistribute it and/or >+ modify it under the terms of the GNU Lesser General Public >+ License as published by the Free Software Foundation; either >+ version 3 of the License, or (at your option) any later version. >+ >+ This library is distributed in the hope that it will be useful, >+ but WITHOUT ANY WARRANTY; without even the implied warranty of >+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >+ Lesser General Public License for more details. >+ >+ You should have received a copy of the GNU Lesser General Public >+ License along with this library; if not, see <http://www.gnu.org/licenses/>. >+*/ >+ >+#include <pthread.h> >+ >+#if defined(HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP) && \ >+ !defined(HAVE_PTHREAD_MUTEXATTR_SETROBUST) >+#define pthread_mutexattr_setrobust pthread_mutexattr_setrobust_np >+#endif >+ >+#endif >diff --git a/lib/replace/wscript b/lib/replace/wscript >index 483c0dc..987b56b 100644 >--- a/lib/replace/wscript >+++ b/lib/replace/wscript >@@ -455,6 +455,20 @@ removeea setea > conf.CHECK_HEADERS('pthread.h') > conf.DEFINE('HAVE_PTHREAD', '1') > >+ if conf.CONFIG_SET('HAVE_PTHREAD'): >+ >+ conf.CHECK_DECLS('pthread_mutexattr_setrobust', headers='pthread.h') >+ conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust', 'pthread', >+ checklibc=True, headers='pthread.h') >+ >+ conf.CHECK_DECLS('pthread_mutexattr_setrobust_np', headers='pthread.h') >+ conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust_np', 'pthread', >+ checklibc=True, headers='pthread.h') >+ >+ if (conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST') or >+ conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP')): >+ conf.DEFINE('HAVE_ROBUST_MUTEXES', 1) >+ > conf.CHECK_FUNCS_IN('crypt', 'crypt', checklibc=True) > > conf.CHECK_VARIABLE('rl_event_hook', define='HAVE_DECL_RL_EVENT_HOOK', always=True, >-- >1.7.9.5 > > >From 7fa9e3ed9c01317ea9ad8d71ef677a08d27e7dd5 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Mon, 4 Feb 2013 12:26:47 +0100 >Subject: [PATCH 13/25] libreplace: Add support for pthread_mutex_consistent > >Signed-off-by: Volker Lendecke <vl@samba.org> >Reviewed-by: Stefan Metzmacher <metze@samba.org> >--- > lib/replace/system/threads.h | 5 +++++ > lib/replace/wscript | 14 ++++++++++++-- > 2 files changed, 17 insertions(+), 2 deletions(-) > >diff --git a/lib/replace/system/threads.h b/lib/replace/system/threads.h >index 3d89bb2..3aca088 100644 >--- a/lib/replace/system/threads.h >+++ b/lib/replace/system/threads.h >@@ -32,4 +32,9 @@ > #define pthread_mutexattr_setrobust pthread_mutexattr_setrobust_np > #endif > >+#if defined(HAVE_PTHREAD_MUTEX_CONSISTENT_NP) && \ >+ !defined(HAVE_PTHREAD_MUTEX_CONSISTENT) >+#define pthread_mutex_consistent pthread_mutex_consistent_np >+#endif >+ > #endif >diff --git a/lib/replace/wscript b/lib/replace/wscript >index 987b56b..fd53999 100644 >--- a/lib/replace/wscript >+++ b/lib/replace/wscript >@@ -465,8 +465,18 @@ removeea setea > conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust_np', 'pthread', > checklibc=True, headers='pthread.h') > >- if (conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST') or >- conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP')): >+ conf.CHECK_DECLS('pthread_mutex_consistent', headers='pthread.h') >+ conf.CHECK_FUNCS_IN('pthread_mutex_consistent', 'pthread', >+ checklibc=True, headers='pthread.h') >+ >+ conf.CHECK_DECLS('pthread_mutex_consistent_np', headers='pthread.h') >+ conf.CHECK_FUNCS_IN('pthread_mutex_consistent_np', 'pthread', >+ checklibc=True, headers='pthread.h') >+ >+ if ((conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST') or >+ conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP')) and >+ (conf.CONFIG_SET('HAVE_PTHREAD_MUTEX_CONSISTENT') or >+ conf.CONFIG_SET('HAVE_PTHREAD_MUTEX_CONSISTENT_NP'))): > conf.DEFINE('HAVE_ROBUST_MUTEXES', 1) > > conf.CHECK_FUNCS_IN('crypt', 'crypt', checklibc=True) >-- >1.7.9.5 > > >From 07219c5ce9c3f6a4f7699af23dd6377948b95793 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Wed, 6 Feb 2013 12:15:41 +0100 >Subject: [PATCH 14/25] libreplace-waf: Only check for _np functions if > standard functions are not available > >Signed-off-by: Volker Lendecke <vl@samba.org> >Reviewed-by: Stefan Metzmacher <metze@samba.org> >--- > lib/replace/wscript | 22 ++++++++++++++-------- > 1 file changed, 14 insertions(+), 8 deletions(-) > >diff --git a/lib/replace/wscript b/lib/replace/wscript >index fd53999..a26de0f 100644 >--- a/lib/replace/wscript >+++ b/lib/replace/wscript >@@ -458,20 +458,26 @@ removeea setea > if conf.CONFIG_SET('HAVE_PTHREAD'): > > conf.CHECK_DECLS('pthread_mutexattr_setrobust', headers='pthread.h') >- conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust', 'pthread', >- checklibc=True, headers='pthread.h') >+ if not conf.CONFIG_SET('HAVE_DECL_PTHREAD_MUTEXATTR_SETROBUST'): >+ conf.CHECK_DECLS('pthread_mutexattr_setrobust_np', >+ headers='pthread.h') > >- conf.CHECK_DECLS('pthread_mutexattr_setrobust_np', headers='pthread.h') >- conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust_np', 'pthread', >+ conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust', 'pthread', > checklibc=True, headers='pthread.h') >+ if not conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST'): >+ conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust_np', 'pthread', >+ checklibc=True, headers='pthread.h') > > conf.CHECK_DECLS('pthread_mutex_consistent', headers='pthread.h') >- conf.CHECK_FUNCS_IN('pthread_mutex_consistent', 'pthread', >- checklibc=True, headers='pthread.h') >+ if not conf.CONFIG_SET('HAVE_DECL_PTHREAD_MUTEX_CONSISTENT'): >+ conf.CHECK_DECLS('pthread_mutex_consistent_np', >+ headers='pthread.h') > >- conf.CHECK_DECLS('pthread_mutex_consistent_np', headers='pthread.h') >- conf.CHECK_FUNCS_IN('pthread_mutex_consistent_np', 'pthread', >+ conf.CHECK_FUNCS_IN('pthread_mutex_consistent', 'pthread', > checklibc=True, headers='pthread.h') >+ if not conf.CONFIG_SET('HAVE_PTHREAD_MUTEX_CONSISTENT'): >+ conf.CHECK_FUNCS_IN('pthread_mutex_consistent_np', 'pthread', >+ checklibc=True, headers='pthread.h') > > if ((conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST') or > conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP')) and >-- >1.7.9.5 > > >From 8bfb475ee81514b18ba6d6266727fff24c5b19be Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Wed, 6 Feb 2013 12:16:02 +0100 >Subject: [PATCH 15/25] libreplace: Define PTHREAD_MUTEX_ROBUST along with > pthread_mutexattr_setrobust > >Signed-off-by: Volker Lendecke <vl@samba.org> >Reviewed-by: Stefan Metzmacher <metze@samba.org> >--- > lib/replace/system/threads.h | 8 ++++++++ > 1 file changed, 8 insertions(+) > >diff --git a/lib/replace/system/threads.h b/lib/replace/system/threads.h >index 3aca088..25d3502 100644 >--- a/lib/replace/system/threads.h >+++ b/lib/replace/system/threads.h >@@ -29,7 +29,15 @@ > > #if defined(HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP) && \ > !defined(HAVE_PTHREAD_MUTEXATTR_SETROBUST) >+ > #define pthread_mutexattr_setrobust pthread_mutexattr_setrobust_np >+ >+/* >+ * We assume that PTHREAD_MUTEX_ROBUST_NP goes along with >+ * pthread_mutexattr_setrobust_np() >+ */ >+#define PTHREAD_MUTEX_ROBUST PTHREAD_MUTEX_ROBUST_NP >+ > #endif > > #if defined(HAVE_PTHREAD_MUTEX_CONSISTENT_NP) && \ >-- >1.7.9.5 > > >From 15e6728373a9a35795251edb78b33c91238d4bcc Mon Sep 17 00:00:00 2001 >From: Stefan Metzmacher <metze@samba.org> >Date: Tue, 13 May 2014 03:15:41 +0200 >Subject: [PATCH 16/25] tdb: use asprintf() to simplify tdb_summary() > >Signed-off-by: Stefan Metzmacher <metze@samba.org> >Reviewed-by: Michael Adam <obnox@samba.org> >--- > lib/tdb/common/summary.c | 13 ++++++------- > 1 file changed, 6 insertions(+), 7 deletions(-) > >diff --git a/lib/tdb/common/summary.c b/lib/tdb/common/summary.c >index a22c17d..22404af 100644 >--- a/lib/tdb/common/summary.c >+++ b/lib/tdb/common/summary.c >@@ -92,7 +92,8 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) > struct tdb_record rec; > char *ret = NULL; > bool locked; >- size_t len, unc = 0; >+ size_t unc = 0; >+ int len; > struct tdb_record recovery; > > /* Read-only databases use no locking at all: it's best-effort. >@@ -163,13 +164,8 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) > for (off = 0; off < tdb->hash_size; off++) > tally_add(&hashval, get_hash_length(tdb, off)); > >- /* 20 is max length of a %zu. */ >- len = strlen(SUMMARY_FORMAT) + 35*20 + 1; >- ret = (char *)malloc(len); >- if (!ret) >- goto unlock; > >- snprintf(ret, len, SUMMARY_FORMAT, >+ len = asprintf(&ret, SUMMARY_FORMAT, > tdb->map_size, keys.total+data.total, > keys.num, > (tdb->hash_fn == tdb_jenkins_hash)?"yes":"no", >@@ -194,6 +190,9 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) > * 100.0 / tdb->map_size, > tdb->hash_size * sizeof(tdb_off_t) > * 100.0 / tdb->map_size); >+ if (len == -1) { >+ goto unlock; >+ } > > unlock: > if (locked) { >-- >1.7.9.5 > > >From eed5dd2d811b43d1cdbc4caa2b145616584f8d93 Mon Sep 17 00:00:00 2001 >From: Stefan Metzmacher <metze@samba.org> >Date: Tue, 4 Feb 2014 23:35:53 +0100 >Subject: [PATCH 17/25] tdb: introduce TDB_SUPPORTED_FEATURE_FLAGS > >This will allow to store a feature mask in the tdb header on disk, >so that openers can check if they can handle the features >other openers are using. > >Pair-Programmed-With: Volker Lendecke <vl@samba.org> >Pair-Programmed-With: Michael Adam <obnox@samba.org> >Signed-off-by: Stefan Metzmacher <metze@samba.org> >TODO: Signed-off-by: Volker Lendecke <vl@samba.org> >Signed-off-by: Michael Adam <obnox@samba.org> >--- > lib/tdb/common/check.c | 4 +++- > lib/tdb/common/open.c | 28 ++++++++++++++++++++++++++++ > lib/tdb/common/summary.c | 2 ++ > lib/tdb/common/tdb_private.h | 7 ++++++- > 4 files changed, 39 insertions(+), 2 deletions(-) > >diff --git a/lib/tdb/common/check.c b/lib/tdb/common/check.c >index 9f9d870..e632af5 100644 >--- a/lib/tdb/common/check.c >+++ b/lib/tdb/common/check.c >@@ -39,7 +39,9 @@ static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery) > if (hdr.version != TDB_VERSION) > goto corrupt; > >- if (hdr.rwlocks != 0 && hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC) >+ if (hdr.rwlocks != 0 && >+ hdr.rwlocks != TDB_FEATURE_FLAG_MAGIC && >+ hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC) > goto corrupt; > > tdb_header_hash(tdb, &h1, &h2); >diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c >index d5c0432..17ab0b7 100644 >--- a/lib/tdb/common/open.c >+++ b/lib/tdb/common/open.c >@@ -76,6 +76,20 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, > if (tdb->flags & TDB_INCOMPATIBLE_HASH) > newdb->rwlocks = TDB_HASH_RWLOCK_MAGIC; > >+ /* >+ * If we have any features we add the FEATURE_FLAG_MAGIC, overwriting the >+ * TDB_HASH_RWLOCK_MAGIC above. >+ */ >+ if (newdb->feature_flags != 0) { >+ newdb->rwlocks = TDB_FEATURE_FLAG_MAGIC; >+ } >+ >+ /* >+ * It's required for some following code pathes >+ * to have the fields on 'tdb' up-to-date. >+ */ >+ tdb->feature_flags = newdb->feature_flags; >+ > if (tdb->flags & TDB_INTERNAL) { > tdb->map_size = size; > tdb->map_ptr = (char *)newdb; >@@ -390,6 +404,7 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td > goto fail; > > if (header.rwlocks != 0 && >+ header.rwlocks != TDB_FEATURE_FLAG_MAGIC && > header.rwlocks != TDB_HASH_RWLOCK_MAGIC) { > TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: spinlocks no longer supported\n")); > errno = ENOSYS; >@@ -397,6 +412,19 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td > } > tdb->hash_size = header.hash_size; > >+ if (header.rwlocks == TDB_FEATURE_FLAG_MAGIC) { >+ tdb->feature_flags = header.feature_flags; >+ } >+ >+ if (tdb->feature_flags & ~TDB_SUPPORTED_FEATURE_FLAGS) { >+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: unsupported " >+ "features in tdb %s: 0x%08x (supported: 0x%08x)\n", >+ name, (unsigned)tdb->feature_flags, >+ (unsigned)TDB_SUPPORTED_FEATURE_FLAGS)); >+ errno = ENOSYS; >+ goto fail; >+ } >+ > if ((header.magic1_hash == 0) && (header.magic2_hash == 0)) { > /* older TDB without magic hash references */ > tdb->hash_fn = tdb_old_hash; >diff --git a/lib/tdb/common/summary.c b/lib/tdb/common/summary.c >index 22404af..6f2e0a9 100644 >--- a/lib/tdb/common/summary.c >+++ b/lib/tdb/common/summary.c >@@ -21,6 +21,7 @@ > "Size of file/data: %u/%zu\n" \ > "Number of records: %zu\n" \ > "Incompatible hash: %s\n" \ >+ "Active/supported feature flags: 0x%08x/0x%08x\n" \ > "Smallest/average/largest keys: %zu/%zu/%zu\n" \ > "Smallest/average/largest data: %zu/%zu/%zu\n" \ > "Smallest/average/largest padding: %zu/%zu/%zu\n" \ >@@ -169,6 +170,7 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) > tdb->map_size, keys.total+data.total, > keys.num, > (tdb->hash_fn == tdb_jenkins_hash)?"yes":"no", >+ (unsigned)tdb->feature_flags, TDB_SUPPORTED_FEATURE_FLAGS, > keys.min, tally_mean(&keys), keys.max, > data.min, tally_mean(&data), data.max, > extra.min, tally_mean(&extra), extra.max, >diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h >index a672159..aa9dd55 100644 >--- a/lib/tdb/common/tdb_private.h >+++ b/lib/tdb/common/tdb_private.h >@@ -53,6 +53,7 @@ typedef uint32_t tdb_off_t; > #define TDB_RECOVERY_MAGIC (0xf53bc0e7U) > #define TDB_RECOVERY_INVALID_MAGIC (0x0) > #define TDB_HASH_RWLOCK_MAGIC (0xbad1a51U) >+#define TDB_FEATURE_FLAG_MAGIC (0xbad1a52U) > #define TDB_ALIGNMENT 4 > #define DEFAULT_HASH_SIZE 131 > #define FREELIST_TOP (sizeof(struct tdb_header)) >@@ -68,6 +69,8 @@ typedef uint32_t tdb_off_t; > #define TDB_PAD_BYTE 0x42 > #define TDB_PAD_U32 0x42424242 > >+#define TDB_SUPPORTED_FEATURE_FLAGS 0 >+ > /* NB assumes there is a local variable called "tdb" that is the > * current context, also takes doubly-parenthesized print-style > * argument. */ >@@ -152,7 +155,8 @@ struct tdb_header { > tdb_off_t sequence_number; /* used when TDB_SEQNUM is set */ > uint32_t magic1_hash; /* hash of TDB_MAGIC_FOOD. */ > uint32_t magic2_hash; /* hash of TDB_MAGIC. */ >- tdb_off_t reserved[27]; >+ uint32_t feature_flags; >+ tdb_off_t reserved[26]; > }; > > struct tdb_lock_type { >@@ -200,6 +204,7 @@ struct tdb_context { > int lockrecs_array_length; > enum TDB_ERROR ecode; /* error code for last tdb error */ > uint32_t hash_size; >+ uint32_t feature_flags; > uint32_t flags; /* the flags passed to tdb_open */ > struct tdb_traverse_lock travlocks; /* current traversal locks */ > struct tdb_context *next; /* all tdbs to avoid multiple opens */ >-- >1.7.9.5 > > >From a25b118b5911a4952a92bc5b54997271158165e1 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Thu, 21 Feb 2013 16:34:32 +0100 >Subject: [PATCH 18/25] tdb: introduce tdb->hdr_ofs > >This makes it possible to have some extra headers before >the real tdb content starts in the file. > >This will be used used e.g. to implement locking based on robust mutexes. > >Pair-Programmed-With: Stefan Metzmacher <metze@samba.org> >Pair-Programmed-With: Michael Adam <obnox@samba.org> >TODO: Signed-off-by: Volker Lendecke <vl@samba.org> >Signed-off-by: Stefan Metzmacher <metze@samba.org> >Signed-off-by: Michael Adam <obnox@samba.org> >--- > lib/tdb/common/io.c | 101 +++++++++++++++++++++++++++++++++++++----- > lib/tdb/common/open.c | 53 +++++++++++++++------- > lib/tdb/common/summary.c | 22 +++++---- > lib/tdb/common/tdb_private.h | 3 ++ > lib/tdb/test/run-3G-file.c | 6 +-- > 5 files changed, 146 insertions(+), 39 deletions(-) > >diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c >index 11dfefd..07d22cc 100644 >--- a/lib/tdb/common/io.c >+++ b/lib/tdb/common/io.c >@@ -28,6 +28,70 @@ > > #include "tdb_private.h" > >+/* >+ * tdb->hdr_ofs is 0 for now. >+ * >+ * Note: that we only have the 4GB limit of tdb_off_t for >+ * tdb->map_size. The file size on disk can be 4GB + tdb->hdr_ofs! >+ */ >+ >+static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off) >+{ >+ off_t tmp = tdb->hdr_ofs + *off; >+ >+ if ((tmp < tdb->hdr_ofs) || (tmp < *off)) { >+ errno = EIO; >+ return false; >+ } >+ >+ *off = tmp; >+ return true; >+} >+ >+static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf, >+ size_t count, off_t offset) >+{ >+ if (!tdb_adjust_offset(tdb, &offset)) { >+ return -1; >+ } >+ return pwrite(tdb->fd, buf, count, offset); >+} >+ >+static ssize_t tdb_pread(struct tdb_context *tdb, void *buf, >+ size_t count, off_t offset) >+{ >+ if (!tdb_adjust_offset(tdb, &offset)) { >+ return -1; >+ } >+ return pread(tdb->fd, buf, count, offset); >+} >+ >+static int tdb_ftruncate(struct tdb_context *tdb, off_t length) >+{ >+ if (!tdb_adjust_offset(tdb, &length)) { >+ return -1; >+ } >+ return ftruncate(tdb->fd, length); >+} >+ >+static int tdb_fstat(struct tdb_context *tdb, struct stat *buf) >+{ >+ int ret; >+ >+ ret = fstat(tdb->fd, buf); >+ if (ret == -1) { >+ return -1; >+ } >+ >+ if (buf->st_size < tdb->hdr_ofs) { >+ errno = EIO; >+ return -1; >+ } >+ buf->st_size -= tdb->hdr_ofs; >+ >+ return ret; >+} >+ > /* check for an out of bounds access - if it is out of bounds then > see if the database has been expanded by someone else and expand > if necessary >@@ -58,7 +122,7 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, > return -1; > } > >- if (fstat(tdb->fd, &st) == -1) { >+ if (tdb_fstat(tdb, &st) == -1) { > tdb->ecode = TDB_ERR_IO; > return -1; > } >@@ -122,16 +186,18 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, > tdb->ecode = TDB_ERR_IO; > return -1; > #else >- ssize_t written = pwrite(tdb->fd, buf, len, off); >+ ssize_t written; >+ >+ written = tdb_pwrite(tdb, buf, len, off); >+ > if ((written != (ssize_t)len) && (written != -1)) { > /* try once more */ > tdb->ecode = TDB_ERR_IO; > TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only " > "%zi of %u bytes at %u, trying once more\n", > written, len, off)); >- written = pwrite(tdb->fd, (const char *)buf+written, >- len-written, >- off+written); >+ written = tdb_pwrite(tdb, (const char *)buf+written, >+ len-written, off+written); > } > if (written == -1) { > /* Ensure ecode is set for log fn. */ >@@ -176,7 +242,9 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, > tdb->ecode = TDB_ERR_IO; > return -1; > #else >- ssize_t ret = pread(tdb->fd, buf, len, off); >+ ssize_t ret; >+ >+ ret = tdb_pread(tdb, buf, len, off); > if (ret != (ssize_t)len) { > /* Ensure ecode is set for log fn. */ > tdb->ecode = TDB_ERR_IO; >@@ -258,7 +326,8 @@ int tdb_mmap(struct tdb_context *tdb) > if (should_mmap(tdb)) { > tdb->map_ptr = mmap(NULL, tdb->map_size, > PROT_READ|(tdb->read_only? 0:PROT_WRITE), >- MAP_SHARED|MAP_FILE, tdb->fd, 0); >+ MAP_SHARED|MAP_FILE, tdb->fd, >+ tdb->hdr_ofs); > > /* > * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! >@@ -303,12 +372,12 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad > return -1; > } > >- if (ftruncate(tdb->fd, new_size) == -1) { >+ if (tdb_ftruncate(tdb, new_size) == -1) { > char b = 0; >- ssize_t written = pwrite(tdb->fd, &b, 1, new_size - 1); >+ ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1); > if (written == 0) { > /* try once more, potentially revealing errno */ >- written = pwrite(tdb->fd, &b, 1, new_size - 1); >+ written = tdb_pwrite(tdb, &b, 1, new_size - 1); > } > if (written == 0) { > /* again - give up, guessing errno */ >@@ -328,10 +397,10 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad > memset(buf, TDB_PAD_BYTE, sizeof(buf)); > while (addition) { > size_t n = addition>sizeof(buf)?sizeof(buf):addition; >- ssize_t written = pwrite(tdb->fd, buf, n, size); >+ ssize_t written = tdb_pwrite(tdb, buf, n, size); > if (written == 0) { > /* prevent infinite loops: try _once_ more */ >- written = pwrite(tdb->fd, buf, n, size); >+ written = tdb_pwrite(tdb, buf, n, size); > } > if (written == 0) { > /* give up, trying to provide a useful errno */ >@@ -437,6 +506,14 @@ int tdb_expand(struct tdb_context *tdb, tdb_off_t size) > /* must know about any previous expansions by another process */ > tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1); > >+ /* >+ * Note: that we don't care about tdb->hdr_ofs != 0 here >+ * >+ * The 4GB limitation is just related to tdb->map_size >+ * and the offset calculation in the records. >+ * >+ * The file on disk can be up to 4GB + tdb->hdr_ofs >+ */ > size = tdb_expand_adjust(tdb->map_size, size, tdb->page_size); > > if (!tdb_add_off_t(tdb->map_size, size, &new_size)) { >diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c >index 17ab0b7..162f30d 100644 >--- a/lib/tdb/common/open.c >+++ b/lib/tdb/common/open.c >@@ -194,6 +194,7 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td > unsigned v; > const char *hash_alg; > uint32_t magic1, magic2; >+ int ret; > > ZERO_STRUCT(header); > >@@ -340,7 +341,6 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td > if ((tdb_flags & TDB_CLEAR_IF_FIRST) && > (!tdb->read_only) && > (locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) { >- int ret; > ret = tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, > TDB_LOCK_WAIT); > if (ret == -1) { >@@ -400,8 +400,18 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td > tdb->flags |= TDB_CONVERT; > tdb_convert(&header, sizeof(header)); > } >- if (fstat(tdb->fd, &st) == -1) >+ >+ /* >+ * We only use st.st_dev and st.st_ino from the raw fstat() >+ * call, everything else needs to use tdb_fstat() in order >+ * to skip tdb->hdr_ofs! >+ */ >+ if (fstat(tdb->fd, &st) == -1) { > goto fail; >+ } >+ tdb->device = st.st_dev; >+ tdb->inode = st.st_ino; >+ ZERO_STRUCT(st); > > if (header.rwlocks != 0 && > header.rwlocks != TDB_FEATURE_FLAG_MAGIC && >@@ -446,28 +456,27 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td > } > > /* Is it already in the open list? If so, fail. */ >- if (tdb_already_open(st.st_dev, st.st_ino)) { >+ if (tdb_already_open(tdb->device, tdb->inode)) { > TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " > "%s (%d,%d) is already open in this process\n", >- name, (int)st.st_dev, (int)st.st_ino)); >+ name, (int)tdb->device, (int)tdb->inode)); > errno = EBUSY; > goto fail; > } > >- /* Beware truncation! */ >- tdb->map_size = st.st_size; >- if (tdb->map_size != st.st_size) { >- /* Ensure ecode is set for log fn. */ >- tdb->ecode = TDB_ERR_IO; >- TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " >- "len %llu too large!\n", (long long)st.st_size)); >+ /* >+ * We had tdb_mmap(tdb) here before, >+ * but we need to use tdb_fstat(), >+ * which is triggered from tdb_oob() before calling tdb_mmap(). >+ * As this skips tdb->hdr_ofs. >+ */ >+ tdb->map_size = 0; >+ ret = tdb->methods->tdb_oob(tdb, 0, 1, 0); >+ if (ret == -1) { > errno = EIO; > goto fail; > } > >- tdb->device = st.st_dev; >- tdb->inode = st.st_ino; >- tdb_mmap(tdb); > if (locked) { > if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { > TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " >@@ -649,6 +658,11 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock) > TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: open failed (%s)\n", strerror(errno))); > goto fail; > } >+ /* >+ * We only use st.st_dev and st.st_ino from the raw fstat() >+ * call, everything else needs to use tdb_fstat() in order >+ * to skip tdb->hdr_ofs! >+ */ > if (fstat(tdb->fd, &st) != 0) { > TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: fstat failed (%s)\n", strerror(errno))); > goto fail; >@@ -657,7 +671,16 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock) > TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: file dev/inode has changed!\n")); > goto fail; > } >- if (tdb_mmap(tdb) != 0) { >+ ZERO_STRUCT(st); >+ >+ /* >+ * We had tdb_mmap(tdb) here before, >+ * but we need to use tdb_fstat(), >+ * which is triggered from tdb_oob() before calling tdb_mmap(). >+ * As this skips tdb->hdr_ofs. >+ */ >+ tdb->map_size = 0; >+ if (tdb->methods->tdb_oob(tdb, 0, 1, 0) != 0) { > goto fail; > } > #endif /* fake pread or pwrite */ >diff --git a/lib/tdb/common/summary.c b/lib/tdb/common/summary.c >index 6f2e0a9..e9989f6 100644 >--- a/lib/tdb/common/summary.c >+++ b/lib/tdb/common/summary.c >@@ -18,7 +18,8 @@ > #include "tdb_private.h" > > #define SUMMARY_FORMAT \ >- "Size of file/data: %u/%zu\n" \ >+ "Size of file/data: %llu/%zu\n" \ >+ "Header offset/logical size: %zu/%zu\n" \ > "Number of records: %zu\n" \ > "Incompatible hash: %s\n" \ > "Active/supported feature flags: 0x%08x/0x%08x\n" \ >@@ -88,6 +89,7 @@ static size_t get_hash_length(struct tdb_context *tdb, unsigned int i) > > _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) > { >+ off_t file_size; > tdb_off_t off, rec_off; > struct tally freet, keys, data, dead, extra, hashval, uncoal; > struct tdb_record rec; >@@ -165,9 +167,11 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) > for (off = 0; off < tdb->hash_size; off++) > tally_add(&hashval, get_hash_length(tdb, off)); > >+ file_size = tdb->hdr_ofs + tdb->map_size; > > len = asprintf(&ret, SUMMARY_FORMAT, >- tdb->map_size, keys.total+data.total, >+ (unsigned long long)file_size, keys.total+data.total, >+ (size_t)tdb->hdr_ofs, (size_t)tdb->map_size, > keys.num, > (tdb->hash_fn == tdb_jenkins_hash)?"yes":"no", > (unsigned)tdb->feature_flags, TDB_SUPPORTED_FEATURE_FLAGS, >@@ -182,16 +186,16 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) > hashval.min, tally_mean(&hashval), hashval.max, > uncoal.total, > uncoal.min, tally_mean(&uncoal), uncoal.max, >- keys.total * 100.0 / tdb->map_size, >- data.total * 100.0 / tdb->map_size, >- extra.total * 100.0 / tdb->map_size, >- freet.total * 100.0 / tdb->map_size, >- dead.total * 100.0 / tdb->map_size, >+ keys.total * 100.0 / file_size, >+ data.total * 100.0 / file_size, >+ extra.total * 100.0 / file_size, >+ freet.total * 100.0 / file_size, >+ dead.total * 100.0 / file_size, > (keys.num + freet.num + dead.num) > * (sizeof(struct tdb_record) + sizeof(uint32_t)) >- * 100.0 / tdb->map_size, >+ * 100.0 / file_size, > tdb->hash_size * sizeof(tdb_off_t) >- * 100.0 / tdb->map_size); >+ * 100.0 / file_size); > if (len == -1) { > goto unlock; > } >diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h >index aa9dd55..4981e2c 100644 >--- a/lib/tdb/common/tdb_private.h >+++ b/lib/tdb/common/tdb_private.h >@@ -202,6 +202,9 @@ struct tdb_context { > int num_lockrecs; > struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */ > int lockrecs_array_length; >+ >+ tdb_off_t hdr_ofs; /* this is 0 for now */ >+ > enum TDB_ERROR ecode; /* error code for last tdb error */ > uint32_t hash_size; > uint32_t feature_flags; >diff --git a/lib/tdb/test/run-3G-file.c b/lib/tdb/test/run-3G-file.c >index 67fd54f..900b1a6 100644 >--- a/lib/tdb/test/run-3G-file.c >+++ b/lib/tdb/test/run-3G-file.c >@@ -22,12 +22,12 @@ static int tdb_expand_file_sparse(struct tdb_context *tdb, > return -1; > } > >- if (ftruncate(tdb->fd, size+addition) == -1) { >+ if (tdb_ftruncate(tdb, size+addition) == -1) { > char b = 0; >- ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); >+ ssize_t written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1); > if (written == 0) { > /* try once more, potentially revealing errno */ >- written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); >+ written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1); > } > if (written == 0) { > /* again - give up, guessing errno */ >-- >1.7.9.5 > > >From f0e4535efc4891044d327cd06417a30f637c9bb1 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Thu, 21 Feb 2013 16:34:32 +0100 >Subject: [PATCH 19/25] tdb: add TDB_MUTEX_LOCKING support > >This adds optional support for locking based on >shared robust mutexes. > >The caller can use the TDB_MUTEX_LOCKING flag >together with TDB_CLEAR_IF_FIRST after verifying >with tdb_runtime_check_for_robust_mutexes() that >it's supported by the current system. > >The caller should be aware that using TDB_MUTEX_LOCKING >implies some limitations, e.g. it's not possible to >have multiple read chainlocks on a given hash chain >from multiple processes. > >Note: that this doesn't make tdb thread safe! > >Pair-Programmed-With: Stefan Metzmacher <metze@samba.org> >Pair-Programmed-With: Michael Adam <obnox@samba.org> >TODO: Signed-off-by: Volker Lendecke <vl@samba.org> >Signed-off-by: Stefan Metzmacher <metze@samba.org> >Signed-off-by: Michael Adam <obnox@samba.org> >--- > lib/tdb/ABI/tdb-1.3.0.sigs | 68 ++ > lib/tdb/common/io.c | 3 +- > lib/tdb/common/lock.c | 79 ++- > lib/tdb/common/mutex.c | 1000 ++++++++++++++++++++++++++++ > lib/tdb/common/open.c | 200 ++++++ > lib/tdb/common/summary.c | 2 + > lib/tdb/common/tdb.c | 9 + > lib/tdb/common/tdb_private.h | 30 +- > lib/tdb/common/transaction.c | 3 +- > lib/tdb/docs/mutex.txt | 136 ++++ > lib/tdb/include/tdb.h | 34 + > lib/tdb/test/run-3G-file.c | 1 + > lib/tdb/test/run-bad-tdb-header.c | 1 + > lib/tdb/test/run-check.c | 1 + > lib/tdb/test/run-corrupt.c | 1 + > lib/tdb/test/run-die-during-transaction.c | 1 + > lib/tdb/test/run-endian.c | 1 + > lib/tdb/test/run-incompatible.c | 1 + > lib/tdb/test/run-nested-transactions.c | 1 + > lib/tdb/test/run-nested-traverse.c | 1 + > lib/tdb/test/run-no-lock-during-traverse.c | 1 + > lib/tdb/test/run-oldhash.c | 1 + > lib/tdb/test/run-open-during-transaction.c | 1 + > lib/tdb/test/run-readonly-check.c | 1 + > lib/tdb/test/run-rescue-find_entry.c | 1 + > lib/tdb/test/run-rescue.c | 1 + > lib/tdb/test/run-rwlock-check.c | 1 + > lib/tdb/test/run-summary.c | 1 + > lib/tdb/test/run-transaction-expand.c | 1 + > lib/tdb/test/run-traverse-in-transaction.c | 1 + > lib/tdb/test/run-wronghash-fail.c | 1 + > lib/tdb/test/run-zero-append.c | 1 + > lib/tdb/test/run.c | 1 + > lib/tdb/wscript | 36 +- > 34 files changed, 1601 insertions(+), 21 deletions(-) > create mode 100644 lib/tdb/ABI/tdb-1.3.0.sigs > create mode 100644 lib/tdb/common/mutex.c > create mode 100644 lib/tdb/docs/mutex.txt > >diff --git a/lib/tdb/ABI/tdb-1.3.0.sigs b/lib/tdb/ABI/tdb-1.3.0.sigs >new file mode 100644 >index 0000000..7d3e469 >--- /dev/null >+++ b/lib/tdb/ABI/tdb-1.3.0.sigs >@@ -0,0 +1,68 @@ >+tdb_add_flags: void (struct tdb_context *, unsigned int) >+tdb_append: int (struct tdb_context *, TDB_DATA, TDB_DATA) >+tdb_chainlock: int (struct tdb_context *, TDB_DATA) >+tdb_chainlock_mark: int (struct tdb_context *, TDB_DATA) >+tdb_chainlock_nonblock: int (struct tdb_context *, TDB_DATA) >+tdb_chainlock_read: int (struct tdb_context *, TDB_DATA) >+tdb_chainlock_unmark: int (struct tdb_context *, TDB_DATA) >+tdb_chainunlock: int (struct tdb_context *, TDB_DATA) >+tdb_chainunlock_read: int (struct tdb_context *, TDB_DATA) >+tdb_check: int (struct tdb_context *, int (*)(TDB_DATA, TDB_DATA, void *), void *) >+tdb_close: int (struct tdb_context *) >+tdb_delete: int (struct tdb_context *, TDB_DATA) >+tdb_dump_all: void (struct tdb_context *) >+tdb_enable_seqnum: void (struct tdb_context *) >+tdb_error: enum TDB_ERROR (struct tdb_context *) >+tdb_errorstr: const char *(struct tdb_context *) >+tdb_exists: int (struct tdb_context *, TDB_DATA) >+tdb_fd: int (struct tdb_context *) >+tdb_fetch: TDB_DATA (struct tdb_context *, TDB_DATA) >+tdb_firstkey: TDB_DATA (struct tdb_context *) >+tdb_freelist_size: int (struct tdb_context *) >+tdb_get_flags: int (struct tdb_context *) >+tdb_get_logging_private: void *(struct tdb_context *) >+tdb_get_seqnum: int (struct tdb_context *) >+tdb_hash_size: int (struct tdb_context *) >+tdb_increment_seqnum_nonblock: void (struct tdb_context *) >+tdb_jenkins_hash: unsigned int (TDB_DATA *) >+tdb_lock_nonblock: int (struct tdb_context *, int, int) >+tdb_lockall: int (struct tdb_context *) >+tdb_lockall_mark: int (struct tdb_context *) >+tdb_lockall_nonblock: int (struct tdb_context *) >+tdb_lockall_read: int (struct tdb_context *) >+tdb_lockall_read_nonblock: int (struct tdb_context *) >+tdb_lockall_unmark: int (struct tdb_context *) >+tdb_log_fn: tdb_log_func (struct tdb_context *) >+tdb_map_size: size_t (struct tdb_context *) >+tdb_name: const char *(struct tdb_context *) >+tdb_nextkey: TDB_DATA (struct tdb_context *, TDB_DATA) >+tdb_null: dptr = 0xXXXX, dsize = 0 >+tdb_open: struct tdb_context *(const char *, int, int, int, mode_t) >+tdb_open_ex: struct tdb_context *(const char *, int, int, int, mode_t, const struct tdb_logging_context *, tdb_hash_func) >+tdb_parse_record: int (struct tdb_context *, TDB_DATA, int (*)(TDB_DATA, TDB_DATA, void *), void *) >+tdb_printfreelist: int (struct tdb_context *) >+tdb_remove_flags: void (struct tdb_context *, unsigned int) >+tdb_reopen: int (struct tdb_context *) >+tdb_reopen_all: int (int) >+tdb_repack: int (struct tdb_context *) >+tdb_rescue: int (struct tdb_context *, void (*)(TDB_DATA, TDB_DATA, void *), void *) >+tdb_runtime_check_for_robust_mutexes: bool (void) >+tdb_set_logging_function: void (struct tdb_context *, const struct tdb_logging_context *) >+tdb_set_max_dead: void (struct tdb_context *, int) >+tdb_setalarm_sigptr: void (struct tdb_context *, volatile sig_atomic_t *) >+tdb_store: int (struct tdb_context *, TDB_DATA, TDB_DATA, int) >+tdb_summary: char *(struct tdb_context *) >+tdb_transaction_cancel: int (struct tdb_context *) >+tdb_transaction_commit: int (struct tdb_context *) >+tdb_transaction_prepare_commit: int (struct tdb_context *) >+tdb_transaction_start: int (struct tdb_context *) >+tdb_transaction_start_nonblock: int (struct tdb_context *) >+tdb_transaction_write_lock_mark: int (struct tdb_context *) >+tdb_transaction_write_lock_unmark: int (struct tdb_context *) >+tdb_traverse: int (struct tdb_context *, tdb_traverse_func, void *) >+tdb_traverse_read: int (struct tdb_context *, tdb_traverse_func, void *) >+tdb_unlock: int (struct tdb_context *, int, int) >+tdb_unlockall: int (struct tdb_context *) >+tdb_unlockall_read: int (struct tdb_context *) >+tdb_validate_freelist: int (struct tdb_context *, int *) >+tdb_wipe_all: int (struct tdb_context *) >diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c >index 07d22cc..fe47d18 100644 >--- a/lib/tdb/common/io.c >+++ b/lib/tdb/common/io.c >@@ -29,7 +29,8 @@ > #include "tdb_private.h" > > /* >- * tdb->hdr_ofs is 0 for now. >+ * We prepend the mutex area, so fixup offsets. See mutex.c for details. >+ * tdb->hdr_ofs is 0 or header.mutex_size. > * > * Note: that we only have the 4GB limit of tdb_off_t for > * tdb->map_size. The file size on disk can be 4GB + tdb->hdr_ofs! >diff --git a/lib/tdb/common/lock.c b/lib/tdb/common/lock.c >index 486de79..6644c40 100644 >--- a/lib/tdb/common/lock.c >+++ b/lib/tdb/common/lock.c >@@ -38,6 +38,15 @@ static int fcntl_lock(struct tdb_context *tdb, > struct flock fl; > int cmd; > >+#ifdef USE_TDB_MUTEX_LOCKING >+ { >+ int ret; >+ if (tdb_mutex_lock(tdb, rw, off, len, waitflag, &ret)) { >+ return ret; >+ } >+ } >+#endif >+ > fl.l_type = rw; > fl.l_whence = SEEK_SET; > fl.l_start = off; >@@ -110,6 +119,15 @@ static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len) > fclose(locks); > #endif > >+#ifdef USE_TDB_MUTEX_LOCKING >+ { >+ int ret; >+ if (tdb_mutex_unlock(tdb, rw, off, len, &ret)) { >+ return ret; >+ } >+ } >+#endif >+ > fl.l_type = F_UNLCK; > fl.l_whence = SEEK_SET; > fl.l_start = off; >@@ -248,13 +266,27 @@ int tdb_allrecord_upgrade(struct tdb_context *tdb) > return -1; > } > >- ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0, >- TDB_LOCK_WAIT|TDB_LOCK_PROBE); >+ if (tdb_have_mutexes(tdb)) { >+ ret = tdb_mutex_allrecord_upgrade(tdb); >+ if (ret == -1) { >+ goto fail; >+ } >+ ret = tdb_brlock_retry(tdb, F_WRLCK, lock_offset(tdb->hash_size), >+ 0, TDB_LOCK_WAIT|TDB_LOCK_PROBE); >+ if (ret == -1) { >+ tdb_mutex_allrecord_downgrade(tdb); >+ } >+ } else { >+ ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0, >+ TDB_LOCK_WAIT|TDB_LOCK_PROBE); >+ } >+ > if (ret == 0) { > tdb->allrecord_lock.ltype = F_WRLCK; > tdb->allrecord_lock.off = 0; > return 0; > } >+fail: > TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_allrecord_upgrade failed\n")); > return -1; > } >@@ -593,6 +625,8 @@ static int tdb_chainlock_gradual(struct tdb_context *tdb, > int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, > enum tdb_lock_flags flags, bool upgradable) > { >+ int ret; >+ > switch (tdb_allrecord_check(tdb, ltype, flags, upgradable)) { > case -1: > return -1; >@@ -607,16 +641,27 @@ int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, > * > * It is (1) which cause the starvation problem, so we're only > * gradual for that. */ >- if (tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP, >- tdb->hash_size * 4) == -1) { >+ >+ if (tdb_have_mutexes(tdb)) { >+ ret = tdb_mutex_allrecord_lock(tdb, ltype, flags); >+ } else { >+ ret = tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP, >+ tdb->hash_size * 4); >+ } >+ >+ if (ret == -1) { > return -1; > } > > /* Grab individual record locks. */ > if (tdb_brlock(tdb, ltype, lock_offset(tdb->hash_size), 0, > flags) == -1) { >- tdb_brunlock(tdb, ltype, FREELIST_TOP, >- tdb->hash_size * 4); >+ if (tdb_have_mutexes(tdb)) { >+ tdb_mutex_allrecord_unlock(tdb); >+ } else { >+ tdb_brunlock(tdb, ltype, FREELIST_TOP, >+ tdb->hash_size * 4); >+ } > return -1; > } > >@@ -672,9 +717,25 @@ int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype, bool mark_lock) > return 0; > } > >- if (!mark_lock && tdb_brunlock(tdb, ltype, FREELIST_TOP, 0)) { >- TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed (%s)\n", strerror(errno))); >- return -1; >+ if (!mark_lock) { >+ int ret; >+ >+ if (tdb_have_mutexes(tdb)) { >+ ret = tdb_mutex_allrecord_unlock(tdb); >+ if (ret == 0) { >+ ret = tdb_brunlock(tdb, ltype, >+ lock_offset(tdb->hash_size), >+ 0); >+ } >+ } else { >+ ret = tdb_brunlock(tdb, ltype, FREELIST_TOP, 0); >+ } >+ >+ if (ret != 0) { >+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed " >+ "(%s)\n", strerror(errno))); >+ return -1; >+ } > } > > tdb->allrecord_lock.count = 0; >diff --git a/lib/tdb/common/mutex.c b/lib/tdb/common/mutex.c >new file mode 100644 >index 0000000..bdc4c28 >--- /dev/null >+++ b/lib/tdb/common/mutex.c >@@ -0,0 +1,1000 @@ >+/* >+ Unix SMB/CIFS implementation. >+ >+ trivial database library >+ >+ Copyright (C) Volker Lendecke 2012,2013 >+ Copyright (C) Stefan Metzmacher 2013,2014 >+ Copyright (C) Michael Adam 2014 >+ >+ ** NOTE! The following LGPL license applies to the tdb >+ ** library. This does NOT imply that all of Samba is released >+ ** under the LGPL >+ >+ This library is free software; you can redistribute it and/or >+ modify it under the terms of the GNU Lesser General Public >+ License as published by the Free Software Foundation; either >+ version 3 of the License, or (at your option) any later version. >+ >+ This library is distributed in the hope that it will be useful, >+ but WITHOUT ANY WARRANTY; without even the implied warranty of >+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >+ Lesser General Public License for more details. >+ >+ You should have received a copy of the GNU Lesser General Public >+ License along with this library; if not, see <http://www.gnu.org/licenses/>. >+*/ >+#include "tdb_private.h" >+#include "system/threads.h" >+ >+#ifdef USE_TDB_MUTEX_LOCKING >+ >+/* >+ * If we run with mutexes, we store the "struct tdb_mutexes" at the >+ * beginning of the file. We store an additional tdb_header right >+ * beyond the mutex area, page aligned. All the offsets within the tdb >+ * are relative to the area behind the mutex area. tdb->map_ptr points >+ * behind the mmap area as well, so the read and write path in the >+ * mutex case can remain unchanged. >+ * >+ * Early in the mutex development the mutexes were placed between the hash >+ * chain pointers and the real tdb data. This had two drawbacks: First, it >+ * made pointer calculations more complex. Second, we had to mmap the mutex >+ * area twice. One was the normal map_ptr in the tdb. This frequently changed >+ * from within tdb_oob. At least the Linux glibc robust mutex code assumes >+ * constant pointers in memory, so a constantly changing mmap area destroys >+ * the mutex list. So we had to mmap the first bytes of the file with a second >+ * mmap call. With that scheme, very weird errors happened that could be >+ * easily fixed by doing the mutex mmap in a second file. It seemed that >+ * mapping the same memory area twice does not end up in accessing the same >+ * physical page, looking at the mutexes in gdb it seemed that old data showed >+ * up after some re-mapping. To avoid a separate mutex file, the code now puts >+ * the real content of the tdb file after the mutex area. This way we do not >+ * have overlapping mmap areas, the mutex area is mmapped once and not >+ * changed, the tdb data area's mmap is constantly changed but does not >+ * overlap. >+ */ >+ >+struct tdb_mutexes { >+ struct tdb_header hdr; >+ >+ /* protect allrecord_lock */ >+ pthread_mutex_t allrecord_mutex; >+ >+ /* >+ * F_UNLCK: free, >+ * F_RDLCK: shared, >+ * F_WRLCK: exclusive >+ */ >+ short int allrecord_lock; >+ >+ /* >+ * Index 0 is the freelist mutex, followed by >+ * one mutex per hashchain. >+ */ >+ pthread_mutex_t hashchains[1]; >+}; >+ >+bool tdb_have_mutexes(struct tdb_context *tdb) >+{ >+ return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0); >+} >+ >+size_t tdb_mutex_size(struct tdb_context *tdb) >+{ >+ size_t mutex_size; >+ >+ if (!tdb_have_mutexes(tdb)) { >+ return 0; >+ } >+ >+ mutex_size = sizeof(struct tdb_mutexes); >+ mutex_size += tdb->hash_size * sizeof(pthread_mutex_t); >+ >+ return TDB_ALIGN(mutex_size, tdb->page_size); >+} >+ >+/* >+ * Get the index for a chain mutex >+ */ >+static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len, >+ unsigned *idx) >+{ >+ /* >+ * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before >+ * the 4 bytes of the freelist start and the hash chain that is about >+ * to be locked. See lock_offset() where the freelist is -1 vs the >+ * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in >+ * the tdb file itself as data, we need to adjust the offset here. >+ */ >+ const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t); >+ >+ if (!tdb_have_mutexes(tdb)) { >+ return false; >+ } >+ if (len != 1) { >+ /* Possibly the allrecord lock */ >+ return false; >+ } >+ if (off < freelist_lock_ofs) { >+ /* One of the special locks */ >+ return false; >+ } >+ if (tdb->hash_size == 0) { >+ /* tdb not initialized yet, called from tdb_open_ex() */ >+ return false; >+ } >+ if (off >= TDB_DATA_START(tdb->hash_size)) { >+ /* Single record lock from traverses */ >+ return false; >+ } >+ >+ /* >+ * Now we know it's a freelist or hash chain lock. Those are always 4 >+ * byte aligned. Paranoia check. >+ */ >+ if ((off % sizeof(tdb_off_t)) != 0) { >+ abort(); >+ } >+ >+ /* >+ * Re-index the fcntl offset into an offset into the mutex array >+ */ >+ off -= freelist_lock_ofs; /* rebase to index 0 */ >+ off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */ >+ >+ *idx = off; >+ return true; >+} >+ >+static bool tdb_have_mutex_chainlocks(struct tdb_context *tdb) >+{ >+ size_t i; >+ >+ for (i=0; i < tdb->num_lockrecs; i++) { >+ bool ret; >+ unsigned idx; >+ >+ ret = tdb_mutex_index(tdb, >+ tdb->lockrecs[i].off, >+ tdb->lockrecs[i].count, >+ &idx); >+ if (!ret) { >+ continue; >+ } >+ >+ if (idx == 0) { >+ /* this is the freelist mutex */ >+ continue; >+ } >+ >+ return true; >+ } >+ >+ return false; >+} >+ >+static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag) >+{ >+ int ret; >+ >+ if (waitflag) { >+ ret = pthread_mutex_lock(m); >+ } else { >+ ret = pthread_mutex_trylock(m); >+ } >+ if (ret != EOWNERDEAD) { >+ return ret; >+ } >+ >+ /* >+ * For chainlocks, we don't do any cleanup (yet?) >+ */ >+ return pthread_mutex_consistent(m); >+} >+ >+static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag) >+{ >+ int ret; >+ >+ if (waitflag) { >+ ret = pthread_mutex_lock(&m->allrecord_mutex); >+ } else { >+ ret = pthread_mutex_trylock(&m->allrecord_mutex); >+ } >+ if (ret != EOWNERDEAD) { >+ return ret; >+ } >+ >+ /* >+ * The allrecord lock holder died. We need to reset the allrecord_lock >+ * to F_UNLCK. This should also be the indication for >+ * tdb_needs_recovery. >+ */ >+ m->allrecord_lock = F_UNLCK; >+ >+ return pthread_mutex_consistent(&m->allrecord_mutex); >+} >+ >+bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len, >+ bool waitflag, int *pret) >+{ >+ struct tdb_mutexes *m = tdb->mutexes; >+ pthread_mutex_t *chain; >+ int ret; >+ unsigned idx; >+ bool allrecord_ok; >+ >+ if (!tdb_mutex_index(tdb, off, len, &idx)) { >+ return false; >+ } >+ chain = &m->hashchains[idx]; >+ >+again: >+ ret = chain_mutex_lock(chain, waitflag); >+ if (ret == EBUSY) { >+ ret = EAGAIN; >+ } >+ if (ret != 0) { >+ errno = ret; >+ goto fail; >+ } >+ >+ if (idx == 0) { >+ /* >+ * This is a freelist lock, which is independent to >+ * the allrecord lock. So we're done once we got the >+ * freelist mutex. >+ */ >+ *pret = 0; >+ return true; >+ } >+ >+ if (tdb_have_mutex_chainlocks(tdb)) { >+ /* >+ * We can only check the allrecord lock once. If we do it with >+ * one chain mutex locked, we will deadlock with the allrecord >+ * locker process in the following way: We lock the first hash >+ * chain, we check for the allrecord lock. We keep the hash >+ * chain locked. Then the allrecord locker locks the >+ * allrecord_mutex. It walks the list of chain mutexes, >+ * locking them all in sequence. Meanwhile, we have the chain >+ * mutex locked, so the allrecord locker blocks trying to lock >+ * our chain mutex. Then we come in and try to lock the second >+ * chain lock, which in most cases will be the freelist. We >+ * see that the allrecord lock is locked and put ourselves on >+ * the allrecord_mutex. This will never be signalled though >+ * because the allrecord locker waits for us to give up the >+ * chain lock. >+ */ >+ >+ *pret = 0; >+ return true; >+ } >+ >+ /* >+ * Check if someone is has the allrecord lock: queue if so. >+ */ >+ >+ allrecord_ok = false; >+ >+ if (m->allrecord_lock == F_UNLCK) { >+ /* >+ * allrecord lock not taken >+ */ >+ allrecord_ok = true; >+ } >+ >+ if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) { >+ /* >+ * allrecord shared lock taken, but we only want to read >+ */ >+ allrecord_ok = true; >+ } >+ >+ if (allrecord_ok) { >+ *pret = 0; >+ return true; >+ } >+ >+ ret = pthread_mutex_unlock(chain); >+ if (ret != 0) { >+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" >+ "(chain_mutex) failed: %s\n", strerror(ret))); >+ errno = ret; >+ goto fail; >+ } >+ ret = allrecord_mutex_lock(m, waitflag); >+ if (ret == EBUSY) { >+ ret = EAGAIN; >+ } >+ if (ret != 0) { >+ if (waitflag || (ret != EAGAIN)) { >+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock" >+ "(allrecord_mutex) failed: %s\n", >+ waitflag ? "" : "try_", strerror(ret))); >+ } >+ errno = ret; >+ goto fail; >+ } >+ ret = pthread_mutex_unlock(&m->allrecord_mutex); >+ if (ret != 0) { >+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" >+ "(allrecord_mutex) failed: %s\n", strerror(ret))); >+ errno = ret; >+ goto fail; >+ } >+ goto again; >+ >+fail: >+ *pret = -1; >+ return true; >+} >+ >+bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len, >+ int *pret) >+{ >+ struct tdb_mutexes *m = tdb->mutexes; >+ pthread_mutex_t *chain; >+ int ret; >+ unsigned idx; >+ >+ if (!tdb_mutex_index(tdb, off, len, &idx)) { >+ return false; >+ } >+ chain = &m->hashchains[idx]; >+ >+ ret = pthread_mutex_unlock(chain); >+ if (ret == 0) { >+ *pret = 0; >+ return true; >+ } >+ errno = ret; >+ *pret = -1; >+ return true; >+} >+ >+int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, >+ enum tdb_lock_flags flags) >+{ >+ struct tdb_mutexes *m = tdb->mutexes; >+ int ret; >+ uint32_t i; >+ bool waitflag = (flags & TDB_LOCK_WAIT); >+ int saved_errno; >+ >+ if (tdb->flags & TDB_NOLOCK) { >+ return 0; >+ } >+ >+ if (flags & TDB_LOCK_MARK_ONLY) { >+ return 0; >+ } >+ >+ ret = allrecord_mutex_lock(m, waitflag); >+ if (!waitflag && (ret == EBUSY)) { >+ errno = EAGAIN; >+ tdb->ecode = TDB_ERR_LOCK; >+ return -1; >+ } >+ if (ret != 0) { >+ if (!(flags & TDB_LOCK_PROBE)) { >+ TDB_LOG((tdb, TDB_DEBUG_TRACE, >+ "allrecord_mutex_lock() failed: %s\n", >+ strerror(ret))); >+ } >+ tdb->ecode = TDB_ERR_LOCK; >+ return -1; >+ } >+ >+ if (m->allrecord_lock != F_UNLCK) { >+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", >+ (int)m->allrecord_lock)); >+ goto fail_unlock_allrecord_mutex; >+ } >+ m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK; >+ >+ for (i=0; i<tdb->hash_size; i++) { >+ >+ /* ignore hashchains[0], the freelist */ >+ pthread_mutex_t *chain = &m->hashchains[i+1]; >+ >+ ret = chain_mutex_lock(chain, waitflag); >+ if (!waitflag && (ret == EBUSY)) { >+ errno = EAGAIN; >+ goto fail_unroll_allrecord_lock; >+ } >+ if (ret != 0) { >+ if (!(flags & TDB_LOCK_PROBE)) { >+ TDB_LOG((tdb, TDB_DEBUG_TRACE, >+ "chain_mutex_lock() failed: %s\n", >+ strerror(ret))); >+ } >+ errno = ret; >+ goto fail_unroll_allrecord_lock; >+ } >+ >+ ret = pthread_mutex_unlock(chain); >+ if (ret != 0) { >+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" >+ "(chainlock) failed: %s\n", strerror(ret))); >+ errno = ret; >+ goto fail_unroll_allrecord_lock; >+ } >+ } >+ /* >+ * We leave this routine with m->allrecord_mutex locked >+ */ >+ return 0; >+ >+fail_unroll_allrecord_lock: >+ m->allrecord_lock = F_UNLCK; >+ >+fail_unlock_allrecord_mutex: >+ saved_errno = errno; >+ ret = pthread_mutex_unlock(&m->allrecord_mutex); >+ if (ret != 0) { >+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" >+ "(allrecord_mutex) failed: %s\n", strerror(ret))); >+ } >+ errno = saved_errno; >+ tdb->ecode = TDB_ERR_LOCK; >+ return -1; >+} >+ >+int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb) >+{ >+ struct tdb_mutexes *m = tdb->mutexes; >+ int ret; >+ uint32_t i; >+ >+ if (tdb->flags & TDB_NOLOCK) { >+ return 0; >+ } >+ >+ /* >+ * Our only caller tdb_allrecord_upgrade() >+ * garantees that we already own the allrecord lock. >+ * >+ * Which means m->allrecord_mutex is still locked by us. >+ */ >+ >+ if (m->allrecord_lock != F_RDLCK) { >+ tdb->ecode = TDB_ERR_LOCK; >+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", >+ (int)m->allrecord_lock)); >+ return -1; >+ } >+ >+ m->allrecord_lock = F_WRLCK; >+ >+ for (i=0; i<tdb->hash_size; i++) { >+ >+ /* ignore hashchains[0], the freelist */ >+ pthread_mutex_t *chain = &m->hashchains[i+1]; >+ >+ ret = chain_mutex_lock(chain, true); >+ if (ret != 0) { >+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock" >+ "(chainlock) failed: %s\n", strerror(ret))); >+ goto fail_unroll_allrecord_lock; >+ } >+ >+ ret = pthread_mutex_unlock(chain); >+ if (ret != 0) { >+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" >+ "(chainlock) failed: %s\n", strerror(ret))); >+ goto fail_unroll_allrecord_lock; >+ } >+ } >+ >+ return 0; >+ >+fail_unroll_allrecord_lock: >+ m->allrecord_lock = F_RDLCK; >+ tdb->ecode = TDB_ERR_LOCK; >+ return -1; >+} >+ >+void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb) >+{ >+ struct tdb_mutexes *m = tdb->mutexes; >+ >+ /* >+ * Our only caller tdb_allrecord_upgrade() (in the error case) >+ * garantees that we already own the allrecord lock. >+ * >+ * Which means m->allrecord_mutex is still locked by us. >+ */ >+ >+ if (m->allrecord_lock != F_WRLCK) { >+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", >+ (int)m->allrecord_lock)); >+ return; >+ } >+ >+ m->allrecord_lock = F_RDLCK; >+ return; >+} >+ >+ >+int tdb_mutex_allrecord_unlock(struct tdb_context *tdb) >+{ >+ struct tdb_mutexes *m = tdb->mutexes; >+ short old; >+ int ret; >+ >+ if (tdb->flags & TDB_NOLOCK) { >+ return 0; >+ } >+ >+ /* >+ * Our only callers tdb_allrecord_unlock() and >+ * tdb_allrecord_lock() (in the error path) >+ * garantee that we already own the allrecord lock. >+ * >+ * Which means m->allrecord_mutex is still locked by us. >+ */ >+ >+ if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) { >+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", >+ (int)m->allrecord_lock)); >+ return -1; >+ } >+ >+ old = m->allrecord_lock; >+ m->allrecord_lock = F_UNLCK; >+ >+ ret = pthread_mutex_unlock(&m->allrecord_mutex); >+ if (ret != 0) { >+ m->allrecord_lock = old; >+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" >+ "(allrecord_mutex) failed: %s\n", strerror(ret))); >+ return -1; >+ } >+ return 0; >+} >+ >+int tdb_mutex_init(struct tdb_context *tdb) >+{ >+ struct tdb_mutexes *m; >+ pthread_mutexattr_t ma; >+ int i, ret; >+ >+ ret = tdb_mutex_mmap(tdb); >+ if (ret == -1) { >+ return -1; >+ } >+ m = tdb->mutexes; >+ >+ ret = pthread_mutexattr_init(&ma); >+ if (ret != 0) { >+ goto fail_munmap; >+ } >+ ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); >+ if (ret != 0) { >+ goto fail; >+ } >+ ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); >+ if (ret != 0) { >+ goto fail; >+ } >+ ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); >+ if (ret != 0) { >+ goto fail; >+ } >+ >+ for (i=0; i<tdb->hash_size+1; i++) { >+ pthread_mutex_t *chain = &m->hashchains[i]; >+ >+ ret = pthread_mutex_init(chain, &ma); >+ if (ret != 0) { >+ goto fail; >+ } >+ } >+ >+ m->allrecord_lock = F_UNLCK; >+ >+ ret = pthread_mutex_init(&m->allrecord_mutex, &ma); >+ if (ret != 0) { >+ goto fail; >+ } >+ ret = 0; >+fail: >+ pthread_mutexattr_destroy(&ma); >+fail_munmap: >+ tdb_mutex_munmap(tdb); >+ >+ if (ret == 0) { >+ return 0; >+ } >+ >+ errno = ret; >+ return -1; >+} >+ >+int tdb_mutex_mmap(struct tdb_context *tdb) >+{ >+ size_t len; >+ void *ptr; >+ >+ len = tdb_mutex_size(tdb); >+ if (len == 0) { >+ return 0; >+ } >+ >+ ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE, >+ tdb->fd, 0); >+ if (ptr == MAP_FAILED) { >+ return -1; >+ } >+ tdb->mutexes = (struct tdb_mutexes *)ptr; >+ >+ return 0; >+} >+ >+int tdb_mutex_munmap(struct tdb_context *tdb) >+{ >+ size_t len; >+ >+ len = tdb_mutex_size(tdb); >+ if (len == 0) { >+ return 0; >+ } >+ >+ return munmap(tdb->mutexes, len); >+} >+ >+static bool tdb_mutex_locking_cached; >+ >+static bool tdb_mutex_locking_supported(void) >+{ >+ pthread_mutexattr_t ma; >+ pthread_mutex_t m; >+ int ret; >+ static bool initialized; >+ >+ if (initialized) { >+ return tdb_mutex_locking_cached; >+ } >+ >+ initialized = true; >+ >+ ret = pthread_mutexattr_init(&ma); >+ if (ret != 0) { >+ return false; >+ } >+ ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); >+ if (ret != 0) { >+ goto cleanup_ma; >+ } >+ ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); >+ if (ret != 0) { >+ goto cleanup_ma; >+ } >+ ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); >+ if (ret != 0) { >+ goto cleanup_ma; >+ } >+ ret = pthread_mutex_init(&m, &ma); >+ if (ret != 0) { >+ goto cleanup_ma; >+ } >+ ret = pthread_mutex_lock(&m); >+ if (ret != 0) { >+ goto cleanup_m; >+ } >+ /* >+ * This makes sure we have real mutexes >+ * from a threading library instead of just >+ * stubs from libc. >+ */ >+ ret = pthread_mutex_lock(&m); >+ if (ret != EDEADLK) { >+ goto cleanup_lock; >+ } >+ ret = pthread_mutex_unlock(&m); >+ if (ret != 0) { >+ goto cleanup_m; >+ } >+ >+ tdb_mutex_locking_cached = true; >+ goto cleanup_m; >+ >+cleanup_lock: >+ pthread_mutex_unlock(&m); >+cleanup_m: >+ pthread_mutex_destroy(&m); >+cleanup_ma: >+ pthread_mutexattr_destroy(&ma); >+ return tdb_mutex_locking_cached; >+} >+ >+static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR; >+static pid_t tdb_robust_mutex_pid = -1; >+ >+static void tdb_robust_mutex_handler(int sig) >+{ >+ if (tdb_robust_mutex_pid != -1) { >+ pid_t pid; >+ int status; >+ >+ pid = waitpid(tdb_robust_mutex_pid, &status, WNOHANG); >+ if (pid == tdb_robust_mutex_pid) { >+ tdb_robust_mutex_pid = -1; >+ return; >+ } >+ } >+ >+ if (tdb_robust_mutext_old_handler == SIG_DFL) { >+ return; >+ } >+ if (tdb_robust_mutext_old_handler == SIG_IGN) { >+ return; >+ } >+ if (tdb_robust_mutext_old_handler == SIG_ERR) { >+ return; >+ } >+ >+ tdb_robust_mutext_old_handler(sig); >+} >+ >+_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) >+{ >+ void *ptr; >+ pthread_mutex_t *m; >+ pthread_mutexattr_t ma; >+ int ret = 1; >+ int pipe_down[2] = { -1, -1 }; >+ int pipe_up[2] = { -1, -1 }; >+ ssize_t nread; >+ char c = 0; >+ bool ok; >+ int status; >+ static bool initialized; >+ >+ if (initialized) { >+ return tdb_mutex_locking_cached; >+ } >+ >+ initialized = true; >+ >+ ok = tdb_mutex_locking_supported(); >+ if (!ok) { >+ return false; >+ } >+ >+ tdb_mutex_locking_cached = false; >+ >+ ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE, >+ MAP_SHARED|MAP_ANON, -1 /* fd */, 0); >+ if (ptr == MAP_FAILED) { >+ return false; >+ } >+ m = (pthread_mutex_t *)ptr; >+ >+ ret = pipe(pipe_down); >+ if (ret != 0) { >+ goto cleanup_mmap; >+ } >+ ret = pipe(pipe_up); >+ if (ret != 0) { >+ goto cleanup_pipe; >+ } >+ >+ ret = pthread_mutexattr_init(&ma); >+ if (ret != 0) { >+ goto cleanup_pipe; >+ } >+ ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); >+ if (ret != 0) { >+ goto cleanup_ma; >+ } >+ ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); >+ if (ret != 0) { >+ goto cleanup_ma; >+ } >+ ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); >+ if (ret != 0) { >+ goto cleanup_ma; >+ } >+ ret = pthread_mutex_init(m, &ma); >+ if (ret != 0) { >+ goto cleanup_ma; >+ } >+ >+ tdb_robust_mutext_old_handler = signal(SIGCHLD, >+ tdb_robust_mutex_handler); >+ >+ tdb_robust_mutex_pid = fork(); >+ if (tdb_robust_mutex_pid == 0) { >+ size_t nwritten; >+ close(pipe_down[1]); >+ close(pipe_up[0]); >+ ret = pthread_mutex_lock(m); >+ nwritten = write(pipe_up[1], &ret, sizeof(ret)); >+ if (nwritten != sizeof(ret)) { >+ exit(1); >+ } >+ if (ret != 0) { >+ exit(1); >+ } >+ nread = read(pipe_down[0], &c, 1); >+ if (nread != 1) { >+ exit(1); >+ } >+ /* leave locked */ >+ exit(0); >+ } >+ if (tdb_robust_mutex_pid == -1) { >+ goto cleanup_sig_child; >+ } >+ close(pipe_down[0]); >+ pipe_down[0] = -1; >+ close(pipe_up[1]); >+ pipe_up[1] = -1; >+ >+ nread = read(pipe_up[0], &ret, sizeof(ret)); >+ if (nread != sizeof(ret)) { >+ goto cleanup_child; >+ } >+ >+ ret = pthread_mutex_trylock(m); >+ if (ret != EBUSY) { >+ if (ret == 0) { >+ pthread_mutex_unlock(m); >+ } >+ goto cleanup_child; >+ } >+ >+ if (write(pipe_down[1], &c, 1) != 1) { >+ goto cleanup_child; >+ } >+ >+ nread = read(pipe_up[0], &c, 1); >+ if (nread != 0) { >+ goto cleanup_child; >+ } >+ >+ while (tdb_robust_mutex_pid > 0) { >+ pid_t pid; >+ >+ errno = 0; >+ pid = waitpid(tdb_robust_mutex_pid, &status, 0); >+ if (pid == tdb_robust_mutex_pid) { >+ tdb_robust_mutex_pid = -1; >+ break; >+ } >+ if (pid == -1 && errno != EINTR) { >+ goto cleanup_child; >+ } >+ } >+ signal(SIGCHLD, tdb_robust_mutext_old_handler); >+ >+ ret = pthread_mutex_trylock(m); >+ if (ret != EOWNERDEAD) { >+ if (ret == 0) { >+ pthread_mutex_unlock(m); >+ } >+ goto cleanup_m; >+ } >+ >+ ret = pthread_mutex_consistent(m); >+ if (ret != 0) { >+ goto cleanup_m; >+ } >+ >+ ret = pthread_mutex_trylock(m); >+ if (ret != EDEADLK) { >+ pthread_mutex_unlock(m); >+ goto cleanup_m; >+ } >+ >+ ret = pthread_mutex_unlock(m); >+ if (ret != 0) { >+ goto cleanup_m; >+ } >+ >+ tdb_mutex_locking_cached = true; >+ goto cleanup_m; >+ >+cleanup_child: >+ while (tdb_robust_mutex_pid > 0) { >+ pid_t pid; >+ >+ kill(tdb_robust_mutex_pid, SIGKILL); >+ >+ errno = 0; >+ pid = waitpid(tdb_robust_mutex_pid, &status, 0); >+ if (pid == tdb_robust_mutex_pid) { >+ tdb_robust_mutex_pid = -1; >+ break; >+ } >+ if (pid == -1 && errno != EINTR) { >+ break; >+ } >+ } >+cleanup_sig_child: >+ signal(SIGCHLD, tdb_robust_mutext_old_handler); >+cleanup_m: >+ pthread_mutex_destroy(m); >+cleanup_ma: >+ pthread_mutexattr_destroy(&ma); >+cleanup_pipe: >+ if (pipe_down[0] != -1) { >+ close(pipe_down[0]); >+ } >+ if (pipe_down[1] != -1) { >+ close(pipe_down[1]); >+ } >+ if (pipe_up[0] != -1) { >+ close(pipe_up[0]); >+ } >+ if (pipe_up[1] != -1) { >+ close(pipe_up[1]); >+ } >+cleanup_mmap: >+ munmap(ptr, sizeof(pthread_mutex_t)); >+ >+ return tdb_mutex_locking_cached; >+} >+ >+#else >+ >+size_t tdb_mutex_size(struct tdb_context *tdb) >+{ >+ return 0; >+} >+ >+bool tdb_have_mutexes(struct tdb_context *tdb) >+{ >+ return false; >+} >+ >+int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, >+ enum tdb_lock_flags flags) >+{ >+ tdb->ecode = TDB_ERR_LOCK; >+ return -1; >+} >+ >+int tdb_mutex_allrecord_unlock(struct tdb_context *tdb) >+{ >+ return -1; >+} >+ >+int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb) >+{ >+ tdb->ecode = TDB_ERR_LOCK; >+ return -1; >+} >+ >+void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb) >+{ >+ return; >+} >+ >+int tdb_mutex_mmap(struct tdb_context *tdb) >+{ >+ errno = ENOSYS; >+ return -1; >+} >+ >+int tdb_mutex_munmap(struct tdb_context *tdb) >+{ >+ errno = ENOSYS; >+ return -1; >+} >+ >+int tdb_mutex_init(struct tdb_context *tdb) >+{ >+ errno = ENOSYS; >+ return -1; >+} >+ >+_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) >+{ >+ return false; >+} >+ >+#endif >diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c >index 162f30d..16a76a3 100644 >--- a/lib/tdb/common/open.c >+++ b/lib/tdb/common/open.c >@@ -77,6 +77,15 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, > newdb->rwlocks = TDB_HASH_RWLOCK_MAGIC; > > /* >+ * We create a tdb with TDB_FEATURE_FLAG_MUTEX support, >+ * the flag combination and runtime feature checks >+ * are done by the caller already. >+ */ >+ if (tdb->flags & TDB_MUTEX_LOCKING) { >+ newdb->feature_flags |= TDB_FEATURE_FLAG_MUTEX; >+ } >+ >+ /* > * If we have any features we add the FEATURE_FLAG_MAGIC, overwriting the > * TDB_HASH_RWLOCK_MAGIC above. > */ >@@ -87,8 +96,11 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, > /* > * It's required for some following code pathes > * to have the fields on 'tdb' up-to-date. >+ * >+ * E.g. tdb_mutex_size() requires it > */ > tdb->feature_flags = newdb->feature_flags; >+ tdb->hash_size = newdb->hash_size; > > if (tdb->flags & TDB_INTERNAL) { > tdb->map_size = size; >@@ -104,6 +116,11 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, > if (ftruncate(tdb->fd, 0) == -1) > goto fail; > >+ if (newdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { >+ newdb->mutex_size = tdb_mutex_size(tdb); >+ tdb->hdr_ofs = newdb->mutex_size; >+ } >+ > /* This creates an endian-converted header, as if read from disk */ > CONVERT(*newdb); > memcpy(header, newdb, sizeof(*header)); >@@ -113,6 +130,37 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, > if (!tdb_write_all(tdb->fd, newdb, size)) > goto fail; > >+ if (newdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { >+ >+ /* >+ * Now we init the mutex area >+ * followed by a second header. >+ */ >+ >+ ret = ftruncate( >+ tdb->fd, >+ newdb->mutex_size + sizeof(struct tdb_header)); >+ if (ret == -1) { >+ goto fail; >+ } >+ ret = tdb_mutex_init(tdb); >+ if (ret == -1) { >+ goto fail; >+ } >+ >+ /* >+ * Write a second header behind the mutexes. That's the area >+ * that will be mmapp'ed. >+ */ >+ ret = lseek(tdb->fd, newdb->mutex_size, SEEK_SET); >+ if (ret == -1) { >+ goto fail; >+ } >+ if (!tdb_write_all(tdb->fd, newdb, size)) { >+ goto fail; >+ } >+ } >+ > ret = 0; > fail: > SAFE_FREE(newdb); >@@ -179,6 +227,70 @@ static bool check_header_hash(struct tdb_context *tdb, > return check_header_hash(tdb, header, false, m1, m2); > } > >+static bool tdb_mutex_open_ok(struct tdb_context *tdb, >+ const struct tdb_header *header) >+{ >+ int locked; >+ >+ locked = tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, >+ TDB_LOCK_NOWAIT|TDB_LOCK_PROBE); >+ >+ if ((locked == -1) && (tdb->ecode == TDB_ERR_LOCK)) { >+ /* >+ * CLEAR_IF_FIRST still active. The tdb was created on this >+ * host, so we can assume the mutex implementation is >+ * compatible. Important for tools like tdbdump on a still >+ * open locking.tdb. >+ */ >+ goto check_local_settings; >+ } >+ >+ /* >+ * We got the CLEAR_IF_FIRST lock. That means the database was >+ * potentially copied from somewhere else. The mutex implementation >+ * might be incompatible. >+ */ >+ >+ if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { >+ /* >+ * Should not happen >+ */ >+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok: " >+ "failed to release ACTIVE_LOCK on %s: %s\n", >+ tdb->name, strerror(errno))); >+ return false; >+ } >+ >+ if (tdb->flags & TDB_NOLOCK) { >+ /* >+ * We don't look at locks, so it does not matter to have a >+ * compatible mutex implementation. Allow the open. >+ */ >+ return true; >+ } >+ >+check_local_settings: >+ >+ if (!(tdb->flags & TDB_MUTEX_LOCKING)) { >+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok[%s]: " >+ "Can use mutexes only with " >+ "MUTEX_LOCKING or NOLOCK\n", >+ tdb->name)); >+ return false; >+ } >+ >+ if (tdb_mutex_size(tdb) != header->mutex_size) { >+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok[%s]: " >+ "Mutex size changed from %u to %u\n.", >+ tdb->name, >+ (unsigned int)header->mutex_size, >+ (unsigned int)tdb_mutex_size(tdb))); >+ return false; >+ } >+ >+ return true; >+} >+ > _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, > int open_flags, mode_t mode, > const struct tdb_logging_context *log_ctx, >@@ -208,6 +320,9 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td > if (tdb_flags & TDB_INTERNAL) { > tdb_flags |= TDB_INCOMPATIBLE_HASH; > } >+ if (tdb_flags & TDB_MUTEX_LOCKING) { >+ tdb_flags |= TDB_INCOMPATIBLE_HASH; >+ } > > tdb->fd = -1; > #ifdef TDB_TRACE >@@ -296,6 +411,64 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td > goto fail; > } > >+ if (tdb->flags & TDB_MUTEX_LOCKING) { >+ /* >+ * Here we catch bugs in the callers, >+ * the runtime check for existing tdb's comes later. >+ */ >+ >+ if (!(tdb->flags & TDB_CLEAR_IF_FIRST)) { >+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " >+ "invalid flags for %s - TDB_MUTEX_LOCKING " >+ "requires TDB_CLEAR_IF_FIRST\n", name)); >+ errno = EINVAL; >+ goto fail; >+ } >+ >+ if (tdb->flags & TDB_INTERNAL) { >+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " >+ "invalid flags for %s - TDB_MUTEX_LOCKING and " >+ "TDB_INTERNAL are not allowed together\n", name)); >+ errno = EINVAL; >+ goto fail; >+ } >+ >+ if (tdb->flags & TDB_NOMMAP) { >+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " >+ "invalid flags for %s - TDB_MUTEX_LOCKING and " >+ "TDB_NOMMAP are not allowed together\n", name)); >+ errno = EINVAL; >+ goto fail; >+ } >+ >+ if (tdb->read_only) { >+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " >+ "invalid flags for %s - TDB_MUTEX_LOCKING " >+ "not allowed read only\n", name)); >+ errno = EINVAL; >+ goto fail; >+ } >+ >+ /* >+ * The callers should have called >+ * tdb_runtime_check_for_robust_mutexes() >+ * before using TDB_MUTEX_LOCKING! >+ * >+ * This makes sure the caller understands >+ * that the locking may behave a bit differently >+ * than with pure fcntl locking. E.g. multiple >+ * read locks are not supported. >+ */ >+ if (!tdb_runtime_check_for_robust_mutexes()) { >+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " >+ "invalid flags for %s - TDB_MUTEX_LOCKING " >+ "requires support for robust_mutexes\n", >+ name)); >+ errno = ENOSYS; >+ goto fail; >+ } >+ } >+ > if (getenv("TDB_NO_FSYNC")) { > tdb->flags |= TDB_NOSYNC; > } >@@ -435,6 +608,21 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td > goto fail; > } > >+ if (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { >+ if (!tdb_mutex_open_ok(tdb, &header)) { >+ errno = EINVAL; >+ goto fail; >+ } >+ >+ /* >+ * We need to remember the hdr_ofs >+ * also for the TDB_NOLOCK case >+ * if the current library doesn't support >+ * mutex locking. >+ */ >+ tdb->hdr_ofs = header.mutex_size; >+ } >+ > if ((header.magic1_hash == 0) && (header.magic2_hash == 0)) { > /* older TDB without magic hash references */ > tdb->hash_fn = tdb_old_hash; >@@ -477,6 +665,15 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td > goto fail; > } > >+ if (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { >+ if (!(tdb->flags & TDB_NOLOCK)) { >+ ret = tdb_mutex_mmap(tdb); >+ if (ret != 0) { >+ goto fail; >+ } >+ } >+ } >+ > if (locked) { > if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { > TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " >@@ -587,6 +784,9 @@ _PUBLIC_ int tdb_close(struct tdb_context *tdb) > else > tdb_munmap(tdb); > } >+ >+ tdb_mutex_munmap(tdb); >+ > SAFE_FREE(tdb->name); > if (tdb->fd != -1) { > ret = close(tdb->fd); >diff --git a/lib/tdb/common/summary.c b/lib/tdb/common/summary.c >index e9989f6..d786132 100644 >--- a/lib/tdb/common/summary.c >+++ b/lib/tdb/common/summary.c >@@ -23,6 +23,7 @@ > "Number of records: %zu\n" \ > "Incompatible hash: %s\n" \ > "Active/supported feature flags: 0x%08x/0x%08x\n" \ >+ "Robust mutexes locking: %s\n" \ > "Smallest/average/largest keys: %zu/%zu/%zu\n" \ > "Smallest/average/largest data: %zu/%zu/%zu\n" \ > "Smallest/average/largest padding: %zu/%zu/%zu\n" \ >@@ -175,6 +176,7 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) > keys.num, > (tdb->hash_fn == tdb_jenkins_hash)?"yes":"no", > (unsigned)tdb->feature_flags, TDB_SUPPORTED_FEATURE_FLAGS, >+ (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX)?"yes":"no", > keys.min, tally_mean(&keys), keys.max, > data.min, tally_mean(&data), data.max, > extra.min, tally_mean(&extra), extra.max, >diff --git a/lib/tdb/common/tdb.c b/lib/tdb/common/tdb.c >index ebd4ffe..ae98c96 100644 >--- a/lib/tdb/common/tdb.c >+++ b/lib/tdb/common/tdb.c >@@ -723,6 +723,15 @@ _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags) > return; > } > >+ if ((flags & TDB_NOLOCK) && >+ (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) && >+ (tdb->mutexes == NULL)) { >+ tdb->ecode = TDB_ERR_LOCK; >+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: " >+ "Can not remove NOLOCK flag on mutexed databases")); >+ return; >+ } >+ > if (flags & TDB_ALLOW_NESTING) { > tdb->flags |= TDB_DISALLOW_NESTING; > } >diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h >index 4981e2c..de8d9e6 100644 >--- a/lib/tdb/common/tdb_private.h >+++ b/lib/tdb/common/tdb_private.h >@@ -69,7 +69,11 @@ typedef uint32_t tdb_off_t; > #define TDB_PAD_BYTE 0x42 > #define TDB_PAD_U32 0x42424242 > >-#define TDB_SUPPORTED_FEATURE_FLAGS 0 >+#define TDB_FEATURE_FLAG_MUTEX 0x00000001 >+ >+#define TDB_SUPPORTED_FEATURE_FLAGS ( \ >+ TDB_FEATURE_FLAG_MUTEX | \ >+ 0) > > /* NB assumes there is a local variable called "tdb" that is the > * current context, also takes doubly-parenthesized print-style >@@ -156,7 +160,8 @@ struct tdb_header { > uint32_t magic1_hash; /* hash of TDB_MAGIC_FOOD. */ > uint32_t magic2_hash; /* hash of TDB_MAGIC. */ > uint32_t feature_flags; >- tdb_off_t reserved[26]; >+ tdb_len_t mutex_size; /* set if TDB_FEATURE_FLAG_MUTEX is set */ >+ tdb_off_t reserved[25]; > }; > > struct tdb_lock_type { >@@ -190,6 +195,8 @@ struct tdb_methods { > int (*tdb_expand_file)(struct tdb_context *, tdb_off_t , tdb_off_t ); > }; > >+struct tdb_mutexes; >+ > struct tdb_context { > char *name; /* the name of the database */ > void *map_ptr; /* where it is currently mapped */ >@@ -203,7 +210,8 @@ struct tdb_context { > struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */ > int lockrecs_array_length; > >- tdb_off_t hdr_ofs; /* this is 0 for now */ >+ tdb_off_t hdr_ofs; /* this is 0 or header.mutex_size */ >+ struct tdb_mutexes *mutexes; /* mmap of the mutex area */ > > enum TDB_ERROR ecode; /* error code for last tdb error */ > uint32_t hash_size; >@@ -300,4 +308,20 @@ bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret); > > /* tdb_off_t and tdb_len_t right now are both uint32_t */ > #define tdb_add_len_t tdb_add_off_t >+ >+size_t tdb_mutex_size(struct tdb_context *tdb); >+bool tdb_have_mutexes(struct tdb_context *tdb); >+int tdb_mutex_init(struct tdb_context *tdb); >+int tdb_mutex_mmap(struct tdb_context *tdb); >+int tdb_mutex_munmap(struct tdb_context *tdb); >+bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len, >+ bool waitflag, int *pret); >+bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len, >+ int *pret); >+int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, >+ enum tdb_lock_flags flags); >+int tdb_mutex_allrecord_unlock(struct tdb_context *tdb); >+int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb); >+void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb); >+ > #endif /* TDB_PRIVATE_H */ >diff --git a/lib/tdb/common/transaction.c b/lib/tdb/common/transaction.c >index a2c3bbd..caef0be 100644 >--- a/lib/tdb/common/transaction.c >+++ b/lib/tdb/common/transaction.c >@@ -421,7 +421,8 @@ static int _tdb_transaction_start(struct tdb_context *tdb, > enum tdb_lock_flags lockflags) > { > /* some sanity checks */ >- if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) { >+ if (tdb->read_only || (tdb->flags & (TDB_INTERNAL|TDB_MUTEX_LOCKING)) >+ || tdb->traverse_read) { > TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction on a read-only or internal db\n")); > tdb->ecode = TDB_ERR_EINVAL; > return -1; >diff --git a/lib/tdb/docs/mutex.txt b/lib/tdb/docs/mutex.txt >new file mode 100644 >index 0000000..7625662 >--- /dev/null >+++ b/lib/tdb/docs/mutex.txt >@@ -0,0 +1,136 @@ >+Tdb is a hashtable database with multiple concurrent writer and external >+record lock support. For speed reasons, wherever possible tdb uses a shared >+memory mapped area for data access. In its currently released form, it uses >+fcntl byte-range locks to coordinate access to the data itself. >+ >+The tdb data is organized as a hashtable. Hash collisions are dealt with by >+forming a linked list of records that share a hash value. The individual >+linked lists are protected across processes with 1-byte fcntl locks on the >+starting pointer of the linked list representing a hash value. >+ >+The external locking API of tdb allows to lock individual records. Instead of >+really locking individual records, the tdb API locks a complete linked list >+with a fcntl lock. >+ >+The external locking API of tdb also allows to lock the complete database, and >+ctdb uses this facility to freeze databases during a recovery. While the >+so-called allrecord lock is held, all linked lists and all individual records >+are frozen alltogether. Tdb achieves this by locking the complete file range >+with a single fcntl lock. Individual 1-byte locks for the linked lists >+conflict with this. Access to records is prevented by the one large fnctl byte >+range lock. >+ >+Fcntl locks have been chosen for tdb for two reasons: First they are portable >+across all current unixes. Secondly they provide auto-cleanup. If a process >+dies while holding a fcntl lock, the lock is given up as if it was explicitly >+unlocked. Thus fcntl locks provide a very robust locking scheme, if a process >+dies for any reason the database will not stay blocked until reboot. This >+robustness is very important for long-running services, a reboot is not an >+option for most users of tdb. >+ >+Unfortunately, during stress testing, fcntl locks have turned out to be a major >+problem for performance. The particular problem that was seen happens when >+ctdb on a busy server does a recovery. A recovery means that ctdb has to >+freeze all tdb databases for some time, usually a few seconds. This is done >+with the allrecord lock. During the recovery phase on a busy server many smbd >+processes try to access the tdb file with blocking fcntl calls. The specific >+test in question easily reproduces 7,000 processes piling up waiting for >+1-byte fcntl locks. When ctdb is done with the recovery, it gives up the >+allrecord lock, covering the whole file range. All 7,000 processes waiting for >+1-byte fcntl locks are woken up, trying to acquire their lock. The special >+implementation of fcntl locks in Linux (up to 2013-02-12 at least) protects >+all fcntl lock operations with a single system-wide spinlock. If 7,000 process >+waiting for the allrecord lock to become released this leads to a thundering >+herd condition, all CPUs are spinning on that single spinlock. >+ >+Functionally the kernel is fine, eventually the thundering herd slows down and >+every process correctly gets his share and locking range, but the performance >+of the system while the herd is active is worse than expected. >+ >+The thundering herd is only the worst case scenario for fcntl lock use. The >+single spinlock for fcntl operations is also a performance penalty for normal >+operations. In the cluster case, every read and write SMB request has to do >+two fcntl calls to provide correct SMB mandatory locks. The single spinlock >+is one source of serialization for the SMB read/write requests, limiting the >+parallelism that can be achieved in a multi-core system. >+ >+While trying to tune his servers, Ira Cooper, Samba Team member, found fcntl >+locks to be a problem on Solaris as well. Ira pointed out that there is a >+potential alternative locking mechanism that might be more scalable: Process >+shared robust mutexes, as defined by Posix 2008 for example via >+ >+http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_mutexattr_setpshared.html >+http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_mutexattr_setrobust.html >+ >+Pthread mutexes provide one of the core mechanisms in posix threads to protect >+in-process data structures from concurrent access by multiple threads. In the >+Linux implementation, a pthread_mutex_t is represented by a data structure in >+user space that requires no kernel calls in the uncontended case for locking >+and unlocking. Locking and unlocking in the uncontended case is implemented >+purely in user space with atomic CPU instructions and thus are very fast. >+ >+The setpshared functions indicate to the kernel that the mutex is about to be >+shared between processes in a common shared memory area. >+ >+The process shared posix mutexes have the potential to replace fcntl locking >+to coordinate mmap access for tdbs. However, they are missing the criticial >+auto-cleanup property that fcntl provides when a process dies. A process that >+dies hard while holding a shared mutex has no chance to clean up the protected >+data structures and unlock the shared mutex. Thus with a pure process shared >+mutex the mutex will remain locked forever until the data structures are >+re-initialized from scratch. >+ >+With the robust mutexes defined by Posix the process shared mutexes have been >+extended with a limited auto-cleanup property. If a mutex has been declared >+robust, when a process exits while holding that mutex, the next process trying >+to lock the mutex will get the special error message EOWNERDEAD. This informs >+the caller that the data structures the mutex protects are potentially corrupt >+and need to be cleaned up. >+ >+The error message EOWNERDEAD when trying to lock a mutex is an extension over >+the fcntl functionality. A process that does a blocking fcntl lock call is not >+informed about whether the lock was explicitly freed by a process still alive >+or due to an unplanned process exit. At the time of this writing (February >+2013), at least Linux and OpenSolaris also implement the robustness feature of >+process-shared mutexes. >+ >+Converting the tdb locking mechanism from fcntl to mutexes has to take care of >+both types of locks that are used on tdb files. >+ >+The easy part is to use mutexes to replace the 1-byte linked list locks >+covering the individual hashes. Those can be represented by a mutex each. >+ >+Covering the allrecord lock is more difficult. The allrecord lock uses a fcntl >+lock spanning all hash list locks simultaneously. This basic functionality is >+not easily possible with mutexes. A mutex carries 1 bit of information, a >+fcntl lock can carry an arbitrary amount of information. >+ >+In order to support the allrecord lock, we have an allrecord_lock variable >+protected by an allrecord_mutex. The coordination between the allrecord lock >+and the chainlocks works like this: >+ >+- Getting a chain lock works like this: >+ >+ 1. get chain mutex >+ 2. return success if allrecord_lock is F_UNLCK (not locked) >+ 3. return success if allrecord_lock is F_RDLCK (locked readonly) >+ and we only need a read lock. >+ 4. release chain mutex >+ 5. wait for allrecord_mutex >+ 6. unlock allrecord_mutex >+ 7. goto 1. >+ >+- Getting the allrecord lock: >+ >+ 1. get the allrecord mutex >+ 2. return error if allrecord_lock is not F_UNLCK (it's locked) >+ 3. set allrecord_lock to the desired value. >+ 4. in a loop: lock(blocking) / unlock each chain mutex. >+ 5. return success. >+ >+- allrecord lock upgrade: >+ >+ 1. check we already have the allrecord lock with F_RDLCK. >+ 3. set allrecord_lock to F_WRLCK >+ 4. in a loop: lock(blocking) / unlock each chain mutex. >+ 5. return success. >diff --git a/lib/tdb/include/tdb.h b/lib/tdb/include/tdb.h >index a34f089..5ea5e60 100644 >--- a/lib/tdb/include/tdb.h >+++ b/lib/tdb/include/tdb.h >@@ -80,6 +80,9 @@ extern "C" { > #define TDB_ALLOW_NESTING 512 /** Allow transactions to nest */ > #define TDB_DISALLOW_NESTING 1024 /** Disallow transactions to nest */ > #define TDB_INCOMPATIBLE_HASH 2048 /** Better hashing: can't be opened by tdb < 1.2.6. */ >+#define TDB_MUTEX_LOCKING 4096 /** optimized locking using robust mutexes if supported, >+ only with tdb >= 1.3.0 and TDB_CLEAR_IF_FIRST >+ after checking tdb_runtime_check_for_robust_mutexes() */ > > /** The tdb error codes */ > enum TDB_ERROR {TDB_SUCCESS=0, TDB_ERR_CORRUPT, TDB_ERR_IO, TDB_ERR_LOCK, >@@ -143,6 +146,11 @@ struct tdb_logging_context { > * default 5.\n > * TDB_ALLOW_NESTING - Allow transactions to nest.\n > * TDB_DISALLOW_NESTING - Disallow transactions to nest.\n >+ * TDB_INCOMPATIBLE_HASH - Better hashing: can't be opened by tdb < 1.2.6.\n >+ * TDB_MUTEX_LOCKING - Optimized locking using robust mutexes if supported, >+ * can't be opened by tdb < 1.3.0. >+ * Only valid in combination with TDB_CLEAR_IF_FIRST >+ * after checking tdb_runtime_check_for_robust_mutexes()\n > * > * @param[in] open_flags Flags for the open(2) function. > * >@@ -179,6 +187,11 @@ struct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags, > * default 5.\n > * TDB_ALLOW_NESTING - Allow transactions to nest.\n > * TDB_DISALLOW_NESTING - Disallow transactions to nest.\n >+ * TDB_INCOMPATIBLE_HASH - Better hashing: can't be opened by tdb < 1.2.6.\n >+ * TDB_MUTEX_LOCKING - Optimized locking using robust mutexes if supported, >+ * can't be opened by tdb < 1.3.0. >+ * Only valid in combination with TDB_CLEAR_IF_FIRST >+ * after checking tdb_runtime_check_for_robust_mutexes()\n > * > * @param[in] open_flags Flags for the open(2) function. > * >@@ -842,6 +855,27 @@ int tdb_rescue(struct tdb_context *tdb, > void (*walk) (TDB_DATA key, TDB_DATA data, void *private_data), > void *private_data); > >+/** >+ * @brief Check if support for TDB_MUTEX_LOCKING is available at runtime. >+ * >+ * On some systems the API for pthread_mutexattr_setrobust() is not available. >+ * On other systems there are some bugs in the interaction between glibc and >+ * the linux kernel. >+ * >+ * This function provides a runtime check if robust mutexes are really >+ * available. >+ * >+ * This needs to be called and return true before TDB_MUTEX_LOCKING >+ * can be used at runtime. >+ * >+ * @note This calls fork(), but the SIGCHILD handling should be transparent. >+ * >+ * @return true if supported, false otherwise. >+ * >+ * @see TDB_MUTEX_LOCKING >+ */ >+bool tdb_runtime_check_for_robust_mutexes(void); >+ > /* @} ******************************************************************/ > > /* Low level locking functions: use with care */ >diff --git a/lib/tdb/test/run-3G-file.c b/lib/tdb/test/run-3G-file.c >index 900b1a6..748c972 100644 >--- a/lib/tdb/test/run-3G-file.c >+++ b/lib/tdb/test/run-3G-file.c >@@ -9,6 +9,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include "logging.h" >diff --git a/lib/tdb/test/run-bad-tdb-header.c b/lib/tdb/test/run-bad-tdb-header.c >index b00fb89..9d29fdf 100644 >--- a/lib/tdb/test/run-bad-tdb-header.c >+++ b/lib/tdb/test/run-bad-tdb-header.c >@@ -9,6 +9,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include "logging.h" >diff --git a/lib/tdb/test/run-check.c b/lib/tdb/test/run-check.c >index b275691..ce389a2 100644 >--- a/lib/tdb/test/run-check.c >+++ b/lib/tdb/test/run-check.c >@@ -9,6 +9,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include "logging.h" >diff --git a/lib/tdb/test/run-corrupt.c b/lib/tdb/test/run-corrupt.c >index 93eae42..e6fc751 100644 >--- a/lib/tdb/test/run-corrupt.c >+++ b/lib/tdb/test/run-corrupt.c >@@ -9,6 +9,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include "logging.h" >diff --git a/lib/tdb/test/run-die-during-transaction.c b/lib/tdb/test/run-die-during-transaction.c >index 9b90415..c636d87 100644 >--- a/lib/tdb/test/run-die-during-transaction.c >+++ b/lib/tdb/test/run-die-during-transaction.c >@@ -19,6 +19,7 @@ static int ftruncate_check(int fd, off_t length); > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include <stdbool.h> >diff --git a/lib/tdb/test/run-endian.c b/lib/tdb/test/run-endian.c >index 3116f7d..9d4d5f5 100644 >--- a/lib/tdb/test/run-endian.c >+++ b/lib/tdb/test/run-endian.c >@@ -9,6 +9,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include "logging.h" >diff --git a/lib/tdb/test/run-incompatible.c b/lib/tdb/test/run-incompatible.c >index af01ca6..b8e95b5 100644 >--- a/lib/tdb/test/run-incompatible.c >+++ b/lib/tdb/test/run-incompatible.c >@@ -9,6 +9,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > >diff --git a/lib/tdb/test/run-nested-transactions.c b/lib/tdb/test/run-nested-transactions.c >index bf08e55..864adf2 100644 >--- a/lib/tdb/test/run-nested-transactions.c >+++ b/lib/tdb/test/run-nested-transactions.c >@@ -9,6 +9,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include <stdbool.h> >diff --git a/lib/tdb/test/run-nested-traverse.c b/lib/tdb/test/run-nested-traverse.c >index 361dc2e..22ee3e2 100644 >--- a/lib/tdb/test/run-nested-traverse.c >+++ b/lib/tdb/test/run-nested-traverse.c >@@ -11,6 +11,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #undef fcntl > #include <stdlib.h> >diff --git a/lib/tdb/test/run-no-lock-during-traverse.c b/lib/tdb/test/run-no-lock-during-traverse.c >index b5e31dc..737a32f 100644 >--- a/lib/tdb/test/run-no-lock-during-traverse.c >+++ b/lib/tdb/test/run-no-lock-during-traverse.c >@@ -13,6 +13,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include "logging.h" >diff --git a/lib/tdb/test/run-oldhash.c b/lib/tdb/test/run-oldhash.c >index 535336c..aaee6f6 100644 >--- a/lib/tdb/test/run-oldhash.c >+++ b/lib/tdb/test/run-oldhash.c >@@ -9,6 +9,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include "logging.h" >diff --git a/lib/tdb/test/run-open-during-transaction.c b/lib/tdb/test/run-open-during-transaction.c >index 04ba956..1605376 100644 >--- a/lib/tdb/test/run-open-during-transaction.c >+++ b/lib/tdb/test/run-open-during-transaction.c >@@ -20,6 +20,7 @@ static int ftruncate_check(int fd, off_t length); > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include <stdbool.h> >diff --git a/lib/tdb/test/run-readonly-check.c b/lib/tdb/test/run-readonly-check.c >index e518532..c5e0f7d 100644 >--- a/lib/tdb/test/run-readonly-check.c >+++ b/lib/tdb/test/run-readonly-check.c >@@ -11,6 +11,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include "logging.h" >diff --git a/lib/tdb/test/run-rescue-find_entry.c b/lib/tdb/test/run-rescue-find_entry.c >index 25f4f1c..5d6f8f7 100644 >--- a/lib/tdb/test/run-rescue-find_entry.c >+++ b/lib/tdb/test/run-rescue-find_entry.c >@@ -10,6 +10,7 @@ > #include "../common/check.c" > #include "../common/hash.c" > #include "../common/rescue.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include "logging.h" >diff --git a/lib/tdb/test/run-rescue.c b/lib/tdb/test/run-rescue.c >index 7c806a4..e43f53b 100644 >--- a/lib/tdb/test/run-rescue.c >+++ b/lib/tdb/test/run-rescue.c >@@ -10,6 +10,7 @@ > #include "../common/check.c" > #include "../common/hash.c" > #include "../common/rescue.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include "logging.h" >diff --git a/lib/tdb/test/run-rwlock-check.c b/lib/tdb/test/run-rwlock-check.c >index 8b8072d..2ac9dc3 100644 >--- a/lib/tdb/test/run-rwlock-check.c >+++ b/lib/tdb/test/run-rwlock-check.c >@@ -9,6 +9,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > >diff --git a/lib/tdb/test/run-summary.c b/lib/tdb/test/run-summary.c >index 2231284..8b9a1a0 100644 >--- a/lib/tdb/test/run-summary.c >+++ b/lib/tdb/test/run-summary.c >@@ -10,6 +10,7 @@ > #include "../common/check.c" > #include "../common/hash.c" > #include "../common/summary.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > >diff --git a/lib/tdb/test/run-transaction-expand.c b/lib/tdb/test/run-transaction-expand.c >index ddf1f24..d36b894 100644 >--- a/lib/tdb/test/run-transaction-expand.c >+++ b/lib/tdb/test/run-transaction-expand.c >@@ -37,6 +37,7 @@ static inline int fake_fdatasync(int fd) > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include "logging.h" >diff --git a/lib/tdb/test/run-traverse-in-transaction.c b/lib/tdb/test/run-traverse-in-transaction.c >index 48194b8..17d6412 100644 >--- a/lib/tdb/test/run-traverse-in-transaction.c >+++ b/lib/tdb/test/run-traverse-in-transaction.c >@@ -11,6 +11,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #undef fcntl_with_lockcheck > #include <stdlib.h> >diff --git a/lib/tdb/test/run-wronghash-fail.c b/lib/tdb/test/run-wronghash-fail.c >index 9c78fc5..c44b0f5 100644 >--- a/lib/tdb/test/run-wronghash-fail.c >+++ b/lib/tdb/test/run-wronghash-fail.c >@@ -9,6 +9,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > >diff --git a/lib/tdb/test/run-zero-append.c b/lib/tdb/test/run-zero-append.c >index a2324c4..f9eba1b 100644 >--- a/lib/tdb/test/run-zero-append.c >+++ b/lib/tdb/test/run-zero-append.c >@@ -9,6 +9,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include "logging.h" >diff --git a/lib/tdb/test/run.c b/lib/tdb/test/run.c >index f61fcf6..c744c4d 100644 >--- a/lib/tdb/test/run.c >+++ b/lib/tdb/test/run.c >@@ -9,6 +9,7 @@ > #include "../common/open.c" > #include "../common/check.c" > #include "../common/hash.c" >+#include "../common/mutex.c" > #include "tap-interface.h" > #include <stdlib.h> > #include "logging.h" >diff --git a/lib/tdb/wscript b/lib/tdb/wscript >index 7019693..6243ccf 100644 >--- a/lib/tdb/wscript >+++ b/lib/tdb/wscript >@@ -1,7 +1,7 @@ > #!/usr/bin/env python > > APPNAME = 'tdb' >-VERSION = '1.2.13' >+VERSION = '1.3.0' > > blddir = 'bin' > >@@ -46,6 +46,10 @@ def set_options(opt): > opt.BUILTIN_DEFAULT('replace') > opt.PRIVATE_EXTENSION_DEFAULT('tdb', noextension='tdb') > opt.RECURSE('lib/replace') >+ opt.add_option('--disable-tdb-mutex-locking', >+ help=("Disable the use of pthread robust mutexes"), >+ action="store_true", dest='disable_tdb_mutex_locking', >+ default=False) > if opt.IN_LAUNCH_DIR(): > opt.add_option('--disable-python', > help=("disable the pytdb module"), >@@ -53,6 +57,11 @@ def set_options(opt): > > > def configure(conf): >+ conf.env.disable_tdb_mutex_locking = getattr(Options.options, >+ 'disable_tdb_mutex_locking', >+ False) >+ if not conf.env.disable_tdb_mutex_locking: >+ conf.env.replace_add_global_pthread = True > conf.RECURSE('lib/replace') > > conf.env.standalone_tdb = conf.IN_LAUNCH_DIR() >@@ -68,6 +77,11 @@ def configure(conf): > > conf.env.disable_python = getattr(Options.options, 'disable_python', False) > >+ if (conf.CONFIG_SET('HAVE_ROBUST_MUTEXES') and >+ conf.env.building_tdb and >+ not conf.env.disable_tdb_mutex_locking): >+ conf.define('USE_TDB_MUTEX_LOCKING', 1) >+ > conf.CHECK_XSLTPROC_MANPAGES() > > if not conf.env.disable_python: >@@ -87,10 +101,12 @@ def configure(conf): > def build(bld): > bld.RECURSE('lib/replace') > >- COMMON_SRC = bld.SUBDIR('common', >- '''check.c error.c tdb.c traverse.c >- freelistcheck.c lock.c dump.c freelist.c >- io.c open.c transaction.c hash.c summary.c rescue.c''') >+ COMMON_FILES='''check.c error.c tdb.c traverse.c >+ freelistcheck.c lock.c dump.c freelist.c >+ io.c open.c transaction.c hash.c summary.c rescue.c >+ mutex.c''' >+ >+ COMMON_SRC = bld.SUBDIR('common', COMMON_FILES) > > if bld.env.standalone_tdb: > bld.env.PKGCONFIGDIR = '${LIBDIR}/pkgconfig' >@@ -99,9 +115,15 @@ def build(bld): > private_library = True > > if not bld.CONFIG_SET('USING_SYSTEM_TDB'): >+ >+ tdb_deps = 'replace' >+ >+ if bld.CONFIG_SET('USE_TDB_MUTEX_LOCKING'): >+ tdb_deps += ' pthread' >+ > bld.SAMBA_LIBRARY('tdb', > COMMON_SRC, >- deps='replace', >+ deps=tdb_deps, > includes='include', > abi_directory='ABI', > abi_match='tdb_*', >@@ -137,7 +159,7 @@ def build(bld): > # FIXME: This hardcoded list is stupid, stupid, stupid. > bld.SAMBA_SUBSYSTEM('tdb-test-helpers', > 'test/external-agent.c test/lock-tracking.c test/logging.c', >- 'replace', >+ tdb_deps, > includes='include') > > for t in tdb1_unit_tests: >-- >1.7.9.5 > > >From 8d98154b5a306f390df17e9d0c0dee9a869f156d Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Thu, 21 Feb 2013 16:34:32 +0100 >Subject: [PATCH 20/25] tdb/test: add mutex related tests > >Pair-Programmed-With: Stefan Metzmacher <metze@samba.org> >Pair-Programmed-With: Michael Adam <obnox@samba.org> >TODO: Signed-off-by: Volker Lendecke <vl@samba.org> >Signed-off-by: Stefan Metzmacher <metze@samba.org> >Signed-off-by: Michael Adam <obnox@samba.org> >--- > lib/tdb/test/run-mutex-allrecord-bench.c | 82 +++++++++ > lib/tdb/test/run-mutex-allrecord-block.c | 120 +++++++++++++ > lib/tdb/test/run-mutex-allrecord-trylock.c | 113 ++++++++++++ > lib/tdb/test/run-mutex-die.c | 269 ++++++++++++++++++++++++++++ > lib/tdb/test/run-mutex-openflags2.c | 152 ++++++++++++++++ > lib/tdb/test/run-mutex-trylock.c | 122 +++++++++++++ > lib/tdb/test/run-mutex1.c | 138 ++++++++++++++ > lib/tdb/wscript | 9 +- > 8 files changed, 1004 insertions(+), 1 deletion(-) > create mode 100644 lib/tdb/test/run-mutex-allrecord-bench.c > create mode 100644 lib/tdb/test/run-mutex-allrecord-block.c > create mode 100644 lib/tdb/test/run-mutex-allrecord-trylock.c > create mode 100644 lib/tdb/test/run-mutex-die.c > create mode 100644 lib/tdb/test/run-mutex-openflags2.c > create mode 100644 lib/tdb/test/run-mutex-trylock.c > create mode 100644 lib/tdb/test/run-mutex1.c > >diff --git a/lib/tdb/test/run-mutex-allrecord-bench.c b/lib/tdb/test/run-mutex-allrecord-bench.c >new file mode 100644 >index 0000000..b81e597 >--- /dev/null >+++ b/lib/tdb/test/run-mutex-allrecord-bench.c >@@ -0,0 +1,82 @@ >+#include "../common/tdb_private.h" >+#include "../common/io.c" >+#include "../common/tdb.c" >+#include "../common/lock.c" >+#include "../common/freelist.c" >+#include "../common/traverse.c" >+#include "../common/transaction.c" >+#include "../common/error.c" >+#include "../common/open.c" >+#include "../common/check.c" >+#include "../common/hash.c" >+#include "../common/mutex.c" >+#include "tap-interface.h" >+#include <stdlib.h> >+#include <sys/types.h> >+#include <sys/wait.h> >+#include <stdarg.h> >+ >+static TDB_DATA key, data; >+ >+static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, >+ const char *fmt, ...) >+{ >+ va_list ap; >+ va_start(ap, fmt); >+ vfprintf(stderr, fmt, ap); >+ va_end(ap); >+} >+ >+static double timeval_elapsed2(const struct timeval *tv1, const struct timeval *tv2) >+{ >+ return (tv2->tv_sec - tv1->tv_sec) + >+ (tv2->tv_usec - tv1->tv_usec)*1.0e-6; >+} >+ >+static double timeval_elapsed(const struct timeval *tv) >+{ >+ struct timeval tv2; >+ gettimeofday(&tv2, NULL); >+ return timeval_elapsed2(tv, &tv2); >+} >+ >+/* The code should barf on TDBs created with rwlocks. */ >+int main(int argc, char *argv[]) >+{ >+ struct tdb_context *tdb; >+ unsigned int log_count; >+ struct tdb_logging_context log_ctx = { log_fn, &log_count }; >+ int ret; >+ struct timeval start; >+ double elapsed; >+ bool runtime_support; >+ >+ runtime_support = tdb_runtime_check_for_robust_mutexes(); >+ >+ if (!runtime_support) { >+ skip(1, "No robust mutex support"); >+ return exit_status(); >+ } >+ >+ key.dsize = strlen("hi"); >+ key.dptr = discard_const_p(uint8_t, "hi"); >+ data.dsize = strlen("world"); >+ data.dptr = discard_const_p(uint8_t, "world"); >+ >+ tdb = tdb_open_ex("mutex-allrecord-bench.tdb", 1000000, >+ TDB_INCOMPATIBLE_HASH| >+ TDB_MUTEX_LOCKING| >+ TDB_CLEAR_IF_FIRST, >+ O_RDWR|O_CREAT, 0755, &log_ctx, NULL); >+ ok(tdb, "tdb_open_ex should succeed"); >+ >+ gettimeofday(&start, NULL); >+ ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); >+ elapsed = timeval_elapsed(&start); >+ >+ ok(ret == 0, "tdb_allrecord_lock should succeed"); >+ >+ diag("allrecord_lock took %f seconds", elapsed); >+ >+ return exit_status(); >+} >diff --git a/lib/tdb/test/run-mutex-allrecord-block.c b/lib/tdb/test/run-mutex-allrecord-block.c >new file mode 100644 >index 0000000..fcd3b4f >--- /dev/null >+++ b/lib/tdb/test/run-mutex-allrecord-block.c >@@ -0,0 +1,120 @@ >+#include "../common/tdb_private.h" >+#include "../common/io.c" >+#include "../common/tdb.c" >+#include "../common/lock.c" >+#include "../common/freelist.c" >+#include "../common/traverse.c" >+#include "../common/transaction.c" >+#include "../common/error.c" >+#include "../common/open.c" >+#include "../common/check.c" >+#include "../common/hash.c" >+#include "../common/mutex.c" >+#include "tap-interface.h" >+#include <stdlib.h> >+#include <sys/types.h> >+#include <sys/wait.h> >+#include <stdarg.h> >+ >+static TDB_DATA key, data; >+ >+static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, >+ const char *fmt, ...) >+{ >+ va_list ap; >+ va_start(ap, fmt); >+ vfprintf(stderr, fmt, ap); >+ va_end(ap); >+} >+ >+static int do_child(int tdb_flags, int to, int from) >+{ >+ struct tdb_context *tdb; >+ unsigned int log_count; >+ struct tdb_logging_context log_ctx = { log_fn, &log_count }; >+ int ret; >+ char c = 0; >+ >+ tdb = tdb_open_ex("mutex-allrecord-block.tdb", 3, tdb_flags, >+ O_RDWR|O_CREAT, 0755, &log_ctx, NULL); >+ ok(tdb, "tdb_open_ex should succeed"); >+ >+ ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); >+ ok(ret == 0, "tdb_allrecord_lock should succeed"); >+ >+ write(to, &c, sizeof(c)); >+ >+ read(from, &c, sizeof(c)); >+ >+ ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); >+ ok(ret == 0, "tdb_allrecord_unlock should succeed"); >+ >+ return 0; >+} >+ >+/* The code should barf on TDBs created with rwlocks. */ >+int main(int argc, char *argv[]) >+{ >+ struct tdb_context *tdb; >+ unsigned int log_count; >+ struct tdb_logging_context log_ctx = { log_fn, &log_count }; >+ int ret, status; >+ pid_t child, wait_ret; >+ int fromchild[2]; >+ int tochild[2]; >+ char c; >+ int tdb_flags; >+ bool runtime_support; >+ >+ runtime_support = tdb_runtime_check_for_robust_mutexes(); >+ >+ if (!runtime_support) { >+ skip(1, "No robust mutex support"); >+ return exit_status(); >+ } >+ >+ key.dsize = strlen("hi"); >+ key.dptr = discard_const_p(uint8_t, "hi"); >+ data.dsize = strlen("world"); >+ data.dptr = discard_const_p(uint8_t, "world"); >+ >+ pipe(fromchild); >+ pipe(tochild); >+ >+ tdb_flags = TDB_INCOMPATIBLE_HASH| >+ TDB_MUTEX_LOCKING| >+ TDB_CLEAR_IF_FIRST; >+ >+ child = fork(); >+ if (child == 0) { >+ close(fromchild[0]); >+ close(tochild[1]); >+ return do_child(tdb_flags, fromchild[1], tochild[0]); >+ } >+ close(fromchild[1]); >+ close(tochild[0]); >+ >+ read(fromchild[0], &c, sizeof(c)); >+ >+ tdb = tdb_open_ex("mutex-allrecord-block.tdb", 0, >+ tdb_flags, O_RDWR|O_CREAT, 0755, >+ &log_ctx, NULL); >+ ok(tdb, "tdb_open_ex should succeed"); >+ >+ ret = tdb_chainlock_nonblock(tdb, key); >+ ok(ret == -1, "tdb_chainlock_nonblock should not succeed"); >+ >+ write(tochild[1], &c, sizeof(c)); >+ >+ ret = tdb_chainlock(tdb, key); >+ ok(ret == 0, "tdb_chainlock should not succeed"); >+ >+ ret = tdb_chainunlock(tdb, key); >+ ok(ret == 0, "tdb_chainunlock should succeed"); >+ >+ wait_ret = wait(&status); >+ ok(wait_ret == child, "child should have exited correctly"); >+ >+ diag("done"); >+ return exit_status(); >+} >diff --git a/lib/tdb/test/run-mutex-allrecord-trylock.c b/lib/tdb/test/run-mutex-allrecord-trylock.c >new file mode 100644 >index 0000000..4b683db >--- /dev/null >+++ b/lib/tdb/test/run-mutex-allrecord-trylock.c >@@ -0,0 +1,113 @@ >+#include "../common/tdb_private.h" >+#include "../common/io.c" >+#include "../common/tdb.c" >+#include "../common/lock.c" >+#include "../common/freelist.c" >+#include "../common/traverse.c" >+#include "../common/transaction.c" >+#include "../common/error.c" >+#include "../common/open.c" >+#include "../common/check.c" >+#include "../common/hash.c" >+#include "../common/mutex.c" >+#include "tap-interface.h" >+#include <stdlib.h> >+#include <sys/types.h> >+#include <sys/wait.h> >+#include <stdarg.h> >+ >+static TDB_DATA key, data; >+ >+static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, >+ const char *fmt, ...) >+{ >+ va_list ap; >+ va_start(ap, fmt); >+ vfprintf(stderr, fmt, ap); >+ va_end(ap); >+} >+ >+static int do_child(int tdb_flags, int to, int from) >+{ >+ struct tdb_context *tdb; >+ unsigned int log_count; >+ struct tdb_logging_context log_ctx = { log_fn, &log_count }; >+ int ret; >+ char c = 0; >+ >+ tdb = tdb_open_ex("mutex-allrecord-trylock.tdb", 3, tdb_flags, >+ O_RDWR|O_CREAT, 0755, &log_ctx, NULL); >+ ok(tdb, "tdb_open_ex should succeed"); >+ >+ ret = tdb_chainlock(tdb, key); >+ ok(ret == 0, "tdb_chainlock should succeed"); >+ >+ write(to, &c, sizeof(c)); >+ >+ read(from, &c, sizeof(c)); >+ >+ ret = tdb_chainunlock(tdb, key); >+ ok(ret == 0, "tdb_chainunlock should succeed"); >+ >+ return 0; >+} >+ >+/* The code should barf on TDBs created with rwlocks. */ >+int main(int argc, char *argv[]) >+{ >+ struct tdb_context *tdb; >+ unsigned int log_count; >+ struct tdb_logging_context log_ctx = { log_fn, &log_count }; >+ int ret, status; >+ pid_t child, wait_ret; >+ int fromchild[2]; >+ int tochild[2]; >+ char c; >+ int tdb_flags; >+ bool runtime_support; >+ >+ runtime_support = tdb_runtime_check_for_robust_mutexes(); >+ >+ if (!runtime_support) { >+ skip(1, "No robust mutex support"); >+ return exit_status(); >+ } >+ >+ key.dsize = strlen("hi"); >+ key.dptr = discard_const_p(uint8_t, "hi"); >+ data.dsize = strlen("world"); >+ data.dptr = discard_const_p(uint8_t, "world"); >+ >+ pipe(fromchild); >+ pipe(tochild); >+ >+ tdb_flags = TDB_INCOMPATIBLE_HASH| >+ TDB_MUTEX_LOCKING| >+ TDB_CLEAR_IF_FIRST; >+ >+ child = fork(); >+ if (child == 0) { >+ close(fromchild[0]); >+ close(tochild[1]); >+ return do_child(tdb_flags, fromchild[1], tochild[0]); >+ } >+ close(fromchild[1]); >+ close(tochild[0]); >+ >+ read(fromchild[0], &c, sizeof(c)); >+ >+ tdb = tdb_open_ex("mutex-allrecord-trylock.tdb", 0, tdb_flags, >+ O_RDWR|O_CREAT, 0755, &log_ctx, NULL); >+ ok(tdb, "tdb_open_ex should succeed"); >+ >+ ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_NOWAIT, false); >+ ok(ret == -1, "tdb_allrecord_lock (nowait) should not succeed"); >+ >+ write(tochild[1], &c, sizeof(c)); >+ >+ wait_ret = wait(&status); >+ ok(wait_ret == child, "child should have exited correctly"); >+ >+ diag("done"); >+ return exit_status(); >+} >diff --git a/lib/tdb/test/run-mutex-die.c b/lib/tdb/test/run-mutex-die.c >new file mode 100644 >index 0000000..4b8eac1 >--- /dev/null >+++ b/lib/tdb/test/run-mutex-die.c >@@ -0,0 +1,269 @@ >+#include "../common/tdb_private.h" >+#include "lock-tracking.h" >+static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset); >+static ssize_t write_check(int fd, const void *buf, size_t count); >+static int ftruncate_check(int fd, off_t length); >+ >+#define pwrite pwrite_check >+#define write write_check >+#define fcntl fcntl_with_lockcheck >+#define ftruncate ftruncate_check >+ >+#include "../common/io.c" >+#include "../common/tdb.c" >+#include "../common/lock.c" >+#include "../common/freelist.c" >+#include "../common/traverse.c" >+#include "../common/transaction.c" >+#include "../common/error.c" >+#include "../common/open.c" >+#include "../common/check.c" >+#include "../common/hash.c" >+#include "../common/mutex.c" >+#include "tap-interface.h" >+#include <stdlib.h> >+#include <stdbool.h> >+#include <stdarg.h> >+#include "external-agent.h" >+#include "logging.h" >+ >+#undef write >+#undef pwrite >+#undef fcntl >+#undef ftruncate >+ >+static int target, current; >+#define TEST_DBNAME "run-mutex-die.tdb" >+#define KEY_STRING "helloworld" >+ >+static void maybe_die(int fd) >+{ >+ if (target == 0) { >+ return; >+ } >+ current += 1; >+ if (current == target) { >+ _exit(1); >+ } >+} >+ >+static ssize_t pwrite_check(int fd, >+ const void *buf, size_t count, off_t offset) >+{ >+ ssize_t ret; >+ >+ maybe_die(fd); >+ >+ ret = pwrite(fd, buf, count, offset); >+ if (ret != count) >+ return ret; >+ >+ maybe_die(fd); >+ return ret; >+} >+ >+static ssize_t write_check(int fd, const void *buf, size_t count) >+{ >+ ssize_t ret; >+ >+ maybe_die(fd); >+ >+ ret = write(fd, buf, count); >+ if (ret != count) >+ return ret; >+ >+ maybe_die(fd); >+ return ret; >+} >+ >+static int ftruncate_check(int fd, off_t length) >+{ >+ int ret; >+ >+ maybe_die(fd); >+ >+ ret = ftruncate(fd, length); >+ >+ maybe_die(fd); >+ return ret; >+} >+ >+static enum agent_return flakey_ops(struct agent *a) >+{ >+ enum agent_return ret; >+ >+ /* >+ * Run in the external agent child >+ */ >+ >+ ret = external_agent_operation(a, OPEN_WITH_CLEAR_IF_FIRST, TEST_DBNAME); >+ if (ret != SUCCESS) { >+ fprintf(stderr, "Agent failed to open: %s\n", >+ agent_return_name(ret)); >+ return ret; >+ } >+ ret = external_agent_operation(a, UNMAP, ""); >+ if (ret != SUCCESS) { >+ fprintf(stderr, "Agent failed to unmap: %s\n", >+ agent_return_name(ret)); >+ return ret; >+ } >+ ret = external_agent_operation(a, STORE, "xyz"); >+ if (ret != SUCCESS) { >+ fprintf(stderr, "Agent failed to store: %s\n", >+ agent_return_name(ret)); >+ return ret; >+ } >+ ret = external_agent_operation(a, STORE, KEY_STRING); >+ if (ret != SUCCESS) { >+ fprintf(stderr, "Agent failed store: %s\n", >+ agent_return_name(ret)); >+ return ret; >+ } >+ ret = external_agent_operation(a, FETCH, KEY_STRING); >+ if (ret != SUCCESS) { >+ fprintf(stderr, "Agent failed find key: %s\n", >+ agent_return_name(ret)); >+ return ret; >+ } >+ ret = external_agent_operation(a, PING, ""); >+ if (ret != SUCCESS) { >+ fprintf(stderr, "Agent failed ping: %s\n", >+ agent_return_name(ret)); >+ return ret; >+ } >+ return ret; >+} >+ >+static bool prep_db(void) { >+ struct tdb_context *tdb; >+ TDB_DATA key; >+ TDB_DATA data; >+ >+ key.dptr = discard_const_p(uint8_t, KEY_STRING); >+ key.dsize = strlen((char *)key.dptr); >+ data.dptr = discard_const_p(uint8_t, "foo"); >+ data.dsize = strlen((char *)data.dptr); >+ >+ unlink(TEST_DBNAME); >+ >+ tdb = tdb_open_ex( >+ TEST_DBNAME, 2, >+ TDB_INCOMPATIBLE_HASH|TDB_MUTEX_LOCKING|TDB_CLEAR_IF_FIRST, >+ O_CREAT|O_TRUNC|O_RDWR, 0600, &taplogctx, NULL); >+ if (tdb == NULL) { >+ return false; >+ } >+ >+ if (tdb_store(tdb, key, data, TDB_INSERT) != 0) { >+ return false; >+ } >+ >+ tdb_close(tdb); >+ tdb = NULL; >+ >+ forget_locking(); >+ >+ return true; >+} >+ >+static bool test_db(void) { >+ struct tdb_context *tdb; >+ int ret; >+ >+ tdb = tdb_open_ex( >+ TEST_DBNAME, 1024, TDB_INCOMPATIBLE_HASH, >+ O_RDWR, 0600, &taplogctx, NULL); >+ >+ if (tdb == NULL) { >+ perror("tdb_open_ex failed"); >+ return false; >+ } >+ >+ ret = tdb_traverse(tdb, NULL, NULL); >+ if (ret == -1) { >+ perror("traverse failed"); >+ goto fail; >+ } >+ >+ tdb_close(tdb); >+ >+ forget_locking(); >+ >+ return true; >+ >+fail: >+ tdb_close(tdb); >+ return false; >+} >+ >+static bool test_one(void) >+{ >+ enum agent_return ret; >+ >+ ret = AGENT_DIED; >+ target = 19; >+ >+ while (ret != SUCCESS) { >+ struct agent *agent; >+ >+ { >+ int child_target = target; >+ bool pret; >+ target = 0; >+ pret = prep_db(); >+ ok1(pret); >+ target = child_target; >+ } >+ >+ agent = prepare_external_agent(); >+ >+ ret = flakey_ops(agent); >+ >+ diag("Agent (target=%d) returns %s", >+ target, agent_return_name(ret)); >+ >+ if (ret == SUCCESS) { >+ ok((target > 19), "At least one AGENT_DIED expected"); >+ } else { >+ ok(ret == AGENT_DIED, "AGENT_DIED expected"); >+ } >+ >+ shutdown_agent(agent); >+ >+ { >+ int child_target = target; >+ bool tret; >+ target = 0; >+ tret = test_db(); >+ ok1(tret); >+ target = child_target; >+ } >+ >+ target += 1; >+ } >+ >+ return true; >+} >+ >+int main(int argc, char *argv[]) >+{ >+ bool ret; >+ bool runtime_support; >+ >+ runtime_support = tdb_runtime_check_for_robust_mutexes(); >+ >+ if (!runtime_support) { >+ skip(1, "No robust mutex support"); >+ return exit_status(); >+ } >+ >+ plan_tests(12); >+ unlock_callback = maybe_die; >+ >+ ret = test_one(); >+ ok1(ret); >+ >+ diag("done"); >+ return exit_status(); >+} >diff --git a/lib/tdb/test/run-mutex-openflags2.c b/lib/tdb/test/run-mutex-openflags2.c >new file mode 100644 >index 0000000..57ac7e3 >--- /dev/null >+++ b/lib/tdb/test/run-mutex-openflags2.c >@@ -0,0 +1,152 @@ >+#include "../common/tdb_private.h" >+#include "../common/io.c" >+#include "../common/tdb.c" >+#include "../common/lock.c" >+#include "../common/freelist.c" >+#include "../common/traverse.c" >+#include "../common/transaction.c" >+#include "../common/error.c" >+#include "../common/open.c" >+#include "../common/check.c" >+#include "../common/hash.c" >+#include "../common/mutex.c" >+#include "tap-interface.h" >+#include <stdlib.h> >+#include <sys/types.h> >+#include <sys/wait.h> >+#include <poll.h> >+#include <stdarg.h> >+ >+static TDB_DATA key, data; >+ >+static void log_void(struct tdb_context *tdb, enum tdb_debug_level level, >+ const char *fmt, ...) >+{ >+} >+ >+static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, >+ const char *fmt, ...) >+{ >+ va_list ap; >+ va_start(ap, fmt); >+ vfprintf(stderr, fmt, ap); >+ va_end(ap); >+} >+ >+static int do_child(int fd) >+{ >+ struct tdb_context *tdb; >+ unsigned int log_count; >+ struct tdb_logging_context log_ctx = { log_fn, &log_count }; >+ struct tdb_logging_context nolog_ctx = { log_void, NULL }; >+ char c; >+ >+ read(fd, &c, 1); >+ >+ tdb = tdb_open_ex("mutex-openflags2.tdb", 0, >+ TDB_DEFAULT, >+ O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL); >+ ok((tdb == NULL) && (errno == EINVAL), "TDB_DEFAULT without " >+ "TDB_MUTEX_LOCKING should fail with EINVAL - %d", errno); >+ >+ tdb = tdb_open_ex("mutex-openflags2.tdb", 0, >+ TDB_CLEAR_IF_FIRST, >+ O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL); >+ ok((tdb == NULL) && (errno == EINVAL), "TDB_CLEAR_IF_FIRST without " >+ "TDB_MUTEX_LOCKING should fail with EINVAL - %d", errno); >+ >+ tdb = tdb_open_ex("mutex-openflags2.tdb", 0, >+ TDB_CLEAR_IF_FIRST | >+ TDB_MUTEX_LOCKING | >+ TDB_INTERNAL, >+ O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL); >+ ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING with " >+ "TDB_INTERNAL should fail with EINVAL - %d", errno); >+ >+ tdb = tdb_open_ex("mutex-openflags2.tdb", 0, >+ TDB_CLEAR_IF_FIRST | >+ TDB_MUTEX_LOCKING | >+ TDB_NOMMAP, >+ O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL); >+ ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING with " >+ "TDB_NOMMAP should fail with EINVAL - %d", errno); >+ >+ tdb = tdb_open_ex("mutex-openflags2.tdb", 0, >+ TDB_CLEAR_IF_FIRST | >+ TDB_MUTEX_LOCKING, >+ O_RDONLY, 0755, &nolog_ctx, NULL); >+ ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING with " >+ "O_RDONLY should fail with EINVAL - %d", errno); >+ >+ tdb = tdb_open_ex("mutex-openflags2.tdb", 0, >+ TDB_CLEAR_IF_FIRST | >+ TDB_MUTEX_LOCKING, >+ O_RDWR|O_CREAT, 0755, &log_ctx, NULL); >+ ok((tdb != NULL), "TDB_MUTEX_LOCKING with TDB_CLEAR_IF_FIRST" >+ "TDB_NOMMAP should work - %d", errno); >+ >+ return 0; >+} >+ >+/* The code should barf on TDBs created with rwlocks. */ >+int main(int argc, char *argv[]) >+{ >+ struct tdb_context *tdb; >+ unsigned int log_count; >+ struct tdb_logging_context log_ctx = { log_fn, &log_count }; >+ struct tdb_logging_context nolog_ctx = { log_void, NULL }; >+ int ret, status; >+ pid_t child, wait_ret; >+ int pipefd[2]; >+ char c = 0; >+ bool runtime_support; >+ >+ runtime_support = tdb_runtime_check_for_robust_mutexes(); >+ >+ ret = pipe(pipefd); >+ ok1(ret == 0); >+ >+ key.dsize = strlen("hi"); >+ key.dptr = discard_const_p(uint8_t, "hi"); >+ data.dsize = strlen("world"); >+ data.dptr = discard_const_p(uint8_t, "world"); >+ >+ tdb = tdb_open_ex("mutex-openflags2.tdb", 0, >+ TDB_INCOMPATIBLE_HASH| >+ TDB_MUTEX_LOCKING, >+ O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL); >+ ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING without " >+ "TDB_CLEAR_IF_FIRST should fail with EINVAL - %d", errno); >+ >+ if (!runtime_support) { >+ tdb = tdb_open_ex("mutex-openflags2.tdb", 0, >+ TDB_CLEAR_IF_FIRST| >+ TDB_MUTEX_LOCKING, >+ O_RDWR|O_CREAT, 0755, &nolog_ctx, NULL); >+ ok((tdb == NULL) && (errno == ENOSYS), "TDB_MUTEX_LOCKING without " >+ "runtime support should fail with ENOSYS - %d", errno); >+ >+ skip(1, "No robust mutex support"); >+ return exit_status(); >+ } >+ >+ child = fork(); >+ if (child == 0) { >+ return do_child(pipefd[0]); >+ } >+ >+ tdb = tdb_open_ex("mutex-openflags2.tdb", 0, >+ TDB_CLEAR_IF_FIRST| >+ TDB_MUTEX_LOCKING, >+ O_RDWR|O_CREAT, 0755, &log_ctx, NULL); >+ ok((tdb != NULL), "tdb_open_ex with mutexes should succeed"); >+ >+ write(pipefd[1], &c, 1); >+ >+ wait_ret = wait(&status); >+ ok((wait_ret == child) && (status == 0), >+ "child should have exited correctly"); >+ >+ diag("done"); >+ return exit_status(); >+} >diff --git a/lib/tdb/test/run-mutex-trylock.c b/lib/tdb/test/run-mutex-trylock.c >new file mode 100644 >index 0000000..c96b635 >--- /dev/null >+++ b/lib/tdb/test/run-mutex-trylock.c >@@ -0,0 +1,122 @@ >+#include "../common/tdb_private.h" >+#include "../common/io.c" >+#include "../common/tdb.c" >+#include "../common/lock.c" >+#include "../common/freelist.c" >+#include "../common/traverse.c" >+#include "../common/transaction.c" >+#include "../common/error.c" >+#include "../common/open.c" >+#include "../common/check.c" >+#include "../common/hash.c" >+#include "../common/mutex.c" >+#include "tap-interface.h" >+#include <stdlib.h> >+#include <sys/types.h> >+#include <sys/wait.h> >+#include <stdarg.h> >+ >+static TDB_DATA key, data; >+ >+static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, >+ const char *fmt, ...) >+{ >+ va_list ap; >+ va_start(ap, fmt); >+ vfprintf(stderr, fmt, ap); >+ va_end(ap); >+} >+ >+static int do_child(int tdb_flags, int to, int from) >+{ >+ struct tdb_context *tdb; >+ unsigned int log_count; >+ struct tdb_logging_context log_ctx = { log_fn, &log_count }; >+ int ret; >+ char c = 0; >+ >+ tdb = tdb_open_ex("mutex-trylock.tdb", 0, tdb_flags, >+ O_RDWR|O_CREAT, 0755, &log_ctx, NULL); >+ ok(tdb, "tdb_open_ex should succeed"); >+ >+ ret = tdb_chainlock(tdb, key); >+ ok(ret == 0, "tdb_chainlock should succeed"); >+ >+ write(to, &c, sizeof(c)); >+ >+ read(from, &c, sizeof(c)); >+ >+ ret = tdb_chainunlock(tdb, key); >+ ok(ret == 0, "tdb_chainunlock should succeed"); >+ >+ write(to, &c, sizeof(c)); >+ >+ return 0; >+} >+ >+/* The code should barf on TDBs created with rwlocks. */ >+int main(int argc, char *argv[]) >+{ >+ struct tdb_context *tdb; >+ unsigned int log_count; >+ struct tdb_logging_context log_ctx = { log_fn, &log_count }; >+ int ret, status; >+ pid_t child, wait_ret; >+ int fromchild[2]; >+ int tochild[2]; >+ char c; >+ int tdb_flags; >+ bool runtime_support; >+ >+ runtime_support = tdb_runtime_check_for_robust_mutexes(); >+ >+ if (!runtime_support) { >+ skip(1, "No robust mutex support"); >+ return exit_status(); >+ } >+ >+ key.dsize = strlen("hi"); >+ key.dptr = discard_const_p(uint8_t, "hi"); >+ data.dsize = strlen("world"); >+ data.dptr = discard_const_p(uint8_t, "world"); >+ >+ pipe(fromchild); >+ pipe(tochild); >+ >+ tdb_flags = TDB_INCOMPATIBLE_HASH| >+ TDB_MUTEX_LOCKING| >+ TDB_CLEAR_IF_FIRST; >+ >+ child = fork(); >+ if (child == 0) { >+ close(fromchild[0]); >+ close(tochild[1]); >+ return do_child(tdb_flags, fromchild[1], tochild[0]); >+ } >+ close(fromchild[1]); >+ close(tochild[0]); >+ >+ read(fromchild[0], &c, sizeof(c)); >+ >+ tdb = tdb_open_ex("mutex-trylock.tdb", 0, tdb_flags, >+ O_RDWR|O_CREAT, 0755, &log_ctx, NULL); >+ ok(tdb, "tdb_open_ex should succeed"); >+ >+ ret = tdb_chainlock_nonblock(tdb, key); >+ ok(ret == -1, "tdb_chainlock_nonblock should not succeed"); >+ >+ write(tochild[1], &c, sizeof(c)); >+ >+ read(fromchild[0], &c, sizeof(c)); >+ >+ ret = tdb_chainlock_nonblock(tdb, key); >+ ok(ret == 0, "tdb_chainlock_nonblock should succeed"); >+ ret = tdb_chainunlock(tdb, key); >+ ok(ret == 0, "tdb_chainunlock should succeed"); >+ >+ wait_ret = wait(&status); >+ ok(wait_ret == child, "child should have exited correctly"); >+ >+ diag("done"); >+ return exit_status(); >+} >diff --git a/lib/tdb/test/run-mutex1.c b/lib/tdb/test/run-mutex1.c >new file mode 100644 >index 0000000..eb75946 >--- /dev/null >+++ b/lib/tdb/test/run-mutex1.c >@@ -0,0 +1,138 @@ >+#include "../common/tdb_private.h" >+#include "../common/io.c" >+#include "../common/tdb.c" >+#include "../common/lock.c" >+#include "../common/freelist.c" >+#include "../common/traverse.c" >+#include "../common/transaction.c" >+#include "../common/error.c" >+#include "../common/open.c" >+#include "../common/check.c" >+#include "../common/hash.c" >+#include "../common/mutex.c" >+#include "tap-interface.h" >+#include <stdlib.h> >+#include <sys/types.h> >+#include <sys/wait.h> >+#include <stdarg.h> >+ >+static TDB_DATA key, data; >+ >+static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, >+ const char *fmt, ...) >+{ >+ va_list ap; >+ va_start(ap, fmt); >+ vfprintf(stderr, fmt, ap); >+ va_end(ap); >+} >+ >+static int do_child(int tdb_flags, int to, int from) >+{ >+ struct tdb_context *tdb; >+ unsigned int log_count; >+ struct tdb_logging_context log_ctx = { log_fn, &log_count }; >+ int ret; >+ char c = 0; >+ >+ tdb = tdb_open_ex("mutex1.tdb", 0, tdb_flags, >+ O_RDWR|O_CREAT, 0755, &log_ctx, NULL); >+ ok(tdb, "tdb_open_ex should succeed"); >+ >+ ret = tdb_chainlock(tdb, key); >+ ok(ret == 0, "tdb_chainlock should succeed"); >+ >+ write(to, &c, sizeof(c)); >+ read(from, &c, sizeof(c)); >+ >+ ret = tdb_chainunlock(tdb, key); >+ ok(ret == 0, "tdb_chainunlock should succeed"); >+ >+ write(to, &c, sizeof(c)); >+ read(from, &c, sizeof(c)); >+ >+ ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); >+ ok(ret == 0, "tdb_allrecord_lock should succeed"); >+ >+ write(to, &c, sizeof(c)); >+ read(from, &c, sizeof(c)); >+ >+ ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); >+ ok(ret == 0, "tdb_allrecord_lock should succeed"); >+ >+ return 0; >+} >+ >+/* The code should barf on TDBs created with rwlocks. */ >+int main(int argc, char *argv[]) >+{ >+ struct tdb_context *tdb; >+ unsigned int log_count; >+ struct tdb_logging_context log_ctx = { log_fn, &log_count }; >+ int ret, status; >+ pid_t child, wait_ret; >+ int fromchild[2]; >+ int tochild[2]; >+ char c; >+ int tdb_flags; >+ bool runtime_support; >+ >+ runtime_support = tdb_runtime_check_for_robust_mutexes(); >+ >+ if (!runtime_support) { >+ skip(1, "No robust mutex support"); >+ return exit_status(); >+ } >+ >+ key.dsize = strlen("hi"); >+ key.dptr = discard_const_p(uint8_t, "hi"); >+ data.dsize = strlen("world"); >+ data.dptr = discard_const_p(uint8_t, "world"); >+ >+ pipe(fromchild); >+ pipe(tochild); >+ >+ tdb_flags = TDB_INCOMPATIBLE_HASH| >+ TDB_MUTEX_LOCKING| >+ TDB_CLEAR_IF_FIRST; >+ >+ child = fork(); >+ if (child == 0) { >+ close(fromchild[0]); >+ close(tochild[1]); >+ return do_child(tdb_flags, fromchild[1], tochild[0]); >+ } >+ close(fromchild[1]); >+ close(tochild[0]); >+ >+ read(fromchild[0], &c, sizeof(c)); >+ >+ tdb = tdb_open_ex("mutex1.tdb", 0, tdb_flags, >+ O_RDWR|O_CREAT, 0755, &log_ctx, NULL); >+ ok(tdb, "tdb_open_ex should succeed"); >+ >+ write(tochild[1], &c, sizeof(c)); >+ read(fromchild[0], &c, sizeof(c)); >+ >+ ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); >+ ok(ret == 0, "tdb_allrecord_lock should succeed"); >+ >+ ret = tdb_store(tdb, key, data, 0); >+ ok(ret == 0, "tdb_store should succeed"); >+ >+ ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); >+ ok(ret == 0, "tdb_allrecord_unlock should succeed"); >+ >+ write(tochild[1], &c, sizeof(c)); >+ read(fromchild[0], &c, sizeof(c)); >+ write(tochild[1], &c, sizeof(c)); >+ >+ ret = tdb_delete(tdb, key); >+ ok(ret == 0, "tdb_delete should succeed"); >+ >+ wait_ret = wait(&status); >+ ok(wait_ret == child, "child should have exited correctly"); >+ >+ diag("done"); >+ return exit_status(); >+} >diff --git a/lib/tdb/wscript b/lib/tdb/wscript >index 6243ccf..c4f8b6a 100644 >--- a/lib/tdb/wscript >+++ b/lib/tdb/wscript >@@ -39,7 +39,14 @@ tdb1_unit_tests = [ > 'run-transaction-expand', > 'run-traverse-in-transaction', > 'run-wronghash-fail', >- 'run-zero-append' >+ 'run-zero-append', >+ 'run-mutex-openflags2', >+ 'run-mutex-trylock', >+ 'run-mutex-allrecord-bench', >+ 'run-mutex-allrecord-trylock', >+ 'run-mutex-allrecord-block', >+ 'run-mutex-die', >+ 'run-mutex1', > ] > > def set_options(opt): >-- >1.7.9.5 > > >From 0b0e5e62e879421b7243fc3b783bc00c2f3f4265 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Wed, 22 Jan 2014 11:15:55 +0100 >Subject: [PATCH 21/25] tdb/test: add marklock deadlock test > >Pair-Programmed-With: Stefan Metzmacher <metze@samba.org> >Pair-Programmed-With: Michael Adam <obnox@samba.org> >TODO: Signed-off-by: Volker Lendecke <vl@samba.org> >Signed-off-by: Stefan Metzmacher <metze@samba.org> >Signed-off-by: Michael Adam <obnox@samba.org> >--- > lib/tdb/test/run-marklock-deadlock.c | 278 ++++++++++++++++++++++++++++++++++ > lib/tdb/wscript | 1 + > 2 files changed, 279 insertions(+) > create mode 100644 lib/tdb/test/run-marklock-deadlock.c > >diff --git a/lib/tdb/test/run-marklock-deadlock.c b/lib/tdb/test/run-marklock-deadlock.c >new file mode 100644 >index 0000000..ff03a11 >--- /dev/null >+++ b/lib/tdb/test/run-marklock-deadlock.c >@@ -0,0 +1,278 @@ >+#include "../common/tdb_private.h" >+#include "../common/io.c" >+#include "../common/tdb.c" >+#include "../common/lock.c" >+#include "../common/freelist.c" >+#include "../common/traverse.c" >+#include "../common/transaction.c" >+#include "../common/error.c" >+#include "../common/open.c" >+#include "../common/check.c" >+#include "../common/hash.c" >+#include "../common/mutex.c" >+#include "tap-interface.h" >+#include <stdlib.h> >+#include <sys/types.h> >+#include <sys/wait.h> >+#include <stdarg.h> >+#include "logging.h" >+ >+static TDB_DATA key, data; >+ >+static void do_chainlock(const char *name, int tdb_flags, int up, int down) >+{ >+ struct tdb_context *tdb; >+ int ret; >+ ssize_t nread, nwritten; >+ char c = 0; >+ >+ tdb = tdb_open_ex(name, 3, tdb_flags, >+ O_RDWR|O_CREAT, 0755, &taplogctx, NULL); >+ ok(tdb, "tdb_open_ex should succeed"); >+ >+ ret = tdb_chainlock(tdb, key); >+ ok(ret == 0, "tdb_chainlock should succeed"); >+ >+ nwritten = write(up, &c, sizeof(c)); >+ ok(nwritten == sizeof(c), "write should succeed"); >+ >+ nread = read(down, &c, sizeof(c)); >+ ok(nread == sizeof(c), "read should succeed"); >+ >+ exit(0); >+} >+ >+static void do_allrecord_lock(const char *name, int tdb_flags, int up, int down) >+{ >+ struct tdb_context *tdb; >+ int ret; >+ ssize_t nread, nwritten; >+ char c = 0; >+ >+ tdb = tdb_open_ex(name, 3, tdb_flags, >+ O_RDWR|O_CREAT, 0755, &taplogctx, NULL); >+ ok(tdb, "tdb_open_ex should succeed"); >+ >+ ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); >+ ok(ret == 0, "tdb_allrecord_lock should succeed"); >+ >+ nwritten = write(up, &c, sizeof(c)); >+ ok(nwritten == sizeof(c), "write should succeed"); >+ >+ nread = read(down, &c, sizeof(c)); >+ ok(nread == sizeof(c), "read should succeed"); >+ >+ exit(0); >+} >+ >+/* The code should barf on TDBs created with rwlocks. */ >+static int do_tests(const char *name, int tdb_flags) >+{ >+ struct tdb_context *tdb; >+ int ret; >+ pid_t chainlock_child, allrecord_child; >+ int chainlock_down[2]; >+ int chainlock_up[2]; >+ int allrecord_down[2]; >+ int allrecord_up[2]; >+ char c; >+ ssize_t nread, nwritten; >+ >+ key.dsize = strlen("hi"); >+ key.dptr = discard_const_p(uint8_t, "hi"); >+ data.dsize = strlen("world"); >+ data.dptr = discard_const_p(uint8_t, "world"); >+ >+ ret = pipe(chainlock_down); >+ ok(ret == 0, "pipe should succeed"); >+ >+ ret = pipe(chainlock_up); >+ ok(ret == 0, "pipe should succeed"); >+ >+ ret = pipe(allrecord_down); >+ ok(ret == 0, "pipe should succeed"); >+ >+ ret = pipe(allrecord_up); >+ ok(ret == 0, "pipe should succeed"); >+ >+ chainlock_child = fork(); >+ ok(chainlock_child != -1, "fork should succeed"); >+ >+ if (chainlock_child == 0) { >+ close(chainlock_up[0]); >+ close(chainlock_down[1]); >+ close(allrecord_up[0]); >+ close(allrecord_up[1]); >+ close(allrecord_down[0]); >+ close(allrecord_down[1]); >+ do_chainlock(name, tdb_flags, >+ chainlock_up[1], chainlock_down[0]); >+ exit(0); >+ } >+ close(chainlock_up[1]); >+ close(chainlock_down[0]); >+ >+ nread = read(chainlock_up[0], &c, sizeof(c)); >+ ok(nread == sizeof(c), "read should succeed"); >+ >+ /* >+ * Now we have a process holding a chainlock. Start another process >+ * trying the allrecord lock. This will block. >+ */ >+ >+ allrecord_child = fork(); >+ ok(allrecord_child != -1, "fork should succeed"); >+ >+ if (allrecord_child == 0) { >+ close(chainlock_up[0]); >+ close(chainlock_up[1]); >+ close(chainlock_down[0]); >+ close(chainlock_down[1]); >+ close(allrecord_up[0]); >+ close(allrecord_down[1]); >+ do_allrecord_lock(name, tdb_flags, >+ allrecord_up[1], allrecord_down[0]); >+ exit(0); >+ } >+ close(allrecord_up[1]); >+ close(allrecord_down[0]); >+ >+ poll(NULL, 0, 500); >+ >+ tdb = tdb_open_ex(name, 3, tdb_flags, >+ O_RDWR|O_CREAT, 0755, &taplogctx, NULL); >+ ok(tdb, "tdb_open_ex should succeed"); >+ >+ /* >+ * Someone already holds a chainlock, but we're able to get the >+ * freelist lock. >+ * >+ * The freelist lock/mutex is independent from the allrecord lock/mutex. >+ */ >+ >+ ret = tdb_chainlock_nonblock(tdb, key); >+ ok(ret == -1, "tdb_chainlock_nonblock should not succeed"); >+ >+ ret = tdb_lock_nonblock(tdb, -1, F_WRLCK); >+ ok(ret == 0, "tdb_lock_nonblock should succeed"); >+ >+ ret = tdb_unlock(tdb, -1, F_WRLCK); >+ ok(ret == 0, "tdb_unlock should succeed"); >+ >+ /* >+ * We have someone else having done the lock for us. Just mark it. >+ */ >+ >+ ret = tdb_chainlock_mark(tdb, key); >+ ok(ret == 0, "tdb_chainlock_mark should succeed"); >+ >+ /* >+ * The tdb_store below will block the freelist. In one version of the >+ * mutex patches, the freelist was already blocked here by the >+ * allrecord child, which was waiting for the chainlock child to give >+ * up its chainlock. Make sure that we don't run into this >+ * deadlock. To excercise the deadlock, just comment out the "ok" >+ * line. >+ * >+ * The freelist lock/mutex is independent from the allrecord lock/mutex. >+ */ >+ >+ ret = tdb_lock_nonblock(tdb, -1, F_WRLCK); >+ ok(ret == 0, "tdb_lock_nonblock should succeed"); >+ >+ ret = tdb_unlock(tdb, -1, F_WRLCK); >+ ok(ret == 0, "tdb_unlock should succeed"); >+ >+ ret = tdb_store(tdb, key, data, TDB_INSERT); >+ ok(ret == 0, "tdb_store should succeed"); >+ >+ ret = tdb_chainlock_unmark(tdb, key); >+ ok(ret == 0, "tdb_chainlock_unmark should succeed"); >+ >+ nwritten = write(chainlock_down[1], &c, sizeof(c)); >+ ok(nwritten == sizeof(c), "write should succeed"); >+ >+ nread = read(chainlock_up[0], &c, sizeof(c)); >+ ok(nread == 0, "read should succeed"); >+ >+ nread = read(allrecord_up[0], &c, sizeof(c)); >+ ok(nread == sizeof(c), "read should succeed"); >+ >+ /* >+ * Someone already holds the allrecord lock, but we're able to get the >+ * freelist lock. >+ * >+ * The freelist lock/mutex is independent from the allrecord lock/mutex. >+ */ >+ >+ ret = tdb_chainlock_nonblock(tdb, key); >+ ok(ret == -1, "tdb_chainlock_nonblock should not succeed"); >+ >+ ret = tdb_lockall_nonblock(tdb); >+ ok(ret == -1, "tdb_lockall_nonblock should not succeed"); >+ >+ ret = tdb_lock_nonblock(tdb, -1, F_WRLCK); >+ ok(ret == 0, "tdb_lock_nonblock should succeed"); >+ >+ ret = tdb_unlock(tdb, -1, F_WRLCK); >+ ok(ret == 0, "tdb_unlock should succeed"); >+ >+ /* >+ * We have someone else having done the lock for us. Just mark it. >+ */ >+ >+ ret = tdb_lockall_mark(tdb); >+ ok(ret == 0, "tdb_lockall_mark should succeed"); >+ >+ ret = tdb_lock_nonblock(tdb, -1, F_WRLCK); >+ ok(ret == 0, "tdb_lock_nonblock should succeed"); >+ >+ ret = tdb_unlock(tdb, -1, F_WRLCK); >+ ok(ret == 0, "tdb_unlock should succeed"); >+ >+ ret = tdb_store(tdb, key, data, TDB_REPLACE); >+ ok(ret == 0, "tdb_store should succeed"); >+ >+ ret = tdb_lockall_unmark(tdb); >+ ok(ret == 0, "tdb_lockall_unmark should succeed"); >+ >+ nwritten = write(allrecord_down[1], &c, sizeof(c)); >+ ok(nwritten == sizeof(c), "write should succeed"); >+ >+ nread = read(allrecord_up[0], &c, sizeof(c)); >+ ok(nread == 0, "read should succeed"); >+ >+ close(chainlock_up[0]); >+ close(chainlock_down[1]); >+ close(allrecord_up[0]); >+ close(allrecord_down[1]); >+ diag("%s tests done", name); >+ return exit_status(); >+} >+ >+int main(int argc, char *argv[]) >+{ >+ int ret; >+ bool mutex_support; >+ >+ mutex_support = tdb_runtime_check_for_robust_mutexes(); >+ >+ ret = do_tests("marklock-deadlock-fcntl.tdb", >+ TDB_CLEAR_IF_FIRST | >+ TDB_INCOMPATIBLE_HASH); >+ ok(ret == 0, "marklock-deadlock-fcntl.tdb tests should succeed"); >+ >+ if (!mutex_support) { >+ skip(1, "No robust mutex support, " >+ "skipping marklock-deadlock-mutex.tdb tests"); >+ return exit_status(); >+ } >+ >+ ret = do_tests("marklock-deadlock-mutex.tdb", >+ TDB_CLEAR_IF_FIRST | >+ TDB_MUTEX_LOCKING | >+ TDB_INCOMPATIBLE_HASH); >+ ok(ret == 0, "marklock-deadlock-mutex.tdb tests should succeed"); >+ >+ return exit_status(); >+} >diff --git a/lib/tdb/wscript b/lib/tdb/wscript >index c4f8b6a..885548d 100644 >--- a/lib/tdb/wscript >+++ b/lib/tdb/wscript >@@ -40,6 +40,7 @@ tdb1_unit_tests = [ > 'run-traverse-in-transaction', > 'run-wronghash-fail', > 'run-zero-append', >+ 'run-marklock-deadlock', > 'run-mutex-openflags2', > 'run-mutex-trylock', > 'run-mutex-allrecord-bench', >-- >1.7.9.5 > > >From 0e548d1b65f61455f98ff05e3610744659f67f60 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Thu, 21 Feb 2013 16:34:32 +0100 >Subject: [PATCH 22/25] tdb/tools: add -m option to tdbtorture > >This allows tdbtorture to run with mutexes. > >Signed-off-by: Volker Lendecke <vl@samba.org> >Reviewed-by: Stefan Metzmacher <metze@samba.org> >--- > lib/tdb/tools/tdbtorture.c | 29 +++++++++++++++++++++++++---- > 1 file changed, 25 insertions(+), 4 deletions(-) > >diff --git a/lib/tdb/tools/tdbtorture.c b/lib/tdb/tools/tdbtorture.c >index 5ae08f6..3e26f65 100644 >--- a/lib/tdb/tools/tdbtorture.c >+++ b/lib/tdb/tools/tdbtorture.c >@@ -33,6 +33,7 @@ static int always_transaction = 0; > static int hash_size = 2; > static int loopnum; > static int count_pipe; >+static bool mutex = false; > static struct tdb_logging_context log_ctx; > > #ifdef PRINTF_ATTRIBUTE >@@ -119,6 +120,7 @@ static void addrec_db(void) > > #if TRANSACTION_PROB > if (in_transaction == 0 && >+ ((tdb_get_flags(db) & TDB_MUTEX_LOCKING) == 0) && > (always_transaction || random() % TRANSACTION_PROB == 0)) { > if (tdb_transaction_start(db) != 0) { > fatal("tdb_transaction_start failed"); >@@ -216,7 +218,7 @@ static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, > > static void usage(void) > { >- printf("Usage: tdbtorture [-t] [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n"); >+ printf("Usage: tdbtorture [-t] [-k] [-m] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n"); > exit(0); > } > >@@ -230,7 +232,13 @@ static void send_count_and_suicide(int sig) > > static int run_child(const char *filename, int i, int seed, unsigned num_loops, unsigned start) > { >- db = tdb_open_ex(filename, hash_size, TDB_DEFAULT, >+ int tdb_flags = TDB_DEFAULT|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH; >+ >+ if (mutex) { >+ tdb_flags |= TDB_MUTEX_LOCKING; >+ } >+ >+ db = tdb_open_ex(filename, hash_size, tdb_flags, > O_RDWR | O_CREAT, 0600, &log_ctx, NULL); > if (!db) { > fatal("db open failed"); >@@ -302,7 +310,7 @@ int main(int argc, char * const *argv) > > log_ctx.log_fn = tdb_log; > >- while ((c = getopt(argc, argv, "n:l:s:H:thk")) != -1) { >+ while ((c = getopt(argc, argv, "n:l:s:H:thkm")) != -1) { > switch (c) { > case 'n': > num_procs = strtol(optarg, NULL, 0); >@@ -322,6 +330,13 @@ int main(int argc, char * const *argv) > case 'k': > kill_random = 1; > break; >+ case 'm': >+ mutex = tdb_runtime_check_for_robust_mutexes(); >+ if (!mutex) { >+ printf("tdb_runtime_check_for_robust_mutexes() returned false\n"); >+ exit(1); >+ } >+ break; > default: > usage(); > } >@@ -443,7 +458,13 @@ int main(int argc, char * const *argv) > > done: > if (error_count == 0) { >- db = tdb_open_ex(test_tdb, hash_size, TDB_DEFAULT, >+ int tdb_flags = TDB_DEFAULT; >+ >+ if (mutex) { >+ tdb_flags |= TDB_NOLOCK; >+ } >+ >+ db = tdb_open_ex(test_tdb, hash_size, tdb_flags, > O_RDWR, 0, &log_ctx, NULL); > if (!db) { > fatal("db open failed\n"); >-- >1.7.9.5 > > >From 9a17f6ef53cf46ea52fb23982b2c0ed79729fe38 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Fri, 15 Nov 2013 12:57:06 +0100 >Subject: [PATCH 23/25] tdb/tools: Allow tdbtool to r/o open mutexed tdbs > >Pair-Programmed-With: Stefan Metzmacher <metze@samba.org> >TODO: Signed-off-by: Volker Lendecke <vl@samba.org> >Signed-off-by: Stefan Metzmacher <metze@samba.org> >--- > lib/tdb/tools/tdbtool.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 45 insertions(+), 1 deletion(-) > >diff --git a/lib/tdb/tools/tdbtool.c b/lib/tdb/tools/tdbtool.c >index c486117..2f93e33 100644 >--- a/lib/tdb/tools/tdbtool.c >+++ b/lib/tdb/tools/tdbtool.c >@@ -119,6 +119,33 @@ static double _end_timer(void) > } > > #ifdef PRINTF_ATTRIBUTE >+static void tdb_log_open(struct tdb_context *tdb, enum tdb_debug_level level, >+ const char *format, ...) PRINTF_ATTRIBUTE(3,4); >+#endif >+static void tdb_log_open(struct tdb_context *tdb, enum tdb_debug_level level, >+ const char *format, ...) >+{ >+ const char *mutex_msg = >+ "Can use mutexes only with MUTEX_LOCKING or NOLOCK\n"; >+ char *p; >+ va_list ap; >+ >+ p = strstr(format, mutex_msg); >+ if (p != NULL) { >+ /* >+ * Yes, this is a hack, but we don't want to see this >+ * message on first open, but we want to see >+ * everything else. >+ */ >+ return; >+ } >+ >+ va_start(ap, format); >+ vfprintf(stderr, format, ap); >+ va_end(ap); >+} >+ >+#ifdef PRINTF_ATTRIBUTE > static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...) PRINTF_ATTRIBUTE(3,4); > #endif > static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...) >@@ -240,7 +267,7 @@ static void create_tdb(const char *tdbname) > static void open_tdb(const char *tdbname) > { > struct tdb_logging_context log_ctx = { NULL, NULL }; >- log_ctx.log_fn = tdb_log; >+ log_ctx.log_fn = tdb_log_open; > > if (tdb) tdb_close(tdb); > tdb = tdb_open_ex(tdbname, 0, >@@ -248,6 +275,23 @@ static void open_tdb(const char *tdbname) > (disable_lock?TDB_NOLOCK:0), > O_RDWR, 0600, > &log_ctx, NULL); >+ >+ log_ctx.log_fn = tdb_log; >+ if (tdb != NULL) { >+ tdb_set_logging_function(tdb, &log_ctx); >+ } >+ >+ if ((tdb == NULL) && (errno == EINVAL)) { >+ /* >+ * Retry NOLOCK and readonly. There we want to see all >+ * error messages. >+ */ >+ tdb = tdb_open_ex(tdbname, 0, >+ (disable_mmap?TDB_NOMMAP:0) |TDB_NOLOCK, >+ O_RDONLY, 0600, >+ &log_ctx, NULL); >+ } >+ > if (!tdb) { > printf("Could not open %s: %s\n", tdbname, strerror(errno)); > } >-- >1.7.9.5 > > >From 04db190fbe90b4b5a5a833dd0d22ccb46f871c47 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Tue, 19 Mar 2013 12:02:22 +0100 >Subject: [PATCH 24/25] dbwrap_tdb: Use mutexes on demand > >Pair-Programmed-With: Stefan Metzmacher <metze@samba.org> >TODO: Signed-off-by: Volker Lendecke <vl@samba.org> >Signed-off-by: Stefan Metzmacher <metze@samba.org> >Reviewed-by: Michael Adam <obnox@samba.org> >--- > source3/lib/dbwrap/dbwrap_open.c | 19 +++++++++++++++++++ > 1 file changed, 19 insertions(+) > >diff --git a/source3/lib/dbwrap/dbwrap_open.c b/source3/lib/dbwrap/dbwrap_open.c >index 81f20b0..64f484e 100644 >--- a/source3/lib/dbwrap/dbwrap_open.c >+++ b/source3/lib/dbwrap/dbwrap_open.c >@@ -93,6 +93,25 @@ struct db_context *db_open(TALLOC_CTX *mem_ctx, > } > } > >+ if (tdb_flags & TDB_CLEAR_IF_FIRST) { >+ const char *base; >+ bool try_mutex = false; >+ >+ base = strrchr_m(name, '/'); >+ if (base != NULL) { >+ base += 1; >+ } else { >+ base = name; >+ } >+ >+ try_mutex = lp_parm_bool(-1, "dbwrap_tdb_mutexes", "*", try_mutex); >+ try_mutex = lp_parm_bool(-1, "dbwrap_tdb_mutexes", base, try_mutex); >+ >+ if (try_mutex && tdb_runtime_check_for_robust_mutexes()) { >+ tdb_flags |= TDB_MUTEX_LOCKING; >+ } >+ } >+ > sockname = lp_ctdbd_socket(); > > if (lp_clustering()) { >-- >1.7.9.5 > > >From c2c1ce98bf0af1821ef3f4e14b05f2ffa0b173ae Mon Sep 17 00:00:00 2001 >From: Stefan Metzmacher <metze@samba.org> >Date: Mon, 13 May 2013 11:14:26 +0200 >Subject: [PATCH 25/25] selftest: use dbwrap_tdb_mutexes:* = yes for > "plugin_s4_dc" and "member" > >Signed-off-by: Stefan Metzmacher <metze@samba.org> >Reviewed-by: Michael Adam <obnox@samba.org> >--- > selftest/target/Samba3.pm | 1 + > selftest/target/Samba4.pm | 2 ++ > 2 files changed, 3 insertions(+) > >diff --git a/selftest/target/Samba3.pm b/selftest/target/Samba3.pm >index 489fec1..f502783 100755 >--- a/selftest/target/Samba3.pm >+++ b/selftest/target/Samba3.pm >@@ -247,6 +247,7 @@ sub setup_member($$$) > my $member_options = " > security = domain > server signing = on >+ dbwrap_tdb_mutexes:* = yes > "; > my $ret = $self->provision($prefix, > "LOCALMEMBER3", >diff --git a/selftest/target/Samba4.pm b/selftest/target/Samba4.pm >index 4e7cbd5..c4e8f31 100755 >--- a/selftest/target/Samba4.pm >+++ b/selftest/target/Samba4.pm >@@ -1446,6 +1446,8 @@ sub provision_plugin_s4_dc($$) > server services = -smb +s3fs > xattr_tdb:file = $prefix_abs/statedir/xattr.tdb > >+ dbwrap_tdb_mutexes:* = yes >+ > kernel oplocks = no > kernel change notify = no > >-- >1.7.9.5 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 10525
:
9839
|
9858
|
9939
|
9959
|
9964
|
9965