The Samba-Bugzilla – Attachment 5581 Details for
Bug 7232
CTDB persistent transactions are racy
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
Patchset to fix the transactions for 3.5
bug-7232.patches (text/plain), 106.33 KB, created by
Michael Adam
on 2010-03-31 10:00:37 UTC
(
hide
)
Description:
Patchset to fix the transactions for 3.5
Filename:
MIME Type:
Creator:
Michael Adam
Created:
2010-03-31 10:00:37 UTC
Size:
106.33 KB
patch
obsolete
>From fb1d5ec061a3f49e36064a7520f5806ba27712df Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Sun, 25 Oct 2009 16:12:12 +0100 >Subject: [PATCH 01/32] s3: Implement global locks in a g_lock tdb > >This is the basis to implement global locks in ctdb without depending on a >shared file system. The initial goal is to make ctdb persistent transactions >deterministic without too many timeouts. >(cherry picked from commit 4c1c3f2549f32fd069e0e7bf3aec299213f1e85b) >--- > source3/Makefile.in | 2 + > source3/include/ctdbd_conn.h | 2 + > source3/include/g_lock.h | 55 +++ > source3/lib/ctdbd_conn.c | 73 ++++- > source3/lib/g_lock.c | 594 ++++++++++++++++++++++++++++++++ > source3/librpc/gen_ndr/messaging.h | 4 +- > source3/librpc/gen_ndr/ndr_messaging.c | 1 + > source3/librpc/idl/messaging.idl | 3 +- > source3/utils/net.c | 7 + > source3/utils/net_g_lock.c | 213 ++++++++++++ > source3/utils/net_proto.h | 3 + > 11 files changed, 950 insertions(+), 7 deletions(-) > create mode 100644 source3/include/g_lock.h > create mode 100644 source3/lib/g_lock.c > create mode 100644 source3/utils/net_g_lock.c > >diff --git a/source3/Makefile.in b/source3/Makefile.in >index f84ed20..f58bb70 100644 >--- a/source3/Makefile.in >+++ b/source3/Makefile.in >@@ -265,6 +265,7 @@ EXTRA_ALL_TARGETS = @EXTRA_ALL_TARGETS@ > TDB_LIB_OBJ = lib/util_tdb.o ../lib/util/util_tdb.o \ > lib/dbwrap.o lib/dbwrap_tdb.o \ > lib/dbwrap_ctdb.o \ >+ lib/g_lock.o \ > lib/dbwrap_rbt.o > > TDB_VALIDATE_OBJ = lib/tdb_validate.o >@@ -1011,6 +1012,7 @@ NET_OBJ1 = utils/net.o utils/net_ads.o utils/net_help.o \ > utils/net_conf.o utils/net_join.o utils/net_user.o \ > utils/net_group.o utils/net_file.o utils/net_registry.o \ > auth/token_util.o utils/net_dom.o utils/net_share.o \ >+ utils/net_g_lock.o \ > utils/net_eventlog.o > > # these are not processed by make proto >diff --git a/source3/include/ctdbd_conn.h b/source3/include/ctdbd_conn.h >index d721235..96e7aa0 100644 >--- a/source3/include/ctdbd_conn.h >+++ b/source3/include/ctdbd_conn.h >@@ -73,5 +73,7 @@ NTSTATUS ctdbd_control_local(struct ctdbd_connection *conn, uint32 opcode, > uint64_t srvid, uint32_t flags, TDB_DATA data, > TALLOC_CTX *mem_ctx, TDB_DATA *outdata, > int *cstatus); >+NTSTATUS ctdb_watch_us(struct ctdbd_connection *conn); >+NTSTATUS ctdb_unwatch(struct ctdbd_connection *conn); > > #endif /* _CTDBD_CONN_H */ >diff --git a/source3/include/g_lock.h b/source3/include/g_lock.h >new file mode 100644 >index 0000000..c0eed38 >--- /dev/null >+++ b/source3/include/g_lock.h >@@ -0,0 +1,55 @@ >+/* >+ Unix SMB/CIFS implementation. >+ global locks based on ctdb >+ Copyright (C) 2009 by Volker Lendecke >+ >+ This program is free software; you can redistribute it and/or modify >+ it under the terms of the GNU General Public License as published by >+ the Free Software Foundation; either version 3 of the License, or >+ (at your option) any later version. >+ >+ This program is distributed in the hope that it will be useful, >+ but WITHOUT ANY WARRANTY; without even the implied warranty of >+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ GNU General Public License for more details. >+ >+ You should have received a copy of the GNU General Public License >+ along with this program. If not, see <http://www.gnu.org/licenses/>. >+*/ >+ >+#ifndef _G_LOCK_H_ >+#define _G_LOCK_H_ >+ >+#include "dbwrap.h" >+ >+struct g_lock_ctx; >+ >+enum g_lock_type { >+ G_LOCK_READ = 0, >+ G_LOCK_WRITE = 1, >+}; >+ >+/* >+ * Or'ed with g_lock_type >+ */ >+#define G_LOCK_PENDING (2) >+ >+struct g_lock_ctx *g_lock_ctx_init(TALLOC_CTX *mem_ctx, >+ struct messaging_context *msg); >+ >+NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, >+ enum g_lock_type lock_type, struct timeval timeout); >+NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name); >+NTSTATUS g_lock_get(struct g_lock_ctx *ctx, const char *name, >+ struct server_id *pid); >+ >+int g_lock_locks(struct g_lock_ctx *ctx, >+ int (*fn)(const char *name, void *private_data), >+ void *private_data); >+NTSTATUS g_lock_dump(struct g_lock_ctx *ctx, const char *name, >+ int (*fn)(struct server_id pid, >+ enum g_lock_type lock_type, >+ void *private_data), >+ void *private_data); >+ >+#endif >diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c >index 8ddb12a..900fa34 100644 >--- a/source3/lib/ctdbd_conn.c >+++ b/source3/lib/ctdbd_conn.c >@@ -361,10 +361,18 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid, > goto next_pkt; > } > >- if (msg->srvid == CTDB_SRVID_RECONFIGURE) { >- DEBUG(0,("Got cluster reconfigure message in ctdb_read_req\n")); >+ if ((msg->srvid == CTDB_SRVID_RECONFIGURE) >+ || (msg->srvid == CTDB_SRVID_SAMBA_NOTIFY)) { >+ >+ DEBUG(1, ("ctdb_read_req: Got %s message\n", >+ (msg->srvid == CTDB_SRVID_RECONFIGURE) >+ ? "cluster reconfigure" : "SAMBA_NOTIFY")); >+ > messaging_send(conn->msg_ctx, procid_self(), > MSG_SMB_BRL_VALIDATE, &data_blob_null); >+ messaging_send(conn->msg_ctx, procid_self(), >+ MSG_DBWRAP_G_LOCK_RETRY, >+ &data_blob_null); > TALLOC_FREE(hdr); > goto next_pkt; > } >@@ -493,6 +501,11 @@ NTSTATUS ctdbd_messaging_connection(TALLOC_CTX *mem_ctx, > goto fail; > } > >+ status = register_with_ctdbd(conn, CTDB_SRVID_SAMBA_NOTIFY); >+ if (!NT_STATUS_IS_OK(status)) { >+ goto fail; >+ } >+ > *pconn = conn; > return NT_STATUS_OK; > >@@ -533,15 +546,21 @@ static NTSTATUS ctdb_handle_message(uint8_t *buf, size_t length, > > SMB_ASSERT(conn->msg_ctx != NULL); > >- if (msg->srvid == CTDB_SRVID_RECONFIGURE) { >+ if ((msg->srvid == CTDB_SRVID_RECONFIGURE) >+ || (msg->srvid == CTDB_SRVID_SAMBA_NOTIFY)){ > DEBUG(0,("Got cluster reconfigure message\n")); > /* >- * when the cluster is reconfigured, we need to clean the brl >- * database >+ * when the cluster is reconfigured or someone of the >+ * family has passed away (SAMBA_NOTIFY), we need to >+ * clean the brl database > */ > messaging_send(conn->msg_ctx, procid_self(), > MSG_SMB_BRL_VALIDATE, &data_blob_null); > >+ messaging_send(conn->msg_ctx, procid_self(), >+ MSG_DBWRAP_G_LOCK_RETRY, >+ &data_blob_null); >+ > TALLOC_FREE(buf); > return NT_STATUS_OK; > } >@@ -1302,6 +1321,50 @@ NTSTATUS ctdbd_control_local(struct ctdbd_connection *conn, uint32 opcode, > return ctdbd_control(conn, CTDB_CURRENT_NODE, opcode, srvid, flags, data, mem_ctx, outdata, cstatus); > } > >+NTSTATUS ctdb_watch_us(struct ctdbd_connection *conn) >+{ >+ struct ctdb_client_notify_register reg_data; >+ size_t struct_len; >+ NTSTATUS status; >+ int cstatus; >+ >+ reg_data.srvid = CTDB_SRVID_SAMBA_NOTIFY; >+ reg_data.len = 1; >+ reg_data.notify_data[0] = 0; >+ >+ struct_len = offsetof(struct ctdb_client_notify_register, >+ notify_data) + reg_data.len; >+ >+ status = ctdbd_control_local( >+ conn, CTDB_CONTROL_REGISTER_NOTIFY, conn->rand_srvid, 0, >+ make_tdb_data((uint8_t *)®_data, struct_len), >+ NULL, NULL, &cstatus); >+ if (!NT_STATUS_IS_OK(status)) { >+ DEBUG(1, ("ctdbd_control_local failed: %s\n", >+ nt_errstr(status))); >+ } >+ return status; >+} >+ >+NTSTATUS ctdb_unwatch(struct ctdbd_connection *conn) >+{ >+ struct ctdb_client_notify_deregister dereg_data; >+ NTSTATUS status; >+ int cstatus; >+ >+ dereg_data.srvid = CTDB_SRVID_SAMBA_NOTIFY; >+ >+ status = ctdbd_control_local( >+ conn, CTDB_CONTROL_DEREGISTER_NOTIFY, conn->rand_srvid, 0, >+ make_tdb_data((uint8_t *)&dereg_data, sizeof(dereg_data)), >+ NULL, NULL, &cstatus); >+ if (!NT_STATUS_IS_OK(status)) { >+ DEBUG(1, ("ctdbd_control_local failed: %s\n", >+ nt_errstr(status))); >+ } >+ return status; >+} >+ > #else > > NTSTATUS ctdbd_init_connection(TALLOC_CTX *mem_ctx, >diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c >new file mode 100644 >index 0000000..6508b39 >--- /dev/null >+++ b/source3/lib/g_lock.c >@@ -0,0 +1,594 @@ >+/* >+ Unix SMB/CIFS implementation. >+ global locks based on dbwrap and messaging >+ Copyright (C) 2009 by Volker Lendecke >+ >+ This program is free software; you can redistribute it and/or modify >+ it under the terms of the GNU General Public License as published by >+ the Free Software Foundation; either version 3 of the License, or >+ (at your option) any later version. >+ >+ This program is distributed in the hope that it will be useful, >+ but WITHOUT ANY WARRANTY; without even the implied warranty of >+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ GNU General Public License for more details. >+ >+ You should have received a copy of the GNU General Public License >+ along with this program. If not, see <http://www.gnu.org/licenses/>. >+*/ >+ >+#include "includes.h" >+#include "g_lock.h" >+ >+static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name, >+ struct server_id pid); >+ >+struct g_lock_ctx { >+ struct db_context *db; >+ struct messaging_context *msg; >+}; >+ >+/* >+ * The "g_lock.tdb" file contains records, indexed by the 0-terminated >+ * lockname. The record contains an array of "struct g_lock_rec" >+ * structures. Waiters have the lock_type with G_LOCK_PENDING or'ed. >+ */ >+ >+struct g_lock_rec { >+ enum g_lock_type lock_type; >+ struct server_id pid; >+}; >+ >+struct g_lock_ctx *g_lock_ctx_init(TALLOC_CTX *mem_ctx, >+ struct messaging_context *msg) >+{ >+ struct g_lock_ctx *result; >+ >+ result = talloc(mem_ctx, struct g_lock_ctx); >+ if (result == NULL) { >+ return NULL; >+ } >+ result->msg = msg; >+ >+ result->db = db_open(result, lock_path("g_lock.tdb"), 0, >+ TDB_CLEAR_IF_FIRST, O_RDWR|O_CREAT, 0700); >+ if (result->db == NULL) { >+ DEBUG(1, ("g_lock_init: Could not open g_lock.tdb")); >+ TALLOC_FREE(result); >+ return NULL; >+ } >+ return result; >+} >+ >+static bool g_lock_conflicts(enum g_lock_type lock_type, >+ const struct g_lock_rec *rec) >+{ >+ enum g_lock_type rec_lock = rec->lock_type; >+ >+ if ((rec_lock & G_LOCK_PENDING) != 0) { >+ return false; >+ } >+ >+ /* >+ * Only tested write locks so far. Very likely this routine >+ * needs to be fixed for read locks.... >+ */ >+ if ((lock_type == G_LOCK_READ) && (rec_lock == G_LOCK_READ)) { >+ return false; >+ } >+ return true; >+} >+ >+static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data, >+ int *pnum_locks, struct g_lock_rec **plocks) >+{ >+ int i, num_locks; >+ struct g_lock_rec *locks; >+ >+ if ((data.dsize % sizeof(struct g_lock_rec)) != 0) { >+ DEBUG(1, ("invalid lock record length %d\n", (int)data.dsize)); >+ return false; >+ } >+ >+ num_locks = data.dsize / sizeof(struct g_lock_rec); >+ locks = talloc_array(mem_ctx, struct g_lock_rec, num_locks); >+ if (locks == NULL) { >+ DEBUG(1, ("talloc failed\n")); >+ return false; >+ } >+ >+ memcpy(locks, data.dptr, data.dsize); >+ >+ DEBUG(10, ("locks:\n")); >+ for (i=0; i<num_locks; i++) { >+ DEBUGADD(10, ("%s: %s %s\n", >+ procid_str(talloc_tos(), &locks[i].pid), >+ ((locks[i].lock_type & 1) == G_LOCK_READ) ? >+ "read" : "write", >+ (locks[i].lock_type & G_LOCK_PENDING) ? >+ "(pending)" : "(owner)")); >+ >+ if (process_exists(locks[i].pid)) { >+ continue; >+ } >+ DEBUGADD(10, ("%s does not exist -- discarding\n", >+ procid_str(talloc_tos(), &locks[i].pid))); >+ >+ if (i < (num_locks-1)) { >+ locks[i] = locks[num_locks-1]; >+ } >+ num_locks -= 1; >+ } >+ >+ *plocks = locks; >+ *pnum_locks = num_locks; >+ return true; >+} >+ >+static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx, >+ struct g_lock_rec *locks, >+ int num_locks, >+ const struct server_id pid, >+ enum g_lock_type lock_type) >+{ >+ struct g_lock_rec *result; >+ >+ result = talloc_realloc(mem_ctx, locks, struct g_lock_rec, >+ num_locks+1); >+ if (result == NULL) { >+ return NULL; >+ } >+ >+ result[num_locks].pid = pid; >+ result[num_locks].lock_type = lock_type; >+ return result; >+} >+ >+static void g_lock_got_retry(struct messaging_context *msg, >+ void *private_data, >+ uint32_t msg_type, >+ struct server_id server_id, >+ DATA_BLOB *data); >+static void g_lock_timedout(struct tevent_context *ev, >+ struct tevent_timer *te, >+ struct timeval current_time, >+ void *private_data); >+ >+static NTSTATUS g_lock_trylock(struct g_lock_ctx *ctx, const char *name, >+ enum g_lock_type lock_type) >+{ >+ struct db_record *rec = NULL; >+ struct g_lock_rec *locks = NULL; >+ int i, num_locks; >+ struct server_id self; >+ int our_index; >+ TDB_DATA data; >+ NTSTATUS status = NT_STATUS_OK; >+ NTSTATUS store_status; >+ >+again: >+ rec = ctx->db->fetch_locked(ctx->db, talloc_tos(), >+ string_term_tdb_data(name)); >+ if (rec == NULL) { >+ DEBUG(10, ("fetch_locked(\"%s\") failed\n", name)); >+ status = NT_STATUS_LOCK_NOT_GRANTED; >+ goto done; >+ } >+ >+ if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) { >+ DEBUG(10, ("g_lock_parse for %s failed\n", name)); >+ status = NT_STATUS_INTERNAL_ERROR; >+ goto done; >+ } >+ >+ self = procid_self(); >+ our_index = -1; >+ >+ for (i=0; i<num_locks; i++) { >+ if (procid_equal(&self, &locks[i].pid)) { >+ if (our_index != -1) { >+ DEBUG(1, ("g_lock_trylock: Added ourself " >+ "twice!\n")); >+ status = NT_STATUS_INTERNAL_ERROR; >+ goto done; >+ } >+ if ((locks[i].lock_type & G_LOCK_PENDING) == 0) { >+ DEBUG(1, ("g_lock_trylock: Found ourself not " >+ "pending!\n")); >+ status = NT_STATUS_INTERNAL_ERROR; >+ goto done; >+ } >+ >+ our_index = i; >+ >+ /* never conflict with ourself */ >+ continue; >+ } >+ if (g_lock_conflicts(lock_type, &locks[i])) { >+ struct server_id pid = locks[i].pid; >+ >+ if (!process_exists(pid)) { >+ TALLOC_FREE(locks); >+ TALLOC_FREE(rec); >+ status = g_lock_force_unlock(ctx, name, pid); >+ if (!NT_STATUS_IS_OK(status)) { >+ DEBUG(1, ("Could not unlock dead lock " >+ "holder!\n")); >+ goto done; >+ } >+ goto again; >+ } >+ lock_type |= G_LOCK_PENDING; >+ } >+ } >+ >+ if (our_index == -1) { >+ /* First round, add ourself */ >+ >+ locks = g_lock_addrec(talloc_tos(), locks, num_locks, >+ self, lock_type); >+ if (locks == NULL) { >+ DEBUG(10, ("g_lock_addrec failed\n")); >+ status = NT_STATUS_NO_MEMORY; >+ goto done; >+ } >+ } else { >+ /* >+ * Retry. We were pending last time. Overwrite the >+ * stored lock_type with what we calculated, we might >+ * have acquired the lock this time. >+ */ >+ locks[our_index].lock_type = lock_type; >+ } >+ >+ data = make_tdb_data((uint8_t *)locks, talloc_get_size(locks)); >+ store_status = rec->store(rec, data, 0); >+ if (!NT_STATUS_IS_OK(store_status)) { >+ DEBUG(1, ("rec->store failed: %s\n", >+ nt_errstr(store_status))); >+ status = store_status; >+ } >+ >+done: >+ TALLOC_FREE(locks); >+ TALLOC_FREE(rec); >+ >+ if (NT_STATUS_IS_OK(status) && (lock_type & G_LOCK_PENDING) != 0) { >+ return STATUS_PENDING; >+ } >+ >+ return NT_STATUS_OK; >+} >+ >+NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, >+ enum g_lock_type lock_type, struct timeval timeout) >+{ >+ struct tevent_timer *te = NULL; >+ NTSTATUS status; >+ bool retry = false; >+ bool timedout = false; >+ >+ DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type, >+ name)); >+ >+ if (lock_type & ~1) { >+ DEBUG(1, ("Got invalid lock type %d for %s\n", >+ (int)lock_type, name)); >+ return NT_STATUS_INVALID_PARAMETER; >+ } >+ >+#ifdef CLUSTER_SUPPORT >+ if (lp_clustering()) { >+ status = ctdb_watch_us(messaging_ctdbd_connection()); >+ if (!NT_STATUS_IS_OK(status)) { >+ DEBUG(10, ("could not register retry with ctdb: %s\n", >+ nt_errstr(status))); >+ goto done; >+ } >+ } >+#endif >+ >+ status = messaging_register(ctx->msg, &retry, MSG_DBWRAP_G_LOCK_RETRY, >+ g_lock_got_retry); >+ if (!NT_STATUS_IS_OK(status)) { >+ DEBUG(10, ("messaging_register failed: %s\n", >+ nt_errstr(status))); >+ return status; >+ } >+again: >+ retry = false; >+ >+ status = g_lock_trylock(ctx, name, lock_type); >+ if (NT_STATUS_IS_OK(status)) { >+ DEBUG(10, ("Got lock %s\n", name)); >+ goto done; >+ } >+ if (!NT_STATUS_EQUAL(status, STATUS_PENDING)) { >+ DEBUG(10, ("g_lock_trylock failed: %s\n", >+ nt_errstr(status))); >+ goto done; >+ } >+ >+ if (retry) { >+ goto again; >+ } >+ >+ DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n")); >+ >+ if (te == NULL) { >+ te = tevent_add_timer( >+ ctx->msg->event_ctx, talloc_tos(), >+ timeval_current_ofs(timeout.tv_sec, timeout.tv_usec), >+ g_lock_timedout, &timedout); >+ if (te == NULL) { >+ DEBUG(10, ("tevent_add_timer failed\n")); >+ status = NT_STATUS_NO_MEMORY; >+ goto done; >+ } >+ } >+ >+ while (true) { >+ if (tevent_loop_once(ctx->msg->event_ctx) == -1) { >+ DEBUG(1, ("tevent_loop_once failed\n")); >+ status = NT_STATUS_INTERNAL_ERROR; >+ goto done; >+ } >+ if (retry) { >+ goto again; >+ } >+ if (timedout) { >+ DEBUG(10, ("g_lock_lock timed out\n")); >+ >+ te = NULL; >+ >+ status = NT_STATUS_LOCK_NOT_GRANTED; >+ goto done; >+ } >+ } >+done: >+ >+ if (!NT_STATUS_IS_OK(status)) { >+ NTSTATUS unlock_status; >+ >+ unlock_status = g_lock_unlock(ctx, name); >+ >+ if (!NT_STATUS_IS_OK(unlock_status)) { >+ DEBUG(1, ("Could not remove ourself from the locking " >+ "db: %s\n", nt_errstr(status))); >+ } >+ } >+ >+ messaging_deregister(ctx->msg, MSG_DBWRAP_G_LOCK_RETRY, &retry); >+ TALLOC_FREE(te); >+ >+ return status; >+} >+ >+static void g_lock_got_retry(struct messaging_context *msg, >+ void *private_data, >+ uint32_t msg_type, >+ struct server_id server_id, >+ DATA_BLOB *data) >+{ >+ bool *pretry = (bool *)private_data; >+ >+ DEBUG(10, ("Got retry message from pid %s\n", >+ procid_str(talloc_tos(), &server_id))); >+ >+ *pretry = true; >+} >+ >+static void g_lock_timedout(struct tevent_context *ev, >+ struct tevent_timer *te, >+ struct timeval current_time, >+ void *private_data) >+{ >+ bool *ptimedout = (bool *)private_data; >+ *ptimedout = true; >+ TALLOC_FREE(te); >+} >+ >+static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name, >+ struct server_id pid) >+{ >+ struct db_record *rec = NULL; >+ struct g_lock_rec *locks = NULL; >+ int i, num_locks; >+ enum g_lock_type lock_type; >+ NTSTATUS status; >+ >+ rec = ctx->db->fetch_locked(ctx->db, talloc_tos(), >+ string_term_tdb_data(name)); >+ if (rec == NULL) { >+ DEBUG(10, ("fetch_locked(\"%s\") failed\n", name)); >+ status = NT_STATUS_INTERNAL_ERROR; >+ goto done; >+ } >+ >+ if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) { >+ DEBUG(10, ("g_lock_parse for %s failed\n", name)); >+ status = NT_STATUS_INTERNAL_ERROR; >+ goto done; >+ } >+ >+ for (i=0; i<num_locks; i++) { >+ if (procid_equal(&pid, &locks[i].pid)) { >+ break; >+ } >+ } >+ >+ if (i == num_locks) { >+ DEBUG(10, ("g_lock_force_unlock: Lock not found\n")); >+ status = NT_STATUS_INTERNAL_ERROR; >+ goto done; >+ } >+ >+ lock_type = locks[i].lock_type; >+ >+ if (i < (num_locks-1)) { >+ locks[i] = locks[num_locks-1]; >+ } >+ num_locks -= 1; >+ >+ if (num_locks == 0) { >+ status = rec->delete_rec(rec); >+ } else { >+ TDB_DATA data; >+ data = make_tdb_data((uint8_t *)locks, >+ sizeof(struct g_lock_rec) * num_locks); >+ status = rec->store(rec, data, 0); >+ } >+ >+ if (!NT_STATUS_IS_OK(status)) { >+ DEBUG(1, ("g_lock_force_unlock: Could not store record: %s\n", >+ nt_errstr(status))); >+ goto done; >+ } >+ >+ if ((lock_type & G_LOCK_PENDING) == 0) { >+ /* >+ * We've been the lock holder. Tell all others to retry. >+ */ >+ for (i=0; i<num_locks; i++) { >+ if ((locks[i].lock_type & G_LOCK_PENDING) == 0) { >+ continue; >+ } >+ >+ /* >+ * Ping all waiters to retry >+ */ >+ status = messaging_send(ctx->msg, locks[i].pid, >+ MSG_DBWRAP_G_LOCK_RETRY, >+ &data_blob_null); >+ if (!NT_STATUS_IS_OK(status)) { >+ DEBUG(1, ("sending retry to %s failed: %s\n", >+ procid_str(talloc_tos(), >+ &locks[i].pid), >+ nt_errstr(status))); >+ } >+ } >+ } >+done: >+ >+ TALLOC_FREE(locks); >+ TALLOC_FREE(rec); >+ return status; >+} >+ >+NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name) >+{ >+ NTSTATUS status; >+ >+ status = g_lock_force_unlock(ctx, name, procid_self()); >+ >+#ifdef CLUSTER_SUPPORT >+ if (lp_clustering()) { >+ ctdb_unwatch(messaging_ctdbd_connection()); >+ } >+#endif >+ return status; >+} >+ >+struct g_lock_locks_state { >+ int (*fn)(const char *name, void *private_data); >+ void *private_data; >+}; >+ >+static int g_lock_locks_fn(struct db_record *rec, void *priv) >+{ >+ struct g_lock_locks_state *state = (struct g_lock_locks_state *)priv; >+ >+ if ((rec->key.dsize == 0) || (rec->key.dptr[rec->key.dsize-1] != 0)) { >+ DEBUG(1, ("invalid key in g_lock.tdb, ignoring\n")); >+ return 0; >+ } >+ return state->fn((char *)rec->key.dptr, state->private_data); >+} >+ >+int g_lock_locks(struct g_lock_ctx *ctx, >+ int (*fn)(const char *name, void *private_data), >+ void *private_data) >+{ >+ struct g_lock_locks_state state; >+ >+ state.fn = fn; >+ state.private_data = private_data; >+ >+ return ctx->db->traverse_read(ctx->db, g_lock_locks_fn, &state); >+} >+ >+NTSTATUS g_lock_dump(struct g_lock_ctx *ctx, const char *name, >+ int (*fn)(struct server_id pid, >+ enum g_lock_type lock_type, >+ void *private_data), >+ void *private_data) >+{ >+ TDB_DATA data; >+ int i, num_locks; >+ struct g_lock_rec *locks = NULL; >+ bool ret; >+ >+ if (ctx->db->fetch(ctx->db, talloc_tos(), string_term_tdb_data(name), >+ &data) != 0) { >+ return NT_STATUS_NOT_FOUND; >+ } >+ >+ if ((data.dsize == 0) || (data.dptr == NULL)) { >+ return NT_STATUS_OK; >+ } >+ >+ ret = g_lock_parse(talloc_tos(), data, &num_locks, &locks); >+ >+ TALLOC_FREE(data.dptr); >+ >+ if (!ret) { >+ DEBUG(10, ("g_lock_parse for %s failed\n", name)); >+ return NT_STATUS_INTERNAL_ERROR; >+ } >+ >+ for (i=0; i<num_locks; i++) { >+ if (fn(locks[i].pid, locks[i].lock_type, private_data) != 0) { >+ break; >+ } >+ } >+ TALLOC_FREE(locks); >+ return NT_STATUS_OK; >+} >+ >+struct g_lock_get_state { >+ bool found; >+ struct server_id *pid; >+}; >+ >+static int g_lock_get_fn(struct server_id pid, enum g_lock_type lock_type, >+ void *priv) >+{ >+ struct g_lock_get_state *state = (struct g_lock_get_state *)priv; >+ >+ if ((lock_type & G_LOCK_PENDING) != 0) { >+ return 0; >+ } >+ >+ state->found = true; >+ *state->pid = pid; >+ return 1; >+} >+ >+NTSTATUS g_lock_get(struct g_lock_ctx *ctx, const char *name, >+ struct server_id *pid) >+{ >+ struct g_lock_get_state state; >+ NTSTATUS status; >+ >+ state.found = false; >+ state.pid = pid; >+ >+ status = g_lock_dump(ctx, name, g_lock_get_fn, &state); >+ if (!NT_STATUS_IS_OK(status)) { >+ return status; >+ } >+ if (!state.found) { >+ return NT_STATUS_NOT_FOUND; >+ } >+ return NT_STATUS_OK; >+} >diff --git a/source3/librpc/gen_ndr/messaging.h b/source3/librpc/gen_ndr/messaging.h >index 225440a..1312c84 100644 >--- a/source3/librpc/gen_ndr/messaging.h >+++ b/source3/librpc/gen_ndr/messaging.h >@@ -62,7 +62,8 @@ enum messaging_type > MSG_WINBIND_VALIDATE_CACHE=(int)(0x0408), > MSG_WINBIND_DUMP_DOMAIN_LIST=(int)(0x0409), > MSG_DUMP_EVENT_LIST=(int)(0x0500), >- MSG_DBWRAP_TDB2_CHANGES=(int)(4001) >+ MSG_DBWRAP_TDB2_CHANGES=(int)(4001), >+ MSG_DBWRAP_G_LOCK_RETRY=(int)(4002) > } > #else > { __donnot_use_enum_messaging_type=0x7FFFFFFF} >@@ -118,6 +119,7 @@ enum messaging_type > #define MSG_WINBIND_DUMP_DOMAIN_LIST ( 0x0409 ) > #define MSG_DUMP_EVENT_LIST ( 0x0500 ) > #define MSG_DBWRAP_TDB2_CHANGES ( 4001 ) >+#define MSG_DBWRAP_G_LOCK_RETRY ( 4002 ) > #endif > ; > >diff --git a/source3/librpc/gen_ndr/ndr_messaging.c b/source3/librpc/gen_ndr/ndr_messaging.c >index 3e2aa1f..1452630 100644 >--- a/source3/librpc/gen_ndr/ndr_messaging.c >+++ b/source3/librpc/gen_ndr/ndr_messaging.c >@@ -74,6 +74,7 @@ _PUBLIC_ void ndr_print_messaging_type(struct ndr_print *ndr, const char *name, > case MSG_WINBIND_DUMP_DOMAIN_LIST: val = "MSG_WINBIND_DUMP_DOMAIN_LIST"; break; > case MSG_DUMP_EVENT_LIST: val = "MSG_DUMP_EVENT_LIST"; break; > case MSG_DBWRAP_TDB2_CHANGES: val = "MSG_DBWRAP_TDB2_CHANGES"; break; >+ case MSG_DBWRAP_G_LOCK_RETRY: val = "MSG_DBWRAP_G_LOCK_RETRY"; break; > } > ndr_print_enum(ndr, name, "ENUM", val, r); > } >diff --git a/source3/librpc/idl/messaging.idl b/source3/librpc/idl/messaging.idl >index 0686585..08caa59 100644 >--- a/source3/librpc/idl/messaging.idl >+++ b/source3/librpc/idl/messaging.idl >@@ -88,7 +88,8 @@ interface messaging > MSG_DUMP_EVENT_LIST = 0x0500, > > /* dbwrap messages 4001-4999 */ >- MSG_DBWRAP_TDB2_CHANGES = 4001 >+ MSG_DBWRAP_TDB2_CHANGES = 4001, >+ MSG_DBWRAP_G_LOCK_RETRY = 4002 > } messaging_type; > > /* messaging struct sent across the sockets and stored in the tdb */ >diff --git a/source3/utils/net.c b/source3/utils/net.c >index 85c3c7d..0c5f080 100644 >--- a/source3/utils/net.c >+++ b/source3/utils/net.c >@@ -612,6 +612,13 @@ static struct functable net_func[] = { > N_(" Use 'net help lookup' to get more information about 'net " > "lookup' commands.") > }, >+ { "g_lock", >+ net_g_lock, >+ NET_TRANSPORT_LOCAL, >+ N_("Manipulate the global lock table"), >+ N_(" Use 'net help g_lock' to get more information about " >+ "'net g_lock' commands.") >+ }, > { "join", > net_join, > NET_TRANSPORT_ADS | NET_TRANSPORT_RPC, >diff --git a/source3/utils/net_g_lock.c b/source3/utils/net_g_lock.c >new file mode 100644 >index 0000000..f30ed33 >--- /dev/null >+++ b/source3/utils/net_g_lock.c >@@ -0,0 +1,213 @@ >+/* >+ * Samba Unix/Linux SMB client library >+ * Interface to the g_lock facility >+ * Copyright (C) Volker Lendecke 2009 >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation; either version 3 of the License, or >+ * (at your option) any later version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program. If not, see <http://www.gnu.org/licenses/>. >+ */ >+ >+#include "includes.h" >+#include "net.h" >+#include "g_lock.h" >+ >+static bool net_g_lock_init(TALLOC_CTX *mem_ctx, >+ struct tevent_context **pev, >+ struct messaging_context **pmsg, >+ struct g_lock_ctx **pg_ctx) >+{ >+ struct tevent_context *ev = NULL; >+ struct messaging_context *msg = NULL; >+ struct g_lock_ctx *g_ctx = NULL; >+ >+ ev = tevent_context_init(talloc_tos()); >+ if (ev == NULL) { >+ d_fprintf(stderr, "ERROR: could not init event context\n"); >+ goto fail; >+ } >+ msg = messaging_init(talloc_tos(), server_id_self(), ev); >+ if (msg == NULL) { >+ d_fprintf(stderr, "ERROR: could not init messaging context\n"); >+ goto fail; >+ } >+ g_ctx = g_lock_ctx_init(talloc_tos(), msg); >+ if (g_ctx == NULL) { >+ d_fprintf(stderr, "ERROR: could not init g_lock context\n"); >+ goto fail; >+ } >+ >+ *pev = ev; >+ *pmsg = msg; >+ *pg_ctx = g_ctx; >+ return true; >+fail: >+ TALLOC_FREE(g_ctx); >+ TALLOC_FREE(msg); >+ TALLOC_FREE(ev); >+ return false; >+} >+ >+ >+static int net_g_lock_do(struct net_context *c, int argc, const char **argv) >+{ >+ struct tevent_context *ev = NULL; >+ struct messaging_context *msg = NULL; >+ struct g_lock_ctx *g_ctx = NULL; >+ const char *name, *cmd; >+ int timeout, res; >+ bool locked = false; >+ NTSTATUS status; >+ int ret = -1; >+ >+ if (argc != 3) { >+ d_printf("Usage: net g_lock do <lockname> <timeout> " >+ "<command>\n"); >+ return -1; >+ } >+ name = argv[0]; >+ timeout = atoi(argv[1]); >+ cmd = argv[2]; >+ >+ if (!net_g_lock_init(talloc_tos(), &ev, &msg, &g_ctx)) { >+ goto done; >+ } >+ >+ status = g_lock_lock(g_ctx, name, G_LOCK_WRITE, >+ timeval_set(timeout / 1000, timeout % 1000)); >+ if (!NT_STATUS_IS_OK(status)) { >+ d_fprintf(stderr, "ERROR: Could not get lock: %s\n", >+ nt_errstr(status)); >+ goto done; >+ } >+ locked = true; >+ >+ res = system(cmd); >+ >+ if (res == -1) { >+ d_fprintf(stderr, "ERROR: system() returned %s\n", >+ strerror(errno)); >+ goto done; >+ } >+ d_fprintf(stderr, "command returned %d\n", res); >+ >+ ret = 0; >+ >+done: >+ if (locked) { >+ g_lock_unlock(g_ctx, name); >+ } >+ TALLOC_FREE(g_ctx); >+ TALLOC_FREE(msg); >+ TALLOC_FREE(ev); >+ return ret; >+} >+ >+static int net_g_lock_dump_fn(struct server_id pid, enum g_lock_type lock_type, >+ void *private_data) >+{ >+ char *pidstr; >+ >+ pidstr = procid_str(talloc_tos(), &pid); >+ d_printf("%s: %s (%s)\n", pidstr, >+ (lock_type & 1) ? "WRITE" : "READ", >+ (lock_type & G_LOCK_PENDING) ? "pending" : "holder"); >+ TALLOC_FREE(pidstr); >+ return 0; >+} >+ >+static int net_g_lock_dump(struct net_context *c, int argc, const char **argv) >+{ >+ struct tevent_context *ev = NULL; >+ struct messaging_context *msg = NULL; >+ struct g_lock_ctx *g_ctx = NULL; >+ NTSTATUS status; >+ int ret = -1; >+ >+ if (argc != 1) { >+ d_printf("Usage: net g_lock dump <lockname>\n"); >+ return -1; >+ } >+ >+ if (!net_g_lock_init(talloc_tos(), &ev, &msg, &g_ctx)) { >+ goto done; >+ } >+ >+ status = g_lock_dump(g_ctx, argv[0], net_g_lock_dump_fn, NULL); >+ >+ ret = 0; >+done: >+ TALLOC_FREE(g_ctx); >+ TALLOC_FREE(msg); >+ TALLOC_FREE(ev); >+ return ret; >+} >+ >+static int net_g_lock_locks_fn(const char *name, void *private_data) >+{ >+ d_printf("%s\n", name); >+ return 0; >+} >+ >+static int net_g_lock_locks(struct net_context *c, int argc, const char **argv) >+{ >+ struct tevent_context *ev = NULL; >+ struct messaging_context *msg = NULL; >+ struct g_lock_ctx *g_ctx = NULL; >+ int ret = -1; >+ >+ if (argc != 0) { >+ d_printf("Usage: net g_lock locks\n"); >+ return -1; >+ } >+ >+ if (!net_g_lock_init(talloc_tos(), &ev, &msg, &g_ctx)) { >+ goto done; >+ } >+ >+ ret = g_lock_locks(g_ctx, net_g_lock_locks_fn, NULL); >+done: >+ TALLOC_FREE(g_ctx); >+ TALLOC_FREE(msg); >+ TALLOC_FREE(ev); >+ return ret; >+} >+ >+int net_g_lock(struct net_context *c, int argc, const char **argv) >+{ >+ struct functable func[] = { >+ { >+ "do", >+ net_g_lock_do, >+ NET_TRANSPORT_LOCAL, >+ N_("Execute a shell command under a lock"), >+ N_("net g_lock do <lock name> <timeout> <command>\n") >+ }, >+ { >+ "locks", >+ net_g_lock_locks, >+ NET_TRANSPORT_LOCAL, >+ N_("List all locknames"), >+ N_("net g_lock locks\n") >+ }, >+ { >+ "dump", >+ net_g_lock_dump, >+ NET_TRANSPORT_LOCAL, >+ N_("Dump a g_lock locking table"), >+ N_("net g_lock dump <lock name>\n") >+ }, >+ {NULL, NULL, 0, NULL, NULL} >+ }; >+ >+ return net_run_function(c, argc, argv, "net g_lock", func); >+} >diff --git a/source3/utils/net_proto.h b/source3/utils/net_proto.h >index 098e2a2..540d31e 100644 >--- a/source3/utils/net_proto.h >+++ b/source3/utils/net_proto.h >@@ -497,4 +497,7 @@ NTSTATUS net_lookup_sid_from_name(struct net_context *c, TALLOC_CTX *ctx, > char *stdin_new_passwd( void); > char *get_pass( const char *prompt, bool stdin_get); > >+/* The following definitions come from utils/net_g_lock.c */ >+int net_g_lock(struct net_context *c, int argc, const char **argv); >+ > #endif /* _NET_PROTO_H_ */ >-- >1.6.3.3 > > >From 7baaa858d2e60c7c18a0ced4df6b9fbefa63dcc2 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Fri, 4 Dec 2009 13:22:30 +0100 >Subject: [PATCH 02/32] s3: Add ctdb_conn_msg_ctx() > (cherry picked from commit 12abab711b58237ddccfa1d9bb526f8c7dbb6e9f) > >--- > source3/include/ctdbd_conn.h | 1 + > source3/lib/ctdbd_conn.c | 5 +++++ > 2 files changed, 6 insertions(+), 0 deletions(-) > >diff --git a/source3/include/ctdbd_conn.h b/source3/include/ctdbd_conn.h >index 96e7aa0..71516c7 100644 >--- a/source3/include/ctdbd_conn.h >+++ b/source3/include/ctdbd_conn.h >@@ -31,6 +31,7 @@ uint32 ctdbd_vnn(const struct ctdbd_connection *conn); > > NTSTATUS ctdbd_register_msg_ctx(struct ctdbd_connection *conn, > struct messaging_context *msg_ctx); >+struct messaging_context *ctdb_conn_msg_ctx(struct ctdbd_connection *conn); > > NTSTATUS ctdbd_messaging_send(struct ctdbd_connection *conn, > uint32 dst_vnn, uint64 dst_srvid, >diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c >index 900fa34..743594d 100644 >--- a/source3/lib/ctdbd_conn.c >+++ b/source3/lib/ctdbd_conn.c >@@ -514,6 +514,11 @@ NTSTATUS ctdbd_messaging_connection(TALLOC_CTX *mem_ctx, > return status; > } > >+struct messaging_context *ctdb_conn_msg_ctx(struct ctdbd_connection *conn) >+{ >+ return conn->msg_ctx; >+} >+ > /* > * Packet handler to receive and handle a ctdb message > */ >-- >1.6.3.3 > > >From 3c72b313193b3baeb819f25882b0623919c26e59 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Mon, 7 Dec 2009 00:36:51 +0100 >Subject: [PATCH 03/32] s3: setup debug for smbtorture > (cherry picked from commit b13dd17840a598ae3441e48b130a2b2a2b940572) > >--- > source3/torture/torture.c | 2 ++ > 1 files changed, 2 insertions(+), 0 deletions(-) > >diff --git a/source3/torture/torture.c b/source3/torture/torture.c >index 9abcabd..b0f5d82 100644 >--- a/source3/torture/torture.c >+++ b/source3/torture/torture.c >@@ -7318,6 +7318,8 @@ static void usage(void) > > load_case_tables(); > >+ setup_logging("smbtorture", true); >+ > if (is_default_dyn_CONFIGFILE()) { > if(getenv("SMB_CONF_PATH")) { > set_dyn_CONFIGFILE(getenv("SMB_CONF_PATH")); >-- >1.6.3.3 > > >From a5dc99f425605ffeb93042c21d361cba55381a96 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Fri, 11 Dec 2009 15:37:52 +0100 >Subject: [PATCH 04/32] s3:torture: add a test LOCAL-DBTRANS to torture dbwrap with transactions. > >--- > source3/torture/torture.c | 130 +++++++++++++++++++++++++++++++++++++++++++++ > 1 files changed, 130 insertions(+), 0 deletions(-) > >diff --git a/source3/torture/torture.c b/source3/torture/torture.c >index b0f5d82..fe5bbf3 100644 >--- a/source3/torture/torture.c >+++ b/source3/torture/torture.c >@@ -7031,6 +7031,135 @@ fail: > return result; > } > >+static bool dbtrans_inc(struct db_context *db) >+{ >+ struct db_record *rec; >+ uint32_t *val; >+ bool ret = false; >+ NTSTATUS status; >+ >+ rec = db->fetch_locked(db, db, string_term_tdb_data("transtest")); >+ if (rec == NULL) { >+ printf(__location__ "fetch_lock failed\n"); >+ return false; >+ } >+ >+ if (rec->value.dsize != sizeof(uint32_t)) { >+ printf(__location__ "value.dsize = %d\n", >+ (int)rec->value.dsize); >+ goto fail; >+ } >+ >+ val = (uint32_t *)rec->value.dptr; >+ *val += 1; >+ >+ status = rec->store(rec, make_tdb_data((uint8_t *)val, >+ sizeof(uint32_t)), >+ 0); >+ if (!NT_STATUS_IS_OK(status)) { >+ printf(__location__ "store failed: %s\n", >+ nt_errstr(status)); >+ goto fail; >+ } >+ >+ ret = true; >+fail: >+ TALLOC_FREE(rec); >+ return ret; >+} >+ >+static bool run_local_dbtrans(int dummy) >+{ >+ struct db_context *db; >+ struct db_record *rec; >+ NTSTATUS status; >+ uint32_t initial; >+ int res; >+ >+ db = db_open(talloc_tos(), "transtest.tdb", 0, TDB_DEFAULT, >+ O_RDWR|O_CREAT, 0600); >+ if (db == NULL) { >+ printf("Could not open transtest.db\n"); >+ return false; >+ } >+ >+ res = db->transaction_start(db); >+ if (res == -1) { >+ printf(__location__ "transaction_start failed\n"); >+ return false; >+ } >+ >+ rec = db->fetch_locked(db, db, string_term_tdb_data("transtest")); >+ if (rec == NULL) { >+ printf(__location__ "fetch_lock failed\n"); >+ return false; >+ } >+ >+ if (rec->value.dptr == NULL) { >+ initial = 0; >+ status = rec->store( >+ rec, make_tdb_data((uint8_t *)&initial, >+ sizeof(initial)), >+ 0); >+ if (!NT_STATUS_IS_OK(status)) { >+ printf(__location__ "store returned %s\n", >+ nt_errstr(status)); >+ return false; >+ } >+ } >+ >+ TALLOC_FREE(rec); >+ >+ res = db->transaction_commit(db); >+ if (res == -1) { >+ printf(__location__ "transaction_commit failed\n"); >+ return false; >+ } >+ >+ while (true) { >+ uint32_t val, val2; >+ int i; >+ >+ res = db->transaction_start(db); >+ if (res == -1) { >+ printf(__location__ "transaction_start failed\n"); >+ break; >+ } >+ >+ if (!dbwrap_fetch_uint32(db, "transtest", &val)) { >+ printf(__location__ "dbwrap_fetch_uint32 failed\n"); >+ break; >+ } >+ >+ for (i=0; i<10; i++) { >+ if (!dbtrans_inc(db)) { >+ return false; >+ } >+ } >+ >+ if (!dbwrap_fetch_uint32(db, "transtest", &val2)) { >+ printf(__location__ "dbwrap_fetch_uint32 failed\n"); >+ break; >+ } >+ >+ if (val2 != val + 10) { >+ printf(__location__ "val=%d, val2=%d\n", >+ (int)val, (int)val2); >+ break; >+ } >+ >+ printf("val2=%d\r", val2); >+ >+ res = db->transaction_commit(db); >+ if (res == -1) { >+ printf(__location__ "transaction_commit failed\n"); >+ break; >+ } >+ } >+ >+ TALLOC_FREE(db); >+ return true; >+} > > static double create_procs(bool (*fn)(int), bool *result) > { >@@ -7205,6 +7334,7 @@ static struct { > { "LOCAL-MEMCACHE", run_local_memcache, 0}, > { "LOCAL-STREAM-NAME", run_local_stream_name, 0}, > { "LOCAL-WBCLIENT", run_local_wbclient, 0}, >+ { "LOCAL-DBTRANS", run_local_dbtrans, 0}, > {NULL, NULL, 0}}; > > >-- >1.6.3.3 > > >From 29badc06940827bddd3bfe3554a14b914528f6cb Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Thu, 3 Dec 2009 18:43:49 +0100 >Subject: [PATCH 05/32] s3: Add tdb_data_equal > (cherry picked from commit ebc08b9938a4d266be16ca7e06d27813952cd00f) > >--- > lib/util/util_tdb.c | 8 ++++++++ > lib/util/util_tdb.h | 1 + > 2 files changed, 9 insertions(+), 0 deletions(-) > >diff --git a/lib/util/util_tdb.c b/lib/util/util_tdb.c >index cda8dc7..46dbf6d 100644 >--- a/lib/util/util_tdb.c >+++ b/lib/util/util_tdb.c >@@ -38,6 +38,14 @@ TDB_DATA make_tdb_data(const uint8_t *dptr, size_t dsize) > return ret; > } > >+bool tdb_data_equal(TDB_DATA t1, TDB_DATA t2) >+{ >+ if (t1.dsize != t2.dsize) { >+ return false; >+ } >+ return (memcmp(t1.dptr, t2.dptr, t1.dsize) == 0); >+} >+ > TDB_DATA string_tdb_data(const char *string) > { > return make_tdb_data((const uint8_t *)string, string ? strlen(string) : 0 ); >diff --git a/lib/util/util_tdb.h b/lib/util/util_tdb.h >index da6378e..79c4671 100644 >--- a/lib/util/util_tdb.h >+++ b/lib/util/util_tdb.h >@@ -6,6 +6,7 @@ > Make a TDB_DATA and keep the const warning in one place > ****************************************************************/ > TDB_DATA make_tdb_data(const uint8_t *dptr, size_t dsize); >+bool tdb_data_equal(TDB_DATA t1, TDB_DATA t2); > TDB_DATA string_tdb_data(const char *string); > TDB_DATA string_term_tdb_data(const char *string); > >-- >1.6.3.3 > > >From 15cfedd40fe948852c5906269a45715d17b29e2b Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Thu, 3 Dec 2009 17:29:54 +0100 >Subject: [PATCH 06/32] s3:dbwrap_ctdb: start rewrite of transactions using the global lock (g_lock) > >This simplifies the transaction code a lot: > >* transaction_start essentially consists of acquiring a global lock. > >* No write operations at all are performed on the local database > until the transaction is committed: Every store operation is just > going into the marshall buffer. > >* The commit operation calls a new simplified TRANS3_COMMIT control > in ctdb which rolls out thae changes to all nodes including the > node that is performing the transaction. > >Michael >(cherry picked from commit 16bc6ba2268e3660d026076264de8666356e00bf) >--- > source3/lib/dbwrap_ctdb.c | 480 +++++++++++++++------------------------------ > 1 files changed, 160 insertions(+), 320 deletions(-) > >diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c >index 8e188d0..c32398a 100644 >--- a/source3/lib/dbwrap_ctdb.c >+++ b/source3/lib/dbwrap_ctdb.c >@@ -22,10 +22,10 @@ > #include "ctdb.h" > #include "ctdb_private.h" > #include "ctdbd_conn.h" >+#include "g_lock.h" > > struct db_ctdb_transaction_handle { > struct db_ctdb_ctx *ctx; >- bool in_replay; > /* > * we store the reads and writes done under a transaction: > * - one list stores both reads and writes (m_all), >@@ -35,6 +35,7 @@ struct db_ctdb_transaction_handle { > struct ctdb_marshall_buffer *m_write; > uint32_t nesting; > bool nested_cancel; >+ char *lock_name; > }; > > struct db_ctdb_ctx { >@@ -42,6 +43,7 @@ struct db_ctdb_ctx { > struct tdb_wrap *wtdb; > uint32 db_id; > struct db_ctdb_transaction_handle *transaction; >+ struct g_lock_ctx *lock_ctx; > }; > > struct db_ctdb_rec { >@@ -297,145 +299,21 @@ static struct ctdb_rec_data *db_ctdb_marshall_loop_next(struct ctdb_marshall_buf > return r; > } > >- >-static int32_t db_ctdb_transaction_active(uint32_t db_id) >-{ >- int32_t status; >- NTSTATUS ret; >- TDB_DATA indata; >- >- indata.dptr = (uint8_t *)&db_id; >- indata.dsize = sizeof(db_id); >- >- ret = ctdbd_control_local(messaging_ctdbd_connection(), >- CTDB_CONTROL_TRANS2_ACTIVE, 0, 0, >- indata, NULL, NULL, &status); >- >- if (!NT_STATUS_IS_OK(ret)) { >- DEBUG(2, ("ctdb control TRANS2_ACTIVE failed\n")); >- return -1; >- } >- >- return status; >-} >- >- > /** > * CTDB transaction destructor > */ > static int db_ctdb_transaction_destructor(struct db_ctdb_transaction_handle *h) > { >- tdb_transaction_cancel(h->ctx->wtdb->tdb); >- return 0; >-} >- >-/** >- * start a transaction on a ctdb database: >- * - lock the transaction lock key >- * - start the tdb transaction >- */ >-static int db_ctdb_transaction_fetch_start(struct db_ctdb_transaction_handle *h) >-{ >- struct db_record *rh; >- struct db_ctdb_rec *crec; >- TDB_DATA key; >- TALLOC_CTX *tmp_ctx; >- const char *keyname = CTDB_TRANSACTION_LOCK_KEY; >- int ret; >- struct db_ctdb_ctx *ctx = h->ctx; >- TDB_DATA data; >- pid_t pid; > NTSTATUS status; >- struct ctdb_ltdb_header header; >- int32_t transaction_status; > >- key.dptr = (uint8_t *)discard_const(keyname); >- key.dsize = strlen(keyname); >- >-again: >- tmp_ctx = talloc_new(h); >- >- rh = fetch_locked_internal(ctx, tmp_ctx, key, true); >- if (rh == NULL) { >- DEBUG(0,(__location__ " Failed to fetch_lock database\n")); >- talloc_free(tmp_ctx); >- return -1; >- } >- crec = talloc_get_type_abort(rh->private_data, struct db_ctdb_rec); >- >- transaction_status = db_ctdb_transaction_active(ctx->db_id); >- if (transaction_status == 1) { >- unsigned long int usec = (1000 + random()) % 100000; >- DEBUG(3, ("Transaction already active on db_id[0x%08x]." >- "Re-trying after %lu microseconds...", >- ctx->db_id, usec)); >- talloc_free(tmp_ctx); >- usleep(usec); >- goto again; >- } >- >- /* >- * store the pid in the database: >- * it is not enought that the node is dmaster... >- */ >- pid = getpid(); >- data.dptr = (unsigned char *)&pid; >- data.dsize = sizeof(pid_t); >- crec->header.rsn++; >- crec->header.dmaster = get_my_vnn(); >- status = db_ctdb_ltdb_store(ctx, key, &(crec->header), data); >+ status = g_lock_unlock(h->ctx->lock_ctx, h->lock_name); > if (!NT_STATUS_IS_OK(status)) { >- DEBUG(0, (__location__ " Failed to store pid in transaction " >- "record: %s\n", nt_errstr(status))); >- talloc_free(tmp_ctx); >+ DEBUG(0, ("g_lock_unlock failed: %s\n", nt_errstr(status))); > return -1; > } >- >- talloc_free(rh); >- >- ret = tdb_transaction_start(ctx->wtdb->tdb); >- if (ret != 0) { >- DEBUG(0,(__location__ " Failed to start tdb transaction\n")); >- talloc_free(tmp_ctx); >- return -1; >- } >- >- status = db_ctdb_ltdb_fetch(ctx, key, &header, tmp_ctx, &data); >- if (!NT_STATUS_IS_OK(status)) { >- DEBUG(0, (__location__ " failed to refetch transaction lock " >- "record inside transaction: %s - retrying\n", >- nt_errstr(status))); >- tdb_transaction_cancel(ctx->wtdb->tdb); >- talloc_free(tmp_ctx); >- goto again; >- } >- >- if (header.dmaster != get_my_vnn()) { >- DEBUG(3, (__location__ " refetch transaction lock record : " >- "we are not dmaster any more " >- "(dmaster[%u] != my_vnn[%u]) - retrying\n", >- header.dmaster, get_my_vnn())); >- tdb_transaction_cancel(ctx->wtdb->tdb); >- talloc_free(tmp_ctx); >- goto again; >- } >- >- if ((data.dsize != sizeof(pid_t)) || (*(pid_t *)(data.dptr) != pid)) { >- DEBUG(3, (__location__ " refetch transaction lock record: " >- "another local process has started a transaction " >- "(stored pid [%u] != my pid [%u]) - retrying\n", >- *(pid_t *)(data.dptr), pid)); >- tdb_transaction_cancel(ctx->wtdb->tdb); >- talloc_free(tmp_ctx); >- goto again; >- } >- >- talloc_free(tmp_ctx); >- > return 0; > } > >- > /** > * CTDB dbwrap API: transaction_start function > * starts a transaction on a persistent database >@@ -443,7 +321,7 @@ again: > static int db_ctdb_transaction_start(struct db_context *db) > { > struct db_ctdb_transaction_handle *h; >- int ret; >+ NTSTATUS status; > struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data, > struct db_ctdb_ctx); > >@@ -466,9 +344,22 @@ static int db_ctdb_transaction_start(struct db_context *db) > > h->ctx = ctx; > >- ret = db_ctdb_transaction_fetch_start(h); >- if (ret != 0) { >- talloc_free(h); >+ h->lock_name = talloc_asprintf(h, "transaction_db_0x%08x", >+ (unsigned int)ctx->db_id); >+ if (h->lock_name == NULL) { >+ DEBUG(0, ("talloc_asprintf failed\n")); >+ TALLOC_FREE(h); >+ return -1; >+ } >+ >+ /* >+ * Wait a day, i.e. forever... >+ */ >+ status = g_lock_lock(ctx->lock_ctx, h->lock_name, G_LOCK_WRITE, >+ timeval_set(86400, 0)); >+ if (!NT_STATUS_IS_OK(status)) { >+ DEBUG(0, ("g_lock_lock failed: %s\n", nt_errstr(status))); >+ TALLOC_FREE(h); > return -1; > } > >@@ -481,7 +372,65 @@ static int db_ctdb_transaction_start(struct db_context *db) > return 0; > } > >+static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf, >+ TDB_DATA key, >+ struct ctdb_ltdb_header *pheader, >+ TALLOC_CTX *mem_ctx, >+ TDB_DATA *pdata) >+{ >+ struct ctdb_rec_data *rec = NULL; >+ struct ctdb_ltdb_header h; >+ bool found; >+ TDB_DATA data; >+ int i; >+ >+ if (buf == NULL) { >+ return false; >+ } >+ >+ /* >+ * Walk the list of records written during this >+ * transaction. If we want to read one we have already >+ * written, return the last written sample. Thus we do not do >+ * a "break;" for the first hit, this record might have been >+ * overwritten later. >+ */ >+ >+ for (i=0; i<buf->count; i++) { >+ TDB_DATA tkey, tdata; >+ uint32_t reqid; >+ >+ rec = db_ctdb_marshall_loop_next(buf, rec, &reqid, &h, &tkey, >+ &tdata); >+ if (rec == NULL) { >+ return false; >+ } >+ >+ if (tdb_data_equal(key, tkey)) { >+ found = true; >+ data = tdata; >+ } >+ } >+ >+ if (!found) { >+ return false; >+ } >+ >+ if (pdata != NULL) { >+ data.dptr = (uint8_t *)talloc_memdup(mem_ctx, data.dptr, >+ data.dsize); >+ if ((data.dsize != 0) && (data.dptr == NULL)) { >+ return false; >+ } >+ *pdata = data; >+ } >+ >+ if (pheader != NULL) { >+ *pheader = h; >+ } > >+ return true; >+} > > /* > fetch a record inside a transaction >@@ -492,6 +441,13 @@ static int db_ctdb_transaction_fetch(struct db_ctdb_ctx *db, > { > struct db_ctdb_transaction_handle *h = db->transaction; > NTSTATUS status; >+ bool found; >+ >+ found = pull_newest_from_marshall_buffer(h->m_write, key, NULL, >+ mem_ctx, data); >+ if (found) { >+ return 0; >+ } > > status = db_ctdb_ltdb_fetch(h->ctx, key, NULL, mem_ctx, data); > >@@ -501,14 +457,14 @@ static int db_ctdb_transaction_fetch(struct db_ctdb_ctx *db, > return -1; > } > >- if (!h->in_replay) { >- h->m_all = db_ctdb_marshall_add(h, h->m_all, h->ctx->db_id, 1, key, NULL, *data); >- if (h->m_all == NULL) { >- DEBUG(0,(__location__ " Failed to add to marshalling record\n")); >- data->dsize = 0; >- talloc_free(data->dptr); >- return -1; >- } >+ h->m_all = db_ctdb_marshall_add(h, h->m_all, h->ctx->db_id, 1, key, >+ NULL, *data); >+ if (h->m_all == NULL) { >+ DEBUG(0,(__location__ " Failed to add to marshalling " >+ "record\n")); >+ data->dsize = 0; >+ talloc_free(data->dptr); >+ return -1; > } > > return 0; >@@ -543,6 +499,11 @@ static struct db_record *db_ctdb_fetch_locked_transaction(struct db_ctdb_ctx *ct > result->store = db_ctdb_store_transaction; > result->delete_rec = db_ctdb_delete_transaction; > >+ if (pull_newest_from_marshall_buffer(ctx->transaction->m_write, key, >+ NULL, result, &result->value)) { >+ return result; >+ } >+ > ctdb_data = tdb_fetch(ctx->wtdb->tdb, key); > if (ctdb_data.dptr == NULL) { > /* create the record */ >@@ -619,27 +580,35 @@ static int db_ctdb_transaction_store(struct db_ctdb_transaction_handle *h, > TDB_DATA key, TDB_DATA data) > { > TALLOC_CTX *tmp_ctx = talloc_new(h); >- int ret; > TDB_DATA rec; > struct ctdb_ltdb_header header; >- NTSTATUS status; >+ >+ ZERO_STRUCT(header); > > /* we need the header so we can update the RSN */ >- rec = tdb_fetch(h->ctx->wtdb->tdb, key); >- if (rec.dptr == NULL) { >- /* the record doesn't exist - create one with us as dmaster. >- This is only safe because we are in a transaction and this >- is a persistent database */ >- ZERO_STRUCT(header); >- } else { >- memcpy(&header, rec.dptr, sizeof(struct ctdb_ltdb_header)); >- rec.dsize -= sizeof(struct ctdb_ltdb_header); >- /* a special case, we are writing the same data that is there now */ >- if (data.dsize == rec.dsize && >- memcmp(data.dptr, rec.dptr + sizeof(struct ctdb_ltdb_header), data.dsize) == 0) { >- SAFE_FREE(rec.dptr); >- talloc_free(tmp_ctx); >- return 0; >+ >+ if (!pull_newest_from_marshall_buffer(h->m_write, key, &header, >+ NULL, NULL)) { >+ >+ rec = tdb_fetch(h->ctx->wtdb->tdb, key); >+ >+ if (rec.dptr != NULL) { >+ memcpy(&header, rec.dptr, >+ sizeof(struct ctdb_ltdb_header)); >+ rec.dsize -= sizeof(struct ctdb_ltdb_header); >+ >+ /* >+ * a special case, we are writing the same >+ * data that is there now >+ */ >+ if (data.dsize == rec.dsize && >+ memcmp(data.dptr, >+ rec.dptr + sizeof(struct ctdb_ltdb_header), >+ data.dsize) == 0) { >+ SAFE_FREE(rec.dptr); >+ talloc_free(tmp_ctx); >+ return 0; >+ } > } > SAFE_FREE(rec.dptr); > } >@@ -647,13 +616,13 @@ static int db_ctdb_transaction_store(struct db_ctdb_transaction_handle *h, > header.dmaster = get_my_vnn(); > header.rsn++; > >- if (!h->in_replay) { >- h->m_all = db_ctdb_marshall_add(h, h->m_all, h->ctx->db_id, 0, key, NULL, data); >- if (h->m_all == NULL) { >- DEBUG(0,(__location__ " Failed to add to marshalling record\n")); >- talloc_free(tmp_ctx); >- return -1; >- } >+ h->m_all = db_ctdb_marshall_add(h, h->m_all, h->ctx->db_id, 0, key, >+ NULL, data); >+ if (h->m_all == NULL) { >+ DEBUG(0,(__location__ " Failed to add to marshalling " >+ "record\n")); >+ talloc_free(tmp_ctx); >+ return -1; > } > > h->m_write = db_ctdb_marshall_add(h, h->m_write, h->ctx->db_id, 0, key, &header, data); >@@ -663,16 +632,8 @@ static int db_ctdb_transaction_store(struct db_ctdb_transaction_handle *h, > return -1; > } > >- status = db_ctdb_ltdb_store(h->ctx, key, &header, data); >- if (NT_STATUS_IS_OK(status)) { >- ret = 0; >- } else { >- ret = -1; >- } >- > talloc_free(tmp_ctx); >- >- return ret; >+ return 0; > } > > >@@ -708,64 +669,6 @@ static NTSTATUS db_ctdb_delete_transaction(struct db_record *rec) > return NT_STATUS_OK; > } > >- >-/* >- replay a transaction >- */ >-static int ctdb_replay_transaction(struct db_ctdb_transaction_handle *h) >-{ >- int ret, i; >- struct ctdb_rec_data *rec = NULL; >- >- h->in_replay = true; >- talloc_free(h->m_write); >- h->m_write = NULL; >- >- ret = db_ctdb_transaction_fetch_start(h); >- if (ret != 0) { >- return ret; >- } >- >- for (i=0;i<h->m_all->count;i++) { >- TDB_DATA key, data; >- >- rec = db_ctdb_marshall_loop_next(h->m_all, rec, NULL, NULL, &key, &data); >- if (rec == NULL) { >- DEBUG(0, (__location__ " Out of records in ctdb_replay_transaction?\n")); >- goto failed; >- } >- >- if (rec->reqid == 0) { >- /* its a store */ >- if (db_ctdb_transaction_store(h, key, data) != 0) { >- goto failed; >- } >- } else { >- TDB_DATA data2; >- TALLOC_CTX *tmp_ctx = talloc_new(h); >- >- if (db_ctdb_transaction_fetch(h->ctx, tmp_ctx, key, &data2) != 0) { >- talloc_free(tmp_ctx); >- goto failed; >- } >- if (data2.dsize != data.dsize || >- memcmp(data2.dptr, data.dptr, data.dsize) != 0) { >- /* the record has changed on us - we have to give up */ >- talloc_free(tmp_ctx); >- goto failed; >- } >- talloc_free(tmp_ctx); >- } >- } >- >- return 0; >- >-failed: >- tdb_transaction_cancel(h->ctx->wtdb->tdb); >- return -1; >-} >- >- > /* > commit a transaction > */ >@@ -774,11 +677,8 @@ static int db_ctdb_transaction_commit(struct db_context *db) > struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data, > struct db_ctdb_ctx); > NTSTATUS rets; >- int ret; > int status; >- int retries = 0; > struct db_ctdb_transaction_handle *h = ctx->transaction; >- enum ctdb_controls failure_control = CTDB_CONTROL_TRANS2_ERROR; > > if (h == NULL) { > DEBUG(0,(__location__ " transaction commit with no open transaction on db 0x%08x\n", ctx->db_id)); >@@ -798,102 +698,29 @@ static int db_ctdb_transaction_commit(struct db_context *db) > > DEBUG(5,(__location__ " Commit transaction on db 0x%08x\n", ctx->db_id)); > >- talloc_set_destructor(h, NULL); >- >- /* our commit strategy is quite complex. >- >- - we first try to commit the changes to all other nodes >- >- - if that works, then we commit locally and we are done >- >- - if a commit on another node fails, then we need to cancel >- the transaction, then restart the transaction (thus >- opening a window of time for a pending recovery to >- complete), then replay the transaction, checking all the >- reads and writes (checking that reads give the same data, >- and writes succeed). Then we retry the transaction to the >- other nodes >- */ >- > again: > if (h->m_write == NULL) { > /* no changes were made, potentially after a retry */ >- tdb_transaction_cancel(h->ctx->wtdb->tdb); >- talloc_free(h); >- ctx->transaction = NULL; >- return 0; >+ goto done; > } > > /* tell ctdbd to commit to the other nodes */ >- rets = ctdbd_control_local(messaging_ctdbd_connection(), >- retries==0?CTDB_CONTROL_TRANS2_COMMIT:CTDB_CONTROL_TRANS2_COMMIT_RETRY, >+ rets = ctdbd_control_local(messaging_ctdbd_connection(), >+ CTDB_CONTROL_TRANS3_COMMIT, > h->ctx->db_id, 0, >- db_ctdb_marshall_finish(h->m_write), NULL, NULL, &status); >+ db_ctdb_marshall_finish(h->m_write), >+ NULL, NULL, &status); > if (!NT_STATUS_IS_OK(rets) || status != 0) { >- tdb_transaction_cancel(h->ctx->wtdb->tdb); >- sleep(1); >- >- if (!NT_STATUS_IS_OK(rets)) { >- failure_control = CTDB_CONTROL_TRANS2_ERROR; >- } else { >- /* work out what error code we will give if we >- have to fail the operation */ >- switch ((enum ctdb_trans2_commit_error)status) { >- case CTDB_TRANS2_COMMIT_SUCCESS: >- case CTDB_TRANS2_COMMIT_SOMEFAIL: >- case CTDB_TRANS2_COMMIT_TIMEOUT: >- failure_control = CTDB_CONTROL_TRANS2_ERROR; >- break; >- case CTDB_TRANS2_COMMIT_ALLFAIL: >- failure_control = CTDB_CONTROL_TRANS2_FINISHED; >- break; >- } >- } >- >- if (++retries == 100) { >- DEBUG(0,(__location__ " Giving up transaction on db 0x%08x after %d retries failure_control=%u\n", >- h->ctx->db_id, retries, (unsigned)failure_control)); >- ctdbd_control_local(messaging_ctdbd_connection(), failure_control, >- h->ctx->db_id, CTDB_CTRL_FLAG_NOREPLY, >- tdb_null, NULL, NULL, NULL); >- h->ctx->transaction = NULL; >- talloc_free(h); >- ctx->transaction = NULL; >- return -1; >- } >- >- if (ctdb_replay_transaction(h) != 0) { >- DEBUG(0,(__location__ " Failed to replay transaction failure_control=%u\n", >- (unsigned)failure_control)); >- ctdbd_control_local(messaging_ctdbd_connection(), failure_control, >- h->ctx->db_id, CTDB_CTRL_FLAG_NOREPLY, >- tdb_null, NULL, NULL, NULL); >- h->ctx->transaction = NULL; >- talloc_free(h); >- ctx->transaction = NULL; >- return -1; >- } >+ /* >+ * TODO: >+ * check the database sequence number and >+ * compare it to the seqnum after applying the >+ * marshall buffer. If it is the same: return success. >+ */ > goto again; >- } else { >- failure_control = CTDB_CONTROL_TRANS2_ERROR; > } > >- /* do the real commit locally */ >- ret = tdb_transaction_commit(h->ctx->wtdb->tdb); >- if (ret != 0) { >- DEBUG(0,(__location__ " Failed to commit transaction failure_control=%u\n", >- (unsigned)failure_control)); >- ctdbd_control_local(messaging_ctdbd_connection(), failure_control, h->ctx->db_id, >- CTDB_CTRL_FLAG_NOREPLY, tdb_null, NULL, NULL, NULL); >- h->ctx->transaction = NULL; >- talloc_free(h); >- return ret; >- } >- >- /* tell ctdbd that we are finished with our local commit */ >- ctdbd_control_local(messaging_ctdbd_connection(), CTDB_CONTROL_TRANS2_FINISHED, >- h->ctx->db_id, CTDB_CTRL_FLAG_NOREPLY, >- tdb_null, NULL, NULL, NULL); >+done: > h->ctx->transaction = NULL; > talloc_free(h); > return 0; >@@ -1314,6 +1141,7 @@ struct db_context *db_open_ctdb(TALLOC_CTX *mem_ctx, > struct db_context *result; > struct db_ctdb_ctx *db_ctdb; > char *db_path; >+ struct ctdbd_connection *conn; > > if (!lp_clustering()) { > DEBUG(10, ("Clustering disabled -- no ctdb\n")); >@@ -1335,13 +1163,15 @@ struct db_context *db_open_ctdb(TALLOC_CTX *mem_ctx, > db_ctdb->transaction = NULL; > db_ctdb->db = result; > >- if (!NT_STATUS_IS_OK(ctdbd_db_attach(messaging_ctdbd_connection(),name, &db_ctdb->db_id, tdb_flags))) { >+ conn = messaging_ctdbd_connection(); >+ >+ if (!NT_STATUS_IS_OK(ctdbd_db_attach(conn, name, &db_ctdb->db_id, tdb_flags))) { > DEBUG(0, ("ctdbd_db_attach failed for %s\n", name)); > TALLOC_FREE(result); > return NULL; > } > >- db_path = ctdbd_dbpath(messaging_ctdbd_connection(), db_ctdb, db_ctdb->db_id); >+ db_path = ctdbd_dbpath(conn, db_ctdb, db_ctdb->db_id); > > result->persistent = ((tdb_flags & TDB_CLEAR_IF_FIRST) == 0); > >@@ -1361,6 +1191,16 @@ struct db_context *db_open_ctdb(TALLOC_CTX *mem_ctx, > } > talloc_free(db_path); > >+ if (result->persistent) { >+ db_ctdb->lock_ctx = g_lock_ctx_init(db_ctdb, >+ ctdb_conn_msg_ctx(conn)); >+ if (db_ctdb->lock_ctx == NULL) { >+ DEBUG(0, ("g_lock_ctx_init failed\n")); >+ TALLOC_FREE(result); >+ return NULL; >+ } >+ } >+ > result->private_data = (void *)db_ctdb; > result->fetch_locked = db_ctdb_fetch_locked; > result->fetch = db_ctdb_fetch; >-- >1.6.3.3 > > >From f631e5f328ef4824d98aab3edd54ad14d1913beb Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Fri, 4 Dec 2009 11:49:21 +0100 >Subject: [PATCH 07/32] build: Add a configure check for CTDB_CONTROL_TRANS3_COMMIT. > >This is the new implementation of ctdb transactions using the >global lock feature. It is needed by the current dbwrap_ctdb code. > >Michael >(cherry picked from commit d4c0afa841ecdae1cab955cc73360deae23f5873) >--- > source3/configure.in | 17 +++++++++++++++++ > 1 files changed, 17 insertions(+), 0 deletions(-) > >diff --git a/source3/configure.in b/source3/configure.in >index a9b9e47..99f2448 100644 >--- a/source3/configure.in >+++ b/source3/configure.in >@@ -5267,6 +5267,23 @@ else > ctdb_broken="transaction support too old" > fi > >+AC_HAVE_DECL(CTDB_CONTROL_TRANS3_COMMIT,[ >+#include "confdefs.h" >+#define NO_CONFIG_H >+#include "replace.h" >+#include "system/wait.h" >+#include "system/network.h" >+#include <talloc.h> >+#include <tdb.h> >+#include <ctdb.h> >+#include <ctdb_private.h> >+]) >+if test x"$ac_cv_have_CTDB_CONTROL_TRANS3_COMMIT_decl" = x"yes"; then >+ ctdb_broken=no >+else >+ ctdb_broken="transaction support too old" >+fi >+ > # in ctdb 1.0.57 ctdb_control_tcp was temparary renamed to ctdb_tcp_client > AC_CHECK_TYPE(struct ctdb_tcp_client,[ > AC_DEFINE([ctdb_control_tcp],[ctdb_tcp_client],[ctdb ipv4 support]) >-- >1.6.3.3 > > >From a6681c93cd643a785fbd6f686f0fdffa9f6d1256 Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Fri, 11 Dec 2009 10:35:50 +0100 >Subject: [PATCH 08/32] s3:dbwrap_ctdb: update (C) > >Michael >(cherry picked from commit 5a0c42770b349877928a2b3fd8316903dd62e5b7) >--- > source3/lib/dbwrap_ctdb.c | 3 ++- > 1 files changed, 2 insertions(+), 1 deletions(-) > >diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c >index c32398a..63b63fa 100644 >--- a/source3/lib/dbwrap_ctdb.c >+++ b/source3/lib/dbwrap_ctdb.c >@@ -1,7 +1,8 @@ > /* > Unix SMB/CIFS implementation. > Database interface wrapper around ctdbd >- Copyright (C) Volker Lendecke 2007 >+ Copyright (C) Volker Lendecke 2007-2009 >+ Copyright (C) Michael Adam 2009 > > This program is free software; you can redistribute it and/or modify > it under the terms of the GNU General Public License as published by >-- >1.6.3.3 > > >From f2f8abf24c89822b6e17d2f4e6077f93eeec7e33 Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Fri, 11 Dec 2009 12:30:57 +0100 >Subject: [PATCH 09/32] s3:dbwrap_ctdb: change db_ctdb_transaction_store() to return NTSTATUS. > >The return values calculated by the callers were wrong anyways since >the new marshalling code does not set the local tdbs tdb error code. > >Michael >(cherry picked from commit 26225d3e798892b39b3c238b0bee465bffac6550) >--- > source3/lib/dbwrap_ctdb.c | 30 ++++++++++++------------------ > 1 files changed, 12 insertions(+), 18 deletions(-) > >diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c >index 63b63fa..0986083 100644 >--- a/source3/lib/dbwrap_ctdb.c >+++ b/source3/lib/dbwrap_ctdb.c >@@ -577,8 +577,8 @@ static struct db_record *db_ctdb_fetch_locked_persistent(struct db_ctdb_ctx *ctx > /* > stores a record inside a transaction > */ >-static int db_ctdb_transaction_store(struct db_ctdb_transaction_handle *h, >- TDB_DATA key, TDB_DATA data) >+static NTSTATUS db_ctdb_transaction_store(struct db_ctdb_transaction_handle *h, >+ TDB_DATA key, TDB_DATA data) > { > TALLOC_CTX *tmp_ctx = talloc_new(h); > TDB_DATA rec; >@@ -608,7 +608,7 @@ static int db_ctdb_transaction_store(struct db_ctdb_transaction_handle *h, > data.dsize) == 0) { > SAFE_FREE(rec.dptr); > talloc_free(tmp_ctx); >- return 0; >+ return NT_STATUS_OK; > } > } > SAFE_FREE(rec.dptr); >@@ -623,18 +623,18 @@ static int db_ctdb_transaction_store(struct db_ctdb_transaction_handle *h, > DEBUG(0,(__location__ " Failed to add to marshalling " > "record\n")); > talloc_free(tmp_ctx); >- return -1; >+ return NT_STATUS_NO_MEMORY; > } > > h->m_write = db_ctdb_marshall_add(h, h->m_write, h->ctx->db_id, 0, key, &header, data); > if (h->m_write == NULL) { > DEBUG(0,(__location__ " Failed to add to marshalling record\n")); > talloc_free(tmp_ctx); >- return -1; >+ return NT_STATUS_NO_MEMORY; > } > > talloc_free(tmp_ctx); >- return 0; >+ return NT_STATUS_OK; > } > > >@@ -645,13 +645,10 @@ static NTSTATUS db_ctdb_store_transaction(struct db_record *rec, TDB_DATA data, > { > struct db_ctdb_transaction_handle *h = talloc_get_type_abort( > rec->private_data, struct db_ctdb_transaction_handle); >- int ret; >+ NTSTATUS status; > >- ret = db_ctdb_transaction_store(h, rec->key, data); >- if (ret != 0) { >- return tdb_error_to_ntstatus(h->ctx->wtdb->tdb); >- } >- return NT_STATUS_OK; >+ status = db_ctdb_transaction_store(h, rec->key, data); >+ return status; > } > > /* >@@ -661,13 +658,10 @@ static NTSTATUS db_ctdb_delete_transaction(struct db_record *rec) > { > struct db_ctdb_transaction_handle *h = talloc_get_type_abort( > rec->private_data, struct db_ctdb_transaction_handle); >- int ret; >+ NTSTATUS status; > >- ret = db_ctdb_transaction_store(h, rec->key, tdb_null); >- if (ret != 0) { >- return tdb_error_to_ntstatus(h->ctx->wtdb->tdb); >- } >- return NT_STATUS_OK; >+ status = db_ctdb_transaction_store(h, rec->key, tdb_null); >+ return status; > } > > /* >-- >1.6.3.3 > > >From a069477de2af1dbc15563eed00b3354fc823e2ca Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Fri, 11 Dec 2009 14:07:28 +0100 >Subject: [PATCH 10/32] s3:dbwrap_ctdb: maintain a database sequence number that bumps in transactions > >For persistent databases, 64bit integer is kept in a special record >__db_sequence_number__. This record is incremented with each completed >transaction. > >The retry mechanism for failing TRANS3_COMMIT controls inside the >db_ctdb_transaction_commit() function now relies one a modified >behaviour of ctdbd's treatment of persistent databases in recoveries. >Recently, a special treatment for persistent databases had been >introduced in ctdb (1.0.108) to work around the problems with the >orinal design of persistent transactions. >Now with the rewrite we need to revert to the old behaviour that >ctdb always takes the newest copies of all records. > >This change also paves the way for a next step, which will make >recovery use the db seqnum to tell which node has the newest copy >of a persistent db and use that node's copy. This will greatly >reduce the amount of data transferred with each recovery. > >Michael >(cherry picked from commit 3fe7ce141d6afe3825b06c5feb90558911e4df1e) >--- > source3/lib/dbwrap_ctdb.c | 121 +++++++++++++++++++++++++++++++++++++++++++-- > 1 files changed, 116 insertions(+), 5 deletions(-) > >diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c >index 0986083..fb99e1d 100644 >--- a/source3/lib/dbwrap_ctdb.c >+++ b/source3/lib/dbwrap_ctdb.c >@@ -664,6 +664,65 @@ static NTSTATUS db_ctdb_delete_transaction(struct db_record *rec) > return status; > } > >+/** >+ * Fetch the db sequence number of a persistent db directly from the db. >+ */ >+static NTSTATUS db_ctdb_fetch_db_seqnum_from_db(struct db_ctdb_ctx *db, >+ uint64_t *seqnum) >+{ >+ NTSTATUS status; >+ const char *keyname = CTDB_DB_SEQNUM_KEY; >+ TDB_DATA key; >+ TDB_DATA data; >+ struct ctdb_ltdb_header header; >+ TALLOC_CTX *mem_ctx = talloc_stackframe(); >+ >+ if (seqnum == NULL) { >+ return NT_STATUS_INVALID_PARAMETER; >+ } >+ >+ key.dptr = (uint8_t *)discard_const(keyname); >+ key.dsize = strlen(keyname) + 1; >+ >+ status = db_ctdb_ltdb_fetch(db, key, &header, mem_ctx, &data); >+ if (!NT_STATUS_IS_OK(status)) { >+ goto done; >+ } >+ >+ if (data.dsize != sizeof(uint64_t)) { >+ *seqnum = 0; >+ goto done; >+ } >+ >+ *seqnum = *(uint64_t *)data.dptr; >+ >+done: >+ TALLOC_FREE(mem_ctx); >+ return status; >+} >+ >+/** >+ * Store the database sequence number inside a transaction. >+ */ >+static NTSTATUS db_ctdb_store_db_seqnum(struct db_ctdb_transaction_handle *h, >+ uint64_t seqnum) >+{ >+ NTSTATUS status; >+ const char *keyname = CTDB_DB_SEQNUM_KEY; >+ TDB_DATA key; >+ TDB_DATA data; >+ >+ key.dptr = (uint8_t *)discard_const(keyname); >+ key.dsize = strlen(keyname); >+ >+ data.dptr = (uint8_t *)&seqnum; >+ data.dsize = sizeof(uint64_t); >+ >+ status = db_ctdb_transaction_store(h, key, data); >+ >+ return status; >+} >+ > /* > commit a transaction > */ >@@ -674,6 +733,8 @@ static int db_ctdb_transaction_commit(struct db_context *db) > NTSTATUS rets; > int status; > struct db_ctdb_transaction_handle *h = ctx->transaction; >+ uint64_t old_seqnum, new_seqnum; >+ int ret; > > if (h == NULL) { > DEBUG(0,(__location__ " transaction commit with no open transaction on db 0x%08x\n", ctx->db_id)); >@@ -693,6 +754,30 @@ static int db_ctdb_transaction_commit(struct db_context *db) > > DEBUG(5,(__location__ " Commit transaction on db 0x%08x\n", ctx->db_id)); > >+ /* >+ * As the last db action before committing, bump the database sequence >+ * number. Note that this undoes all changes to the seqnum records >+ * performed under the transaction. This record is not meant to be >+ * modified by user interaction. It is for internal use only... >+ */ >+ rets = db_ctdb_fetch_db_seqnum_from_db(ctx, &old_seqnum); >+ if (!NT_STATUS_IS_OK(rets)) { >+ DEBUG(1, (__location__ " failed to fetch the db sequence number " >+ "in transaction commit on db 0x%08x\n", ctx->db_id)); >+ ret = -1; >+ goto done; >+ } >+ >+ new_seqnum = old_seqnum + 1; >+ >+ rets = db_ctdb_store_db_seqnum(h, new_seqnum); >+ if (!NT_STATUS_IS_OK(rets)) { >+ DEBUG(1, (__location__ "failed to store the db sequence number " >+ " in transaction commit on db 0x%08x\n", ctx->db_id)); >+ ret = -1; >+ goto done; >+ } >+ > again: > if (h->m_write == NULL) { > /* no changes were made, potentially after a retry */ >@@ -707,14 +792,40 @@ again: > NULL, NULL, &status); > if (!NT_STATUS_IS_OK(rets) || status != 0) { > /* >- * TODO: >- * check the database sequence number and >- * compare it to the seqnum after applying the >- * marshall buffer. If it is the same: return success. >+ * The TRANS3_COMMIT control should only possibly fail when a >+ * recovery has been running concurrently. In any case, the db >+ * will be the same on all nodes, either the new copy or the >+ * old copy. This can be detected by comparing the old and new >+ * local sequence numbers. >+ */ >+ rets = db_ctdb_fetch_db_seqnum_from_db(ctx, &new_seqnum); >+ if (!NT_STATUS_IS_OK(rets)) { >+ DEBUG(1, (__location__ " failed to refetch db sequence " >+ "number after failed TRANS3_COMMIT\n")); >+ ret = -1; >+ goto done; >+ } >+ >+ if (new_seqnum == old_seqnum) { >+ /* Recovery prevented all our changes: retry. */ >+ goto again; >+ } else if (new_seqnum != (old_seqnum + 1)) { >+ DEBUG(0, (__location__ " ERROR: new_seqnum[%lu] != " >+ "old_seqnum[%lu] + (0 or 1) after failed " >+ "TRANS3_COMMIT - this should not happen!\n", >+ (unsigned long)new_seqnum, >+ (unsigned long)old_seqnum)); >+ ret = -1; >+ goto done; >+ } >+ /* >+ * Recovery propagated our changes to all nodes, completing >+ * our commit for us - succeed. > */ >- goto again; > } > >+ ret = 0; >+ > done: > h->ctx->transaction = NULL; > talloc_free(h); >-- >1.6.3.3 > > >From 6ae38ef5660945ff5846612907d57fc65c748ad4 Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Fri, 11 Dec 2009 16:45:38 +0100 >Subject: [PATCH 11/32] s3:build: remove checks for deprecated ctdb controls. > >Michael >(cherry picked from commit 9113ce82b59c718ac709eb01b125e9e6746a96b7) >--- > source3/configure.in | 36 +----------------------------------- > 1 files changed, 1 insertions(+), 35 deletions(-) > >diff --git a/source3/configure.in b/source3/configure.in >index 99f2448..3111d5a 100644 >--- a/source3/configure.in >+++ b/source3/configure.in >@@ -5233,40 +5233,6 @@ AC_CHECK_HEADERS(ctdb.h ctdb_private.h,,,[ > #include <ctdb.h> > ]) > >-AC_HAVE_DECL(CTDB_CONTROL_TRANS2_COMMIT_RETRY,[ >-#include "confdefs.h" >-#define NO_CONFIG_H >-#include "replace.h" >-#include "system/wait.h" >-#include "system/network.h" >-#include <talloc.h> >-#include <tdb.h> >-#include <ctdb.h> >-#include <ctdb_private.h> >-]) >-if test x"$ac_cv_have_CTDB_CONTROL_TRANS2_COMMIT_RETRY_decl" = x"yes"; then >- ctdb_broken=no >-else >- ctdb_broken="missing transaction support" >-fi >- >-AC_HAVE_DECL(CTDB_CONTROL_TRANS2_ACTIVE,[ >-#include "confdefs.h" >-#define NO_CONFIG_H >-#include "replace.h" >-#include "system/wait.h" >-#include "system/network.h" >-#include <talloc.h> >-#include <tdb.h> >-#include <ctdb.h> >-#include <ctdb_private.h> >-]) >-if test x"$ac_cv_have_CTDB_CONTROL_TRANS2_ACTIVE_decl" = x"yes"; then >- ctdb_broken=no >-else >- ctdb_broken="transaction support too old" >-fi >- > AC_HAVE_DECL(CTDB_CONTROL_TRANS3_COMMIT,[ > #include "confdefs.h" > #define NO_CONFIG_H >@@ -5281,7 +5247,7 @@ AC_HAVE_DECL(CTDB_CONTROL_TRANS3_COMMIT,[ > if test x"$ac_cv_have_CTDB_CONTROL_TRANS3_COMMIT_decl" = x"yes"; then > ctdb_broken=no > else >- ctdb_broken="transaction support too old" >+ ctdb_broken="ctdb transaction support missing or too old" > fi > > # in ctdb 1.0.57 ctdb_control_tcp was temparary renamed to ctdb_tcp_client >-- >1.6.3.3 > > >From 8a8bd12a9eadd5007d5948a8519709ec835f46ca Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Sat, 12 Dec 2009 00:30:37 +0100 >Subject: [PATCH 12/32] s3:dbwrap_ctdb: fix db_ctdb_fetch_db_seqnum_from_db() when NT_STATUS_NOT_FOUND. > >Don't treat this as an error but return seqnum 0 instead. > >Michael >(cherry picked from commit 10a44ee6930bb51b4b20ce42f35bc455ac1b7293) >--- > source3/lib/dbwrap_ctdb.c | 6 +++++- > 1 files changed, 5 insertions(+), 1 deletions(-) > >diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c >index fb99e1d..4c4486c 100644 >--- a/source3/lib/dbwrap_ctdb.c >+++ b/source3/lib/dbwrap_ctdb.c >@@ -685,10 +685,14 @@ static NTSTATUS db_ctdb_fetch_db_seqnum_from_db(struct db_ctdb_ctx *db, > key.dsize = strlen(keyname) + 1; > > status = db_ctdb_ltdb_fetch(db, key, &header, mem_ctx, &data); >- if (!NT_STATUS_IS_OK(status)) { >+ if (!NT_STATUS_IS_OK(status) && >+ !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) >+ { > goto done; > } > >+ status = NT_STATUS_OK; >+ > if (data.dsize != sizeof(uint64_t)) { > *seqnum = 0; > goto done; >-- >1.6.3.3 > > >From f3355db811b47ebf94bf260be5756ca075123f3d Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Sat, 12 Dec 2009 00:38:14 +0100 >Subject: [PATCH 13/32] s3:dbwrap_ctdb: fix two "may be used uninitialized" warnings > >Michael >(cherry picked from commit fb981cdb8282d3b9b46d9ca515a5685add232a72) >--- > source3/lib/dbwrap_ctdb.c | 3 +++ > 1 files changed, 3 insertions(+), 0 deletions(-) > >diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c >index 4c4486c..d70637e 100644 >--- a/source3/lib/dbwrap_ctdb.c >+++ b/source3/lib/dbwrap_ctdb.c >@@ -389,6 +389,9 @@ static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf, > return false; > } > >+ ZERO_STRUCT(h); >+ ZERO_STRUCT(data); >+ > /* > * Walk the list of records written during this > * transaction. If we want to read one we have already >-- >1.6.3.3 > > >From dfbdef3e2890f2b8e8478eefd20e215edc800e0e Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Tue, 5 Jan 2010 16:17:27 +0100 >Subject: [PATCH 14/32] s3:dbwrap_ctdb: fix an uninitialized variable. > >Michael >(cherry picked from commit 1505b69dea6044a13a59f672e22f5833256cb981) >--- > source3/lib/dbwrap_ctdb.c | 2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > >diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c >index d70637e..d6fde35 100644 >--- a/source3/lib/dbwrap_ctdb.c >+++ b/source3/lib/dbwrap_ctdb.c >@@ -381,7 +381,7 @@ static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf, > { > struct ctdb_rec_data *rec = NULL; > struct ctdb_ltdb_header h; >- bool found; >+ bool found = false; > TDB_DATA data; > int i; > >-- >1.6.3.3 > > >From 033d307def08666c6b8eb0a0edd3c31540aa4d97 Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Wed, 6 Jan 2010 00:37:21 +0100 >Subject: [PATCH 15/32] s3:dbwrap_ctdb: fix logic error in pull_newest_from_marshall_buffer(). > >The logic bug was that if a record was found in the marshall buffer, >then always the ctdb header of tha last record in the marshall buffer >was returned, and not the ctdb header of the last occurrence of the >requested record. > >This is fixed by introducing an additional temporary variable. > >Michael >(cherry picked from commit 524072b56bf659002410a817749bf86fe6f51e83) >--- > source3/lib/dbwrap_ctdb.c | 6 +++++- > 1 files changed, 5 insertions(+), 1 deletions(-) > >diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c >index d6fde35..4e97d26 100644 >--- a/source3/lib/dbwrap_ctdb.c >+++ b/source3/lib/dbwrap_ctdb.c >@@ -403,8 +403,11 @@ static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf, > for (i=0; i<buf->count; i++) { > TDB_DATA tkey, tdata; > uint32_t reqid; >+ struct ctdb_ltdb_header hdr; > >- rec = db_ctdb_marshall_loop_next(buf, rec, &reqid, &h, &tkey, >+ ZERO_STRUCT(hdr); >+ >+ rec = db_ctdb_marshall_loop_next(buf, rec, &reqid, &hdr, &tkey, > &tdata); > if (rec == NULL) { > return false; >@@ -413,6 +416,7 @@ static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf, > if (tdb_data_equal(key, tkey)) { > found = true; > data = tdata; >+ h = hdr; > } > } > >-- >1.6.3.3 > > >From 59d53e82c1512cc684de8110c1a125c69caecc6c Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Wed, 13 Jan 2010 23:51:34 +0100 >Subject: [PATCH 16/32] s3:dbwrap_ctdb: fix brown paperbag bug in ctdb_transaction_commit. > >I carefully prepared the return value only to "return 0;" at the bottom. :-( >This may well have hit us for instance in the nested cancel case >and produced random errors. > >Michael >(cherry picked from commit 1d594bd734a2f7146ed52872456a16c5e41816f1) >--- > source3/lib/dbwrap_ctdb.c | 2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > >diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c >index 4e97d26..c0b5fd5 100644 >--- a/source3/lib/dbwrap_ctdb.c >+++ b/source3/lib/dbwrap_ctdb.c >@@ -840,7 +840,7 @@ again: > done: > h->ctx->transaction = NULL; > talloc_free(h); >- return 0; >+ return ret; > } > > >-- >1.6.3.3 > > >From 79bdb10b65395bb5cd03a0422a1697e5e90db22c Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Wed, 13 Jan 2010 23:53:54 +0100 >Subject: [PATCH 17/32] s3:dbwrap_ctdb: exit early when nothing has been written in transaction_commit. > >This skips update of the __db_sequence_number__ record when nothing else has >been written. There are transactions that are just openend and then nothing >is written until transaction_commit is called. This is for instance the case >with registry initialization routines: They start a transaction and only >write somthing when the registry has not been initialized yet. >So this change will skip many db_seqnum bumps and TRANS3_COMMIT roundtrips. > >Michael >(cherry picked from commit c311697aded87ce624d40cbf14e05d6e6377c257) >--- > source3/lib/dbwrap_ctdb.c | 14 +++++++++----- > 1 files changed, 9 insertions(+), 5 deletions(-) > >diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c >index c0b5fd5..79c4c0c 100644 >--- a/source3/lib/dbwrap_ctdb.c >+++ b/source3/lib/dbwrap_ctdb.c >@@ -763,6 +763,15 @@ static int db_ctdb_transaction_commit(struct db_context *db) > return 0; > } > >+ if (h->m_write == NULL) { >+ /* >+ * No changes were made, so don't change the seqnum, >+ * don't push to other node, just exit with success. >+ */ >+ ret = 0; >+ goto done; >+ } >+ > DEBUG(5,(__location__ " Commit transaction on db 0x%08x\n", ctx->db_id)); > > /* >@@ -790,11 +799,6 @@ static int db_ctdb_transaction_commit(struct db_context *db) > } > > again: >- if (h->m_write == NULL) { >- /* no changes were made, potentially after a retry */ >- goto done; >- } >- > /* tell ctdbd to commit to the other nodes */ > rets = ctdbd_control_local(messaging_ctdbd_connection(), > CTDB_CONTROL_TRANS3_COMMIT, >-- >1.6.3.3 > > >From 88c6be332667dda4d322f2a795fbad2df835d33e Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Mon, 18 Jan 2010 17:26:04 +0100 >Subject: [PATCH 18/32] s3:dbwrap_ctdb: fix reading/storing of special key __db_sequence_number__ > >The key for reading and writing was inconsistent due to a >off by one data length. > >Michael >(cherry picked from commit 1933214108d1a71bc6473a696ce35020a427d8f4) >--- > source3/lib/dbwrap_ctdb.c | 6 ++---- > 1 files changed, 2 insertions(+), 4 deletions(-) > >diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c >index 79c4c0c..ddc8868 100644 >--- a/source3/lib/dbwrap_ctdb.c >+++ b/source3/lib/dbwrap_ctdb.c >@@ -688,8 +688,7 @@ static NTSTATUS db_ctdb_fetch_db_seqnum_from_db(struct db_ctdb_ctx *db, > return NT_STATUS_INVALID_PARAMETER; > } > >- key.dptr = (uint8_t *)discard_const(keyname); >- key.dsize = strlen(keyname) + 1; >+ key = string_term_tdb_data(keyname); > > status = db_ctdb_ltdb_fetch(db, key, &header, mem_ctx, &data); > if (!NT_STATUS_IS_OK(status) && >@@ -723,8 +722,7 @@ static NTSTATUS db_ctdb_store_db_seqnum(struct db_ctdb_transaction_handle *h, > TDB_DATA key; > TDB_DATA data; > >- key.dptr = (uint8_t *)discard_const(keyname); >- key.dsize = strlen(keyname); >+ key = string_term_tdb_data(keyname); > > data.dptr = (uint8_t *)&seqnum; > data.dsize = sizeof(uint64_t); >-- >1.6.3.3 > > >From 05afe8843cf656376f61b3f4af887fe4eac3e5ee Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Fri, 22 Jan 2010 15:56:28 +0100 >Subject: [PATCH 19/32] s3:g_lock: remove an unreached code path. > >Michael >(cherry picked from commit 8e306b51b79d3dacd68be9f13aa8455e2eb4c03f) >--- > source3/lib/g_lock.c | 4 ---- > 1 files changed, 0 insertions(+), 4 deletions(-) > >diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c >index 6508b39..0eae3f2 100644 >--- a/source3/lib/g_lock.c >+++ b/source3/lib/g_lock.c >@@ -309,10 +309,6 @@ again: > goto done; > } > >- if (retry) { >- goto again; >- } >- > DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n")); > > if (te == NULL) { >-- >1.6.3.3 > > >From 88430eacb4203ce02f592db2ab87255119ace70f Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Sat, 23 Jan 2010 00:05:15 +0100 >Subject: [PATCH 20/32] s3:ctdb_conn: add ctdbd_conn_get_fd() to get the fd out of the ctdb connection > >Michael >(cherry picked from commit e4af0bc5af2c3ee025ca7fac251c3672ba2c8dd5) >--- > source3/include/ctdbd_conn.h | 2 ++ > source3/lib/ctdbd_conn.c | 5 +++++ > 2 files changed, 7 insertions(+), 0 deletions(-) > >diff --git a/source3/include/ctdbd_conn.h b/source3/include/ctdbd_conn.h >index 71516c7..c5ba572 100644 >--- a/source3/include/ctdbd_conn.h >+++ b/source3/include/ctdbd_conn.h >@@ -33,6 +33,8 @@ NTSTATUS ctdbd_register_msg_ctx(struct ctdbd_connection *conn, > struct messaging_context *msg_ctx); > struct messaging_context *ctdb_conn_msg_ctx(struct ctdbd_connection *conn); > >+int ctdbd_conn_get_fd(struct ctdbd_connection *conn); >+ > NTSTATUS ctdbd_messaging_send(struct ctdbd_connection *conn, > uint32 dst_vnn, uint64 dst_srvid, > struct messaging_rec *msg); >diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c >index 743594d..6b50009 100644 >--- a/source3/lib/ctdbd_conn.c >+++ b/source3/lib/ctdbd_conn.c >@@ -519,6 +519,11 @@ struct messaging_context *ctdb_conn_msg_ctx(struct ctdbd_connection *conn) > return conn->msg_ctx; > } > >+int ctdbd_conn_get_fd(struct ctdbd_connection *conn) >+{ >+ return packet_get_fd(conn->pkt); >+} >+ > /* > * Packet handler to receive and handle a ctdb message > */ >-- >1.6.3.3 > > >From e787b6e8c18bc8c60cdbe432cf6c88cc0c6dc413 Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Sat, 23 Jan 2010 01:17:06 +0100 >Subject: [PATCH 21/32] s3:g_lock: remove a nested event loop, replacing the inner loop by select > >This made smbd crash in g_lock_lock() when trying to start a >transaction on a db with an already started transaction, >e.g. in a tcon_and_X where the share_info.tdb was not yet >initialized but share_info.tdb was already locked by another >process or writing acces to the winreg rpc pipe where the >registry tdb was already locked by another process. > >What we really _want_ to do here by design is to react to >MSG_DBWRAP_G_LOCK_RETRY messages that are either sent >by a client doing g_lock_unlock or by ourselves when >we receive a CTDB_SRVID_SAMBA_NOTIFY or >CTDB_SRVID_RECONFIGURE message from ctdbd, i.e. when >either a client holding a lock or a complete node >has died. > >Doing this properly involves calling tevent_loop_once(), >but doing this here with the main ctdbd messaging context >creates a nested event loop when g_lock_lock() is called >from the main event loop. > >So as a quick fix, we act a little corasely here: we do >a select on the ctdb connection fd and when it is readable >or we get EINTR, then we retry without actually parsing >any ctdb packages or dispatching messages. This means that >we retry more often than necessary and intended by design, >but this does not harm and it is unobtrusive. When we have >finished, the main loop will pick up all the messages and >ctdb packets. The only extra twist is that we cannot use >timed events here but have to handcode a timeout for select. > >Michael >(cherry picked from commit 83fffbeb44441a87569e543054af21d975eb20ae) >--- > source3/lib/g_lock.c | 139 ++++++++++++++++++++++++++++++++++++-------------- > 1 files changed, 101 insertions(+), 38 deletions(-) > >diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c >index 0eae3f2..8a44b22 100644 >--- a/source3/lib/g_lock.c >+++ b/source3/lib/g_lock.c >@@ -266,7 +266,9 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, > struct tevent_timer *te = NULL; > NTSTATUS status; > bool retry = false; >- bool timedout = false; >+ struct timeval timeout_end; >+ struct timeval timeout_remaining; >+ struct timeval time_now; > > DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type, > name)); >@@ -295,52 +297,113 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, > nt_errstr(status))); > return status; > } >-again: >- retry = false; > >- status = g_lock_trylock(ctx, name, lock_type); >- if (NT_STATUS_IS_OK(status)) { >- DEBUG(10, ("Got lock %s\n", name)); >- goto done; >- } >- if (!NT_STATUS_EQUAL(status, STATUS_PENDING)) { >- DEBUG(10, ("g_lock_trylock failed: %s\n", >- nt_errstr(status))); >- goto done; >- } >- >- DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n")); >- >- if (te == NULL) { >- te = tevent_add_timer( >- ctx->msg->event_ctx, talloc_tos(), >- timeval_current_ofs(timeout.tv_sec, timeout.tv_usec), >- g_lock_timedout, &timedout); >- if (te == NULL) { >- DEBUG(10, ("tevent_add_timer failed\n")); >- status = NT_STATUS_NO_MEMORY; >- goto done; >- } >- } >+ time_now = timeval_current(); >+ timeout_end = timeval_sum(&time_now, &timeout); > > while (true) { >- if (tevent_loop_once(ctx->msg->event_ctx) == -1) { >- DEBUG(1, ("tevent_loop_once failed\n")); >- status = NT_STATUS_INTERNAL_ERROR; >- goto done; >+ fd_set _r_fds; >+ fd_set *r_fds = NULL; >+ int max_fd = 0; >+ int ret; >+ >+ status = g_lock_trylock(ctx, name, lock_type); >+ if (NT_STATUS_IS_OK(status)) { >+ DEBUG(10, ("Got lock %s\n", name)); >+ break; >+ } >+ if (!NT_STATUS_EQUAL(status, STATUS_PENDING)) { >+ DEBUG(10, ("g_lock_trylock failed: %s\n", >+ nt_errstr(status))); >+ break; > } >- if (retry) { >- goto again; >+ >+ DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n")); >+ >+ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >+ * !!! HACK ALERT --- FIX ME !!! >+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >+ * What we really want to do here is to react to >+ * MSG_DBWRAP_G_LOCK_RETRY messages that are either sent >+ * by a client doing g_lock_unlock or by ourselves when >+ * we receive a CTDB_SRVID_SAMBA_NOTIFY or >+ * CTDB_SRVID_RECONFIGURE message from ctdbd, i.e. when >+ * either a client holding a lock or a complete node >+ * has died. >+ * >+ * Doing this properly involves calling tevent_loop_once(), >+ * but doing this here with the main ctdbd messaging context >+ * creates a nested event loop when g_lock_lock() is called >+ * from the main event loop, e.g. in a tcon_and_X where the >+ * share_info.tdb needs to be initialized and is locked by >+ * another process, or when the remore registry is accessed >+ * for writing and some other process already holds a lock >+ * on the registry.tdb. >+ * >+ * So as a quick fix, we act a little corasely here: we do >+ * a select on the ctdb connection fd and when it is readable >+ * or we get EINTR, then we retry without actually parsing >+ * any ctdb packages or dispatching messages. This means that >+ * we retry more often than intended by design, but this does >+ * not harm and it is unobtrusive. When we have finished, >+ * the main loop will pick up all the messages and ctdb >+ * packets. The only extra twist is that we cannot use timed >+ * events here but have to handcode a timeout. >+ */ >+ >+#ifdef CLUSTER_SUPPORT >+ if (lp_clustering()) { >+ struct ctdbd_connection *conn = messaging_ctdbd_connection(); >+ >+ r_fds = &_r_fds; >+ FD_ZERO(r_fds); >+ max_fd = ctdbd_conn_get_fd(conn); >+ FD_SET(max_fd, r_fds); > } >- if (timedout) { >- DEBUG(10, ("g_lock_lock timed out\n")); >+#endif > >- te = NULL; >+ time_now = timeval_current(); >+ timeout_remaining = timeval_until(&time_now, &timeout_end); > >- status = NT_STATUS_LOCK_NOT_GRANTED; >- goto done; >+ ret = sys_select(max_fd + 1, r_fds, NULL, NULL, >+ &timeout_remaining); >+ >+ if (ret == -1) { >+ if (errno != EINTR) { >+ DEBUG(1, ("error calling select: %s\n", >+ strerror(errno))); >+ status = NT_STATUS_INTERNAL_ERROR; >+ break; >+ } >+ /* >+ * errno == EINTR: >+ * This means a signal was received. >+ * It might have been a MSG_DBWRAP_G_LOCK_RETRY message. >+ * ==> retry >+ */ >+ } else if (ret == 0) { >+ if (timeval_expired(&timeout_end)) { >+ DEBUG(10, ("g_lock_lock timed out\n")); >+ status = NT_STATUS_LOCK_NOT_GRANTED; >+ break; >+ } else { >+ DEBUG(10, ("select returned 0 but timeout not " >+ "not expired: strange - retrying\n")); >+ } >+ } else if (ret != 1) { >+ DEBUG(1, ("invalid return code of select: %d\n", ret)); >+ status = NT_STATUS_INTERNAL_ERROR; >+ break; > } >+ /* >+ * ret == 1: >+ * This means ctdbd has sent us some data. >+ * Might be a CTDB_SRVID_RECONFIGURE or a >+ * CTDB_SRVID_SAMBA_NOTIFY message. >+ * ==> retry >+ */ > } >+ > done: > > if (!NT_STATUS_IS_OK(status)) { >-- >1.6.3.3 > > >From bdf4071ddb2db94969a6d1bf0d9e5d29daa60945 Mon Sep 17 00:00:00 2001 >From: Jeremy Allison <jra@samba.org> >Date: Fri, 12 Feb 2010 22:21:19 -0800 >Subject: [PATCH 22/32] Fix warning messages on compile in g_lock.c Volker & Michael please check. > >Jeremy. >(cherry picked from commit 10e54fb422d9f1ae6d33e5fabbf8c651b0e57a8c) >--- > source3/lib/g_lock.c | 18 ++++-------------- > 1 files changed, 4 insertions(+), 14 deletions(-) > >diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c >index 8a44b22..37fb7ce 100644 >--- a/source3/lib/g_lock.c >+++ b/source3/lib/g_lock.c >@@ -149,10 +149,6 @@ static void g_lock_got_retry(struct messaging_context *msg, > uint32_t msg_type, > struct server_id server_id, > DATA_BLOB *data); >-static void g_lock_timedout(struct tevent_context *ev, >- struct tevent_timer *te, >- struct timeval current_time, >- void *private_data); > > static NTSTATUS g_lock_trylock(struct g_lock_ctx *ctx, const char *name, > enum g_lock_type lock_type) >@@ -302,7 +298,9 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, > timeout_end = timeval_sum(&time_now, &timeout); > > while (true) { >+#ifdef CLUSTER_SUPPORT > fd_set _r_fds; >+#endif > fd_set *r_fds = NULL; > int max_fd = 0; > int ret; >@@ -404,7 +402,9 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, > */ > } > >+#ifdef CLUSTER_SUPPORT > done: >+#endif > > if (!NT_STATUS_IS_OK(status)) { > NTSTATUS unlock_status; >@@ -437,16 +437,6 @@ static void g_lock_got_retry(struct messaging_context *msg, > *pretry = true; > } > >-static void g_lock_timedout(struct tevent_context *ev, >- struct tevent_timer *te, >- struct timeval current_time, >- void *private_data) >-{ >- bool *ptimedout = (bool *)private_data; >- *ptimedout = true; >- TALLOC_FREE(te); >-} >- > static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name, > struct server_id pid) > { >-- >1.6.3.3 > > >From 7732a4128e41ca8379211ad16109e88095d941a0 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Mon, 15 Feb 2010 16:35:06 +0100 >Subject: [PATCH 23/32] s3: Fix a typo > (cherry picked from commit bac235dd302570850bb25194ff4bd39b6d653f0d) > >--- > source3/lib/g_lock.c | 2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > >diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c >index 37fb7ce..26b079d 100644 >--- a/source3/lib/g_lock.c >+++ b/source3/lib/g_lock.c >@@ -338,7 +338,7 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, > * for writing and some other process already holds a lock > * on the registry.tdb. > * >- * So as a quick fix, we act a little corasely here: we do >+ * So as a quick fix, we act a little coarsely here: we do > * a select on the ctdb connection fd and when it is readable > * or we get EINTR, then we retry without actually parsing > * any ctdb packages or dispatching messages. This means that >-- >1.6.3.3 > > >From 80bd6c73d5008682bc8ad148436020ac4a495da2 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Mon, 15 Feb 2010 16:49:46 +0100 >Subject: [PATCH 24/32] s3: Fix handling of processes that died in g_lock > >g_lock_parse might have thrown away entries from the locks array because the >processes were not around anymore. Don't store the orphaned entries. >(cherry picked from commit f3bdb163f461175c50b4930fa3464beaee30f4a8) >--- > source3/lib/g_lock.c | 8 +++++--- > 1 files changed, 5 insertions(+), 3 deletions(-) > >diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c >index 26b079d..42c0397 100644 >--- a/source3/lib/g_lock.c >+++ b/source3/lib/g_lock.c >@@ -127,11 +127,12 @@ static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data, > > static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx, > struct g_lock_rec *locks, >- int num_locks, >+ int *pnum_locks, > const struct server_id pid, > enum g_lock_type lock_type) > { > struct g_lock_rec *result; >+ int num_locks = *pnum_locks; > > result = talloc_realloc(mem_ctx, locks, struct g_lock_rec, > num_locks+1); >@@ -141,6 +142,7 @@ static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx, > > result[num_locks].pid = pid; > result[num_locks].lock_type = lock_type; >+ *pnum_locks += 1; > return result; > } > >@@ -221,7 +223,7 @@ again: > if (our_index == -1) { > /* First round, add ourself */ > >- locks = g_lock_addrec(talloc_tos(), locks, num_locks, >+ locks = g_lock_addrec(talloc_tos(), locks, &num_locks, > self, lock_type); > if (locks == NULL) { > DEBUG(10, ("g_lock_addrec failed\n")); >@@ -237,7 +239,7 @@ again: > locks[our_index].lock_type = lock_type; > } > >- data = make_tdb_data((uint8_t *)locks, talloc_get_size(locks)); >+ data = make_tdb_data((uint8_t *)locks, num_locks * sizeof(*locks)); > store_status = rec->store(rec, data, 0); > if (!NT_STATUS_IS_OK(store_status)) { > DEBUG(1, ("rec->store failed: %s\n", >-- >1.6.3.3 > > >From 0b6177bb8f683034916084b1fd3da669563a138b Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Mon, 15 Feb 2010 16:57:16 +0100 >Subject: [PATCH 25/32] s3: Optimize g_lock_lock for a heavily contended case > >Only check the existence of the lock owner in g_lock_parse, check the rest of >the records only when we got the lock successfully. This reduces the load on >process_exists which can involve a network roundtrip in the clustered case. >(cherry picked from commit 07978bd175395e0dc770f68fff5b8bd8b0fdeb51) >--- > source3/lib/g_lock.c | 39 ++++++++++++++++++++++++++++++++++++--- > 1 files changed, 36 insertions(+), 3 deletions(-) > >diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c >index 42c0397..e262025 100644 >--- a/source3/lib/g_lock.c >+++ b/source3/lib/g_lock.c >@@ -108,6 +108,34 @@ static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data, > (locks[i].lock_type & G_LOCK_PENDING) ? > "(pending)" : "(owner)")); > >+ if (((locks[i].lock_type & G_LOCK_PENDING) == 0) >+ && !process_exists(locks[i].pid)) { >+ >+ DEBUGADD(10, ("lock owner %s died -- discarding\n", >+ procid_str(talloc_tos(), >+ &locks[i].pid))); >+ >+ if (i < (num_locks-1)) { >+ locks[i] = locks[num_locks-1]; >+ } >+ num_locks -= 1; >+ } >+ } >+ >+ *plocks = locks; >+ *pnum_locks = num_locks; >+ return true; >+} >+ >+static void g_lock_cleanup(int *pnum_locks, struct g_lock_rec *locks) >+{ >+ int i, num_locks; >+ >+ num_locks = *pnum_locks; >+ >+ DEBUG(10, ("g_lock_cleanup: %d locks\n", num_locks)); >+ >+ for (i=0; i<num_locks; i++) { > if (process_exists(locks[i].pid)) { > continue; > } >@@ -119,10 +147,8 @@ static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data, > } > num_locks -= 1; > } >- >- *plocks = locks; > *pnum_locks = num_locks; >- return true; >+ return; > } > > static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx, >@@ -239,6 +265,13 @@ again: > locks[our_index].lock_type = lock_type; > } > >+ if (NT_STATUS_IS_OK(status) && ((lock_type & G_LOCK_PENDING) == 0)) { >+ /* >+ * Walk through the list of locks, search for dead entries >+ */ >+ g_lock_cleanup(&num_locks, locks); >+ } >+ > data = make_tdb_data((uint8_t *)locks, num_locks * sizeof(*locks)); > store_status = rec->store(rec, data, 0); > if (!NT_STATUS_IS_OK(store_status)) { >-- >1.6.3.3 > > >From 1914a31880ac136cfadf9e74cefd6e3caf468cc7 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Tue, 16 Feb 2010 12:22:08 +0100 >Subject: [PATCH 26/32] s3: Avoid a thundering herd in g_lock_unlock > >Only notify the first 5 pending lock waiters. This avoids a thundering herd >problem that is really nasty in a cluster. It also makes acquiring a lock a bit >more FIFO, lock waiters are added to the end of the array. >(cherry picked from commit 725b3654f831fbe0388cc09f46269903c9eef1d7) >--- > source3/lib/g_lock.c | 17 ++++++++++++++++- > 1 files changed, 16 insertions(+), 1 deletions(-) > >diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c >index e262025..512c068 100644 >--- a/source3/lib/g_lock.c >+++ b/source3/lib/g_lock.c >@@ -530,13 +530,23 @@ static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name, > } > > if ((lock_type & G_LOCK_PENDING) == 0) { >+ int num_wakeups = 0; >+ > /* >- * We've been the lock holder. Tell all others to retry. >+ * We've been the lock holder. Others to retry. Don't >+ * tell all others to avoid a thundering herd. In case >+ * this leads to a complete stall because we miss some >+ * processes, the loop in g_lock_lock tries at least >+ * once a minute. > */ >+ > for (i=0; i<num_locks; i++) { > if ((locks[i].lock_type & G_LOCK_PENDING) == 0) { > continue; > } >+ if (!process_exists(locks[i].pid)) { >+ continue; >+ } > > /* > * Ping all waiters to retry >@@ -549,6 +559,11 @@ static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name, > procid_str(talloc_tos(), > &locks[i].pid), > nt_errstr(status))); >+ } else { >+ num_wakeups += 1; >+ } >+ if (num_wakeups > 5) { >+ break; > } > } > } >-- >1.6.3.3 > > >From fe5db728b6eb7e647458e0b9921ce7ce29c1a1d7 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Tue, 16 Feb 2010 12:28:53 +0100 >Subject: [PATCH 27/32] s3: Avoid starving locks when many processes die at the same time > >In g_lock_unlock we have a little race between the process_exists and >messaging_send call: We only send to 5 waiters now, they all might have died >between us checking their existence and sending the message. This change makes >g_lock_lock retry at least once every minute. >(cherry picked from commit be919d6faed198cdc29322a4d9491946c0b044b3) >--- > source3/lib/g_lock.c | 10 ++++------ > 1 files changed, 4 insertions(+), 6 deletions(-) > >diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c >index 512c068..33ebe94 100644 >--- a/source3/lib/g_lock.c >+++ b/source3/lib/g_lock.c >@@ -298,7 +298,6 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, > NTSTATUS status; > bool retry = false; > struct timeval timeout_end; >- struct timeval timeout_remaining; > struct timeval time_now; > > DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type, >@@ -339,6 +338,7 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, > fd_set *r_fds = NULL; > int max_fd = 0; > int ret; >+ struct timeval select_timeout; > > status = g_lock_trylock(ctx, name, lock_type); > if (NT_STATUS_IS_OK(status)) { >@@ -395,12 +395,10 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, > } > #endif > >- time_now = timeval_current(); >- timeout_remaining = timeval_until(&time_now, &timeout_end); >+ select_timeout = timeval_set(60, 0); > > ret = sys_select(max_fd + 1, r_fds, NULL, NULL, >- &timeout_remaining); >- >+ &select_timeout); > if (ret == -1) { > if (errno != EINTR) { > DEBUG(1, ("error calling select: %s\n", >@@ -421,7 +419,7 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, > break; > } else { > DEBUG(10, ("select returned 0 but timeout not " >- "not expired: strange - retrying\n")); >+ "not expired, retrying\n")); > } > } else if (ret != 1) { > DEBUG(1, ("invalid return code of select: %d\n", ret)); >-- >1.6.3.3 > > >From 16965425a0aa2d213e7ddadc898b09b2399617de Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Tue, 16 Feb 2010 12:31:58 +0100 >Subject: [PATCH 28/32] s3: Slightly increase parallelism in g_lock > >There's no need to still hold the g_lock tdb-level lock while telling the >waiters to retry >(cherry picked from commit 83542d973ca771353109c7da4b0391d6ba910f53) >--- > source3/lib/g_lock.c | 8 +++++++- > 1 files changed, 7 insertions(+), 1 deletions(-) > >diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c >index 33ebe94..add670c 100644 >--- a/source3/lib/g_lock.c >+++ b/source3/lib/g_lock.c >@@ -527,6 +527,8 @@ static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name, > goto done; > } > >+ TALLOC_FREE(rec); >+ > if ((lock_type & G_LOCK_PENDING) == 0) { > int num_wakeups = 0; > >@@ -566,9 +568,13 @@ static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name, > } > } > done: >+ /* >+ * For the error path, TALLOC_FREE(rec) as well. In the good >+ * path we have already freed it. >+ */ >+ TALLOC_FREE(rec); > > TALLOC_FREE(locks); >- TALLOC_FREE(rec); > return status; > } > >-- >1.6.3.3 > > >From a6c06b24ba0730b41240d1db652012f628ff1a73 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Tue, 16 Feb 2010 15:21:25 +0100 >Subject: [PATCH 29/32] s3: Fix timeout calculation if g_lock_lock is given a timeout < 60s > >Detected while showing this code to obnox :-) >(cherry picked from commit f8b246e44c819b909b23b4b98ef0999c84d2f4ff) >--- > source3/lib/g_lock.c | 7 ++++++- > 1 files changed, 6 insertions(+), 1 deletions(-) > >diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c >index add670c..e4c6d7c 100644 >--- a/source3/lib/g_lock.c >+++ b/source3/lib/g_lock.c >@@ -338,7 +338,7 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, > fd_set *r_fds = NULL; > int max_fd = 0; > int ret; >- struct timeval select_timeout; >+ struct timeval timeout_remaining, select_timeout; > > status = g_lock_trylock(ctx, name, lock_type); > if (NT_STATUS_IS_OK(status)) { >@@ -395,8 +395,13 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, > } > #endif > >+ time_now = timeval_current(); >+ timeout_remaining = timeval_until(&time_now, &timeout_end); > select_timeout = timeval_set(60, 0); > >+ select_timeout = timeval_min(&select_timeout, >+ &timeout_remaining); >+ > ret = sys_select(max_fd + 1, r_fds, NULL, NULL, > &select_timeout); > if (ret == -1) { >-- >1.6.3.3 > > >From aee580c4722377e8d1260467ad652937cee2de29 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Fri, 12 Mar 2010 14:22:54 +0100 >Subject: [PATCH 30/32] s3: Add "g_lock_do" as a convenience wrapper function > (cherry picked from commit 79100c242153ea174a4405afd45cbf635da313aa) > >--- > source3/include/g_lock.h | 4 +++ > source3/lib/g_lock.c | 64 ++++++++++++++++++++++++++++++++++++++++++++ > source3/utils/net_g_lock.c | 49 +++++++++++++++------------------ > 3 files changed, 90 insertions(+), 27 deletions(-) > >diff --git a/source3/include/g_lock.h b/source3/include/g_lock.h >index c0eed38..b2d2c2d 100644 >--- a/source3/include/g_lock.h >+++ b/source3/include/g_lock.h >@@ -43,6 +43,10 @@ NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name); > NTSTATUS g_lock_get(struct g_lock_ctx *ctx, const char *name, > struct server_id *pid); > >+NTSTATUS g_lock_do(const char *name, enum g_lock_type lock_type, >+ struct timeval timeout, >+ void (*fn)(void *private_data), void *private_data); >+ > int g_lock_locks(struct g_lock_ctx *ctx, > int (*fn)(const char *name, void *private_data), > void *private_data); >diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c >index e4c6d7c..1726047 100644 >--- a/source3/lib/g_lock.c >+++ b/source3/lib/g_lock.c >@@ -700,3 +700,67 @@ NTSTATUS g_lock_get(struct g_lock_ctx *ctx, const char *name, > } > return NT_STATUS_OK; > } >+ >+static bool g_lock_init_all(TALLOC_CTX *mem_ctx, >+ struct tevent_context **pev, >+ struct messaging_context **pmsg, >+ struct g_lock_ctx **pg_ctx) >+{ >+ struct tevent_context *ev = NULL; >+ struct messaging_context *msg = NULL; >+ struct g_lock_ctx *g_ctx = NULL; >+ >+ ev = tevent_context_init(mem_ctx); >+ if (ev == NULL) { >+ d_fprintf(stderr, "ERROR: could not init event context\n"); >+ goto fail; >+ } >+ msg = messaging_init(mem_ctx, procid_self(), ev); >+ if (msg == NULL) { >+ d_fprintf(stderr, "ERROR: could not init messaging context\n"); >+ goto fail; >+ } >+ g_ctx = g_lock_ctx_init(mem_ctx, msg); >+ if (g_ctx == NULL) { >+ d_fprintf(stderr, "ERROR: could not init g_lock context\n"); >+ goto fail; >+ } >+ >+ *pev = ev; >+ *pmsg = msg; >+ *pg_ctx = g_ctx; >+ return true; >+fail: >+ TALLOC_FREE(g_ctx); >+ TALLOC_FREE(msg); >+ TALLOC_FREE(ev); >+ return false; >+} >+ >+NTSTATUS g_lock_do(const char *name, enum g_lock_type lock_type, >+ struct timeval timeout, >+ void (*fn)(void *private_data), void *private_data) >+{ >+ struct tevent_context *ev = NULL; >+ struct messaging_context *msg = NULL; >+ struct g_lock_ctx *g_ctx = NULL; >+ NTSTATUS status; >+ >+ if (!g_lock_init_all(talloc_tos(), &ev, &msg, &g_ctx)) { >+ status = NT_STATUS_ACCESS_DENIED; >+ goto done; >+ } >+ >+ status = g_lock_lock(g_ctx, name, lock_type, timeout); >+ if (!NT_STATUS_IS_OK(status)) { >+ goto done; >+ } >+ fn(private_data); >+ g_lock_unlock(g_ctx, name); >+ >+done: >+ TALLOC_FREE(g_ctx); >+ TALLOC_FREE(msg); >+ TALLOC_FREE(ev); >+ return status; >+} >diff --git a/source3/utils/net_g_lock.c b/source3/utils/net_g_lock.c >index f30ed33..a070510 100644 >--- a/source3/utils/net_g_lock.c >+++ b/source3/utils/net_g_lock.c >@@ -57,17 +57,24 @@ fail: > return false; > } > >+struct net_g_lock_do_state { >+ const char *cmd; >+ int result; >+}; >+ >+static void net_g_lock_do_fn(void *private_data) >+{ >+ struct net_g_lock_do_state *state = >+ (struct net_g_lock_do_state *)private_data; >+ state->result = system(state->cmd); >+} > > static int net_g_lock_do(struct net_context *c, int argc, const char **argv) > { >- struct tevent_context *ev = NULL; >- struct messaging_context *msg = NULL; >- struct g_lock_ctx *g_ctx = NULL; >+ struct net_g_lock_do_state state; > const char *name, *cmd; >- int timeout, res; >- bool locked = false; >+ int timeout; > NTSTATUS status; >- int ret = -1; > > if (argc != 3) { > d_printf("Usage: net g_lock do <lockname> <timeout> " >@@ -78,38 +85,26 @@ static int net_g_lock_do(struct net_context *c, int argc, const char **argv) > timeout = atoi(argv[1]); > cmd = argv[2]; > >- if (!net_g_lock_init(talloc_tos(), &ev, &msg, &g_ctx)) { >- goto done; >- } >+ state.cmd = cmd; >+ state.result = -1; > >- status = g_lock_lock(g_ctx, name, G_LOCK_WRITE, >- timeval_set(timeout / 1000, timeout % 1000)); >+ status = g_lock_do(name, G_LOCK_WRITE, >+ timeval_set(timeout / 1000, timeout % 1000), >+ net_g_lock_do_fn, &state); > if (!NT_STATUS_IS_OK(status)) { >- d_fprintf(stderr, "ERROR: Could not get lock: %s\n", >+ d_fprintf(stderr, "ERROR: g_lock_do failed: %s\n", > nt_errstr(status)); > goto done; > } >- locked = true; >- >- res = system(cmd); >- >- if (res == -1) { >+ if (state.result == -1) { > d_fprintf(stderr, "ERROR: system() returned %s\n", > strerror(errno)); > goto done; > } >- d_fprintf(stderr, "command returned %d\n", res); >- >- ret = 0; >+ d_fprintf(stderr, "command returned %d\n", state.result); > > done: >- if (locked) { >- g_lock_unlock(g_ctx, name); >- } >- TALLOC_FREE(g_ctx); >- TALLOC_FREE(msg); >- TALLOC_FREE(ev); >- return ret; >+ return state.result; > } > > static int net_g_lock_dump_fn(struct server_id pid, enum g_lock_type lock_type, >-- >1.6.3.3 > > >From 3b6283cce22113f0f46dbebdc8c58bce4c0f8cb0 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Fri, 5 Mar 2010 15:28:39 +0100 >Subject: [PATCH 31/32] s3: db->persistent==true was handled earlier, make this more obvious > (cherry picked from commit c7835a4845bbc7e4d340a75229866b2d4946f6eb) > >--- > source3/lib/dbwrap_ctdb.c | 2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > >diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c >index ddc8868..05ac777 100644 >--- a/source3/lib/dbwrap_ctdb.c >+++ b/source3/lib/dbwrap_ctdb.c >@@ -1050,7 +1050,7 @@ static struct db_record *db_ctdb_fetch_locked(struct db_context *db, > return db_ctdb_fetch_locked_persistent(ctx, mem_ctx, key); > } > >- return fetch_locked_internal(ctx, mem_ctx, key, db->persistent); >+ return fetch_locked_internal(ctx, mem_ctx, key, false); > } > > /* >-- >1.6.3.3 > > >From 7b200cd8809c22aa1978fa8919b002a26053c483 Mon Sep 17 00:00:00 2001 >From: Volker Lendecke <vl@samba.org> >Date: Fri, 5 Mar 2010 15:30:22 +0100 >Subject: [PATCH 32/32] s3: Remove the unused parameter "persistent" from fetch_locked_internal > (cherry picked from commit a5db27936e9c6aad99300ea46808481803f57e08) > >--- > source3/lib/dbwrap_ctdb.c | 10 ++-------- > 1 files changed, 2 insertions(+), 8 deletions(-) > >diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c >index 05ac777..938a312 100644 >--- a/source3/lib/dbwrap_ctdb.c >+++ b/source3/lib/dbwrap_ctdb.c >@@ -52,11 +52,6 @@ struct db_ctdb_rec { > struct ctdb_ltdb_header header; > }; > >-static struct db_record *fetch_locked_internal(struct db_ctdb_ctx *ctx, >- TALLOC_CTX *mem_ctx, >- TDB_DATA key, >- bool persistent); >- > static NTSTATUS tdb_error_to_ntstatus(struct tdb_context *tdb) > { > NTSTATUS status; >@@ -921,8 +916,7 @@ static int db_ctdb_record_destr(struct db_record* data) > > static struct db_record *fetch_locked_internal(struct db_ctdb_ctx *ctx, > TALLOC_CTX *mem_ctx, >- TDB_DATA key, >- bool persistent) >+ TDB_DATA key) > { > struct db_record *result; > struct db_ctdb_rec *crec; >@@ -1050,7 +1044,7 @@ static struct db_record *db_ctdb_fetch_locked(struct db_context *db, > return db_ctdb_fetch_locked_persistent(ctx, mem_ctx, key); > } > >- return fetch_locked_internal(ctx, mem_ctx, key, false); >+ return fetch_locked_internal(ctx, mem_ctx, key); > } > > /* >-- >1.6.3.3 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Flags:
vl
:
review+
Actions:
View
Attachments on
bug 7232
: 5581