From dc4a5b3fb6d0281da524f01cddfd14febf40c10c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 25 Nov 2015 10:17:34 +0100 Subject: [PATCH 1/4] dbwrap_rbt: use talloc_zero_size() instead of a partial ZERO_STRUCT() BUG: https://bugzilla.samba.org/show_bug.cgi?id=11375 BUG: https://bugzilla.samba.org/show_bug.cgi?id=11394 Signed-off-by: Stefan Metzmacher --- lib/dbwrap/dbwrap_rbt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/dbwrap/dbwrap_rbt.c b/lib/dbwrap/dbwrap_rbt.c index 0764a2c..37d9649 100644 --- a/lib/dbwrap/dbwrap_rbt.c +++ b/lib/dbwrap/dbwrap_rbt.c @@ -153,7 +153,7 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag) return NT_STATUS_INSUFFICIENT_RESOURCES; } - node = talloc_size(db_ctx, reclen); + node = talloc_zero_size(db_ctx, reclen); if (node == NULL) { return NT_STATUS_NO_MEMORY; } @@ -172,8 +172,6 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag) */ } - ZERO_STRUCT(node->rb_node); - node->keysize = rec->key.dsize; node->valuesize = data.dsize; -- 1.9.1 From a467a9c9dff5eb6c26e23da3dc11d4923b93cf58 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 25 Nov 2015 09:22:08 +0100 Subject: [PATCH 2/4] dbwrap_rbt: add nested traverse protection Multiple dbwrap_traverse_read() calls are possible. store() and delete() on a fetch locked record are rejected during dbwrap_traverse_read(). A dbwrap_traverse() within a dbwrap_traverse_read() behaves like a dbwrap_traverse_read(). Nested dbwrap_traverse() calls are not possible. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11375 BUG: https://bugzilla.samba.org/show_bug.cgi?id=11394 Signed-off-by: Stefan Metzmacher --- lib/dbwrap/dbwrap_rbt.c | 71 ++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/lib/dbwrap/dbwrap_rbt.c b/lib/dbwrap/dbwrap_rbt.c index 37d9649..622ab0d 100644 --- a/lib/dbwrap/dbwrap_rbt.c +++ b/lib/dbwrap/dbwrap_rbt.c @@ -27,6 +27,8 @@ struct db_rbt_ctx { struct rb_root tree; + size_t traverse_read; + bool traverse_write; }; struct db_rbt_rec { @@ -126,6 +128,10 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag) ssize_t reclen; TDB_DATA this_key, this_val; + if (db_ctx->traverse_read > 0) { + return NT_STATUS_MEDIA_WRITE_PROTECTED; + } + if (rec_priv->node != NULL) { /* @@ -222,6 +228,10 @@ static NTSTATUS db_rbt_delete(struct db_record *rec) rec->db->private_data, struct db_rbt_ctx); struct db_rbt_rec *rec_priv = (struct db_rbt_rec *)rec->private_data; + if (db_ctx->traverse_read > 0) { + return NT_STATUS_MEDIA_WRITE_PROTECTED; + } + if (rec_priv->node == NULL) { return NT_STATUS_OK; } @@ -232,16 +242,6 @@ static NTSTATUS db_rbt_delete(struct db_record *rec) return NT_STATUS_OK; } -static NTSTATUS db_rbt_store_deny(struct db_record *rec, TDB_DATA data, int flag) -{ - return NT_STATUS_MEDIA_WRITE_PROTECTED; -} - -static NTSTATUS db_rbt_delete_deny(struct db_record *rec) -{ - return NT_STATUS_MEDIA_WRITE_PROTECTED; -} - struct db_rbt_search_result { TDB_DATA key; TDB_DATA val; @@ -414,13 +414,8 @@ static int db_rbt_traverse_internal(struct db_context *db, ZERO_STRUCT(rec); rec.db = db; rec.private_data = &rec_priv; - if (rw) { - rec.store = db_rbt_store; - rec.delete_rec = db_rbt_delete; - } else { - rec.store = db_rbt_store_deny; - rec.delete_rec = db_rbt_delete_deny; - } + rec.store = db_rbt_store; + rec.delete_rec = db_rbt_delete; db_rbt_parse_node(rec_priv.node, &rec.key, &rec.value); ret = f(&rec, private_data); @@ -440,18 +435,21 @@ static int db_rbt_traverse_internal(struct db_context *db, return db_rbt_traverse_internal(db, rb_right, f, private_data, count, rw); } -static int db_rbt_traverse(struct db_context *db, - int (*f)(struct db_record *db, - void *private_data), - void *private_data) +static int db_rbt_traverse_read(struct db_context *db, + int (*f)(struct db_record *db, + void *private_data), + void *private_data) { struct db_rbt_ctx *ctx = talloc_get_type_abort( db->private_data, struct db_rbt_ctx); uint32_t count = 0; + int ret; - int ret = db_rbt_traverse_internal(db, ctx->tree.rb_node, - f, private_data, &count, - true /* rw */); + ctx->traverse_read++; + ret = db_rbt_traverse_internal(db, ctx->tree.rb_node, + f, private_data, &count, + false /* rw */); + ctx->traverse_read--; if (ret != 0) { return -1; } @@ -461,18 +459,29 @@ static int db_rbt_traverse(struct db_context *db, return count; } -static int db_rbt_traverse_read(struct db_context *db, - int (*f)(struct db_record *db, - void *private_data), - void *private_data) +static int db_rbt_traverse(struct db_context *db, + int (*f)(struct db_record *db, + void *private_data), + void *private_data) { struct db_rbt_ctx *ctx = talloc_get_type_abort( db->private_data, struct db_rbt_ctx); uint32_t count = 0; + int ret; + + if (ctx->traverse_write) { + return -1; + }; + + if (ctx->traverse_read > 0) { + return db_rbt_traverse_read(db, f, private_data); + } - int ret = db_rbt_traverse_internal(db, ctx->tree.rb_node, - f, private_data, &count, - false /* rw */); + ctx->traverse_write = true; + ret = db_rbt_traverse_internal(db, ctx->tree.rb_node, + f, private_data, &count, + true /* rw */); + ctx->traverse_write = false; if (ret != 0) { return -1; } -- 1.9.1 From 43dbea33ecc00c48c2529ada666fc61ec4fb8da4 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 25 Nov 2015 09:22:08 +0100 Subject: [PATCH 3/4] dbwrap_rbt: fix modifying the db during traverse We delete and add of records rebalace the tree, but our traverse code doesn't handle that and skips records randomly. We maintain records in a linked list for now in addition to the rbtree and use that list during traverse. This add a bit overhead, but at least it works reliable. If someone finds a way to do reliable traverse with the rebalanced tree, we can replace this commit. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11375 BUG: https://bugzilla.samba.org/show_bug.cgi?id=11394 Signed-off-by: Stefan Metzmacher --- lib/dbwrap/dbwrap_rbt.c | 104 ++++++++++++++++++++++++++---------------------- 1 file changed, 57 insertions(+), 47 deletions(-) diff --git a/lib/dbwrap/dbwrap_rbt.c b/lib/dbwrap/dbwrap_rbt.c index 622ab0d..3b5e589 100644 --- a/lib/dbwrap/dbwrap_rbt.c +++ b/lib/dbwrap/dbwrap_rbt.c @@ -22,13 +22,15 @@ #include "dbwrap/dbwrap_private.h" #include "dbwrap/dbwrap_rbt.h" #include "../lib/util/rbtree.h" +#include "../lib/util/dlinklist.h" #define DBWRAP_RBT_ALIGN(_size_) (((_size_)+15)&~15) struct db_rbt_ctx { struct rb_root tree; + struct db_rbt_node *nodes; size_t traverse_read; - bool traverse_write; + struct db_rbt_node **traverse_nextp; }; struct db_rbt_rec { @@ -40,6 +42,7 @@ struct db_rbt_rec { struct db_rbt_node { struct rb_node rb_node; size_t keysize, valuesize; + struct db_rbt_node *prev, *next; }; /* @@ -123,7 +126,8 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag) struct db_rbt_node *node; struct rb_node ** p; - struct rb_node * parent; + struct rb_node *parent = NULL; + struct db_rbt_node *parent_node = NULL; ssize_t reclen; TDB_DATA this_key, this_val; @@ -165,12 +169,19 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag) } if (rec_priv->node != NULL) { + if (db_ctx->traverse_nextp != NULL) { + if (*db_ctx->traverse_nextp == rec_priv->node) { + *db_ctx->traverse_nextp = node; + } + } + /* * We need to delete the key from the tree and start fresh, * there's not enough space in the existing record */ rb_erase(&rec_priv->node->rb_node, &db_ctx->tree); + DLIST_REMOVE(db_ctx->nodes, rec_priv->node); /* * Keep the existing node around for a while: If the record @@ -197,10 +208,11 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag) TDB_DATA search_key, search_val; int res; - parent = (*p); - r = db_rbt2node(*p); + parent = (*p); + parent_node = r; + db_rbt_parse_node(r, &search_key, &search_val); res = db_rbt_compare(this_key, search_key); @@ -217,6 +229,7 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag) } rb_link_node(&node->rb_node, parent, p); + DLIST_ADD_AFTER(db_ctx->nodes, node, parent_node); rb_insert_color(&node->rb_node, &db_ctx->tree); return NT_STATUS_OK; @@ -236,7 +249,14 @@ static NTSTATUS db_rbt_delete(struct db_record *rec) return NT_STATUS_OK; } + if (db_ctx->traverse_nextp != NULL) { + if (*db_ctx->traverse_nextp == rec_priv->node) { + *db_ctx->traverse_nextp = rec_priv->node->next; + } + } + rb_erase(&rec_priv->node->rb_node, &db_ctx->tree); + DLIST_REMOVE(db_ctx->nodes, rec_priv->node); TALLOC_FREE(rec_priv->node); return NT_STATUS_OK; @@ -383,56 +403,48 @@ static NTSTATUS db_rbt_parse_record(struct db_context *db, TDB_DATA key, } static int db_rbt_traverse_internal(struct db_context *db, - struct rb_node *n, int (*f)(struct db_record *db, void *private_data), void *private_data, uint32_t* count, bool rw) { - struct rb_node *rb_right; - struct rb_node *rb_left; - struct db_record rec; - struct db_rbt_rec rec_priv; + struct db_rbt_ctx *ctx = talloc_get_type_abort( + db->private_data, struct db_rbt_ctx); + struct db_rbt_node *cur = NULL; + struct db_rbt_node *next = NULL; int ret; - if (n == NULL) { - return 0; - } - - rb_left = n->rb_left; - rb_right = n->rb_right; + for (cur = ctx->nodes; cur != NULL; cur = next) { + struct db_record rec; + struct db_rbt_rec rec_priv; - ret = db_rbt_traverse_internal(db, rb_left, f, private_data, count, rw); - if (ret != 0) { - return ret; - } + rec_priv.node = cur; + next = rec_priv.node->next; - rec_priv.node = db_rbt2node(n); - /* n might be altered by the callback function */ - n = NULL; + ZERO_STRUCT(rec); + rec.db = db; + rec.private_data = &rec_priv; + rec.store = db_rbt_store; + rec.delete_rec = db_rbt_delete; + db_rbt_parse_node(rec_priv.node, &rec.key, &rec.value); - ZERO_STRUCT(rec); - rec.db = db; - rec.private_data = &rec_priv; - rec.store = db_rbt_store; - rec.delete_rec = db_rbt_delete; - db_rbt_parse_node(rec_priv.node, &rec.key, &rec.value); - - ret = f(&rec, private_data); - (*count) ++; - if (ret != 0) { - return ret; - } - - if (rec_priv.node != NULL) { - /* - * If the current record is still there - * we should take the current rb_right. - */ - rb_right = rec_priv.node->rb_node.rb_right; + if (rw) { + ctx->traverse_nextp = &next; + } + ret = f(&rec, private_data); + (*count) ++; + if (rw) { + ctx->traverse_nextp = NULL; + } + if (ret != 0) { + return ret; + } + if (rec_priv.node != NULL) { + next = rec_priv.node->next; + } } - return db_rbt_traverse_internal(db, rb_right, f, private_data, count, rw); + return 0; } static int db_rbt_traverse_read(struct db_context *db, @@ -446,7 +458,7 @@ static int db_rbt_traverse_read(struct db_context *db, int ret; ctx->traverse_read++; - ret = db_rbt_traverse_internal(db, ctx->tree.rb_node, + ret = db_rbt_traverse_internal(db, f, private_data, &count, false /* rw */); ctx->traverse_read--; @@ -469,7 +481,7 @@ static int db_rbt_traverse(struct db_context *db, uint32_t count = 0; int ret; - if (ctx->traverse_write) { + if (ctx->traverse_nextp != NULL) { return -1; }; @@ -477,11 +489,9 @@ static int db_rbt_traverse(struct db_context *db, return db_rbt_traverse_read(db, f, private_data); } - ctx->traverse_write = true; - ret = db_rbt_traverse_internal(db, ctx->tree.rb_node, + ret = db_rbt_traverse_internal(db, f, private_data, &count, true /* rw */); - ctx->traverse_write = false; if (ret != 0) { return -1; } -- 1.9.1 From 1d0acd956a3fd886d92c22e61f8793a987043fec Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 25 Nov 2015 00:13:17 +0100 Subject: [PATCH 4/4] s3:torture: add traverse testing to LOCAL-RBTREE BUG: https://bugzilla.samba.org/show_bug.cgi?id=11375 BUG: https://bugzilla.samba.org/show_bug.cgi?id=11394 Signed-off-by: Stefan Metzmacher --- source3/torture/torture.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/source3/torture/torture.c b/source3/torture/torture.c index ef75d214..6c34e4c 100644 --- a/source3/torture/torture.c +++ b/source3/torture/torture.c @@ -8347,11 +8347,29 @@ static bool rbt_testval(struct db_context *db, const char *key, return ret; } +static int local_rbtree_traverse_read(struct db_record *rec, void *private_data) +{ + int *count2 = (int *)private_data; + (*count2)++; + return 0; +} + +static int local_rbtree_traverse_delete(struct db_record *rec, void *private_data) +{ + int *count2 = (int *)private_data; + (*count2)++; + dbwrap_record_delete(rec); + return 0; +} + static bool run_local_rbtree(int dummy) { struct db_context *db; bool ret = false; int i; + NTSTATUS status; + int count = 0; + int count2 = 0; db = db_open_rbt(NULL); @@ -8394,6 +8412,27 @@ static bool run_local_rbtree(int dummy) } ret = true; + count = 0; count2 = 0; + status = dbwrap_traverse_read(db, local_rbtree_traverse_read, + &count2, &count); + printf("%s: read1: %d %d, %s\n", __func__, count, count2, nt_errstr(status)); + if ((count != count2) || (count != 1000)) { + ret = false; + } + count = 0; count2 = 0; + status = dbwrap_traverse(db, local_rbtree_traverse_delete, + &count2, &count); + printf("%s: delete: %d %d, %s\n", __func__, count, count2, nt_errstr(status)); + if ((count != count2) || (count != 1000)) { + ret = false; + } + count = 0; count2 = 0; + status = dbwrap_traverse_read(db, local_rbtree_traverse_read, + &count2, &count); + printf("%s: read2: %d %d, %s\n", __func__, count, count2, nt_errstr(status)); + if ((count != count2) || (count != 0)) { + ret = false; + } done: TALLOC_FREE(db); -- 1.9.1