The Samba-Bugzilla – Attachment 12287 Details for
Bug 5324
Reduce the performance penalty of --xattrs on Mac OS X
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
Possible patch for master
rsync-xattr-hashtable-try-03.patches.txt (text/plain), 27.82 KB, created by
Stefan Metzmacher
on 2016-07-25 15:19:28 UTC
(
hide
)
Description:
Possible patch for master
Filename:
MIME Type:
Creator:
Stefan Metzmacher
Created:
2016-07-25 15:19:28 UTC
Size:
27.82 KB
patch
obsolete
>From da8218c4ec5dfc31fb640951ad9de7b76841b2bf Mon Sep 17 00:00:00 2001 >From: Stefan Metzmacher <metze@samba.org> >Date: Fri, 22 Jul 2016 14:43:27 +0200 >Subject: [PATCH 1/5] xattrs: add some const to empty_xattr > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=5324 > >Signed-off-by: Stefan Metzmacher <metze@samba.org> >--- > xattrs.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > >diff --git a/xattrs.c b/xattrs.c >index 0658afb..ed8c81b 100644 >--- a/xattrs.c >+++ b/xattrs.c >@@ -82,7 +82,7 @@ typedef struct { > static size_t namebuf_len = 0; > static char *namebuf = NULL; > >-static item_list empty_xattr = EMPTY_ITEM_LIST; >+static const item_list empty_xattr = EMPTY_ITEM_LIST; > static item_list rsync_xal_l = EMPTY_ITEM_LIST; > > static size_t prior_xattr_count = (size_t)-1; >@@ -466,7 +466,7 @@ int send_xattr(int f, stat_x *sxp) > * need so that send_xattr_request() can tell the sender about them. */ > int xattr_diff(struct file_struct *file, stat_x *sxp, int find_all) > { >- item_list *lst = rsync_xal_l.items; >+ const item_list *lst = rsync_xal_l.items; > rsync_xa *snd_rxa, *rec_rxa; > int snd_cnt, rec_cnt; > int cmp, same, xattrs_equal = 1; >-- >1.9.1 > > >From e72d18eaaf5ac3686eb160c1956f460f09b5c8c7 Mon Sep 17 00:00:00 2001 >From: Stefan Metzmacher <metze@samba.org> >Date: Fri, 22 Jul 2016 18:14:40 +0200 >Subject: [PATCH 2/5] xattrs: let rsync_xal_store() return ndx. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=5324 > >Signed-off-by: Stefan Metzmacher <metze@samba.org> >--- > xattrs.c | 10 +++++----- > 1 file changed, 5 insertions(+), 5 deletions(-) > >diff --git a/xattrs.c b/xattrs.c >index ed8c81b..88da5ee 100644 >--- a/xattrs.c >+++ b/xattrs.c >@@ -398,8 +398,9 @@ static int find_matching_xattr(item_list *xalp) > } > > /* Store *xalp on the end of rsync_xal_l */ >-static void rsync_xal_store(item_list *xalp) >+static int rsync_xal_store(item_list *xalp) > { >+ int ndx = rsync_xal_l.count; /* pre-incremented count */ > item_list *new_lst = EXPAND_ITEM_LIST(&rsync_xal_l, item_list, RSYNC_XAL_LIST_INITIAL); > /* Since the following call starts a new list, we know it will hold the > * entire initial-count, not just enough space for one new item. */ >@@ -408,6 +409,7 @@ static void rsync_xal_store(item_list *xalp) > memcpy(new_lst->items, xalp->items, xalp->count * sizeof (rsync_xa)); > new_lst->count = xalp->count; > xalp->count = 0; >+ return ndx; > } > > /* Send the make_xattr()-generated xattr list for this flist entry. */ >@@ -454,8 +456,7 @@ int send_xattr(int f, stat_x *sxp) > else > write_bigbuf(f, rxa->datum, rxa->datum_len); > } >- ndx = rsync_xal_l.count; /* pre-incremented count */ >- rsync_xal_store(sxp->xattr); /* adds item to rsync_xal_l */ >+ ndx = rsync_xal_store(sxp->xattr); /* adds item to rsync_xal_l */ > } > > return ndx; >@@ -769,8 +770,7 @@ void receive_xattr(int f, struct file_struct *file) > if (need_sort && count > 1) > qsort(temp_xattr.items, count, sizeof (rsync_xa), rsync_xal_compare_names); > >- ndx = rsync_xal_l.count; /* pre-incremented count */ >- rsync_xal_store(&temp_xattr); /* adds item to rsync_xal_l */ >+ ndx = rsync_xal_store(&temp_xattr); /* adds item to rsync_xal_l */ > > F_XATTR(file) = ndx; > } >-- >1.9.1 > > >From 1693992b5eb26c6791f847b6c2ff15fc1af3f0c5 Mon Sep 17 00:00:00 2001 >From: Stefan Metzmacher <metze@samba.org> >Date: Fri, 22 Jul 2016 18:32:04 +0200 >Subject: [PATCH 3/5] xattrs: introduce a rsync_xa_list struct as layer between > two nested item_lists > >We have the global 'item_list rsync_xal_l', this maintains an array >of rsync_xa_list structure, one per file. > >Each rsync_xa_list structure maintains an array of rsync_xa structure, >while each represent a single xattr with name and value. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=5324 > >Signed-off-by: Stefan Metzmacher <metze@samba.org> >--- > xattrs.c | 73 +++++++++++++++++++++++++++++++++++++++++----------------------- > 1 file changed, 47 insertions(+), 26 deletions(-) > >diff --git a/xattrs.c b/xattrs.c >index 88da5ee..fa4128d 100644 >--- a/xattrs.c >+++ b/xattrs.c >@@ -79,9 +79,16 @@ typedef struct { > int num; > } rsync_xa; > >+typedef struct { >+ item_list xa_items; >+} rsync_xa_list; >+ > static size_t namebuf_len = 0; > static char *namebuf = NULL; > >+static const rsync_xa_list empty_xa_list = { >+ .xa_items = EMPTY_ITEM_LIST, >+}; > static const item_list empty_xattr = EMPTY_ITEM_LIST; > static item_list rsync_xal_l = EMPTY_ITEM_LIST; > >@@ -360,17 +367,19 @@ int copy_xattrs(const char *source, const char *dest) > return 0; > } > >-static int find_matching_xattr(item_list *xalp) >+static int find_matching_xattr(const item_list *xalp) > { >- size_t i, j; >- item_list *lst = rsync_xal_l.items; >+ const rsync_xa_list *glst = rsync_xal_l.items; >+ size_t i; > > for (i = 0; i < rsync_xal_l.count; i++) { >- rsync_xa *rxas1 = lst[i].items; >- rsync_xa *rxas2 = xalp->items; >+ const item_list *lst = &glst[i].xa_items; >+ const rsync_xa *rxas1 = lst->items; >+ const rsync_xa *rxas2 = xalp->items; >+ size_t j; > > /* Wrong number of elements? */ >- if (lst[i].count != xalp->count) >+ if (lst->count != xalp->count) > continue; > /* any elements different? */ > for (j = 0; j < xalp->count; j++) { >@@ -401,13 +410,13 @@ static int find_matching_xattr(item_list *xalp) > static int rsync_xal_store(item_list *xalp) > { > int ndx = rsync_xal_l.count; /* pre-incremented count */ >- item_list *new_lst = EXPAND_ITEM_LIST(&rsync_xal_l, item_list, RSYNC_XAL_LIST_INITIAL); >+ rsync_xa_list *new_list = EXPAND_ITEM_LIST(&rsync_xal_l, rsync_xa_list, RSYNC_XAL_LIST_INITIAL); > /* Since the following call starts a new list, we know it will hold the > * entire initial-count, not just enough space for one new item. */ >- *new_lst = empty_xattr; >- (void)EXPAND_ITEM_LIST(new_lst, rsync_xa, xalp->count); >- memcpy(new_lst->items, xalp->items, xalp->count * sizeof (rsync_xa)); >- new_lst->count = xalp->count; >+ *new_list = empty_xa_list; >+ (void)EXPAND_ITEM_LIST(&new_list->xa_items, rsync_xa, xalp->count); >+ memcpy(new_list->xa_items.items, xalp->items, xalp->count * sizeof (rsync_xa)); >+ new_list->xa_items.count = xalp->count; > xalp->count = 0; > return ndx; > } >@@ -467,7 +476,8 @@ int send_xattr(int f, stat_x *sxp) > * need so that send_xattr_request() can tell the sender about them. */ > int xattr_diff(struct file_struct *file, stat_x *sxp, int find_all) > { >- const item_list *lst = rsync_xal_l.items; >+ const rsync_xa_list *glst = rsync_xal_l.items; >+ const item_list *lst = NULL; > rsync_xa *snd_rxa, *rec_rxa; > int snd_cnt, rec_cnt; > int cmp, same, xattrs_equal = 1; >@@ -480,10 +490,12 @@ int xattr_diff(struct file_struct *file, stat_x *sxp, int find_all) > rec_cnt = 0; > } > >- if (F_XATTR(file) >= 0) >- lst += F_XATTR(file); >- else >+ if (F_XATTR(file) >= 0) { >+ glst += F_XATTR(file); >+ lst = &glst->xa_items; >+ } else { > lst = &empty_xattr; >+ } > > snd_rxa = lst->items; > snd_cnt = lst->count; >@@ -541,11 +553,14 @@ int xattr_diff(struct file_struct *file, stat_x *sxp, int find_all) > * XSTATE_ABBREV states into XSTATE_DONE. */ > void send_xattr_request(const char *fname, struct file_struct *file, int f_out) > { >- item_list *lst = rsync_xal_l.items; >+ const rsync_xa_list *glst = rsync_xal_l.items; >+ const item_list *lst = NULL; > int cnt, prior_req = 0; > rsync_xa *rxa; > >- lst += F_XATTR(file); >+ glst += F_XATTR(file); >+ lst = &glst->xa_items; >+ > for (rxa = lst->items, cnt = lst->count; cnt--; rxa++) { > if (rxa->datum_len <= MAX_FULL_DATUM) > continue; >@@ -596,7 +611,8 @@ void send_xattr_request(const char *fname, struct file_struct *file, int f_out) > * stores it in place of its checksum. */ > int recv_xattr_request(struct file_struct *file, int f_in) > { >- item_list *lst = rsync_xal_l.items; >+ const rsync_xa_list *glst = rsync_xal_l.items; >+ const item_list *lst = NULL; > char *old_datum, *name; > rsync_xa *rxa; > int rel_pos, cnt, num, got_xattr_data = 0; >@@ -605,7 +621,8 @@ int recv_xattr_request(struct file_struct *file, int f_in) > rprintf(FERROR, "recv_xattr_request: internal data error!\n"); > exit_cleanup(RERR_PROTOCOL); > } >- lst += F_XATTR(file); >+ glst += F_XATTR(file); >+ lst = &glst->xa_items; > > cnt = lst->count; > rxa = lst->items; >@@ -796,12 +813,13 @@ void cache_tmp_xattr(struct file_struct *file, stat_x *sxp) > void uncache_tmp_xattrs(void) > { > if (prior_xattr_count != (size_t)-1) { >- item_list *xattr_item = rsync_xal_l.items; >- item_list *xattr_start = xattr_item + prior_xattr_count; >- xattr_item += rsync_xal_l.count; >+ rsync_xa_list *xa_list_item = rsync_xal_l.items; >+ rsync_xa_list *xa_list_start = xa_list_item + prior_xattr_count; >+ xa_list_item += rsync_xal_l.count; > rsync_xal_l.count = prior_xattr_count; >- while (xattr_item-- > xattr_start) >- rsync_xal_free(xattr_item); >+ while (xa_list_item-- > xa_list_start) { >+ rsync_xal_free(&xa_list_item->xa_items); >+ } > prior_xattr_count = (size_t)-1; > } > } >@@ -921,8 +939,9 @@ static int rsync_xal_set(const char *fname, item_list *xalp, > int set_xattr(const char *fname, const struct file_struct *file, > const char *fnamecmp, stat_x *sxp) > { >+ rsync_xa_list *glst = rsync_xal_l.items; >+ item_list *lst = NULL; > int ndx; >- item_list *lst = rsync_xal_l.items; > > if (dry_run) > return 1; /* FIXME: --dry-run needs to compute this value */ >@@ -952,7 +971,9 @@ int set_xattr(const char *fname, const struct file_struct *file, > #endif > > ndx = F_XATTR(file); >- return rsync_xal_set(fname, lst + ndx, fnamecmp, sxp); >+ glst += ndx; >+ lst = &glst->xa_items; >+ return rsync_xal_set(fname, lst, fnamecmp, sxp); > } > > #ifdef SUPPORT_ACLS >-- >1.9.1 > > >From 8284473f43caabeeb5f0184671338e1313fd86d6 Mon Sep 17 00:00:00 2001 >From: Stefan Metzmacher <metze@samba.org> >Date: Fri, 22 Jul 2016 18:35:18 +0200 >Subject: [PATCH 4/5] hashtable: add hashlittle() from lookup3.c, by Bob > Jenkins > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=5324 > >Signed-off-by: Stefan Metzmacher <metze@samba.org> >--- > hashtable.c | 302 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 302 insertions(+) > >diff --git a/hashtable.c b/hashtable.c >index f0fbe51..238db08 100644 >--- a/hashtable.c >+++ b/hashtable.c >@@ -170,3 +170,305 @@ void *hashtable_find(struct hashtable *tbl, int64 key, int allocate_if_missing) > tbl->entries++; > return node; > } >+ >+#ifndef WORDS_BIGENDIAN >+# define HASH_LITTLE_ENDIAN 1 >+# define HASH_BIG_ENDIAN 0 >+#else >+# define HASH_LITTLE_ENDIAN 0 >+# define HASH_BIG_ENDIAN 1 >+#endif >+ >+/* >+ ------------------------------------------------------------------------------- >+ lookup3.c, by Bob Jenkins, May 2006, Public Domain. >+ >+ These are functions for producing 32-bit hashes for hash table lookup. >+ hash_word(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() >+ are externally useful functions. Routines to test the hash are included >+ if SELF_TEST is defined. You can use this free for any purpose. It's in >+ the public domain. It has no warranty. >+ >+ You probably want to use hashlittle(). hashlittle() and hashbig() >+ hash byte arrays. hashlittle() is is faster than hashbig() on >+ little-endian machines. Intel and AMD are little-endian machines. >+ On second thought, you probably want hashlittle2(), which is identical to >+ hashlittle() except it returns two 32-bit hashes for the price of one. >+ You could implement hashbig2() if you wanted but I haven't bothered here. >+ >+ If you want to find a hash of, say, exactly 7 integers, do >+ a = i1; b = i2; c = i3; >+ mix(a,b,c); >+ a += i4; b += i5; c += i6; >+ mix(a,b,c); >+ a += i7; >+ final(a,b,c); >+ then use c as the hash value. If you have a variable length array of >+ 4-byte integers to hash, use hash_word(). If you have a byte array (like >+ a character string), use hashlittle(). If you have several byte arrays, or >+ a mix of things, see the comments above hashlittle(). >+ >+ Why is this so big? I read 12 bytes at a time into 3 4-byte integers, >+ then mix those integers. This is fast (you can do a lot more thorough >+ mixing with 12*3 instructions on 3 integers than you can with 3 instructions >+ on 1 byte), but shoehorning those bytes into integers efficiently is messy. >+*/ >+ >+#define hashsize(n) ((uint32_t)1<<(n)) >+#define hashmask(n) (hashsize(n)-1) >+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) >+ >+/* >+ ------------------------------------------------------------------------------- >+ mix -- mix 3 32-bit values reversibly. >+ >+ This is reversible, so any information in (a,b,c) before mix() is >+ still in (a,b,c) after mix(). >+ >+ If four pairs of (a,b,c) inputs are run through mix(), or through >+ mix() in reverse, there are at least 32 bits of the output that >+ are sometimes the same for one pair and different for another pair. >+ This was tested for: >+ * pairs that differed by one bit, by two bits, in any combination >+ of top bits of (a,b,c), or in any combination of bottom bits of >+ (a,b,c). >+ * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed >+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as >+ is commonly produced by subtraction) look like a single 1-bit >+ difference. >+ * the base values were pseudorandom, all zero but one bit set, or >+ all zero plus a counter that starts at zero. >+ >+ Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that >+ satisfy this are >+ 4 6 8 16 19 4 >+ 9 15 3 18 27 15 >+ 14 9 3 7 17 3 >+ Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing >+ for "differ" defined as + with a one-bit base and a two-bit delta. I >+ used http://burtleburtle.net/bob/hash/avalanche.html to choose >+ the operations, constants, and arrangements of the variables. >+ >+ This does not achieve avalanche. There are input bits of (a,b,c) >+ that fail to affect some output bits of (a,b,c), especially of a. The >+ most thoroughly mixed value is c, but it doesn't really even achieve >+ avalanche in c. >+ >+ This allows some parallelism. Read-after-writes are good at doubling >+ the number of bits affected, so the goal of mixing pulls in the opposite >+ direction as the goal of parallelism. I did what I could. Rotates >+ seem to cost as much as shifts on every machine I could lay my hands >+ on, and rotates are much kinder to the top and bottom bits, so I used >+ rotates. >+ ------------------------------------------------------------------------------- >+*/ >+#define mix(a,b,c) \ >+{ \ >+ a -= c; a ^= rot(c, 4); c += b; \ >+ b -= a; b ^= rot(a, 6); a += c; \ >+ c -= b; c ^= rot(b, 8); b += a; \ >+ a -= c; a ^= rot(c,16); c += b; \ >+ b -= a; b ^= rot(a,19); a += c; \ >+ c -= b; c ^= rot(b, 4); b += a; \ >+} >+ >+/* >+ ------------------------------------------------------------------------------- >+ final -- final mixing of 3 32-bit values (a,b,c) into c >+ >+ Pairs of (a,b,c) values differing in only a few bits will usually >+ produce values of c that look totally different. This was tested for >+ * pairs that differed by one bit, by two bits, in any combination >+ of top bits of (a,b,c), or in any combination of bottom bits of >+ (a,b,c). >+ * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed >+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as >+ is commonly produced by subtraction) look like a single 1-bit >+ difference. >+ * the base values were pseudorandom, all zero but one bit set, or >+ all zero plus a counter that starts at zero. >+ >+ These constants passed: >+ 14 11 25 16 4 14 24 >+ 12 14 25 16 4 14 24 >+ and these came close: >+ 4 8 15 26 3 22 24 >+ 10 8 15 26 3 22 24 >+ 11 8 15 26 3 22 24 >+ ------------------------------------------------------------------------------- >+*/ >+#define final(a,b,c) \ >+{ \ >+ c ^= b; c -= rot(b,14); \ >+ a ^= c; a -= rot(c,11); \ >+ b ^= a; b -= rot(a,25); \ >+ c ^= b; c -= rot(b,16); \ >+ a ^= c; a -= rot(c,4); \ >+ b ^= a; b -= rot(a,14); \ >+ c ^= b; c -= rot(b,24); \ >+} >+ >+ >+/* >+ ------------------------------------------------------------------------------- >+ hashlittle() -- hash a variable-length key into a 32-bit value >+ k : the key (the unaligned variable-length array of bytes) >+ length : the length of the key, counting by bytes >+ val2 : IN: can be any 4-byte value OUT: second 32 bit hash. >+ Returns a 32-bit value. Every bit of the key affects every bit of >+ the return value. Two keys differing by one or two bits will have >+ totally different hash values. Note that the return value is better >+ mixed than val2, so use that first. >+ >+ The best hash table sizes are powers of 2. There is no need to do >+ mod a prime (mod is sooo slow!). If you need less than 32 bits, >+ use a bitmask. For example, if you need only 10 bits, do >+ h = (h & hashmask(10)); >+ In which case, the hash table should have hashsize(10) elements. >+ >+ If you are hashing n strings (uint8_t **)k, do it like this: >+ for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); >+ >+ By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this >+ code any way you wish, private, educational, or commercial. It's free. >+ >+ Use for hash table lookup, or anything where one collision in 2^^32 is >+ acceptable. Do NOT use for cryptographic purposes. >+ ------------------------------------------------------------------------------- >+*/ >+ >+uint32_t hashlittle(const void *key, size_t length) >+{ >+ uint32_t a,b,c; /* internal state */ >+ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ >+ >+ /* Set up the internal state */ >+ a = b = c = 0xdeadbeef + ((uint32_t)length); >+ >+ u.ptr = key; >+ if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { >+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ >+ const uint8_t *k8; >+ >+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ >+ while (length > 12) >+ { >+ a += k[0]; >+ b += k[1]; >+ c += k[2]; >+ mix(a,b,c); >+ length -= 12; >+ k += 3; >+ } >+ >+ /*----------------------------- handle the last (probably partial) block */ >+ k8 = (const uint8_t *)k; >+ switch(length) >+ { >+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; >+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ >+ case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ >+ case 9 : c+=k8[8]; /* fall through */ >+ case 8 : b+=k[1]; a+=k[0]; break; >+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ >+ case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ >+ case 5 : b+=k8[4]; /* fall through */ >+ case 4 : a+=k[0]; break; >+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ >+ case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ >+ case 1 : a+=k8[0]; break; >+ case 0 : return c; >+ } >+ } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { >+ const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ >+ const uint8_t *k8; >+ >+ /*--------------- all but last block: aligned reads and different mixing */ >+ while (length > 12) >+ { >+ a += k[0] + (((uint32_t)k[1])<<16); >+ b += k[2] + (((uint32_t)k[3])<<16); >+ c += k[4] + (((uint32_t)k[5])<<16); >+ mix(a,b,c); >+ length -= 12; >+ k += 6; >+ } >+ >+ /*----------------------------- handle the last (probably partial) block */ >+ k8 = (const uint8_t *)k; >+ switch(length) >+ { >+ case 12: c+=k[4]+(((uint32_t)k[5])<<16); >+ b+=k[2]+(((uint32_t)k[3])<<16); >+ a+=k[0]+(((uint32_t)k[1])<<16); >+ break; >+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ >+ case 10: c+=k[4]; >+ b+=k[2]+(((uint32_t)k[3])<<16); >+ a+=k[0]+(((uint32_t)k[1])<<16); >+ break; >+ case 9 : c+=k8[8]; /* fall through */ >+ case 8 : b+=k[2]+(((uint32_t)k[3])<<16); >+ a+=k[0]+(((uint32_t)k[1])<<16); >+ break; >+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ >+ case 6 : b+=k[2]; >+ a+=k[0]+(((uint32_t)k[1])<<16); >+ break; >+ case 5 : b+=k8[4]; /* fall through */ >+ case 4 : a+=k[0]+(((uint32_t)k[1])<<16); >+ break; >+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ >+ case 2 : a+=k[0]; >+ break; >+ case 1 : a+=k8[0]; >+ break; >+ case 0 : return c; /* zero length requires no mixing */ >+ } >+ >+ } else { /* need to read the key one byte at a time */ >+ const uint8_t *k = (const uint8_t *)key; >+ >+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ >+ while (length > 12) >+ { >+ a += k[0]; >+ a += ((uint32_t)k[1])<<8; >+ a += ((uint32_t)k[2])<<16; >+ a += ((uint32_t)k[3])<<24; >+ b += k[4]; >+ b += ((uint32_t)k[5])<<8; >+ b += ((uint32_t)k[6])<<16; >+ b += ((uint32_t)k[7])<<24; >+ c += k[8]; >+ c += ((uint32_t)k[9])<<8; >+ c += ((uint32_t)k[10])<<16; >+ c += ((uint32_t)k[11])<<24; >+ mix(a,b,c); >+ length -= 12; >+ k += 12; >+ } >+ >+ /*-------------------------------- last block: affect all 32 bits of (c) */ >+ switch(length) /* all the case statements fall through */ >+ { >+ case 12: c+=((uint32_t)k[11])<<24; >+ case 11: c+=((uint32_t)k[10])<<16; >+ case 10: c+=((uint32_t)k[9])<<8; >+ case 9 : c+=k[8]; >+ case 8 : b+=((uint32_t)k[7])<<24; >+ case 7 : b+=((uint32_t)k[6])<<16; >+ case 6 : b+=((uint32_t)k[5])<<8; >+ case 5 : b+=k[4]; >+ case 4 : a+=((uint32_t)k[3])<<24; >+ case 3 : a+=((uint32_t)k[2])<<16; >+ case 2 : a+=((uint32_t)k[1])<<8; >+ case 1 : a+=k[0]; >+ break; >+ case 0 : return c; >+ } >+ } >+ >+ final(a,b,c); >+ return c; >+} >-- >1.9.1 > > >From a9be6928c5ebf9dae24a8de86c0e3d2d86bdc591 Mon Sep 17 00:00:00 2001 >From: Stefan Metzmacher <metze@samba.org> >Date: Fri, 22 Jul 2016 19:46:46 +0200 >Subject: [PATCH 5/5] xattrs: maintain a hashtable in order to speed up > find_matching_xattr() > >As a testcase I've used one directory on gpfs with 1000000 files, >each with an xattr called 'name$i' having a value of 'value$i'. >So we also have 1000000 unique xattrs. The source and dest directories >are already in sync before. So the rsync command is basically a noop, >just verifying that everything is already in sync. > >The results before this patchset are: > > [gpfs]# time rsync -a -P -X -q source-xattr/ dest-with-xattr/ > > real 8m46.191s > user 6m29.016s > sys 0m24.883s > > [gpfs]# time rsync -a -P -q source-xattr/ dest-without-xattr/ > > real 1m58.462s > user 0m0.957s > sys 0m11.801s > >With the patchset I got: > > [gpfs]# time /gpfs/rsync.install/bin/rsync -a -P -X -q source-xattr/ dest-with-xattr/ > > real 2m4.150s > user 0m1.917s > sys 0m17.077s > > [gpfs]# time /gpfs/rsync.install/bin/rsync -a -P -q source-xattr/ dest-without-xattr/ > real 1m59.534s > user 0m0.924s > sys 0m11.599s > >It means the time in userspace dropped from 6m29.016s down to 0m1.917s! >Without -X we get ~ 0m0.9s with or without the patch. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=5324 > >Signed-off-by: Stefan Metzmacher <metze@samba.org> >--- > xattrs.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 143 insertions(+), 8 deletions(-) > >diff --git a/xattrs.c b/xattrs.c >index fa4128d..f3c6350 100644 >--- a/xattrs.c >+++ b/xattrs.c >@@ -79,7 +79,16 @@ typedef struct { > int num; > } rsync_xa; > >-typedef struct { >+struct _rsync_xa_list; >+ >+typedef struct _rsync_xa_list_ref { >+ struct _rsync_xa_list_ref *next; >+ int ndx; >+} rsync_xa_list_ref; >+ >+typedef struct _rsync_xa_list { >+ int ndx; >+ int64 key; > item_list xa_items; > } rsync_xa_list; > >@@ -91,6 +100,7 @@ static const rsync_xa_list empty_xa_list = { > }; > static const item_list empty_xattr = EMPTY_ITEM_LIST; > static item_list rsync_xal_l = EMPTY_ITEM_LIST; >+static struct hashtable *rsync_xal_h = NULL; > > static size_t prior_xattr_count = (size_t)-1; > >@@ -367,19 +377,64 @@ int copy_xattrs(const char *source, const char *dest) > return 0; > } > >-static int find_matching_xattr(const item_list *xalp) >+static int64 xattr_lookup_hash(const item_list *xalp) > { >- const rsync_xa_list *glst = rsync_xal_l.items; >+ const rsync_xa *rxas = xalp->items; > size_t i; >+ int64 key = hashlittle(&xalp->count, sizeof(xalp->count)); >+ >+ for (i = 0; i < xalp->count; i++) { >+ key += hashlittle(rxas[i].name, rxas[i].name_len); >+ if (rxas[i].datum_len > MAX_FULL_DATUM) { >+ key += hashlittle(rxas[i].datum, MAX_DIGEST_LEN); >+ } else { >+ key += hashlittle(rxas[i].datum, rxas[i].datum_len); >+ } >+ } >+ >+ if (key == 0) { >+ /* >+ * very unlikely, but we should never return 0 >+ * as hashtable_find() doesn't like it. >+ */ >+ return 1; >+ } >+ >+ return key; >+} >+ >+static int find_matching_xattr(const item_list *xalp) >+{ >+ const struct ht_int64_node *node = NULL; >+ const rsync_xa_list_ref *ref = NULL; >+ int64 key; >+ >+ if (rsync_xal_h == NULL) { >+ return -1; >+ } >+ >+ key = xattr_lookup_hash(xalp); >+ >+ node = hashtable_find(rsync_xal_h, key, 0); >+ if (node == NULL) { >+ return -1; >+ } > >- for (i = 0; i < rsync_xal_l.count; i++) { >- const item_list *lst = &glst[i].xa_items; >- const rsync_xa *rxas1 = lst->items; >+ if (node->data == NULL) { >+ return -1; >+ } >+ >+ for (ref = node->data; ref != NULL; ref = ref->next) { >+ const rsync_xa_list *ptr = rsync_xal_l.items; >+ const rsync_xa *rxas1 = NULL; > const rsync_xa *rxas2 = xalp->items; > size_t j; > >+ ptr += ref->ndx; >+ rxas1 = ptr->xa_items.items; >+ > /* Wrong number of elements? */ >- if (lst->count != xalp->count) >+ if (ptr->xa_items.count != xalp->count) > continue; > /* any elements different? */ > for (j = 0; j < xalp->count; j++) { >@@ -400,7 +455,7 @@ static int find_matching_xattr(const item_list *xalp) > } > /* no differences found. This is The One! */ > if (j == xalp->count) >- return i; >+ return ref->ndx; > } > > return -1; >@@ -409,8 +464,10 @@ static int find_matching_xattr(const item_list *xalp) > /* Store *xalp on the end of rsync_xal_l */ > static int rsync_xal_store(item_list *xalp) > { >+ struct ht_int64_node *node = NULL; > int ndx = rsync_xal_l.count; /* pre-incremented count */ > rsync_xa_list *new_list = EXPAND_ITEM_LIST(&rsync_xal_l, rsync_xa_list, RSYNC_XAL_LIST_INITIAL); >+ rsync_xa_list_ref *new_ref = NULL; > /* Since the following call starts a new list, we know it will hold the > * entire initial-count, not just enough space for one new item. */ > *new_list = empty_xa_list; >@@ -418,6 +475,45 @@ static int rsync_xal_store(item_list *xalp) > memcpy(new_list->xa_items.items, xalp->items, xalp->count * sizeof (rsync_xa)); > new_list->xa_items.count = xalp->count; > xalp->count = 0; >+ >+ new_list->ndx = ndx; >+ new_list->key = xattr_lookup_hash(&new_list->xa_items); >+ >+ if (rsync_xal_h == NULL) { >+ rsync_xal_h = hashtable_create(512, 1); >+ } >+ if (rsync_xal_h == NULL) { >+ out_of_memory("rsync_xal_h hashtable_create()"); >+ } >+ >+ node = hashtable_find(rsync_xal_h, new_list->key, 1); >+ if (node == NULL) { >+ out_of_memory("rsync_xal_h hashtable_find()"); >+ } >+ >+ new_ref = new0(rsync_xa_list_ref); >+ if (new_ref == NULL) { >+ out_of_memory("new0(rsync_xa_list_ref)"); >+ } >+ >+ new_ref->ndx = ndx; >+ >+ if (node->data != NULL) { >+ rsync_xa_list_ref *ref = node->data; >+ >+ while (ref != NULL) { >+ if (ref->next != NULL) { >+ ref = ref->next; >+ continue; >+ } >+ >+ ref->next = new_ref; >+ break; >+ } >+ } else { >+ node->data = new_ref; >+ } >+ > return ndx; > } > >@@ -818,7 +914,46 @@ void uncache_tmp_xattrs(void) > xa_list_item += rsync_xal_l.count; > rsync_xal_l.count = prior_xattr_count; > while (xa_list_item-- > xa_list_start) { >+ struct ht_int64_node *node = NULL; >+ rsync_xa_list_ref *ref = NULL; >+ > rsync_xal_free(&xa_list_item->xa_items); >+ >+ if (rsync_xal_h == NULL) { >+ continue; >+ } >+ >+ node = hashtable_find(rsync_xal_h, xa_list_item->key, 0); >+ if (node == NULL) { >+ continue; >+ } >+ >+ if (node->data == NULL) { >+ continue; >+ } >+ >+ ref = node->data; >+ if (xa_list_item->ndx == ref->ndx) { >+ /* >+ * xa_list_item is the first in the list >+ */ >+ node->data = ref->next; >+ free(ref); >+ continue; >+ } >+ >+ while (ref != NULL) { >+ if (ref->next == NULL) { >+ ref = NULL; >+ break; >+ } >+ if (xa_list_item->ndx == ref->next->ndx) { >+ ref->next = ref->next->next; >+ free(ref); >+ break; >+ } >+ ref = ref->next; >+ } > } > prior_xattr_count = (size_t)-1; > } >-- >1.9.1 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 5324
:
12285
|
12286
| 12287