diff --git a/checksum.c b/checksum.c index a1c2aa2..229c156 100644 --- a/checksum.c +++ b/checksum.c @@ -23,6 +23,9 @@ extern int checksum_seed; extern int protocol_version; +#ifdef WITH_DROP_CACHE +#define close(fd) fadv_close(fd) +#endif /* a simple 32 bit checksum that can be upadted from either end diff --git a/cleanup.c b/cleanup.c index cd023aa..a9d315c 100644 --- a/cleanup.c +++ b/cleanup.c @@ -52,7 +52,11 @@ void close_all(void) int fd; int ret; STRUCT_STAT st; - +#endif +#ifdef WITH_DROP_CACHE + fadv_close_all(); +#endif +#ifdef SHUTDOWN_ALL_SOCKETS max_fd = sysconf(_SC_OPEN_MAX) - 1; for (fd = max_fd; fd >= 0; fd--) { if ((ret = do_fstat(fd, &st)) == 0) { diff --git a/configure.ac b/configure.ac index cf588ce..0a084bf 100644 --- a/configure.ac +++ b/configure.ac @@ -602,6 +602,7 @@ AC_CHECK_FUNCS(waitpid wait4 getcwd strdup chown chmod lchmod mknod mkfifo \ setlocale setmode open64 lseek64 mkstemp64 mtrace va_copy __va_copy \ seteuid strerror putenv iconv_open locale_charset nl_langinfo getxattr \ extattr_get_link sigaction sigprocmask setattrlist getgrouplist \ + mmap mincore posix_fadvise64 \ initgroups utimensat posix_fallocate attropen setvbuf) dnl cygwin iconv.h defines iconv_open as libiconv_open diff --git a/fileio.c b/fileio.c index abef46d..00d6ad9 100644 --- a/fileio.c +++ b/fileio.c @@ -51,7 +51,7 @@ int sparse_end(int f, OFF_T size) ret = -1; else { do { - ret = write(f, "", 1); + ret = fadv_write(f, "", 1); } while (ret < 0 && errno == EINTR); ret = ret <= 0 ? -1 : 0; @@ -81,7 +81,7 @@ static int write_sparse(int f, char *buf, int len) do_lseek(f, sparse_seek, SEEK_CUR); sparse_seek = l2; - while ((ret = write(f, buf + l1, len - (l1+l2))) <= 0) { + while ((ret = fadv_write(f, buf + l1, len - (l1+l2))) <= 0) { if (ret < 0 && errno == EINTR) continue; sparse_seek = 0; @@ -107,7 +107,7 @@ int flush_write_file(int f) char *bp = wf_writeBuf; while (wf_writeBufCnt > 0) { - if ((ret = write(f, bp, wf_writeBufCnt)) < 0) { + if ((ret = fadv_write(f, bp, wf_writeBufCnt)) < 0) { if (errno == EINTR) continue; return ret; @@ -254,7 +254,7 @@ char *map_ptr(struct map_struct *map, OFF_T offset, int32 len) map->p_len = window_size; while (read_size > 0) { - int32 nread = read(map->fd, map->p + read_offset, read_size); + int32 nread = fadv_read(map->fd, map->p + read_offset, read_size); if (nread <= 0) { if (!map->status) map->status = nread ? errno : ENODATA; diff --git a/generator.c b/generator.c index 91009a5..611eb45 100644 --- a/generator.c +++ b/generator.c @@ -111,6 +111,10 @@ static int need_retouch_dir_times; static int need_retouch_dir_perms; static const char *solo_file = NULL; +#ifdef WITH_DROP_CACHE +#define close(fd) fadv_close(fd) +#endif + enum nonregtype { TYPE_DIR, TYPE_SPECIAL, TYPE_DEVICE, TYPE_SYMLINK }; diff --git a/options.c b/options.c index 62dfe4f..b086275 100644 --- a/options.c +++ b/options.c @@ -62,6 +62,10 @@ int preserve_uid = 0; int preserve_gid = 0; int preserve_times = 0; int update_only = 0; +#ifdef WITH_DROP_CACHE +int drop_cache = 0; +int remote_drop_cache = 0; +#endif int cvs_exclude = 0; int dry_run = 0; int do_xfers = 1; @@ -682,6 +686,10 @@ void usage(enum logcode F) rprintf(F," --backup-dir=DIR make backups into hierarchy based in DIR\n"); rprintf(F," --suffix=SUFFIX set backup suffix (default %s w/o --backup-dir)\n",BACKUP_SUFFIX); rprintf(F," -u, --update skip files that are newer on the receiver\n"); +#ifdef WITH_DROP_CACHE + rprintf(F," --drop-cache do not cache rsync files (POSIX_FADV_DONTNEED)\n"); + rprintf(F," --remote-drop-cache do not cache rsync files (POSIX_FADV_DONTNEED)\n"); +#endif rprintf(F," --inplace update destination files in-place (SEE MAN PAGE)\n"); rprintf(F," --append append data onto shorter files\n"); rprintf(F," --append-verify like --append, but with old data in file checksum\n"); @@ -916,6 +924,10 @@ static struct poptOption long_options[] = { {"no-one-file-system",0, POPT_ARG_VAL, &one_file_system, 0, 0, 0 }, {"no-x", 0, POPT_ARG_VAL, &one_file_system, 0, 0, 0 }, {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 }, +#ifdef WITH_DROP_CACHE + {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 }, + {"remote-drop-cache",0, POPT_ARG_NONE, &remote_drop_cache, 0, 0, 0 }, +#endif {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 }, {"ignore-non-existing",0,POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 }, {"ignore-existing", 0, POPT_ARG_NONE, &ignore_existing, 0, 0, 0 }, @@ -1067,6 +1079,10 @@ static void daemon_usage(enum logcode F) rprintf(F," --log-file=FILE override the \"log file\" setting\n"); rprintf(F," --log-file-format=FMT override the \"log format\" setting\n"); rprintf(F," --sockopts=OPTIONS specify custom TCP options\n"); +#ifdef WITH_DROP_CACHE + rprintf(F," --drop-cache do not cache rsync files (POSIX_FADV_DONTNEED)\n"); + rprintf(F," --remote-drop-cache do not cache rsync files (POSIX_FADV_DONTNEED)\n"); +#endif rprintf(F," -v, --verbose increase verbosity\n"); rprintf(F," -4, --ipv4 prefer IPv4\n"); rprintf(F," -6, --ipv6 prefer IPv6\n"); @@ -1091,6 +1107,9 @@ static struct poptOption long_daemon_options[] = { {"log-file", 0, POPT_ARG_STRING, &logfile_name, 0, 0, 0 }, {"log-file-format", 0, POPT_ARG_STRING, &logfile_format, 0, 0, 0 }, {"port", 0, POPT_ARG_INT, &rsync_port, 0, 0, 0 }, +#ifdef WITH_DROP_CACHE + {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 }, +#endif {"sockopts", 0, POPT_ARG_STRING, &sockopts, 0, 0, 0 }, {"protocol", 0, POPT_ARG_INT, &protocol_version, 0, 0, 0 }, {"server", 0, POPT_ARG_NONE, &am_server, 0, 0, 0 }, @@ -2378,6 +2397,11 @@ void server_options(char **args, int *argc_p) if (!am_sender) args[ac++] = "--sender"; +#ifdef WITH_DROP_CACHE + if (remote_drop_cache) + args[ac++] = "--drop-cache"; +#endif + x = 1; argstr[0] = '-'; diff --git a/receiver.c b/receiver.c index 571b7da..b82cdf5 100644 --- a/receiver.c +++ b/receiver.c @@ -62,6 +62,10 @@ extern char sender_file_sum[MAX_DIGEST_LEN]; extern struct file_list *cur_flist, *first_flist, *dir_flist; extern filter_rule_list daemon_filter_list; +#ifdef WITH_DROP_CACHE +#define close(fd) fadv_close(fd) +#endif + static struct bitbag *delayed_bits = NULL; static int phase = 0, redoing = 0; static flist_ndx_list batch_redo_list; diff --git a/rsync.h b/rsync.h index 4fef882..a9fa172 100644 --- a/rsync.h +++ b/rsync.h @@ -1292,3 +1292,13 @@ char *getpass(const char *prompt); #ifdef MAINTAINER_MODE const char *get_panic_action(void); #endif + +#if defined HAVE_POSIX_FADVISE64 && defined HAVE_MINCORE && defined HAVE_MMAP +#define WITH_DROP_CACHE 1 +#include +int fadv_close(int fd); +void fadv_close_all(void); +#endif + +ssize_t fadv_write(int fd, const void *buf, size_t count); +ssize_t fadv_read(int fd, void *buf, size_t count); diff --git a/rsync.yo b/rsync.yo index 87028ca..1c1853d 100644 --- a/rsync.yo +++ b/rsync.yo @@ -1237,6 +1237,19 @@ dit(bf(-S, --sparse)) Try to handle sparse files efficiently so they take up less space on the destination. Conflicts with bf(--inplace) because it's not possible to overwrite data in a sparse fashion. +dit(bf(--drop-cache)) Stop rsync from disturbing the file system cache with +the data from the files it copies. Without this option other processes, that +had been crunching along happily using cached data, will suddenly become +slow as they find their favorite data blocks data being evicted from the +cache by the files read and written by rsync. Since rsync has to wait until +the data is written to disk, before it can drop the cache, this option will +slow rsync down considerably, especially with small files and short copy +jobs. The bf(--drop-cache) function uses posix_fadvise64 and mincore todo +its work. It will only get compiled if configure can find posix_fadvise64 +and mincore. + +dit(bf(--remote-drop-cache)) Works as drop-cache but on the server side. + dit(bf(--preallocate)) This tells the receiver to allocate each destination file to its eventual size before writing data to the file. Rsync will only use the real filesystem-level preallocation support provided by Linux's diff --git a/sender.c b/sender.c index 5adc2fd..1a62476 100644 --- a/sender.c +++ b/sender.c @@ -46,6 +46,9 @@ extern int write_batch; extern int file_old_total; extern struct stats stats; extern struct file_list *cur_flist, *first_flist, *dir_flist; +#ifdef WITH_DROP_CACHE +#define close(fd) fadv_close(fd) +#endif BOOL extra_flist_sending_enabled; diff --git a/t_unsafe.c b/t_unsafe.c index 72339d0..6cf023a 100644 --- a/t_unsafe.c +++ b/t_unsafe.c @@ -24,6 +24,7 @@ #include "rsync.h" int dry_run = 0; +int drop_cache = 0; int am_root = 0; int am_sender = 1; int read_only = 0; diff --git a/util.c b/util.c index 05aa86a..dcc729d 100644 --- a/util.c +++ b/util.c @@ -37,6 +37,10 @@ extern char *module_dir; extern unsigned int module_dirlen; extern char *partial_dir; extern filter_rule_list daemon_filter_list; +#ifdef WITH_DROP_CACHE +#include +extern int drop_cache; +#endif int sanitize_paths = 0; @@ -44,6 +48,218 @@ char curr_dir[MAXPATHLEN]; unsigned int curr_dir_len; int curr_dir_depth; /* This is only set for a sanitizing daemon. */ +#ifdef WITH_DROP_CACHE +#define FADV_BUFFER_SIZE 1024*1024*16 + +static struct stat fadv_fd_stat[1024]; +static off_t fadv_fd_pos[1024]; +static unsigned char *fadv_core_ptr[1024]; +static int fadv_max_fd = 0; +static int fadv_close_ring_tail = 0; +static int fadv_close_ring_head = 0; +static int fadv_close_ring_size = 0; +static int fadv_close_ring[1024]; +static int fadv_close_buffer_size = 0; +static size_t fadv_pagesize; + +static void fadv_fd_init_func(void) +{ + static int fadv_fd_init = 0; + if (fadv_fd_init == 0){ + int i; + fadv_fd_init = 1; + fadv_pagesize = getpagesize(); + if (fadv_max_fd == 0){ + fadv_max_fd = sysconf(_SC_OPEN_MAX) - 20; + if (fadv_max_fd < 0) + fadv_max_fd = 1; + if (fadv_max_fd > 1000) + fadv_max_fd = 1000; + } + for (i=0;i 99) { + } else { + rprintf(FINFO,"%d: ",fd); + for (pi = 0; pi <= stat.st_size/fadv_pagesize; pi++) { + if ((fadv_core_ptr[fd])[pi]&1) { + rprintf(FINFO,"%lu ", (unsigned long)pi); + } + } + rprintf(FINFO,"\n"); + } + munmap(pa, stat.st_size); + } + } +} + +static void fadv_drop(int fd, int sync) +{ + /* trail 1 MB behind in dropping. we do this to make + sure that the same block or stripe does not have + to be written twice */ + off_t pos = lseek(fd,0,SEEK_CUR) - 1024*1024; + if (fd > fadv_max_fd){ + return; + } + if ( fadv_fd_pos[fd] < pos - FADV_BUFFER_SIZE ) { + if (sync) { + /* if the file is not flushed to disk before calling fadvise, + then the Cache will not be freed and the advise gets ignored + this does give a severe hit on performance. If only there + was a way to mark cache so that it gets release once the data + is written to disk. */ + fdatasync(fd); + } + if (fadv_core_ptr[fd] != NULL) { + size_t pi; + if (pos < fadv_fd_stat[fd].st_size){ + for (pi = fadv_fd_pos[fd]/fadv_pagesize; pi <= pos/fadv_pagesize; pi++) { + if (! (fadv_core_ptr[fd][pi]&1)) { + posix_fadvise64(fd, pi*fadv_pagesize, fadv_pagesize, POSIX_FADV_DONTNEED); + } + } + } else { + posix_fadvise64(fd, fadv_fd_stat[fd].st_size, pos-fadv_fd_stat[fd].st_size, POSIX_FADV_DONTNEED); + } + } else { + posix_fadvise64(fd, 0, pos, POSIX_FADV_DONTNEED); + } + fadv_fd_pos[fd] = pos; + } +} + +#endif + +ssize_t fadv_write(int fd, const void *buf, size_t count) +{ + int ret = write(fd, buf, count); +#ifdef WITH_DROP_CACHE + if (drop_cache) { + fadv_drop(fd,1); + } +#endif + return ret; +} + +ssize_t fadv_read(int fd, void *buf, size_t count) +{ + int ret; +#ifdef WITH_DROP_CACHE + if (drop_cache) { + fadv_fd_init_func(); + fadv_get_core(fd); + } +#endif + ret = read(fd, buf, count); +#ifdef WITH_DROP_CACHE + if (drop_cache) { + fadv_drop(fd,0); + } +#endif + return ret; +} + +#ifdef WITH_DROP_CACHE +void fadv_close_all(void) +{ + /* printf ("%i\n",fadv_close_ring_size); */ + while (fadv_close_ring_size > 0){ + fdatasync(fadv_close_ring[fadv_close_ring_tail]); + if (fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]]){ + size_t pi; + for (pi = 0; pi <= fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_size/fadv_pagesize; pi++) { + if (!(fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]][pi]&1)) { + posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], pi*fadv_pagesize, fadv_pagesize, POSIX_FADV_DONTNEED); + } + } + /* if the file has grown, drop the rest */ + //posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_size,0, POSIX_FADV_DONTNEED); + + free(fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]]); + fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]] = NULL; + fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_size = 0; + fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_ino = 0; + fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_dev = 0; + } else { + posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], 0, 0,POSIX_FADV_DONTNEED); + } + fadv_close_ring_size--; + close(fadv_close_ring[fadv_close_ring_tail]); + fadv_close_ring_tail = (fadv_close_ring_tail + 1) % fadv_max_fd; + fadv_close_buffer_size = 0; + } +} + +int fadv_close(int fd) +{ + if (drop_cache) { + /* if the file is not flushed to disk before calling fadvise, + then the Cache will not be freed and the advise gets ignored + this does give a severe hit on performance. So instead of doing + it right away, we save us a copy of the filehandle and do it + some time before we are out of filehandles. This speeds + up operation for small files massively. It is directly + related to the number of spare file handles you have. */ + int newfd = dup(fd); + off_t pos = lseek(fd,0,SEEK_CUR); + fadv_fd_init_func(); + fadv_core_ptr[newfd] = fadv_core_ptr[fd]; + fadv_fd_stat[newfd].st_size = fadv_fd_stat[fd].st_size ; + fadv_core_ptr[fd] = NULL; + fadv_close_buffer_size += pos - fadv_fd_pos[fd]; + fadv_close_ring[fadv_close_ring_head] = newfd; + fadv_close_ring_head = (fadv_close_ring_head + 1) % fadv_max_fd; + fadv_close_ring_size ++; + if (fadv_close_ring_size == fadv_max_fd || fadv_close_buffer_size > 1024*1024 ){ + /* it seems fastest to drop things 'in groups' */ + fadv_close_all(); + } + }; + return close(fd); +} + + +#define close(fd) fadv_close(fd) +#endif + /* Set a fd into nonblocking mode. */ void set_nonblocking(int fd) { @@ -271,7 +487,7 @@ int full_write(int desc, const char *ptr, size_t len) total_written = 0; while (len > 0) { - int written = write(desc, ptr, len); + int written = fadv_write(desc, ptr, len); if (written < 0) { if (errno == EINTR) continue; @@ -303,7 +519,7 @@ static int safe_read(int desc, char *ptr, size_t len) return len; do { - n_chars = read(desc, ptr, len); + n_chars = fadv_read(desc, ptr, len); } while (n_chars < 0 && errno == EINTR); return n_chars;