From d7ab59f80f535e960780ec3a643101dfe96627ac Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 11 May 2020 12:37:41 +0200 Subject: [PATCH 01/31] s4:torture: add tests to test the SMB2 read/write offset/length boundaries [MS-FSA] 2.1.5.2 Server Requests a Read and 2.1.5.3 Server Requests a Write define some contraints. These tests demonstrate that ((int64_t)offset) < 0) is not allowed for both reads and writes for SMB. Also the special case for writes at offset -2 is not possible nor the append mode with offset < 0. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit 54de0e4a3e46a53db5262963e64b109c567554a1) --- selftest/knownfail.d/rw-invalid | 2 + source4/torture/smb2/read_write.c | 189 ++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 selftest/knownfail.d/rw-invalid diff --git a/selftest/knownfail.d/rw-invalid b/selftest/knownfail.d/rw-invalid new file mode 100644 index 00000000000..c6f11e03d20 --- /dev/null +++ b/selftest/knownfail.d/rw-invalid @@ -0,0 +1,2 @@ +samba3.smb2.rw.invalid +samba4.smb2.rw.invalid diff --git a/source4/torture/smb2/read_write.c b/source4/torture/smb2/read_write.c index bc8898cec31..b0eea55d7f1 100644 --- a/source4/torture/smb2/read_write.c +++ b/source4/torture/smb2/read_write.c @@ -23,8 +23,19 @@ #include "libcli/smb2/smb2.h" #include "libcli/smb2/smb2_calls.h" #include "torture/torture.h" +#include "torture/util.h" #include "torture/smb2/proto.h" +#define CHECK_STATUS(_status, _expected) \ + torture_assert_ntstatus_equal_goto(torture, _status, _expected, \ + ret, done, "Incorrect status") + +#define CHECK_VALUE(v, correct) \ + torture_assert_int_equal_goto(torture, v, correct, \ + ret, done, "Incorrect value") + +#define FNAME "smb2_writetest.dat" + static bool run_smb2_readwritetest(struct torture_context *tctx, struct smb2_tree *t1, struct smb2_tree *t2) { @@ -150,12 +161,190 @@ static bool run_smb2_wrap_readwritetest(struct torture_context *tctx, return run_smb2_readwritetest(tctx, tree1, tree1); } +static bool test_rw_invalid(struct torture_context *torture, struct smb2_tree *tree) +{ + bool ret = true; + NTSTATUS status; + struct smb2_handle h; + uint8_t buf[64*1024]; + struct smb2_read rd; + struct smb2_write w = {0}; + TALLOC_CTX *tmp_ctx = talloc_new(tree); + + ZERO_STRUCT(buf); + + smb2_util_unlink(tree, FNAME); + + status = torture_smb2_testfile(tree, FNAME, &h); + CHECK_STATUS(status, NT_STATUS_OK); + + status = smb2_util_write(tree, h, buf, 0, ARRAY_SIZE(buf)); + CHECK_STATUS(status, NT_STATUS_OK); + + ZERO_STRUCT(rd); + rd.in.file.handle = h; + rd.in.length = 10; + rd.in.offset = 0; + rd.in.min_count = 1; + + status = smb2_read(tree, tmp_ctx, &rd); + CHECK_STATUS(status, NT_STATUS_OK); + CHECK_VALUE(rd.out.data.length, 10); + + rd.in.min_count = 0; + rd.in.length = 10; + rd.in.offset = sizeof(buf); + status = smb2_read(tree, tmp_ctx, &rd); + CHECK_STATUS(status, NT_STATUS_END_OF_FILE); + + rd.in.min_count = 0; + rd.in.length = 0; + rd.in.offset = sizeof(buf); + status = smb2_read(tree, tmp_ctx, &rd); + CHECK_STATUS(status, NT_STATUS_OK); + CHECK_VALUE(rd.out.data.length, 0); + + rd.in.min_count = 0; + rd.in.length = 1; + rd.in.offset = INT64_MAX - 1; + status = smb2_read(tree, tmp_ctx, &rd); + CHECK_STATUS(status, NT_STATUS_END_OF_FILE); + + rd.in.min_count = 0; + rd.in.length = 0; + rd.in.offset = INT64_MAX; + status = smb2_read(tree, tmp_ctx, &rd); + CHECK_STATUS(status, NT_STATUS_OK); + CHECK_VALUE(rd.out.data.length, 0); + + rd.in.min_count = 0; + rd.in.length = 1; + rd.in.offset = INT64_MAX; + status = smb2_read(tree, tmp_ctx, &rd); + CHECK_STATUS(status, NT_STATUS_INVALID_PARAMETER); + + rd.in.min_count = 0; + rd.in.length = 0; + rd.in.offset = (uint64_t)INT64_MAX + 1; + status = smb2_read(tree, tmp_ctx, &rd); + CHECK_STATUS(status, NT_STATUS_INVALID_PARAMETER); + + rd.in.min_count = 0; + rd.in.length = 0; + rd.in.offset = (uint64_t)INT64_MIN; + status = smb2_read(tree, tmp_ctx, &rd); + CHECK_STATUS(status, NT_STATUS_INVALID_PARAMETER); + + rd.in.min_count = 0; + rd.in.length = 0; + rd.in.offset = (uint64_t)(int64_t)-1; + status = smb2_read(tree, tmp_ctx, &rd); + CHECK_STATUS(status, NT_STATUS_INVALID_PARAMETER); + + rd.in.min_count = 0; + rd.in.length = 0; + rd.in.offset = (uint64_t)(int64_t)-2; + status = smb2_read(tree, tmp_ctx, &rd); + CHECK_STATUS(status, NT_STATUS_INVALID_PARAMETER); + + rd.in.min_count = 0; + rd.in.length = 0; + rd.in.offset = (uint64_t)(int64_t)-3; + status = smb2_read(tree, tmp_ctx, &rd); + CHECK_STATUS(status, NT_STATUS_INVALID_PARAMETER); + + w.in.file.handle = h; + w.in.offset = (int64_t)-1; + w.in.data.data = buf; + w.in.data.length = ARRAY_SIZE(buf); + + status = smb2_write(tree, &w); + CHECK_STATUS(status, NT_STATUS_INVALID_PARAMETER); + + w.in.file.handle = h; + w.in.offset = (int64_t)-2; + w.in.data.data = buf; + w.in.data.length = ARRAY_SIZE(buf); + + status = smb2_write(tree, &w); + CHECK_STATUS(status, NT_STATUS_INVALID_PARAMETER); + + w.in.file.handle = h; + w.in.offset = INT64_MIN; + w.in.data.data = buf; + w.in.data.length = 1; + + status = smb2_write(tree, &w); + CHECK_STATUS(status, NT_STATUS_INVALID_PARAMETER); + + w.in.file.handle = h; + w.in.offset = INT64_MIN; + w.in.data.data = buf; + w.in.data.length = 0; + status = smb2_write(tree, &w); + CHECK_STATUS(status, NT_STATUS_INVALID_PARAMETER); + + w.in.file.handle = h; + w.in.offset = INT64_MAX; + w.in.data.data = buf; + w.in.data.length = 0; + status = smb2_write(tree, &w); + CHECK_STATUS(status, NT_STATUS_OK); + CHECK_VALUE(w.out.nwritten, 0); + + w.in.file.handle = h; + w.in.offset = INT64_MAX; + w.in.data.data = buf; + w.in.data.length = 1; + status = smb2_write(tree, &w); + CHECK_STATUS(status, NT_STATUS_INVALID_PARAMETER); + + w.in.file.handle = h; + w.in.offset = (uint64_t)INT64_MAX + 1; + w.in.data.data = buf; + w.in.data.length = 0; + status = smb2_write(tree, &w); + CHECK_STATUS(status, NT_STATUS_INVALID_PARAMETER); + + w.in.file.handle = h; + w.in.offset = 0xfffffff0000; /* MAXFILESIZE */ + w.in.data.data = buf; + w.in.data.length = 1; + status = smb2_write(tree, &w); + CHECK_STATUS(status, NT_STATUS_INVALID_PARAMETER); + + w.in.file.handle = h; + w.in.offset = 0xfffffff0000 - 1; /* MAXFILESIZE - 1 */ + w.in.data.data = buf; + w.in.data.length = 1; + status = smb2_write(tree, &w); + if (TARGET_IS_SAMBA3(torture) || TARGET_IS_SAMBA4(torture)) { + CHECK_STATUS(status, NT_STATUS_OK); + CHECK_VALUE(w.out.nwritten, 1); + } else { + CHECK_STATUS(status, NT_STATUS_DISK_FULL); + } + + w.in.file.handle = h; + w.in.offset = 0xfffffff0000; /* MAXFILESIZE */ + w.in.data.data = buf; + w.in.data.length = 0; + status = smb2_write(tree, &w); + CHECK_STATUS(status, NT_STATUS_OK); + CHECK_VALUE(w.out.nwritten, 0); + +done: + talloc_free(tmp_ctx); + return ret; +} + struct torture_suite *torture_smb2_readwrite_init(TALLOC_CTX *ctx) { struct torture_suite *suite = torture_suite_create(ctx, "rw"); torture_suite_add_2smb2_test(suite, "rw1", run_smb2_readwritetest); torture_suite_add_2smb2_test(suite, "rw2", run_smb2_wrap_readwritetest); + torture_suite_add_1smb2_test(suite, "invalid", test_rw_invalid); suite->description = talloc_strdup(suite, "SMB2 Samba4 Read/Write"); -- 2.20.1 From cb8661162b9d344288a776a9e4ff29a47f2cc37d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 13:06:54 +0200 Subject: [PATCH 02/31] lib: util: Add sys_valid_io_range() This implements the contraints of [MS-FSA] 2.1.5.2 Server Requests a Read. The special handling of [MS-FSA] 2.1.5.3 Server Requests a Write with offset < 0, should be handled by higher layers! Which means the check can also be used for writes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit e02cbd5c3ea6903d2b7b43c3193b8662d029ecdd) --- lib/util/sys_rw.c | 24 ++++++++++++++++++++++++ lib/util/sys_rw.h | 1 + 2 files changed, 25 insertions(+) diff --git a/lib/util/sys_rw.c b/lib/util/sys_rw.c index 9a6cdcaa606..6fa7ca57365 100644 --- a/lib/util/sys_rw.c +++ b/lib/util/sys_rw.c @@ -24,6 +24,30 @@ #include "system/filesys.h" #include "lib/util/sys_rw.h" +bool sys_valid_io_range(off_t offset, size_t length) +{ + uint64_t last_byte_ofs; + + if (offset < 0) { + return false; + } + + if (offset > INT64_MAX) { + return false; + } + + if (length > UINT32_MAX) { + return false; + } + + last_byte_ofs = (uint64_t)offset + (uint64_t)length; + if (last_byte_ofs > INT64_MAX) { + return false; + } + + return true; +} + /******************************************************************* A read wrapper that will deal with EINTR/EWOULDBLOCK ********************************************************************/ diff --git a/lib/util/sys_rw.h b/lib/util/sys_rw.h index ab456d87b22..70864cb2b74 100644 --- a/lib/util/sys_rw.h +++ b/lib/util/sys_rw.h @@ -27,6 +27,7 @@ struct iovec; +bool sys_valid_io_range(off_t offset, size_t length); ssize_t sys_read(int fd, void *buf, size_t count); void sys_read_v(int fd, void *buf, size_t count); ssize_t sys_write(int fd, const void *buf, size_t count); -- 2.20.1 From 640a6715dacfaf7ec9a2b4e4b36fa22b625f571f Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:32:48 -0700 Subject: [PATCH 03/31] lib: util: Add sys_pread_full(). A pread wrapper that will deal with EINTR and never return a short read unless pread returns zero meaning EOF. Thread-safe so may be used as a replacement for pread inside pread_do() thread functions. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Pair-Programmed-With: Stefan Metzmacher Signed-off-by: Jeremy Allison Signed-off-by: Stefan Metzmacher (cherry picked from commit 36af33bf9fcdf93fce5ef1520fcb7ddbb07b355e) --- lib/util/sys_rw.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++ lib/util/sys_rw.h | 1 + 2 files changed, 49 insertions(+) diff --git a/lib/util/sys_rw.c b/lib/util/sys_rw.c index 6fa7ca57365..bfeb2e6b466 100644 --- a/lib/util/sys_rw.c +++ b/lib/util/sys_rw.c @@ -143,6 +143,54 @@ ssize_t sys_pread(int fd, void *buf, size_t count, off_t off) return ret; } +/******************************************************************* + A pread wrapper that will deal with EINTR and never return a short + read unless pread returns zero meaning EOF. +********************************************************************/ + +ssize_t sys_pread_full(int fd, void *buf, size_t count, off_t off) +{ + ssize_t total_read = 0; + uint8_t *curr_buf = (uint8_t *)buf; + size_t curr_count = count; + off_t curr_off = off; + bool ok; + + ok = sys_valid_io_range(off, count); + if (!ok) { + errno = EINVAL; + return -1; + } + + while (curr_count != 0) { + ssize_t ret = sys_pread(fd, + curr_buf, + curr_count, + curr_off); + + if (ret == -1) { + return -1; + } + if (ret == 0) { + /* EOF */ + break; + } + + if (ret > curr_count) { + errno = EIO; + return -1; + } + + curr_buf += ret; + curr_count -= ret; + curr_off += ret; + + total_read += ret; + } + + return total_read; +} + /******************************************************************* A write wrapper that will deal with EINTR ********************************************************************/ diff --git a/lib/util/sys_rw.h b/lib/util/sys_rw.h index 70864cb2b74..1e0dd3730a6 100644 --- a/lib/util/sys_rw.h +++ b/lib/util/sys_rw.h @@ -34,6 +34,7 @@ ssize_t sys_write(int fd, const void *buf, size_t count); void sys_write_v(int fd, const void *buf, size_t count); ssize_t sys_writev(int fd, const struct iovec *iov, int iovcnt); ssize_t sys_pread(int fd, void *buf, size_t count, off_t off); +ssize_t sys_pread_full(int fd, void *buf, size_t count, off_t off); ssize_t sys_pwrite(int fd, const void *buf, size_t count, off_t off); #endif -- 2.20.1 From c1f14bb9bb4dc2601814dda398e361a9ef097b12 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:34:32 -0700 Subject: [PATCH 04/31] lib: util: Add sys_pwrite_full(). A pwrite wrapper that will deal with EINTR and never return a short write unless the file system returns an error. Copes with the unspecified edge condition of pwrite returning zero by changing the return to -1, errno = ENOSPC. Thread-safe so may be used as a replacement for pwrite inside pwrite_do() thread functions. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Pair-Programmed-With: Stefan Metzmacher Signed-off-by: Jeremy Allison Signed-off-by: Stefan Metzmacher (cherry picked from commit 3ba7a89cea85d134eacf1e624e011fe6f66146fc) --- lib/util/sys_rw.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++ lib/util/sys_rw.h | 1 + 2 files changed, 50 insertions(+) diff --git a/lib/util/sys_rw.c b/lib/util/sys_rw.c index bfeb2e6b466..d74395fc409 100644 --- a/lib/util/sys_rw.c +++ b/lib/util/sys_rw.c @@ -204,3 +204,52 @@ ssize_t sys_pwrite(int fd, const void *buf, size_t count, off_t off) } while (ret == -1 && errno == EINTR); return ret; } + +/******************************************************************* + A pwrite wrapper that will deal with EINTR and never allow a short + write unless the file system returns an error. +********************************************************************/ + +ssize_t sys_pwrite_full(int fd, const void *buf, size_t count, off_t off) +{ + ssize_t total_written = 0; + const uint8_t *curr_buf = (const uint8_t *)buf; + size_t curr_count = count; + off_t curr_off = off; + bool ok; + + ok = sys_valid_io_range(off, count); + if (!ok) { + errno = EINVAL; + return -1; + } + + while (curr_count != 0) { + ssize_t ret = sys_pwrite(fd, + curr_buf, + curr_count, + curr_off); + + if (ret == -1) { + return -1; + } + if (ret == 0) { + /* Ensure we can never spin. */ + errno = ENOSPC; + return -1; + } + + if (ret > curr_count) { + errno = EIO; + return -1; + } + + curr_buf += ret; + curr_count -= ret; + curr_off += ret; + + total_written += ret; + } + + return total_written; +} diff --git a/lib/util/sys_rw.h b/lib/util/sys_rw.h index 1e0dd3730a6..b224ecb30ac 100644 --- a/lib/util/sys_rw.h +++ b/lib/util/sys_rw.h @@ -36,5 +36,6 @@ ssize_t sys_writev(int fd, const struct iovec *iov, int iovcnt); ssize_t sys_pread(int fd, void *buf, size_t count, off_t off); ssize_t sys_pread_full(int fd, void *buf, size_t count, off_t off); ssize_t sys_pwrite(int fd, const void *buf, size_t count, off_t off); +ssize_t sys_pwrite_full(int fd, const void *buf, size_t count, off_t off); #endif -- 2.20.1 From c4a08bd0b1ecebdbbbbbb257696e0f9b93eebeaa Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 11 May 2020 18:18:24 +0200 Subject: [PATCH 05/31] smb2_server: fix smbd_smb2_request_verify_sizes() for SMB2_OP_WRITE Writes with a length of 0 are allowed. The readfile related check we had before was not really useful as min_dyn_len can only every be 0 or 1 (and for SMB2_OP_WRITE it's always 1). So we checked if (unread_bytes > 0) { if (unread_bytes < 1) { return error; } } BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit a6eee38ba2f89280676f0a32d26745afd95b551c) --- source3/smbd/smb2_server.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/source3/smbd/smb2_server.c b/source3/smbd/smb2_server.c index 8c66d74c8de..88937ac19b0 100644 --- a/source3/smbd/smb2_server.c +++ b/source3/smbd/smb2_server.c @@ -2073,16 +2073,8 @@ NTSTATUS smbd_smb2_request_verify_sizes(struct smbd_smb2_request *req, switch (opcode) { case SMB2_OP_IOCTL: case SMB2_OP_GETINFO: - min_dyn_size = 0; - break; case SMB2_OP_WRITE: - if (req->smb1req != NULL && req->smb1req->unread_bytes > 0) { - if (req->smb1req->unread_bytes < min_dyn_size) { - return NT_STATUS_INVALID_PARAMETER; - } - - min_dyn_size = 0; - } + min_dyn_size = 0; break; } -- 2.20.1 From 74fadf2f083daaf4358a9b64c1092586595894d5 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 11 May 2020 18:18:24 +0200 Subject: [PATCH 06/31] s3:smbd: handle 0 length writes as no-op. They should never touch the SMB_VFS layer and they never trigger an DISK_FULL error. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit ba68f21286c2c2f1fef8bf8c9cd500a622077887) --- source3/smbd/aio.c | 5 +++++ source3/smbd/fileio.c | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/source3/smbd/aio.c b/source3/smbd/aio.c index cf35f3297ec..088ea603788 100644 --- a/source3/smbd/aio.c +++ b/source3/smbd/aio.c @@ -339,6 +339,11 @@ static struct tevent_req *pwrite_fsync_send(TALLOC_CTX *mem_ctx, state->fsp = fsp; state->write_through = write_through; + if (n == 0) { + tevent_req_done(req); + return tevent_req_post(req, ev); + } + subreq = SMB_VFS_PWRITE_SEND(state, ev, fsp, data, n, offset); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); diff --git a/source3/smbd/fileio.c b/source3/smbd/fileio.c index 31d5b7510b7..f4a3dde66b7 100644 --- a/source3/smbd/fileio.c +++ b/source3/smbd/fileio.c @@ -70,6 +70,10 @@ static ssize_t real_write_file(struct smb_request *req, { ssize_t ret; + if (n == 0) { + return 0; + } + fsp->fh->pos = pos; if (pos && lp_strict_allocate(SNUM(fsp->conn) && !fsp->is_sparse)) { -- 2.20.1 From c593b105e646792cf19901e94ed764dc3b50d752 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 11 May 2020 22:00:37 +0200 Subject: [PATCH 07/31] s3:smbd: add vfs_valid_{pread,pwrite}_range() helper functions These implement the SMB2 visible behavior of the [MS-FSA] 2.1.5.2 Server Requests a Read and 2.1.5.3 Server Requests a Write constraints. Note that offset < 0 is not allowed over SMB. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit 70fa4b884d2c22669984c25fe757c2fc528f7331) --- source3/smbd/proto.h | 2 ++ source3/smbd/vfs.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/source3/smbd/proto.h b/source3/smbd/proto.h index 8e33c473d44..60941ce6c1b 100644 --- a/source3/smbd/proto.h +++ b/source3/smbd/proto.h @@ -1246,6 +1246,8 @@ void sys_utmp_yield(const char *username, const char *hostname, bool vfs_init_custom(connection_struct *conn, const char *vfs_object); bool smbd_vfs_init(connection_struct *conn); NTSTATUS vfs_file_exist(connection_struct *conn, struct smb_filename *smb_fname); +bool vfs_valid_pread_range(off_t offset, size_t length); +bool vfs_valid_pwrite_range(off_t offset, size_t length); ssize_t vfs_pwrite_data(struct smb_request *req, files_struct *fsp, const char *buffer, diff --git a/source3/smbd/vfs.c b/source3/smbd/vfs.c index 7dc15158ccb..566eed35d9b 100644 --- a/source3/smbd/vfs.c +++ b/source3/smbd/vfs.c @@ -32,6 +32,7 @@ #include "ntioctl.h" #include "lib/util/tevent_unix.h" #include "lib/util/tevent_ntstatus.h" +#include "lib/util/sys_rw.h" #undef DBGC_CLASS #define DBGC_CLASS DBGC_VFS @@ -399,6 +400,37 @@ NTSTATUS vfs_file_exist(connection_struct *conn, struct smb_filename *smb_fname) return NT_STATUS_OBJECT_NAME_NOT_FOUND; } +bool vfs_valid_pread_range(off_t offset, size_t length) +{ + return sys_valid_io_range(offset, length); +} + +bool vfs_valid_pwrite_range(off_t offset, size_t length) +{ + /* + * See MAXFILESIZE in [MS-FSA] 2.1.5.3 Server Requests a Write + */ + static const uint64_t maxfilesize = 0xfffffff0000; + uint64_t last_byte_ofs; + bool ok; + + ok = sys_valid_io_range(offset, length); + if (!ok) { + return false; + } + + if (length == 0) { + return true; + } + + last_byte_ofs = offset + length; + if (last_byte_ofs > maxfilesize) { + return false; + } + + return true; +} + ssize_t vfs_pwrite_data(struct smb_request *req, files_struct *fsp, const char *buffer, -- 2.20.1 From 9e1f6bb08d5e814e9894d643c46ff1aac6a8da0b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 11 May 2020 18:18:24 +0200 Subject: [PATCH 08/31] smbd: add vfs_valid_{pread,pwrite}_range() checks where needed I checked all callers of SMB_VFS_PWRITE[_SEND](), all callers of SMB_VFS_PREAD[_SEND]() and also places where we append to the file and allocate more space. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit 6fa753a1a67d563cd22d0cad73ae15ee267512fc) --- selftest/knownfail.d/rw-invalid | 3 +-- source3/modules/vfs_default.c | 7 +++++++ source3/smbd/aio.c | 19 ++++++++++++++++++ source3/smbd/fileio.c | 14 ++++++++++++++ source3/smbd/vfs.c | 34 +++++++++++++++++++++++++++++++-- 5 files changed, 73 insertions(+), 4 deletions(-) diff --git a/selftest/knownfail.d/rw-invalid b/selftest/knownfail.d/rw-invalid index c6f11e03d20..ac5fe573239 100644 --- a/selftest/knownfail.d/rw-invalid +++ b/selftest/knownfail.d/rw-invalid @@ -1,2 +1 @@ -samba3.smb2.rw.invalid -samba4.smb2.rw.invalid +samba4.smb2.rw.invalid.ad_dc_ntvfs diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c index a30f3ba1d31..3511c548166 100644 --- a/source3/modules/vfs_default.c +++ b/source3/modules/vfs_default.c @@ -2582,6 +2582,13 @@ static int strict_allocate_ftruncate(vfs_handle_struct *handle, files_struct *fs int ret; NTSTATUS status; SMB_STRUCT_STAT *pst; + bool ok; + + ok = vfs_valid_pwrite_range(len, 0); + if (!ok) { + errno = EINVAL; + return -1; + } status = vfs_stat_fsp(fsp); if (!NT_STATUS_IS_OK(status)) { diff --git a/source3/smbd/aio.c b/source3/smbd/aio.c index 088ea603788..af8a01461a7 100644 --- a/source3/smbd/aio.c +++ b/source3/smbd/aio.c @@ -166,6 +166,12 @@ NTSTATUS schedule_aio_read_and_X(connection_struct *conn, size_t bufsize; size_t min_aio_read_size = lp_aio_read_size(SNUM(conn)); struct tevent_req *req; + bool ok; + + ok = vfs_valid_pread_range(startpos, smb_maxcnt); + if (!ok) { + return NT_STATUS_INVALID_PARAMETER; + } if (fsp->base_fsp != NULL) { /* No AIO on streams yet */ @@ -330,6 +336,7 @@ static struct tevent_req *pwrite_fsync_send(TALLOC_CTX *mem_ctx, { struct tevent_req *req, *subreq; struct pwrite_fsync_state *state; + bool ok; req = tevent_req_create(mem_ctx, &state, struct pwrite_fsync_state); if (req == NULL) { @@ -339,6 +346,12 @@ static struct tevent_req *pwrite_fsync_send(TALLOC_CTX *mem_ctx, state->fsp = fsp; state->write_through = write_through; + ok = vfs_valid_pwrite_range(offset, n); + if (!ok) { + tevent_req_error(req, EINVAL); + return tevent_req_post(req, ev); + } + if (n == 0) { tevent_req_done(req); return tevent_req_post(req, ev); @@ -665,6 +678,12 @@ NTSTATUS schedule_smb2_aio_read(connection_struct *conn, struct aio_extra *aio_ex; size_t min_aio_read_size = lp_aio_read_size(SNUM(conn)); struct tevent_req *req; + bool ok; + + ok = vfs_valid_pread_range(startpos, smb_maxcnt); + if (!ok) { + return NT_STATUS_INVALID_PARAMETER; + } if (fsp->base_fsp != NULL) { /* No AIO on streams yet */ diff --git a/source3/smbd/fileio.c b/source3/smbd/fileio.c index f4a3dde66b7..000a01e2c0f 100644 --- a/source3/smbd/fileio.c +++ b/source3/smbd/fileio.c @@ -32,6 +32,7 @@ ssize_t read_file(files_struct *fsp,char *data,off_t pos,size_t n) { ssize_t ret = 0; + bool ok; /* you can't read from print files */ if (fsp->print_file) { @@ -39,6 +40,12 @@ ssize_t read_file(files_struct *fsp,char *data,off_t pos,size_t n) return -1; } + ok = vfs_valid_pread_range(pos, n); + if (!ok) { + errno = EINVAL; + return -1; + } + fsp->fh->pos = pos; if (n > 0) { @@ -69,6 +76,13 @@ static ssize_t real_write_file(struct smb_request *req, size_t n) { ssize_t ret; + bool ok; + + ok = vfs_valid_pwrite_range(pos, n); + if (!ok) { + errno = EINVAL; + return -1; + } if (n == 0) { return 0; diff --git a/source3/smbd/vfs.c b/source3/smbd/vfs.c index 566eed35d9b..96067e45005 100644 --- a/source3/smbd/vfs.c +++ b/source3/smbd/vfs.c @@ -439,6 +439,13 @@ ssize_t vfs_pwrite_data(struct smb_request *req, { size_t total=0; ssize_t ret; + bool ok; + + ok = vfs_valid_pwrite_range(offset, N); + if (!ok) { + errno = EINVAL; + return -1; + } if (req && req->unread_bytes) { int sockfd = req->xconn->transport.sock; @@ -515,6 +522,7 @@ int vfs_allocate_file_space(files_struct *fsp, uint64_t len) uint64_t space_avail; uint64_t bsize,dfree,dsize; NTSTATUS status; + bool ok; /* * Actually try and commit the space on disk.... @@ -523,8 +531,9 @@ int vfs_allocate_file_space(files_struct *fsp, uint64_t len) DEBUG(10,("vfs_allocate_file_space: file %s, len %.0f\n", fsp_str_dbg(fsp), (double)len)); - if (((off_t)len) < 0) { - DEBUG(0,("vfs_allocate_file_space: %s negative len " + ok = vfs_valid_pwrite_range((off_t)len, 0); + if (!ok) { + DEBUG(0,("vfs_allocate_file_space: %s negative/invalid len " "requested.\n", fsp_str_dbg(fsp))); errno = EINVAL; return -1; @@ -609,6 +618,13 @@ int vfs_allocate_file_space(files_struct *fsp, uint64_t len) int vfs_set_filelen(files_struct *fsp, off_t len) { int ret; + bool ok; + + ok = vfs_valid_pwrite_range(len, 0); + if (!ok) { + errno = EINVAL; + return -1; + } contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_SET_FILE_LEN); @@ -640,6 +656,13 @@ int vfs_slow_fallocate(files_struct *fsp, off_t offset, off_t len) { ssize_t pwrite_ret; size_t total = 0; + bool ok; + + ok = vfs_valid_pwrite_range(offset, len); + if (!ok) { + errno = EINVAL; + return -1; + } if (!sparse_buf) { sparse_buf = SMB_CALLOC_ARRAY(char, SPARSE_BUF_WRITE_SIZE); @@ -680,6 +703,13 @@ int vfs_fill_sparse(files_struct *fsp, off_t len) NTSTATUS status; off_t offset; size_t num_to_write; + bool ok; + + ok = vfs_valid_pwrite_range(len, 0); + if (!ok) { + errno = EINVAL; + return -1; + } status = vfs_stat_fsp(fsp); if (!NT_STATUS_IS_OK(status)) { -- 2.20.1 From af989582036a929eb0e2594b1e2ee54145d41286 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:42:10 -0700 Subject: [PATCH 09/31] s3: VFS: aio_fork: Change sys_pread() -> sys_pread_full() to protect against short reads. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison Reviewed-by: Stefan Metzmacher (cherry picked from commit 60f590000d545292760018694deb34a7cc4ded6d) --- source3/modules/vfs_aio_fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source3/modules/vfs_aio_fork.c b/source3/modules/vfs_aio_fork.c index a1fed5c0655..285b331ff9c 100644 --- a/source3/modules/vfs_aio_fork.c +++ b/source3/modules/vfs_aio_fork.c @@ -342,7 +342,7 @@ static void aio_child_loop(int sockfd, struct mmap_area *map) switch (cmd_struct.cmd) { case READ_CMD: - ret_struct.size = sys_pread( + ret_struct.size = sys_pread_full( fd, discard_const(map->ptr), cmd_struct.n, cmd_struct.offset); #if 0 -- 2.20.1 From 52b13fda0509f3bb28026a3d0f7d304c3bf28b08 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:42:53 -0700 Subject: [PATCH 10/31] s3: VFS: aio_fork: Change sys_pwrite() -> sys_pwrite_full() to protect against short writes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison Reviewed-by: Stefan Metzmacher (cherry picked from commit 20ee8b03bbe5bef4ea968170808e3c4c9d22318e) --- source3/modules/vfs_aio_fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source3/modules/vfs_aio_fork.c b/source3/modules/vfs_aio_fork.c index 285b331ff9c..7c6f4b00fd0 100644 --- a/source3/modules/vfs_aio_fork.c +++ b/source3/modules/vfs_aio_fork.c @@ -353,7 +353,7 @@ static void aio_child_loop(int sockfd, struct mmap_area *map) #endif break; case WRITE_CMD: - ret_struct.size = sys_pwrite( + ret_struct.size = sys_pwrite_full( fd, discard_const(map->ptr), cmd_struct.n, cmd_struct.offset); break; -- 2.20.1 From bd99244e029ef1df6a60a276d794bcd465a7bb5b Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:43:34 -0700 Subject: [PATCH 11/31] s3: VFS: default. Change sys_pread() -> sys_pread_full() in SMB_VFS_PREAD() to protect against short reads. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison Reviewed-by: Stefan Metzmacher (cherry picked from commit 7daa79908b6a0362db30276b3b6f0db176b6ae3c) --- source3/modules/vfs_default.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c index 3511c548166..1170629b084 100644 --- a/source3/modules/vfs_default.c +++ b/source3/modules/vfs_default.c @@ -739,7 +739,7 @@ static ssize_t vfswrap_pread(vfs_handle_struct *handle, files_struct *fsp, void #if defined(HAVE_PREAD) || defined(HAVE_PREAD64) START_PROFILE_BYTES(syscall_pread, n); - result = sys_pread(fsp->fh->fd, data, n, offset); + result = sys_pread_full(fsp->fh->fd, data, n, offset); END_PROFILE_BYTES(syscall_pread); if (result == -1 && errno == ESPIPE) { -- 2.20.1 From 26ec59040879427166e04cdadbff175a40018c87 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:44:26 -0700 Subject: [PATCH 12/31] s3: VFS: default. Change sys_pwrite() -> sys_pwrite_full() in SMB_VFS_PWRITE() to protect against short writes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison Reviewed-by: Stefan Metzmacher (cherry picked from commit ca8c3619f657dc38db7cb248f1a657f5bfe20757) --- source3/modules/vfs_default.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c index 1170629b084..e516c733e69 100644 --- a/source3/modules/vfs_default.c +++ b/source3/modules/vfs_default.c @@ -763,7 +763,7 @@ static ssize_t vfswrap_pwrite(vfs_handle_struct *handle, files_struct *fsp, cons #if defined(HAVE_PWRITE) || defined(HAVE_PRWITE64) START_PROFILE_BYTES(syscall_pwrite, n); - result = sys_pwrite(fsp->fh->fd, data, n, offset); + result = sys_pwrite_full(fsp->fh->fd, data, n, offset); END_PROFILE_BYTES(syscall_pwrite); if (result == -1 && errno == ESPIPE) { -- 2.20.1 From fa25bd26d1fe02c7480f2af7e22af8a03127157e Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:45:10 -0700 Subject: [PATCH 13/31] s3: VFS: default. Change pread() -> sys_pread_full() in SMB_VFS_PREAD_SEND() to protect against short reads. Note that as sys_pread_full() deals with the EINTR case we can remove the do {} while loop here. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison Reviewed-by: Stefan Metzmacher (cherry picked from commit bf2e546be38abfc77cf40e0b0fef42937696dcde) --- source3/modules/vfs_default.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c index e516c733e69..809c9b3d92f 100644 --- a/source3/modules/vfs_default.c +++ b/source3/modules/vfs_default.c @@ -843,10 +843,10 @@ static void vfs_pread_do(void *private_data) PROFILE_TIMESTAMP(&start_time); - do { - state->ret = pread(state->fd, state->buf, state->count, - state->offset); - } while ((state->ret == -1) && (errno == EINTR)); + state->ret = sys_pread_full(state->fd, + state->buf, + state->count, + state->offset); if (state->ret == -1) { state->vfs_aio_state.error = errno; -- 2.20.1 From 9a4639d6500900e18dfb685fbf4dc597f98cc63c Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:48:49 -0700 Subject: [PATCH 14/31] s3: VFS: default. Change pwrite() -> sys_pwrite_full() in SMB_VFS_PWRITE_SEND() to protect against short writes. Note that as sys_pwrite_full() deals with the EINTR case we can remove the do {} while loop here. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison Reviewed-by: Stefan Metzmacher (cherry picked from commit 801c06f4c9400343b72cad998086288931f7c6b3) --- source3/modules/vfs_default.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c index 809c9b3d92f..4cf553411cb 100644 --- a/source3/modules/vfs_default.c +++ b/source3/modules/vfs_default.c @@ -971,10 +971,10 @@ static void vfs_pwrite_do(void *private_data) PROFILE_TIMESTAMP(&start_time); - do { - state->ret = pwrite(state->fd, state->buf, state->count, - state->offset); - } while ((state->ret == -1) && (errno == EINTR)); + state->ret = sys_pwrite_full(state->fd, + state->buf, + state->count, + state->offset); if (state->ret == -1) { state->vfs_aio_state.error = errno; -- 2.20.1 From 9ecf0e23c6b5f40cf73df600b4ce2e6469d32065 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 May 2020 03:05:47 -0700 Subject: [PATCH 15/31] vfs_io_uring: fix the prefix for parametric options from 'vfs_io_uring' to 'io_uring' This is what the manpage describes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit fadc7043a71b409ad60a1a4076a7f88f379d2056) --- source3/modules/vfs_io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 378e48d112f..b409d075337 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -172,13 +172,13 @@ static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service, } num_entries = lp_parm_ulong(SNUM(handle->conn), - "vfs_io_uring", + "io_uring", "num_entries", 128); num_entries = MAX(num_entries, 1); sqpoll = lp_parm_bool(SNUM(handle->conn), - "vfs_io_uring", + "io_uring", "sqpoll", false); if (sqpoll) { -- 2.20.1 From 0f22fb3e27bdda29e4969e4ddb64f0287deb5599 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 10:39:52 +0200 Subject: [PATCH 16/31] vfs_io_uring: replace vfs_io_uring_request->state with _tevent_req_data() We don't need a direct pointer to the state... BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit f78e98e0226fe70899b613e0aa5c804d8458bdb0) --- source3/modules/vfs_io_uring.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index b409d075337..988b309da52 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -42,7 +42,6 @@ struct vfs_io_uring_request { struct vfs_io_uring_request **list_head; struct vfs_io_uring_config *config; struct tevent_req *req; - void *state; struct io_uring_sqe sqe; struct io_uring_cqe cqe; struct timespec start_time; @@ -58,8 +57,9 @@ static void vfs_io_uring_finish_req(struct vfs_io_uring_request *cur, struct tevent_req *req = talloc_get_type_abort(cur->req, struct tevent_req); + void *state = _tevent_req_data(req); - talloc_set_destructor(cur->state, NULL); + talloc_set_destructor(state, NULL); if (cur->list_head != NULL) { DLIST_REMOVE((*cur->list_head), cur); cur->list_head = NULL; @@ -238,6 +238,7 @@ static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config) for (cur = config->queue; cur != NULL; cur = next) { struct io_uring_sqe *sqe = NULL; + void *state = _tevent_req_data(cur->req); next = cur->next; @@ -246,7 +247,7 @@ static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config) break; } - talloc_set_destructor(cur->state, + talloc_set_destructor(state, vfs_io_uring_request_state_deny_destructor); DLIST_REMOVE(config->queue, cur); *sqe = cur->sqe; @@ -318,7 +319,6 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand } state->ur.config = config; state->ur.req = req; - state->ur.state = state; SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pread, profile_p, state->ur.profile_bytes, n); @@ -398,7 +398,6 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han } state->ur.config = config; state->ur.req = req; - state->ur.state = state; SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pwrite, profile_p, state->ur.profile_bytes, n); @@ -475,7 +474,6 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand } state->ur.config = config; state->ur.req = req; - state->ur.state = state; SMBPROFILE_BYTES_ASYNC_START(syscall_asys_fsync, profile_p, state->ur.profile_bytes, 0); -- 2.20.1 From 61d1e13c927a8fb8439d576026dc56a2379f127d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 10:42:59 +0200 Subject: [PATCH 17/31] vfs_io_uring: introduce vfs_io_uring_request->completion_fn() We'll need to add more logic than a simple _tevent_req_done() BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit 456533c9cfc332d3a83ea03a6f969b0d64ccbeb6) --- source3/modules/vfs_io_uring.c | 49 +++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 988b309da52..abdd4d16e9f 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -44,6 +44,8 @@ struct vfs_io_uring_request { struct tevent_req *req; struct io_uring_sqe sqe; struct io_uring_cqe cqe; + void (*completion_fn)(struct vfs_io_uring_request *cur, + const char *location); struct timespec start_time; struct timespec end_time; SMBPROFILE_BYTES_ASYNC_STATE(profile_bytes); @@ -74,7 +76,7 @@ static void vfs_io_uring_finish_req(struct vfs_io_uring_request *cur, * or tevent_req_defer_callback() being called * already. */ - _tevent_req_done(req, location); + cur->completion_fn(cur, location); } static void vfs_io_uring_config_destroy(struct vfs_io_uring_config *config, @@ -297,6 +299,9 @@ struct vfs_io_uring_pread_state { struct iovec iov; }; +static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, + const char *location); + static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx, struct tevent_context *ev, @@ -319,6 +324,7 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand } state->ur.config = config; state->ur.req = req; + state->ur.completion_fn = vfs_io_uring_pread_completion; SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pread, profile_p, state->ur.profile_bytes, n); @@ -344,6 +350,17 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand return req; } +static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, + const char *location) +{ + /* + * We rely on being inside the _send() function + * or tevent_req_defer_callback() being called + * already. + */ + _tevent_req_done(cur->req, location); +} + static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req, struct vfs_aio_state *vfs_aio_state) { @@ -376,6 +393,9 @@ struct vfs_io_uring_pwrite_state { struct iovec iov; }; +static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, + const char *location); + static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx, struct tevent_context *ev, @@ -398,6 +418,7 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han } state->ur.config = config; state->ur.req = req; + state->ur.completion_fn = vfs_io_uring_pwrite_completion; SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pwrite, profile_p, state->ur.profile_bytes, n); @@ -423,6 +444,17 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han return req; } +static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, + const char *location) +{ + /* + * We rely on being inside the _send() function + * or tevent_req_defer_callback() being called + * already. + */ + _tevent_req_done(cur->req, location); +} + static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req, struct vfs_aio_state *vfs_aio_state) { @@ -454,6 +486,9 @@ struct vfs_io_uring_fsync_state { struct vfs_io_uring_request ur; }; +static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur, + const char *location); + static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx, struct tevent_context *ev, @@ -474,6 +509,7 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand } state->ur.config = config; state->ur.req = req; + state->ur.completion_fn = vfs_io_uring_fsync_completion; SMBPROFILE_BYTES_ASYNC_START(syscall_asys_fsync, profile_p, state->ur.profile_bytes, 0); @@ -496,6 +532,17 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand return req; } +static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur, + const char *location) +{ + /* + * We rely on being inside the _send() function + * or tevent_req_defer_callback() being called + * already. + */ + _tevent_req_done(cur->req, location); +} + static int vfs_io_uring_fsync_recv(struct tevent_req *req, struct vfs_aio_state *vfs_aio_state) { -- 2.20.1 From 1d670bd14381a00c872ec56033f602b9e18f11b0 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 10:52:52 +0200 Subject: [PATCH 18/31] vfs_io_uring: move error handling out of vfs_io_uring_pread_recv() We should do that as early as possible and that's in vfs_io_uring_pread_completion(). BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit a1487067a6c9df3136fd5d4d16dda4c0f63cb662) --- source3/modules/vfs_io_uring.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index abdd4d16e9f..0d8e1833009 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -297,6 +297,7 @@ static void vfs_io_uring_fd_handler(struct tevent_context *ev, struct vfs_io_uring_pread_state { struct vfs_io_uring_request ur; struct iovec iov; + size_t nread; }; static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, @@ -353,12 +354,23 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, const char *location) { + struct vfs_io_uring_pread_state *state = tevent_req_data( + cur->req, struct vfs_io_uring_pread_state); + /* * We rely on being inside the _send() function * or tevent_req_defer_callback() being called * already. */ - _tevent_req_done(cur->req, location); + + if (cur->cqe.res < 0) { + int err = -cur->cqe.res; + _tevent_req_error(cur->req, err, location); + return; + } + + state->nread = state->ur.cqe.res; + tevent_req_done(cur->req); } static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req, @@ -366,23 +378,19 @@ static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req, { struct vfs_io_uring_pread_state *state = tevent_req_data( req, struct vfs_io_uring_pread_state); - int ret; + ssize_t ret; SMBPROFILE_BYTES_ASYNC_END(state->ur.profile_bytes); vfs_aio_state->duration = nsec_time_diff(&state->ur.end_time, &state->ur.start_time); if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) { + tevent_req_received(req); return -1; } - if (state->ur.cqe.res < 0) { - vfs_aio_state->error = -state->ur.cqe.res; - ret = -1; - } else { - vfs_aio_state->error = 0; - ret = state->ur.cqe.res; - } + vfs_aio_state->error = 0; + ret = state->nread; tevent_req_received(req); return ret; -- 2.20.1 From d340f961466be94d1467b4953389b721421fb86a Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 10:52:52 +0200 Subject: [PATCH 19/31] vfs_io_uring: move error handling out of vfs_io_uring_pwrite_recv() We should do that as early as possible and that's in vfs_io_uring_pwrite_completion(). BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit a51969b8c7e6e49c0d3b776d897aea4f309f8678) --- source3/modules/vfs_io_uring.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 0d8e1833009..a8da341e7b7 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -399,6 +399,7 @@ static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req, struct vfs_io_uring_pwrite_state { struct vfs_io_uring_request ur; struct iovec iov; + size_t nwritten; }; static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, @@ -455,12 +456,23 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, const char *location) { + struct vfs_io_uring_pwrite_state *state = tevent_req_data( + cur->req, struct vfs_io_uring_pwrite_state); + /* * We rely on being inside the _send() function * or tevent_req_defer_callback() being called * already. */ - _tevent_req_done(cur->req, location); + + if (cur->cqe.res < 0) { + int err = -cur->cqe.res; + _tevent_req_error(cur->req, err, location); + return; + } + + state->nwritten = state->ur.cqe.res; + tevent_req_done(cur->req); } static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req, @@ -468,23 +480,19 @@ static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req, { struct vfs_io_uring_pwrite_state *state = tevent_req_data( req, struct vfs_io_uring_pwrite_state); - int ret; + ssize_t ret; SMBPROFILE_BYTES_ASYNC_END(state->ur.profile_bytes); vfs_aio_state->duration = nsec_time_diff(&state->ur.end_time, &state->ur.start_time); if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) { + tevent_req_received(req); return -1; } - if (state->ur.cqe.res < 0) { - vfs_aio_state->error = -state->ur.cqe.res; - ret = -1; - } else { - vfs_aio_state->error = 0; - ret = state->ur.cqe.res; - } + vfs_aio_state->error = 0; + ret = state->nwritten; tevent_req_received(req); return ret; -- 2.20.1 From 3da1624f4f6a674dd1e83d2b327e7a38cca7c252 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 10:52:52 +0200 Subject: [PATCH 20/31] vfs_io_uring: move error handling out of vfs_io_uring_fsync_recv() We should do that as early as possible and that's in vfs_io_uring_fsync_completion(). BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit 40be2232a44a86cb5dfdda330801e615826408ba) --- source3/modules/vfs_io_uring.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index a8da341e7b7..0f560c95b67 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -556,7 +556,14 @@ static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur, * or tevent_req_defer_callback() being called * already. */ - _tevent_req_done(cur->req, location); + + if (cur->cqe.res < 0) { + int err = -cur->cqe.res; + _tevent_req_error(cur->req, err, location); + return; + } + + tevent_req_done(cur->req); } static int vfs_io_uring_fsync_recv(struct tevent_req *req, @@ -564,26 +571,20 @@ static int vfs_io_uring_fsync_recv(struct tevent_req *req, { struct vfs_io_uring_fsync_state *state = tevent_req_data( req, struct vfs_io_uring_fsync_state); - int ret; SMBPROFILE_BYTES_ASYNC_END(state->ur.profile_bytes); vfs_aio_state->duration = nsec_time_diff(&state->ur.end_time, &state->ur.start_time); if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) { + tevent_req_received(req); return -1; } - if (state->ur.cqe.res < 0) { - vfs_aio_state->error = -state->ur.cqe.res; - ret = -1; - } else { - vfs_aio_state->error = 0; - ret = state->ur.cqe.res; - } + vfs_aio_state->error = 0; tevent_req_received(req); - return ret; + return 0; } static struct vfs_fn_pointers vfs_io_uring_fns = { -- 2.20.1 From 8bac62120864dc40285ae680f4493116dd2abe6d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 13:17:05 +0200 Subject: [PATCH 21/31] vfs_io_uring: make use of sys_valid_io_range() in vfs_io_uring_pread_send() This makes the follow up commits easier as we don't have to care about overflows. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit 5005ae3fb24018e370ae60cc23c5e9cfe8357bc9) --- source3/modules/vfs_io_uring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 0f560c95b67..c7565b8c39d 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -25,6 +25,7 @@ #include "smbd/smbd.h" #include "smbd/globals.h" #include "lib/util/tevent_unix.h" +#include "lib/util/sys_rw.h" #include "smbprofile.h" #include @@ -313,6 +314,7 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand struct tevent_req *req = NULL; struct vfs_io_uring_pread_state *state = NULL; struct vfs_io_uring_config *config = NULL; + bool ok; SMB_VFS_HANDLE_GET_DATA(handle, config, struct vfs_io_uring_config, @@ -331,6 +333,12 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand state->ur.profile_bytes, n); SMBPROFILE_BYTES_ASYNC_SET_IDLE(state->ur.profile_bytes); + ok = sys_valid_io_range(offset, n); + if (!ok) { + tevent_req_error(req, EINVAL); + return tevent_req_post(req, ev); + } + state->iov.iov_base = (void *)data; state->iov.iov_len = n; io_uring_prep_readv(&state->ur.sqe, -- 2.20.1 From 371195310fc9510692e0ee7f63361094bd7a4911 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 13:17:05 +0200 Subject: [PATCH 22/31] vfs_io_uring: make use of sys_valid_io_range() in vfs_io_uring_pwrite_send() This makes the follow up commits easier as we don't have to care about overflows. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit 388bc2e6e44470ea4043ecb22750e241145355d2) --- source3/modules/vfs_io_uring.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index c7565b8c39d..ee23449c63c 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -423,6 +423,7 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han struct tevent_req *req = NULL; struct vfs_io_uring_pwrite_state *state = NULL; struct vfs_io_uring_config *config = NULL; + bool ok; SMB_VFS_HANDLE_GET_DATA(handle, config, struct vfs_io_uring_config, @@ -441,6 +442,12 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han state->ur.profile_bytes, n); SMBPROFILE_BYTES_ASYNC_SET_IDLE(state->ur.profile_bytes); + ok = sys_valid_io_range(offset, n); + if (!ok) { + tevent_req_error(req, EINVAL); + return tevent_req_post(req, ev); + } + state->iov.iov_base = discard_const(data); state->iov.iov_len = n; io_uring_prep_writev(&state->ur.sqe, -- 2.20.1 From fec90a44a6eb56a883e19268c399b9475a20220d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 21:29:53 +0200 Subject: [PATCH 23/31] vfs_io_uring: avoid stack recursion of vfs_io_uring_queue_run() Instead we remember if recursion was triggered and jump to the start of the function again from the end. This should make it safe to be called from the completion_fn(). This is hideously complex stuff, so document the hell out of it. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit f96f45c9ba8d4c8fa4026c22ac4201d66335e5c4) --- source3/modules/vfs_io_uring.c | 93 +++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index ee23449c63c..f94453d9995 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -34,6 +34,10 @@ struct vfs_io_uring_request; struct vfs_io_uring_config { struct io_uring uring; struct tevent_fd *fde; + /* recursion guard. See comment above vfs_io_uring_queue_run() */ + bool busy; + /* recursion guard. See comment above vfs_io_uring_queue_run() */ + bool need_retry; struct vfs_io_uring_request *queue; struct vfs_io_uring_request *pending; }; @@ -222,7 +226,7 @@ static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service, return 0; } -static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config) +static void _vfs_io_uring_queue_run(struct vfs_io_uring_config *config) { struct vfs_io_uring_request *cur = NULL, *next = NULL; struct io_uring_cqe *cqe = NULL; @@ -280,6 +284,93 @@ static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config) io_uring_cq_advance(&config->uring, nr); } +/* + * Wrapper function to prevent recursion which could happen + * if we called _vfs_io_uring_queue_run() directly without + * recursion checks. + * + * Looking at the pread call, we can have: + * + * vfs_io_uring_pread_send() + * ->vfs_io_uring_pread_submit() <----------------------------------- + * ->vfs_io_uring_request_submit() | + * ->vfs_io_uring_queue_run() | + * ->_vfs_io_uring_queue_run() | + * | + * But inside _vfs_io_uring_queue_run() looks like: | + * | + * _vfs_io_uring_queue_run() { | + * if (THIS_IO_COMPLETED) { | + * ->vfs_io_uring_finish_req() | + * ->cur->completion_fn() | + * } | + * } | + * | + * cur->completion_fn() for pread is set to vfs_io_uring_pread_completion() | + * | + * vfs_io_uring_pread_completion() { | + * if (READ_TERMINATED) { | + * -> tevent_req_done() - We're done, go back up the stack. | + * return; | + * } | + * | + * We have a short read - adjust the io vectors | + * | + * ->vfs_io_uring_pread_submit() --------------------------------------- + * } + * + * So before calling _vfs_io_uring_queue_run() we backet it with setting + * a flag config->busy, and unset it once _vfs_io_uring_queue_run() finally + * exits the retry loop. + * + * If we end up back into vfs_io_uring_queue_run() we notice we've done so + * as config->busy is set and don't recurse into _vfs_io_uring_queue_run(). + * + * We set the second flag config->need_retry that tells us to loop in the + * vfs_io_uring_queue_run() call above us in the stack and return. + * + * When the outer call to _vfs_io_uring_queue_run() returns we are in + * a loop checking if config->need_retry was set. That happens if + * the short read case occurs and _vfs_io_uring_queue_run() ended up + * recursing into vfs_io_uring_queue_run(). + * + * Once vfs_io_uring_pread_completion() finishes without a short + * read (the READ_TERMINATED case, tevent_req_done() is called) + * then config->need_retry is left as false, we exit the loop, + * set config->busy to false so the next top level call into + * vfs_io_uring_queue_run() won't think it's a recursed call + * and return. + * + */ + +static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config) +{ + if (config->busy) { + /* + * We've recursed due to short read/write. + * Set need_retry to ensure we retry the + * io_uring_submit(). + */ + config->need_retry = true; + return; + } + + /* + * Bracket the loop calling _vfs_io_uring_queue_run() + * with busy = true / busy = false. + * so we can detect recursion above. + */ + + config->busy = true; + + do { + config->need_retry = false; + _vfs_io_uring_queue_run(config); + } while (config->need_retry); + + config->busy = false; +} + static void vfs_io_uring_fd_handler(struct tevent_context *ev, struct tevent_fd *fde, uint16_t flags, -- 2.20.1 From f7baf726bbb2c048c243857347731628ae1f112b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 11:17:51 +0200 Subject: [PATCH 24/31] vfs_io_uring: split out a vfs_io_uring_request_submit() function BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit ab89b8e75354c5fd571985e924e1ccbec99de990) --- source3/modules/vfs_io_uring.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index f94453d9995..1d48bd192fe 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -371,6 +371,17 @@ static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config) config->busy = false; } +static void vfs_io_uring_request_submit(struct vfs_io_uring_request *cur) +{ + struct vfs_io_uring_config *config = cur->config; + + io_uring_sqe_set_data(&cur->sqe, cur); + DLIST_ADD_END(config->queue, cur); + cur->list_head = &config->queue; + + vfs_io_uring_queue_run(config); +} + static void vfs_io_uring_fd_handler(struct tevent_context *ev, struct tevent_fd *fde, uint16_t flags, @@ -436,11 +447,7 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand fsp->fh->fd, &state->iov, 1, offset); - io_uring_sqe_set_data(&state->ur.sqe, &state->ur); - DLIST_ADD_END(config->queue, &state->ur); - state->ur.list_head = &config->queue; - - vfs_io_uring_queue_run(config); + vfs_io_uring_request_submit(&state->ur); if (!tevent_req_is_in_progress(req)) { return tevent_req_post(req, ev); @@ -545,11 +552,7 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han fsp->fh->fd, &state->iov, 1, offset); - io_uring_sqe_set_data(&state->ur.sqe, &state->ur); - DLIST_ADD_END(config->queue, &state->ur); - state->ur.list_head = &config->queue; - - vfs_io_uring_queue_run(config); + vfs_io_uring_request_submit(&state->ur); if (!tevent_req_is_in_progress(req)) { return tevent_req_post(req, ev); @@ -640,11 +643,7 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand io_uring_prep_fsync(&state->ur.sqe, fsp->fh->fd, 0); /* fsync_flags */ - io_uring_sqe_set_data(&state->ur.sqe, &state->ur); - DLIST_ADD_END(config->queue, &state->ur); - state->ur.list_head = &config->queue; - - vfs_io_uring_queue_run(config); + vfs_io_uring_request_submit(&state->ur); if (!tevent_req_is_in_progress(req)) { return tevent_req_post(req, ev); -- 2.20.1 From ccda7ef339d743ee90df6739ddcdce22c169b70e Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 11:17:51 +0200 Subject: [PATCH 25/31] vfs_io_uring: split out a vfs_io_uring_pread_submit() function This can be reused when we add handling for short reads. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit 9de4f8be1dc8b4274891016191a5ca1f724e08b3) --- source3/modules/vfs_io_uring.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 1d48bd192fe..19e268e63db 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -399,10 +399,13 @@ static void vfs_io_uring_fd_handler(struct tevent_context *ev, struct vfs_io_uring_pread_state { struct vfs_io_uring_request ur; + struct files_struct *fsp; + off_t offset; struct iovec iov; size_t nread; }; +static void vfs_io_uring_pread_submit(struct vfs_io_uring_pread_state *state); static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, const char *location); @@ -441,13 +444,11 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand return tevent_req_post(req, ev); } + state->fsp = fsp; + state->offset = offset; state->iov.iov_base = (void *)data; state->iov.iov_len = n; - io_uring_prep_readv(&state->ur.sqe, - fsp->fh->fd, - &state->iov, 1, - offset); - vfs_io_uring_request_submit(&state->ur); + vfs_io_uring_pread_submit(state); if (!tevent_req_is_in_progress(req)) { return tevent_req_post(req, ev); @@ -457,6 +458,15 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand return req; } +static void vfs_io_uring_pread_submit(struct vfs_io_uring_pread_state *state) +{ + io_uring_prep_readv(&state->ur.sqe, + state->fsp->fh->fd, + &state->iov, 1, + state->offset); + vfs_io_uring_request_submit(&state->ur); +} + static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, const char *location) { -- 2.20.1 From def543bf2c96db54475b3a48a77f68a500f7a248 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 11:17:51 +0200 Subject: [PATCH 26/31] vfs_io_uring: split out a vfs_io_uring_pwrite_submit() function This can be reused when we add handling for short writes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit 2f6abb00b0daeb4de9ad0aea1b5c56559391aef9) --- source3/modules/vfs_io_uring.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 19e268e63db..3e004f48aa0 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -514,10 +514,13 @@ static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req, struct vfs_io_uring_pwrite_state { struct vfs_io_uring_request ur; + struct files_struct *fsp; + off_t offset; struct iovec iov; size_t nwritten; }; +static void vfs_io_uring_pwrite_submit(struct vfs_io_uring_pwrite_state *state); static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, const char *location); @@ -556,13 +559,11 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han return tevent_req_post(req, ev); } + state->fsp = fsp; + state->offset = offset; state->iov.iov_base = discard_const(data); state->iov.iov_len = n; - io_uring_prep_writev(&state->ur.sqe, - fsp->fh->fd, - &state->iov, 1, - offset); - vfs_io_uring_request_submit(&state->ur); + vfs_io_uring_pwrite_submit(state); if (!tevent_req_is_in_progress(req)) { return tevent_req_post(req, ev); @@ -572,6 +573,15 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han return req; } +static void vfs_io_uring_pwrite_submit(struct vfs_io_uring_pwrite_state *state) +{ + io_uring_prep_writev(&state->ur.sqe, + state->fsp->fh->fd, + &state->iov, 1, + state->offset); + vfs_io_uring_request_submit(&state->ur); +} + static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, const char *location) { -- 2.20.1 From 92a105453c9479ff70efc9a1cf5b4fd3b36764e0 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 11:38:56 +0200 Subject: [PATCH 27/31] vfs_io_uring: protect vfs_io_uring_pread_completion() against invalid results We should never get back more than we asked for. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit f085dbf8b2bed2695e0065a5bf4523232cb532c7) --- source3/modules/vfs_io_uring.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 3e004f48aa0..46fab116e9d 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -26,6 +26,7 @@ #include "smbd/globals.h" #include "lib/util/tevent_unix.h" #include "lib/util/sys_rw.h" +#include "lib/util/iov_buf.h" #include "smbprofile.h" #include @@ -472,6 +473,9 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, { struct vfs_io_uring_pread_state *state = tevent_req_data( cur->req, struct vfs_io_uring_pread_state); + struct iovec *iov = &state->iov; + int num_iov = 1; + bool ok; /* * We rely on being inside the _send() function @@ -485,6 +489,16 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, return; } + ok = iov_advance(&iov, &num_iov, cur->cqe.res); + if (!ok) { + /* This is not expected! */ + DBG_ERR("iov_advance() failed cur->cqe.res=%d > iov_len=%d\n", + (int)cur->cqe.res, + (int)state->iov.iov_len); + tevent_req_error(cur->req, EIO); + return; + } + state->nread = state->ur.cqe.res; tevent_req_done(cur->req); } -- 2.20.1 From 2153f89d521d61f303a1ab55abb4b3e65825c7bd Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 11:38:56 +0200 Subject: [PATCH 28/31] vfs_io_uring: protect vfs_io_uring_pwrite_completion() against invalid results We should never get more acked than we asked for. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit 283f96872237517f0b3bc4e63e8d3c482ecd5fa4) --- source3/modules/vfs_io_uring.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 46fab116e9d..0ea785aae85 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -601,6 +601,9 @@ static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, { struct vfs_io_uring_pwrite_state *state = tevent_req_data( cur->req, struct vfs_io_uring_pwrite_state); + struct iovec *iov = &state->iov; + int num_iov = 1; + bool ok; /* * We rely on being inside the _send() function @@ -614,6 +617,16 @@ static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, return; } + ok = iov_advance(&iov, &num_iov, cur->cqe.res); + if (!ok) { + /* This is not expected! */ + DBG_ERR("iov_advance() failed cur->cqe.res=%d > iov_len=%d\n", + (int)cur->cqe.res, + (int)state->iov.iov_len); + tevent_req_error(cur->req, EIO); + return; + } + state->nwritten = state->ur.cqe.res; tevent_req_done(cur->req); } -- 2.20.1 From 16fe7c511d019990318db5b44278ed9f6ae18a00 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 11:38:56 +0200 Subject: [PATCH 29/31] vfs_io_uring: protect vfs_io_uring_fsync_completion() against invalid results We should never get back a value > 0. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit c57a731c4ce395fd710f0b066cd6f1b72223ae07) --- source3/modules/vfs_io_uring.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 0ea785aae85..0b1583f962a 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -715,6 +715,13 @@ static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur, return; } + if (cur->cqe.res > 0) { + /* This is not expected! */ + DBG_ERR("got cur->cqe.res=%d\n", (int)cur->cqe.res); + tevent_req_error(cur->req, EIO); + return; + } + tevent_req_done(cur->req); } -- 2.20.1 From 81deb9fe0b2991c7da3f0165c24fb449c5ce879b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 13:30:17 +0200 Subject: [PATCH 30/31] vfs_io_uring: retry after a short read in vfs_io_uring_pread_completion() We need to be prepared for short reads from the kernel depending on the state of the page cache. Windows and Mac clients don't expect short reads for files, so we need to retry ourself. For the future we may be able to play with some io_uring flags in order to avoid the retries in userspace, but for now we just fix the data corruption bug... BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit 42e77c4cf245d8420641d216d1abefe81f7a3b79) --- source3/modules/vfs_io_uring.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 0b1583f962a..f16c9ae56d3 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -489,6 +489,14 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, return; } + if (cur->cqe.res == 0) { + /* + * We reached EOF, we're done + */ + tevent_req_done(cur->req); + return; + } + ok = iov_advance(&iov, &num_iov, cur->cqe.res); if (!ok) { /* This is not expected! */ @@ -499,8 +507,20 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, return; } - state->nread = state->ur.cqe.res; - tevent_req_done(cur->req); + /* sys_valid_io_range() already checked the boundaries */ + state->nread += state->ur.cqe.res; + if (num_iov == 0) { + /* We're done */ + tevent_req_done(cur->req); + return; + } + + /* + * sys_valid_io_range() already checked the boundaries + * now try to get the rest. + */ + state->offset += state->ur.cqe.res; + vfs_io_uring_pread_submit(state); } static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req, -- 2.20.1 From 382b5a8b7263ff1dfb428cdebef0c17d622e2066 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 13:30:17 +0200 Subject: [PATCH 31/31] vfs_io_uring: retry after a short writes in vfs_io_uring_pwrite_completion() We need to be prepared for short writes from the kernel depending on the state of the page cache. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher Reviewed-by: Jeremy Allison (cherry picked from commit 0f01b10679c06dbd28da72ca6c6280ddf81672ba) --- source3/modules/vfs_io_uring.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index f16c9ae56d3..4625e16c37e 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -637,6 +637,14 @@ static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, return; } + if (cur->cqe.res == 0) { + /* + * Ensure we can never spin. + */ + tevent_req_error(cur->req, ENOSPC); + return; + } + ok = iov_advance(&iov, &num_iov, cur->cqe.res); if (!ok) { /* This is not expected! */ @@ -647,8 +655,20 @@ static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, return; } - state->nwritten = state->ur.cqe.res; - tevent_req_done(cur->req); + /* sys_valid_io_range() already checked the boundaries */ + state->nwritten += state->ur.cqe.res; + if (num_iov == 0) { + /* We're done */ + tevent_req_done(cur->req); + return; + } + + /* + * sys_valid_io_range() already checked the boundaries + * now try to write the rest. + */ + state->offset += state->ur.cqe.res; + vfs_io_uring_pwrite_submit(state); } static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req, -- 2.20.1