On Mon, Dec 5, 2022 at 9:24 PM Greg Kroah-Hartman gregkh@linuxfoundation.org wrote:
From: Amir Goldstein amir73il@gmail.com
[ Upstream commit 10bc8e4af65946b727728d7479c028742321b60a ]
Commit 868f9f2f8e00 ("vfs: fix copy_file_range() regression in cross-fs copies") removed fallback to generic_copy_file_range() for cross-fs cases inside vfs_copy_file_range().
Hi Greg,
The regressing commit is in v5.15.53. Please apply this fix to 5.15.y.
I will test and post backports of cross-fs copy_file_range() fixes to pre 5.15 kernels. See: https://bugzilla.kernel.org/show_bug.cgi?id=216800
Thanks, Amir.
To preserve behavior of nfsd and ksmbd server-side-copy, the fallback to generic_copy_file_range() was added in nfsd and ksmbd code, but that call is missing sb_start_write(), fsnotify hooks and more.
Ideally, nfsd and ksmbd would pass a flag to vfs_copy_file_range() that will take care of the fallback, but that code would be subtle and we got vfs_copy_file_range() logic wrong too many times already.
Instead, add a flag to explicitly request vfs_copy_file_range() to perform only generic_copy_file_range() and let nfsd and ksmbd use this flag only in the fallback path.
This choise keeps the logic changes to minimum in the non-nfsd/ksmbd code paths to reduce the risk of further regressions.
Fixes: 868f9f2f8e00 ("vfs: fix copy_file_range() regression in cross-fs copies") Tested-by: Namjae Jeon linkinjeon@kernel.org Tested-by: Luis Henriques lhenriques@suse.de Signed-off-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org
fs/ksmbd/vfs.c | 6 +++--- fs/nfsd/vfs.c | 4 ++-- fs/read_write.c | 19 +++++++++++++++---- include/linux/fs.h | 8 ++++++++ 4 files changed, 28 insertions(+), 9 deletions(-)
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 78d01033604c..c5c801e38b63 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1784,9 +1784,9 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, ret = vfs_copy_file_range(src_fp->filp, src_off, dst_fp->filp, dst_off, len, 0); if (ret == -EOPNOTSUPP || ret == -EXDEV)
ret = generic_copy_file_range(src_fp->filp, src_off,
dst_fp->filp, dst_off,
len, 0);
ret = vfs_copy_file_range(src_fp->filp, src_off,
dst_fp->filp, dst_off, len,
COPY_FILE_SPLICE); if (ret < 0) return ret;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f3cd614e1f1e..dc24d67d0ca4 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -572,8 +572,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
ret = generic_copy_file_range(src, src_pos, dst, dst_pos,
count, 0);
ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
COPY_FILE_SPLICE); return ret;
}
diff --git a/fs/read_write.c b/fs/read_write.c index 328ce8cf9a85..24b9668d6377 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1388,6 +1388,8 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) {
lockdep_assert(sb_write_started(file_inode(file_out)->i_sb));
return do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
} @@ -1424,7 +1426,9 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, * and several different sets of file_operations, but they all end up * using the same ->copy_file_range() function pointer. */
if (file_out->f_op->copy_file_range) {
if (flags & COPY_FILE_SPLICE) {
/* cross sb splice is allowed */
} else if (file_out->f_op->copy_file_range) { if (file_in->f_op->copy_file_range != file_out->f_op->copy_file_range) return -EXDEV;
@@ -1474,8 +1478,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, size_t len, unsigned int flags) { ssize_t ret;
bool splice = flags & COPY_FILE_SPLICE;
if (flags != 0)
if (flags & ~COPY_FILE_SPLICE) return -EINVAL; ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
@@ -1501,14 +1506,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * same sb using clone, but for filesystems where both clone and copy * are supported (e.g. nfs,cifs), we only call the copy method. */
if (file_out->f_op->copy_file_range) {
if (!splice && file_out->f_op->copy_file_range) { ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); goto done; }
if (file_in->f_op->remap_file_range &&
if (!splice && file_in->f_op->remap_file_range && file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out,
@@ -1528,6 +1533,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * consistent story about which filesystems support copy_file_range() * and which filesystems do not, that will allow userspace tools to * make consistent desicions w.r.t using copy_file_range().
*
* We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE. */ ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, flags);
@@ -1582,6 +1589,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, pos_out = f_out.file->f_pos; }
ret = -EINVAL;
if (flags != 0)
goto out;
ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, flags); if (ret > 0) {
diff --git a/include/linux/fs.h b/include/linux/fs.h index 7203f5582fd4..be074b6895b9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2087,6 +2087,14 @@ struct dir_context { */ #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
+/*
- These flags control the behavior of vfs_copy_file_range().
- They are not available to the user via syscall.
- COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops
- */
+#define COPY_FILE_SPLICE (1 << 0)
struct iov_iter; struct io_uring_cmd;
-- 2.35.1