The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3f965021c8bc38965ecb1924f570c4842b33d408 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Mon, 24 Jan 2022 15:50:31 -0500
Subject: [PATCH] NFSD: COMMIT operations must not return NFS?ERR_INVAL
Since, well, forever, the Linux NFS server's nfsd_commit() function
has returned nfserr_inval when the passed-in byte range arguments
were non-sensical.
However, according to RFC 1813 section 3.3.21, NFSv3 COMMIT requests
are permitted to return only the following non-zero status codes:
NFS3ERR_IO
NFS3ERR_STALE
NFS3ERR_BADHANDLE
NFS3ERR_SERVERFAULT
NFS3ERR_INVAL is not included in that list. Likewise, NFS4ERR_INVAL
is not listed in the COMMIT row of Table 6 in RFC 8881.
RFC 7530 does permit COMMIT to return NFS4ERR_INVAL, but does not
specify when it can or should be used.
Instead of dropping or failing a COMMIT request in a byte range that
is not supported, turn it into a valid request by treating one or
both arguments as zero. Offset zero means start-of-file, count zero
means until-end-of-file, so we only ever extend the commit range.
NFS servers are always allowed to commit more and sooner than
requested.
The range check is no longer bounded by NFS_OFFSET_MAX, but rather
by the value that is returned in the maxfilesize field of the NFSv3
FSINFO procedure or the NFSv4 maxfilesize file attribute.
Note that this change results in a new pynfs failure:
CMT4 st_commit.testCommitOverflow : RUNNING
CMT4 st_commit.testCommitOverflow : FAILURE
COMMIT with offset + count overflow should return
NFS4ERR_INVAL, instead got NFS4_OK
IMO the test is not correct as written: RFC 8881 does not allow the
COMMIT operation to return NFS4ERR_INVAL.
Reported-by: Dan Aloni <dan.aloni(a)vastdata.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
Reviewed-by: Bruce Fields <bfields(a)fieldses.org>
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index aca38ed1526e..52ad1972cc33 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -663,15 +663,9 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
argp->count,
(unsigned long long) argp->offset);
- if (argp->offset > NFS_OFFSET_MAX) {
- resp->status = nfserr_inval;
- goto out;
- }
-
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset,
argp->count, resp->verf);
-out:
return rpc_success;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0cccceb105e7..91600e71be19 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1114,42 +1114,61 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
}
#ifdef CONFIG_NFSD_V3
-/*
- * Commit all pending writes to stable storage.
+/**
+ * nfsd_commit - Commit pending writes to stable storage
+ * @rqstp: RPC request being processed
+ * @fhp: NFS filehandle
+ * @offset: raw offset from beginning of file
+ * @count: raw count of bytes to sync
+ * @verf: filled in with the server's current write verifier
*
- * Note: we only guarantee that data that lies within the range specified
- * by the 'offset' and 'count' parameters will be synced.
+ * Note: we guarantee that data that lies within the range specified
+ * by the 'offset' and 'count' parameters will be synced. The server
+ * is permitted to sync data that lies outside this range at the
+ * same time.
*
* Unfortunately we cannot lock the file to make sure we return full WCC
* data to the client, as locking happens lower down in the filesystem.
+ *
+ * Return values:
+ * An nfsstat value in network byte order.
*/
__be32
-nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, unsigned long count, __be32 *verf)
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+ u32 count, __be32 *verf)
{
+ u64 maxbytes;
+ loff_t start, end;
struct nfsd_net *nn;
struct nfsd_file *nf;
- loff_t end = LLONG_MAX;
- __be32 err = nfserr_inval;
-
- if (offset < 0)
- goto out;
- if (count != 0) {
- end = offset + (loff_t)count - 1;
- if (end < offset)
- goto out;
- }
+ __be32 err;
err = nfsd_file_acquire(rqstp, fhp,
NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
+
+ /*
+ * Convert the client-provided (offset, count) range to a
+ * (start, end) range. If the client-provided range falls
+ * outside the maximum file size of the underlying FS,
+ * clamp the sync range appropriately.
+ */
+ start = 0;
+ end = LLONG_MAX;
+ maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes;
+ if (offset < maxbytes) {
+ start = offset;
+ if (count && (offset + count - 1 < maxbytes))
+ end = offset + count - 1;
+ }
+
nn = net_generic(nf->nf_net, nfsd_net_id);
if (EX_ISSYNC(fhp->fh_export)) {
errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
int err2;
- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ err2 = vfs_fsync_range(nf->nf_file, start, end, 0);
switch (err2) {
case 0:
nfsd_copy_write_verifier(verf, nn);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 9f56dcb22ff7..2c43d10e3cab 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -74,8 +74,8 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
struct svc_fh *res, int createmode,
u32 *verifier, bool *truncp, bool *created);
-__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
- loff_t, unsigned long, __be32 *verf);
+__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
+ u64 offset, u32 count, __be32 *verf);
#endif /* CONFIG_NFSD_V3 */
#ifdef CONFIG_NFSD_V4
__be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3f965021c8bc38965ecb1924f570c4842b33d408 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Mon, 24 Jan 2022 15:50:31 -0500
Subject: [PATCH] NFSD: COMMIT operations must not return NFS?ERR_INVAL
Since, well, forever, the Linux NFS server's nfsd_commit() function
has returned nfserr_inval when the passed-in byte range arguments
were non-sensical.
However, according to RFC 1813 section 3.3.21, NFSv3 COMMIT requests
are permitted to return only the following non-zero status codes:
NFS3ERR_IO
NFS3ERR_STALE
NFS3ERR_BADHANDLE
NFS3ERR_SERVERFAULT
NFS3ERR_INVAL is not included in that list. Likewise, NFS4ERR_INVAL
is not listed in the COMMIT row of Table 6 in RFC 8881.
RFC 7530 does permit COMMIT to return NFS4ERR_INVAL, but does not
specify when it can or should be used.
Instead of dropping or failing a COMMIT request in a byte range that
is not supported, turn it into a valid request by treating one or
both arguments as zero. Offset zero means start-of-file, count zero
means until-end-of-file, so we only ever extend the commit range.
NFS servers are always allowed to commit more and sooner than
requested.
The range check is no longer bounded by NFS_OFFSET_MAX, but rather
by the value that is returned in the maxfilesize field of the NFSv3
FSINFO procedure or the NFSv4 maxfilesize file attribute.
Note that this change results in a new pynfs failure:
CMT4 st_commit.testCommitOverflow : RUNNING
CMT4 st_commit.testCommitOverflow : FAILURE
COMMIT with offset + count overflow should return
NFS4ERR_INVAL, instead got NFS4_OK
IMO the test is not correct as written: RFC 8881 does not allow the
COMMIT operation to return NFS4ERR_INVAL.
Reported-by: Dan Aloni <dan.aloni(a)vastdata.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
Reviewed-by: Bruce Fields <bfields(a)fieldses.org>
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index aca38ed1526e..52ad1972cc33 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -663,15 +663,9 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
argp->count,
(unsigned long long) argp->offset);
- if (argp->offset > NFS_OFFSET_MAX) {
- resp->status = nfserr_inval;
- goto out;
- }
-
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset,
argp->count, resp->verf);
-out:
return rpc_success;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0cccceb105e7..91600e71be19 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1114,42 +1114,61 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
}
#ifdef CONFIG_NFSD_V3
-/*
- * Commit all pending writes to stable storage.
+/**
+ * nfsd_commit - Commit pending writes to stable storage
+ * @rqstp: RPC request being processed
+ * @fhp: NFS filehandle
+ * @offset: raw offset from beginning of file
+ * @count: raw count of bytes to sync
+ * @verf: filled in with the server's current write verifier
*
- * Note: we only guarantee that data that lies within the range specified
- * by the 'offset' and 'count' parameters will be synced.
+ * Note: we guarantee that data that lies within the range specified
+ * by the 'offset' and 'count' parameters will be synced. The server
+ * is permitted to sync data that lies outside this range at the
+ * same time.
*
* Unfortunately we cannot lock the file to make sure we return full WCC
* data to the client, as locking happens lower down in the filesystem.
+ *
+ * Return values:
+ * An nfsstat value in network byte order.
*/
__be32
-nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, unsigned long count, __be32 *verf)
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+ u32 count, __be32 *verf)
{
+ u64 maxbytes;
+ loff_t start, end;
struct nfsd_net *nn;
struct nfsd_file *nf;
- loff_t end = LLONG_MAX;
- __be32 err = nfserr_inval;
-
- if (offset < 0)
- goto out;
- if (count != 0) {
- end = offset + (loff_t)count - 1;
- if (end < offset)
- goto out;
- }
+ __be32 err;
err = nfsd_file_acquire(rqstp, fhp,
NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
+
+ /*
+ * Convert the client-provided (offset, count) range to a
+ * (start, end) range. If the client-provided range falls
+ * outside the maximum file size of the underlying FS,
+ * clamp the sync range appropriately.
+ */
+ start = 0;
+ end = LLONG_MAX;
+ maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes;
+ if (offset < maxbytes) {
+ start = offset;
+ if (count && (offset + count - 1 < maxbytes))
+ end = offset + count - 1;
+ }
+
nn = net_generic(nf->nf_net, nfsd_net_id);
if (EX_ISSYNC(fhp->fh_export)) {
errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
int err2;
- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ err2 = vfs_fsync_range(nf->nf_file, start, end, 0);
switch (err2) {
case 0:
nfsd_copy_write_verifier(verf, nn);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 9f56dcb22ff7..2c43d10e3cab 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -74,8 +74,8 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
struct svc_fh *res, int createmode,
u32 *verifier, bool *truncp, bool *created);
-__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
- loff_t, unsigned long, __be32 *verf);
+__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
+ u64 offset, u32 count, __be32 *verf);
#endif /* CONFIG_NFSD_V3 */
#ifdef CONFIG_NFSD_V4
__be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3f965021c8bc38965ecb1924f570c4842b33d408 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Mon, 24 Jan 2022 15:50:31 -0500
Subject: [PATCH] NFSD: COMMIT operations must not return NFS?ERR_INVAL
Since, well, forever, the Linux NFS server's nfsd_commit() function
has returned nfserr_inval when the passed-in byte range arguments
were non-sensical.
However, according to RFC 1813 section 3.3.21, NFSv3 COMMIT requests
are permitted to return only the following non-zero status codes:
NFS3ERR_IO
NFS3ERR_STALE
NFS3ERR_BADHANDLE
NFS3ERR_SERVERFAULT
NFS3ERR_INVAL is not included in that list. Likewise, NFS4ERR_INVAL
is not listed in the COMMIT row of Table 6 in RFC 8881.
RFC 7530 does permit COMMIT to return NFS4ERR_INVAL, but does not
specify when it can or should be used.
Instead of dropping or failing a COMMIT request in a byte range that
is not supported, turn it into a valid request by treating one or
both arguments as zero. Offset zero means start-of-file, count zero
means until-end-of-file, so we only ever extend the commit range.
NFS servers are always allowed to commit more and sooner than
requested.
The range check is no longer bounded by NFS_OFFSET_MAX, but rather
by the value that is returned in the maxfilesize field of the NFSv3
FSINFO procedure or the NFSv4 maxfilesize file attribute.
Note that this change results in a new pynfs failure:
CMT4 st_commit.testCommitOverflow : RUNNING
CMT4 st_commit.testCommitOverflow : FAILURE
COMMIT with offset + count overflow should return
NFS4ERR_INVAL, instead got NFS4_OK
IMO the test is not correct as written: RFC 8881 does not allow the
COMMIT operation to return NFS4ERR_INVAL.
Reported-by: Dan Aloni <dan.aloni(a)vastdata.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
Reviewed-by: Bruce Fields <bfields(a)fieldses.org>
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index aca38ed1526e..52ad1972cc33 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -663,15 +663,9 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
argp->count,
(unsigned long long) argp->offset);
- if (argp->offset > NFS_OFFSET_MAX) {
- resp->status = nfserr_inval;
- goto out;
- }
-
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset,
argp->count, resp->verf);
-out:
return rpc_success;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0cccceb105e7..91600e71be19 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1114,42 +1114,61 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
}
#ifdef CONFIG_NFSD_V3
-/*
- * Commit all pending writes to stable storage.
+/**
+ * nfsd_commit - Commit pending writes to stable storage
+ * @rqstp: RPC request being processed
+ * @fhp: NFS filehandle
+ * @offset: raw offset from beginning of file
+ * @count: raw count of bytes to sync
+ * @verf: filled in with the server's current write verifier
*
- * Note: we only guarantee that data that lies within the range specified
- * by the 'offset' and 'count' parameters will be synced.
+ * Note: we guarantee that data that lies within the range specified
+ * by the 'offset' and 'count' parameters will be synced. The server
+ * is permitted to sync data that lies outside this range at the
+ * same time.
*
* Unfortunately we cannot lock the file to make sure we return full WCC
* data to the client, as locking happens lower down in the filesystem.
+ *
+ * Return values:
+ * An nfsstat value in network byte order.
*/
__be32
-nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, unsigned long count, __be32 *verf)
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+ u32 count, __be32 *verf)
{
+ u64 maxbytes;
+ loff_t start, end;
struct nfsd_net *nn;
struct nfsd_file *nf;
- loff_t end = LLONG_MAX;
- __be32 err = nfserr_inval;
-
- if (offset < 0)
- goto out;
- if (count != 0) {
- end = offset + (loff_t)count - 1;
- if (end < offset)
- goto out;
- }
+ __be32 err;
err = nfsd_file_acquire(rqstp, fhp,
NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
+
+ /*
+ * Convert the client-provided (offset, count) range to a
+ * (start, end) range. If the client-provided range falls
+ * outside the maximum file size of the underlying FS,
+ * clamp the sync range appropriately.
+ */
+ start = 0;
+ end = LLONG_MAX;
+ maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes;
+ if (offset < maxbytes) {
+ start = offset;
+ if (count && (offset + count - 1 < maxbytes))
+ end = offset + count - 1;
+ }
+
nn = net_generic(nf->nf_net, nfsd_net_id);
if (EX_ISSYNC(fhp->fh_export)) {
errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
int err2;
- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ err2 = vfs_fsync_range(nf->nf_file, start, end, 0);
switch (err2) {
case 0:
nfsd_copy_write_verifier(verf, nn);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 9f56dcb22ff7..2c43d10e3cab 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -74,8 +74,8 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
struct svc_fh *res, int createmode,
u32 *verifier, bool *truncp, bool *created);
-__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
- loff_t, unsigned long, __be32 *verf);
+__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
+ u64 offset, u32 count, __be32 *verf);
#endif /* CONFIG_NFSD_V3 */
#ifdef CONFIG_NFSD_V4
__be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3f965021c8bc38965ecb1924f570c4842b33d408 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Mon, 24 Jan 2022 15:50:31 -0500
Subject: [PATCH] NFSD: COMMIT operations must not return NFS?ERR_INVAL
Since, well, forever, the Linux NFS server's nfsd_commit() function
has returned nfserr_inval when the passed-in byte range arguments
were non-sensical.
However, according to RFC 1813 section 3.3.21, NFSv3 COMMIT requests
are permitted to return only the following non-zero status codes:
NFS3ERR_IO
NFS3ERR_STALE
NFS3ERR_BADHANDLE
NFS3ERR_SERVERFAULT
NFS3ERR_INVAL is not included in that list. Likewise, NFS4ERR_INVAL
is not listed in the COMMIT row of Table 6 in RFC 8881.
RFC 7530 does permit COMMIT to return NFS4ERR_INVAL, but does not
specify when it can or should be used.
Instead of dropping or failing a COMMIT request in a byte range that
is not supported, turn it into a valid request by treating one or
both arguments as zero. Offset zero means start-of-file, count zero
means until-end-of-file, so we only ever extend the commit range.
NFS servers are always allowed to commit more and sooner than
requested.
The range check is no longer bounded by NFS_OFFSET_MAX, but rather
by the value that is returned in the maxfilesize field of the NFSv3
FSINFO procedure or the NFSv4 maxfilesize file attribute.
Note that this change results in a new pynfs failure:
CMT4 st_commit.testCommitOverflow : RUNNING
CMT4 st_commit.testCommitOverflow : FAILURE
COMMIT with offset + count overflow should return
NFS4ERR_INVAL, instead got NFS4_OK
IMO the test is not correct as written: RFC 8881 does not allow the
COMMIT operation to return NFS4ERR_INVAL.
Reported-by: Dan Aloni <dan.aloni(a)vastdata.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
Reviewed-by: Bruce Fields <bfields(a)fieldses.org>
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index aca38ed1526e..52ad1972cc33 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -663,15 +663,9 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
argp->count,
(unsigned long long) argp->offset);
- if (argp->offset > NFS_OFFSET_MAX) {
- resp->status = nfserr_inval;
- goto out;
- }
-
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset,
argp->count, resp->verf);
-out:
return rpc_success;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0cccceb105e7..91600e71be19 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1114,42 +1114,61 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
}
#ifdef CONFIG_NFSD_V3
-/*
- * Commit all pending writes to stable storage.
+/**
+ * nfsd_commit - Commit pending writes to stable storage
+ * @rqstp: RPC request being processed
+ * @fhp: NFS filehandle
+ * @offset: raw offset from beginning of file
+ * @count: raw count of bytes to sync
+ * @verf: filled in with the server's current write verifier
*
- * Note: we only guarantee that data that lies within the range specified
- * by the 'offset' and 'count' parameters will be synced.
+ * Note: we guarantee that data that lies within the range specified
+ * by the 'offset' and 'count' parameters will be synced. The server
+ * is permitted to sync data that lies outside this range at the
+ * same time.
*
* Unfortunately we cannot lock the file to make sure we return full WCC
* data to the client, as locking happens lower down in the filesystem.
+ *
+ * Return values:
+ * An nfsstat value in network byte order.
*/
__be32
-nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, unsigned long count, __be32 *verf)
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+ u32 count, __be32 *verf)
{
+ u64 maxbytes;
+ loff_t start, end;
struct nfsd_net *nn;
struct nfsd_file *nf;
- loff_t end = LLONG_MAX;
- __be32 err = nfserr_inval;
-
- if (offset < 0)
- goto out;
- if (count != 0) {
- end = offset + (loff_t)count - 1;
- if (end < offset)
- goto out;
- }
+ __be32 err;
err = nfsd_file_acquire(rqstp, fhp,
NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
+
+ /*
+ * Convert the client-provided (offset, count) range to a
+ * (start, end) range. If the client-provided range falls
+ * outside the maximum file size of the underlying FS,
+ * clamp the sync range appropriately.
+ */
+ start = 0;
+ end = LLONG_MAX;
+ maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes;
+ if (offset < maxbytes) {
+ start = offset;
+ if (count && (offset + count - 1 < maxbytes))
+ end = offset + count - 1;
+ }
+
nn = net_generic(nf->nf_net, nfsd_net_id);
if (EX_ISSYNC(fhp->fh_export)) {
errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
int err2;
- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ err2 = vfs_fsync_range(nf->nf_file, start, end, 0);
switch (err2) {
case 0:
nfsd_copy_write_verifier(verf, nn);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 9f56dcb22ff7..2c43d10e3cab 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -74,8 +74,8 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
struct svc_fh *res, int createmode,
u32 *verifier, bool *truncp, bool *created);
-__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
- loff_t, unsigned long, __be32 *verf);
+__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
+ u64 offset, u32 count, __be32 *verf);
#endif /* CONFIG_NFSD_V3 */
#ifdef CONFIG_NFSD_V4
__be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
The patch below does not apply to the 5.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3f965021c8bc38965ecb1924f570c4842b33d408 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Mon, 24 Jan 2022 15:50:31 -0500
Subject: [PATCH] NFSD: COMMIT operations must not return NFS?ERR_INVAL
Since, well, forever, the Linux NFS server's nfsd_commit() function
has returned nfserr_inval when the passed-in byte range arguments
were non-sensical.
However, according to RFC 1813 section 3.3.21, NFSv3 COMMIT requests
are permitted to return only the following non-zero status codes:
NFS3ERR_IO
NFS3ERR_STALE
NFS3ERR_BADHANDLE
NFS3ERR_SERVERFAULT
NFS3ERR_INVAL is not included in that list. Likewise, NFS4ERR_INVAL
is not listed in the COMMIT row of Table 6 in RFC 8881.
RFC 7530 does permit COMMIT to return NFS4ERR_INVAL, but does not
specify when it can or should be used.
Instead of dropping or failing a COMMIT request in a byte range that
is not supported, turn it into a valid request by treating one or
both arguments as zero. Offset zero means start-of-file, count zero
means until-end-of-file, so we only ever extend the commit range.
NFS servers are always allowed to commit more and sooner than
requested.
The range check is no longer bounded by NFS_OFFSET_MAX, but rather
by the value that is returned in the maxfilesize field of the NFSv3
FSINFO procedure or the NFSv4 maxfilesize file attribute.
Note that this change results in a new pynfs failure:
CMT4 st_commit.testCommitOverflow : RUNNING
CMT4 st_commit.testCommitOverflow : FAILURE
COMMIT with offset + count overflow should return
NFS4ERR_INVAL, instead got NFS4_OK
IMO the test is not correct as written: RFC 8881 does not allow the
COMMIT operation to return NFS4ERR_INVAL.
Reported-by: Dan Aloni <dan.aloni(a)vastdata.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
Reviewed-by: Bruce Fields <bfields(a)fieldses.org>
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index aca38ed1526e..52ad1972cc33 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -663,15 +663,9 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
argp->count,
(unsigned long long) argp->offset);
- if (argp->offset > NFS_OFFSET_MAX) {
- resp->status = nfserr_inval;
- goto out;
- }
-
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset,
argp->count, resp->verf);
-out:
return rpc_success;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0cccceb105e7..91600e71be19 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1114,42 +1114,61 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
}
#ifdef CONFIG_NFSD_V3
-/*
- * Commit all pending writes to stable storage.
+/**
+ * nfsd_commit - Commit pending writes to stable storage
+ * @rqstp: RPC request being processed
+ * @fhp: NFS filehandle
+ * @offset: raw offset from beginning of file
+ * @count: raw count of bytes to sync
+ * @verf: filled in with the server's current write verifier
*
- * Note: we only guarantee that data that lies within the range specified
- * by the 'offset' and 'count' parameters will be synced.
+ * Note: we guarantee that data that lies within the range specified
+ * by the 'offset' and 'count' parameters will be synced. The server
+ * is permitted to sync data that lies outside this range at the
+ * same time.
*
* Unfortunately we cannot lock the file to make sure we return full WCC
* data to the client, as locking happens lower down in the filesystem.
+ *
+ * Return values:
+ * An nfsstat value in network byte order.
*/
__be32
-nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, unsigned long count, __be32 *verf)
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+ u32 count, __be32 *verf)
{
+ u64 maxbytes;
+ loff_t start, end;
struct nfsd_net *nn;
struct nfsd_file *nf;
- loff_t end = LLONG_MAX;
- __be32 err = nfserr_inval;
-
- if (offset < 0)
- goto out;
- if (count != 0) {
- end = offset + (loff_t)count - 1;
- if (end < offset)
- goto out;
- }
+ __be32 err;
err = nfsd_file_acquire(rqstp, fhp,
NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
+
+ /*
+ * Convert the client-provided (offset, count) range to a
+ * (start, end) range. If the client-provided range falls
+ * outside the maximum file size of the underlying FS,
+ * clamp the sync range appropriately.
+ */
+ start = 0;
+ end = LLONG_MAX;
+ maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes;
+ if (offset < maxbytes) {
+ start = offset;
+ if (count && (offset + count - 1 < maxbytes))
+ end = offset + count - 1;
+ }
+
nn = net_generic(nf->nf_net, nfsd_net_id);
if (EX_ISSYNC(fhp->fh_export)) {
errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
int err2;
- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ err2 = vfs_fsync_range(nf->nf_file, start, end, 0);
switch (err2) {
case 0:
nfsd_copy_write_verifier(verf, nn);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 9f56dcb22ff7..2c43d10e3cab 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -74,8 +74,8 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
struct svc_fh *res, int createmode,
u32 *verifier, bool *truncp, bool *created);
-__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
- loff_t, unsigned long, __be32 *verf);
+__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
+ u64 offset, u32 count, __be32 *verf);
#endif /* CONFIG_NFSD_V3 */
#ifdef CONFIG_NFSD_V4
__be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0cb4d23ae08c48f6bf3c29a8e5c4a74b8388b960 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Fri, 4 Feb 2022 15:19:34 -0500
Subject: [PATCH] NFSD: Fix the behavior of READ near OFFSET_MAX
Dan Aloni reports:
> Due to commit 8cfb9015280d ("NFS: Always provide aligned buffers to
> the RPC read layers") on the client, a read of 0xfff is aligned up
> to server rsize of 0x1000.
>
> As a result, in a test where the server has a file of size
> 0x7fffffffffffffff, and the client tries to read from the offset
> 0x7ffffffffffff000, the read causes loff_t overflow in the server
> and it returns an NFS code of EINVAL to the client. The client as
> a result indefinitely retries the request.
The Linux NFS client does not handle NFS?ERR_INVAL, even though all
NFS specifications permit servers to return that status code for a
READ.
Instead of NFS?ERR_INVAL, have out-of-range READ requests succeed
and return a short result. Set the EOF flag in the result to prevent
the client from retrying the READ request. This behavior appears to
be consistent with Solaris NFS servers.
Note that NFSv3 and NFSv4 use u64 offset values on the wire. These
must be converted to loff_t internally before use -- an implicit
type cast is not adequate for this purpose. Otherwise VFS checks
against sb->s_maxbytes do not work properly.
Reported-by: Dan Aloni <dan.aloni(a)vastdata.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 2c681785186f..b5a52528f19f 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -150,13 +150,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
unsigned int len;
int v;
- argp->count = min_t(u32, argp->count, max_blocksize);
-
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
(unsigned long) argp->count,
(unsigned long long) argp->offset);
+ argp->count = min_t(u32, argp->count, max_blocksize);
+ if (argp->offset > (u64)OFFSET_MAX)
+ argp->offset = (u64)OFFSET_MAX;
+ if (argp->offset + argp->count > (u64)OFFSET_MAX)
+ argp->count = (u64)OFFSET_MAX - argp->offset;
+
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ed1ee25647be..71d735b125a0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -782,12 +782,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
read->rd_nf = NULL;
- if (read->rd_offset >= OFFSET_MAX)
- return nfserr_inval;
trace_nfsd_read_start(rqstp, &cstate->current_fh,
read->rd_offset, read->rd_length);
+ read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp));
+ if (read->rd_offset > (u64)OFFSET_MAX)
+ read->rd_offset = (u64)OFFSET_MAX;
+ if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX)
+ read->rd_length = (u64)OFFSET_MAX - read->rd_offset;
+
/*
* If we do a zero copy read, then a client will see read data
* that reflects the state of the file *after* performing the
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 899de438e529..f5e3430bb6ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3986,10 +3986,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
}
xdr_commit_encode(xdr);
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount,
+ maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
- maxcount = min_t(unsigned long, maxcount, read->rd_length);
if (file->f_op->splice_read &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
@@ -4826,10 +4824,8 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr_resource;
xdr_commit_encode(xdr);
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount,
+ maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
- maxcount = min_t(unsigned long, maxcount, read->rd_length);
count = maxcount;
eof = read->rd_offset >= i_size_read(file_inode(file));
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0cb4d23ae08c48f6bf3c29a8e5c4a74b8388b960 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Fri, 4 Feb 2022 15:19:34 -0500
Subject: [PATCH] NFSD: Fix the behavior of READ near OFFSET_MAX
Dan Aloni reports:
> Due to commit 8cfb9015280d ("NFS: Always provide aligned buffers to
> the RPC read layers") on the client, a read of 0xfff is aligned up
> to server rsize of 0x1000.
>
> As a result, in a test where the server has a file of size
> 0x7fffffffffffffff, and the client tries to read from the offset
> 0x7ffffffffffff000, the read causes loff_t overflow in the server
> and it returns an NFS code of EINVAL to the client. The client as
> a result indefinitely retries the request.
The Linux NFS client does not handle NFS?ERR_INVAL, even though all
NFS specifications permit servers to return that status code for a
READ.
Instead of NFS?ERR_INVAL, have out-of-range READ requests succeed
and return a short result. Set the EOF flag in the result to prevent
the client from retrying the READ request. This behavior appears to
be consistent with Solaris NFS servers.
Note that NFSv3 and NFSv4 use u64 offset values on the wire. These
must be converted to loff_t internally before use -- an implicit
type cast is not adequate for this purpose. Otherwise VFS checks
against sb->s_maxbytes do not work properly.
Reported-by: Dan Aloni <dan.aloni(a)vastdata.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 2c681785186f..b5a52528f19f 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -150,13 +150,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
unsigned int len;
int v;
- argp->count = min_t(u32, argp->count, max_blocksize);
-
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
(unsigned long) argp->count,
(unsigned long long) argp->offset);
+ argp->count = min_t(u32, argp->count, max_blocksize);
+ if (argp->offset > (u64)OFFSET_MAX)
+ argp->offset = (u64)OFFSET_MAX;
+ if (argp->offset + argp->count > (u64)OFFSET_MAX)
+ argp->count = (u64)OFFSET_MAX - argp->offset;
+
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ed1ee25647be..71d735b125a0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -782,12 +782,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
read->rd_nf = NULL;
- if (read->rd_offset >= OFFSET_MAX)
- return nfserr_inval;
trace_nfsd_read_start(rqstp, &cstate->current_fh,
read->rd_offset, read->rd_length);
+ read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp));
+ if (read->rd_offset > (u64)OFFSET_MAX)
+ read->rd_offset = (u64)OFFSET_MAX;
+ if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX)
+ read->rd_length = (u64)OFFSET_MAX - read->rd_offset;
+
/*
* If we do a zero copy read, then a client will see read data
* that reflects the state of the file *after* performing the
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 899de438e529..f5e3430bb6ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3986,10 +3986,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
}
xdr_commit_encode(xdr);
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount,
+ maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
- maxcount = min_t(unsigned long, maxcount, read->rd_length);
if (file->f_op->splice_read &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
@@ -4826,10 +4824,8 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr_resource;
xdr_commit_encode(xdr);
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount,
+ maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
- maxcount = min_t(unsigned long, maxcount, read->rd_length);
count = maxcount;
eof = read->rd_offset >= i_size_read(file_inode(file));
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0cb4d23ae08c48f6bf3c29a8e5c4a74b8388b960 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Fri, 4 Feb 2022 15:19:34 -0500
Subject: [PATCH] NFSD: Fix the behavior of READ near OFFSET_MAX
Dan Aloni reports:
> Due to commit 8cfb9015280d ("NFS: Always provide aligned buffers to
> the RPC read layers") on the client, a read of 0xfff is aligned up
> to server rsize of 0x1000.
>
> As a result, in a test where the server has a file of size
> 0x7fffffffffffffff, and the client tries to read from the offset
> 0x7ffffffffffff000, the read causes loff_t overflow in the server
> and it returns an NFS code of EINVAL to the client. The client as
> a result indefinitely retries the request.
The Linux NFS client does not handle NFS?ERR_INVAL, even though all
NFS specifications permit servers to return that status code for a
READ.
Instead of NFS?ERR_INVAL, have out-of-range READ requests succeed
and return a short result. Set the EOF flag in the result to prevent
the client from retrying the READ request. This behavior appears to
be consistent with Solaris NFS servers.
Note that NFSv3 and NFSv4 use u64 offset values on the wire. These
must be converted to loff_t internally before use -- an implicit
type cast is not adequate for this purpose. Otherwise VFS checks
against sb->s_maxbytes do not work properly.
Reported-by: Dan Aloni <dan.aloni(a)vastdata.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 2c681785186f..b5a52528f19f 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -150,13 +150,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
unsigned int len;
int v;
- argp->count = min_t(u32, argp->count, max_blocksize);
-
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
(unsigned long) argp->count,
(unsigned long long) argp->offset);
+ argp->count = min_t(u32, argp->count, max_blocksize);
+ if (argp->offset > (u64)OFFSET_MAX)
+ argp->offset = (u64)OFFSET_MAX;
+ if (argp->offset + argp->count > (u64)OFFSET_MAX)
+ argp->count = (u64)OFFSET_MAX - argp->offset;
+
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ed1ee25647be..71d735b125a0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -782,12 +782,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
read->rd_nf = NULL;
- if (read->rd_offset >= OFFSET_MAX)
- return nfserr_inval;
trace_nfsd_read_start(rqstp, &cstate->current_fh,
read->rd_offset, read->rd_length);
+ read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp));
+ if (read->rd_offset > (u64)OFFSET_MAX)
+ read->rd_offset = (u64)OFFSET_MAX;
+ if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX)
+ read->rd_length = (u64)OFFSET_MAX - read->rd_offset;
+
/*
* If we do a zero copy read, then a client will see read data
* that reflects the state of the file *after* performing the
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 899de438e529..f5e3430bb6ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3986,10 +3986,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
}
xdr_commit_encode(xdr);
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount,
+ maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
- maxcount = min_t(unsigned long, maxcount, read->rd_length);
if (file->f_op->splice_read &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
@@ -4826,10 +4824,8 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr_resource;
xdr_commit_encode(xdr);
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount,
+ maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
- maxcount = min_t(unsigned long, maxcount, read->rd_length);
count = maxcount;
eof = read->rd_offset >= i_size_read(file_inode(file));
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0cb4d23ae08c48f6bf3c29a8e5c4a74b8388b960 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Fri, 4 Feb 2022 15:19:34 -0500
Subject: [PATCH] NFSD: Fix the behavior of READ near OFFSET_MAX
Dan Aloni reports:
> Due to commit 8cfb9015280d ("NFS: Always provide aligned buffers to
> the RPC read layers") on the client, a read of 0xfff is aligned up
> to server rsize of 0x1000.
>
> As a result, in a test where the server has a file of size
> 0x7fffffffffffffff, and the client tries to read from the offset
> 0x7ffffffffffff000, the read causes loff_t overflow in the server
> and it returns an NFS code of EINVAL to the client. The client as
> a result indefinitely retries the request.
The Linux NFS client does not handle NFS?ERR_INVAL, even though all
NFS specifications permit servers to return that status code for a
READ.
Instead of NFS?ERR_INVAL, have out-of-range READ requests succeed
and return a short result. Set the EOF flag in the result to prevent
the client from retrying the READ request. This behavior appears to
be consistent with Solaris NFS servers.
Note that NFSv3 and NFSv4 use u64 offset values on the wire. These
must be converted to loff_t internally before use -- an implicit
type cast is not adequate for this purpose. Otherwise VFS checks
against sb->s_maxbytes do not work properly.
Reported-by: Dan Aloni <dan.aloni(a)vastdata.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 2c681785186f..b5a52528f19f 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -150,13 +150,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
unsigned int len;
int v;
- argp->count = min_t(u32, argp->count, max_blocksize);
-
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
(unsigned long) argp->count,
(unsigned long long) argp->offset);
+ argp->count = min_t(u32, argp->count, max_blocksize);
+ if (argp->offset > (u64)OFFSET_MAX)
+ argp->offset = (u64)OFFSET_MAX;
+ if (argp->offset + argp->count > (u64)OFFSET_MAX)
+ argp->count = (u64)OFFSET_MAX - argp->offset;
+
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ed1ee25647be..71d735b125a0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -782,12 +782,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
read->rd_nf = NULL;
- if (read->rd_offset >= OFFSET_MAX)
- return nfserr_inval;
trace_nfsd_read_start(rqstp, &cstate->current_fh,
read->rd_offset, read->rd_length);
+ read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp));
+ if (read->rd_offset > (u64)OFFSET_MAX)
+ read->rd_offset = (u64)OFFSET_MAX;
+ if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX)
+ read->rd_length = (u64)OFFSET_MAX - read->rd_offset;
+
/*
* If we do a zero copy read, then a client will see read data
* that reflects the state of the file *after* performing the
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 899de438e529..f5e3430bb6ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3986,10 +3986,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
}
xdr_commit_encode(xdr);
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount,
+ maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
- maxcount = min_t(unsigned long, maxcount, read->rd_length);
if (file->f_op->splice_read &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
@@ -4826,10 +4824,8 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr_resource;
xdr_commit_encode(xdr);
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount,
+ maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
- maxcount = min_t(unsigned long, maxcount, read->rd_length);
count = maxcount;
eof = read->rd_offset >= i_size_read(file_inode(file));
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0cb4d23ae08c48f6bf3c29a8e5c4a74b8388b960 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Fri, 4 Feb 2022 15:19:34 -0500
Subject: [PATCH] NFSD: Fix the behavior of READ near OFFSET_MAX
Dan Aloni reports:
> Due to commit 8cfb9015280d ("NFS: Always provide aligned buffers to
> the RPC read layers") on the client, a read of 0xfff is aligned up
> to server rsize of 0x1000.
>
> As a result, in a test where the server has a file of size
> 0x7fffffffffffffff, and the client tries to read from the offset
> 0x7ffffffffffff000, the read causes loff_t overflow in the server
> and it returns an NFS code of EINVAL to the client. The client as
> a result indefinitely retries the request.
The Linux NFS client does not handle NFS?ERR_INVAL, even though all
NFS specifications permit servers to return that status code for a
READ.
Instead of NFS?ERR_INVAL, have out-of-range READ requests succeed
and return a short result. Set the EOF flag in the result to prevent
the client from retrying the READ request. This behavior appears to
be consistent with Solaris NFS servers.
Note that NFSv3 and NFSv4 use u64 offset values on the wire. These
must be converted to loff_t internally before use -- an implicit
type cast is not adequate for this purpose. Otherwise VFS checks
against sb->s_maxbytes do not work properly.
Reported-by: Dan Aloni <dan.aloni(a)vastdata.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 2c681785186f..b5a52528f19f 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -150,13 +150,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
unsigned int len;
int v;
- argp->count = min_t(u32, argp->count, max_blocksize);
-
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
(unsigned long) argp->count,
(unsigned long long) argp->offset);
+ argp->count = min_t(u32, argp->count, max_blocksize);
+ if (argp->offset > (u64)OFFSET_MAX)
+ argp->offset = (u64)OFFSET_MAX;
+ if (argp->offset + argp->count > (u64)OFFSET_MAX)
+ argp->count = (u64)OFFSET_MAX - argp->offset;
+
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ed1ee25647be..71d735b125a0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -782,12 +782,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
read->rd_nf = NULL;
- if (read->rd_offset >= OFFSET_MAX)
- return nfserr_inval;
trace_nfsd_read_start(rqstp, &cstate->current_fh,
read->rd_offset, read->rd_length);
+ read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp));
+ if (read->rd_offset > (u64)OFFSET_MAX)
+ read->rd_offset = (u64)OFFSET_MAX;
+ if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX)
+ read->rd_length = (u64)OFFSET_MAX - read->rd_offset;
+
/*
* If we do a zero copy read, then a client will see read data
* that reflects the state of the file *after* performing the
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 899de438e529..f5e3430bb6ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3986,10 +3986,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
}
xdr_commit_encode(xdr);
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount,
+ maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
- maxcount = min_t(unsigned long, maxcount, read->rd_length);
if (file->f_op->splice_read &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
@@ -4826,10 +4824,8 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr_resource;
xdr_commit_encode(xdr);
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount,
+ maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
- maxcount = min_t(unsigned long, maxcount, read->rd_length);
count = maxcount;
eof = read->rd_offset >= i_size_read(file_inode(file));