The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath")
ac3a2585f018 ("nfsd: rework refcounting in filecache")
d7064eaf688c ("NFSD: Add an nfsd_file_fsync tracepoint")
821411858988 ("nfsd: reorganize filecache.c")
1f696e230ea5 ("nfsd: remove the pages_flushed statistic from filecache")
4d1ea8455716 ("NFSD: Add an NFSD_FILE_GC flag to enable nfsd_file garbage collection")
dcf3f80965ca ("NFSD: Revert "NFSD: NFSv4 CLOSE should release an nfsd_file immediately"")
c252849082ff ("NFSD: Pass the target nfsd_file to nfsd_commit()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0b3a551fa58b4da941efeb209b3770868e2eddd7 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton(a)kernel.org>
Date: Thu, 5 Jan 2023 14:55:56 -0500
Subject: [PATCH] nfsd: fix handling of cached open files in nfsd4_open
codepath
Commit fb70bf124b05 ("NFSD: Instantiate a struct file when creating a
regular NFSv4 file") added the ability to cache an open fd over a
compound. There are a couple of problems with the way this currently
works:
It's racy, as a newly-created nfsd_file can end up with its PENDING bit
cleared while the nf is hashed, and the nf_file pointer is still zeroed
out. Other tasks can find it in this state and they expect to see a
valid nf_file, and can oops if nf_file is NULL.
Also, there is no guarantee that we'll end up creating a new nfsd_file
if one is already in the hash. If an extant entry is in the hash with a
valid nf_file, nfs4_get_vfs_file will clobber its nf_file pointer with
the value of op_file and the old nf_file will leak.
Fix both issues by making a new nfsd_file_acquirei_opened variant that
takes an optional file pointer. If one is present when this is called,
we'll take a new reference to it instead of trying to open the file. If
the nfsd_file already has a valid nf_file, we'll just ignore the
optional file and pass the nfsd_file back as-is.
Also rework the tracepoints a bit to allow for an "opened" variant and
don't try to avoid counting acquisitions in the case where we already
have a cached open file.
Fixes: fb70bf124b05 ("NFSD: Instantiate a struct file when creating a regular NFSv4 file")
Cc: Trond Myklebust <trondmy(a)hammerspace.com>
Reported-by: Stanislav Saner <ssaner(a)redhat.com>
Reported-and-Tested-by: Ruben Vestergaard <rubenv(a)drcmr.dk>
Reported-and-Tested-by: Torkil Svensgaard <torkil(a)drcmr.dk>
Signed-off-by: Jeff Layton <jlayton(a)kernel.org>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 45b2c9e3f636..0ef070349014 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -1071,8 +1071,8 @@ nfsd_file_is_cached(struct inode *inode)
static __be32
nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
- unsigned int may_flags, struct nfsd_file **pnf,
- bool open, bool want_gc)
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **pnf, bool want_gc)
{
struct nfsd_file_lookup_key key = {
.type = NFSD_FILE_KEY_FULL,
@@ -1147,8 +1147,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
out:
if (status == nfs_ok) {
- if (open)
- this_cpu_inc(nfsd_file_acquisitions);
+ this_cpu_inc(nfsd_file_acquisitions);
*pnf = nf;
} else {
if (refcount_dec_and_test(&nf->nf_ref))
@@ -1158,20 +1157,23 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
out_status:
put_cred(key.cred);
- if (open)
- trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
+ trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
return status;
open_file:
trace_nfsd_file_alloc(nf);
nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
if (nf->nf_mark) {
- if (open) {
+ if (file) {
+ get_file(file);
+ nf->nf_file = file;
+ status = nfs_ok;
+ trace_nfsd_file_opened(nf, status);
+ } else {
status = nfsd_open_verified(rqstp, fhp, may_flags,
&nf->nf_file);
trace_nfsd_file_open(nf, status);
- } else
- status = nfs_ok;
+ }
} else
status = nfserr_jukebox;
/*
@@ -1207,7 +1209,7 @@ __be32
nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true);
+ return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true);
}
/**
@@ -1228,28 +1230,30 @@ __be32
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false);
+ return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false);
}
/**
- * nfsd_file_create - Get a struct nfsd_file, do not open
+ * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file
* @rqstp: the RPC transaction being executed
* @fhp: the NFS filehandle of the file just created
* @may_flags: NFSD_MAY_ settings for the file
+ * @file: cached, already-open file (may be NULL)
* @pnf: OUT: new or found "struct nfsd_file" object
*
- * The nfsd_file_object returned by this API is reference-counted
- * but not garbage-collected. The object is released immediately
- * one RCU grace period after the final nfsd_file_put().
+ * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist,
+ * and @file is non-NULL, use it to instantiate a new nfsd_file instead of
+ * opening a new one.
*
* Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
* network byte order is returned.
*/
__be32
-nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
- unsigned int may_flags, struct nfsd_file **pnf)
+nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false);
+ return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false);
}
/*
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index b7efb2c3ddb1..41516a4263ea 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -60,7 +60,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
-__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
- unsigned int may_flags, struct nfsd_file **nfp);
+__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **nfp);
int nfsd_file_cache_stats_show(struct seq_file *m, void *v);
#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e1e85c21f12b..313f666d5357 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5262,18 +5262,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
if (!fp->fi_fds[oflag]) {
spin_unlock(&fp->fi_lock);
- if (!open->op_filp) {
- status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
- if (status != nfs_ok)
- goto out_put_access;
- } else {
- status = nfsd_file_create(rqstp, cur_fh, access, &nf);
- if (status != nfs_ok)
- goto out_put_access;
- nf->nf_file = open->op_filp;
- open->op_filp = NULL;
- trace_nfsd_file_create(rqstp, access, nf);
- }
+ status = nfsd_file_acquire_opened(rqstp, cur_fh, access,
+ open->op_filp, &nf);
+ if (status != nfs_ok)
+ goto out_put_access;
spin_lock(&fp->fi_lock);
if (!fp->fi_fds[oflag]) {
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index c852ae8eaf37..8f9c82d9e075 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -981,43 +981,6 @@ TRACE_EVENT(nfsd_file_acquire,
)
);
-TRACE_EVENT(nfsd_file_create,
- TP_PROTO(
- const struct svc_rqst *rqstp,
- unsigned int may_flags,
- const struct nfsd_file *nf
- ),
-
- TP_ARGS(rqstp, may_flags, nf),
-
- TP_STRUCT__entry(
- __field(const void *, nf_inode)
- __field(const void *, nf_file)
- __field(unsigned long, may_flags)
- __field(unsigned long, nf_flags)
- __field(unsigned long, nf_may)
- __field(unsigned int, nf_ref)
- __field(u32, xid)
- ),
-
- TP_fast_assign(
- __entry->nf_inode = nf->nf_inode;
- __entry->nf_file = nf->nf_file;
- __entry->may_flags = may_flags;
- __entry->nf_flags = nf->nf_flags;
- __entry->nf_may = nf->nf_may;
- __entry->nf_ref = refcount_read(&nf->nf_ref);
- __entry->xid = be32_to_cpu(rqstp->rq_xid);
- ),
-
- TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p",
- __entry->xid, __entry->nf_inode,
- show_nfsd_may_flags(__entry->may_flags),
- __entry->nf_ref, show_nf_flags(__entry->nf_flags),
- show_nfsd_may_flags(__entry->nf_may), __entry->nf_file
- )
-);
-
TRACE_EVENT(nfsd_file_insert_err,
TP_PROTO(
const struct svc_rqst *rqstp,
@@ -1079,8 +1042,8 @@ TRACE_EVENT(nfsd_file_cons_err,
)
);
-TRACE_EVENT(nfsd_file_open,
- TP_PROTO(struct nfsd_file *nf, __be32 status),
+DECLARE_EVENT_CLASS(nfsd_file_open_class,
+ TP_PROTO(const struct nfsd_file *nf, __be32 status),
TP_ARGS(nf, status),
TP_STRUCT__entry(
__field(void *, nf_inode) /* cannot be dereferenced */
@@ -1104,6 +1067,17 @@ TRACE_EVENT(nfsd_file_open,
__entry->nf_file)
)
+#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \
+DEFINE_EVENT(nfsd_file_open_class, name, \
+ TP_PROTO( \
+ const struct nfsd_file *nf, \
+ __be32 status \
+ ), \
+ TP_ARGS(nf, status))
+
+DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open);
+DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened);
+
TRACE_EVENT(nfsd_file_is_cached,
TP_PROTO(
const struct inode *inode,
Hi Greg,
The following commits fix a regression introduced into the kernel in
5.14 (in e84ba47e313d). Please cherry-pick them for both the 6.1.y and
the 6.0.y branches.
6a877d2450ac x86/fpu: Take task_struct* in copy_sigframe_from_user_to_xstate()
1c813ce03055 x86/fpu: Add a pkru argument to copy_uabi_from_kernel_to_xstate().
2c87767c35ee x86/fpu: Add a pkru argument to copy_uabi_to_xstate()
4a804c4f8356 x86/fpu: Allow PKRU to be (once again) written by ptrace.
d7e5aceace51 x86/fpu: Emulate XRSTOR's behavior if the xfeatures PKRU
bit is not set
6ea25770b043 selftests/vm/pkeys: Add a regression test for setting
PKRU through ptrace
5.15.y requires adjusted patches and will be sent separately.
- Kyle
Hi Greg,
can you please consider adding this patch to the 6.0-stable series
(actually kernels 5.18 up to and including 6.0)?
It's a backport of upstream commit 71bdea6f798b425bc0003780b13e3fdecb16a010
Thanks!
Helge
From: Helge Deller <deller(a)gmx.de>
Subject: [PATCH] parisc: Align parisc MADV_XXX constants with all other architectures
Adjust some MADV_XXX constants to be in sync what their values are on
all other platforms. There is currently no reason to have an own
numbering on parisc, but it requires workarounds in many userspace
sources (e.g. glibc, qemu, ...) - which are often forgotten and thus
introduce bugs and different behaviour on parisc.
A wrapper avoids an ABI breakage for existing userspace applications by
translating any old values to the new ones, so this change allows us to
move over all programs to the new ABI over time.
Signed-off-by: Helge Deller <deller(a)gmx.de>
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index a7ea3204a5fa..3fff360ae4a0 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -49,6 +49,19 @@
#define MADV_DONTFORK 10 /* don't inherit across fork */
#define MADV_DOFORK 11 /* do inherit across fork */
+#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
+
+#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */
+#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */
+
+#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump,
+ overrides the coredump filter bits */
+#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */
+
+#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
+
#define MADV_COLD 20 /* deactivate these pages */
#define MADV_PAGEOUT 21 /* reclaim these pages */
@@ -57,25 +70,11 @@
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
-#define MADV_MERGEABLE 65 /* KSM may merge identical pages */
-#define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */
-
-#define MADV_HUGEPAGE 67 /* Worth backing with hugepages */
-#define MADV_NOHUGEPAGE 68 /* Not worth backing with hugepages */
-
-#define MADV_DONTDUMP 69 /* Explicity exclude from the core dump,
- overrides the coredump filter bits */
-#define MADV_DODUMP 70 /* Clear the MADV_NODUMP flag */
-
-#define MADV_WIPEONFORK 71 /* Zero memory on fork, child only */
-#define MADV_KEEPONFORK 72 /* Undo MADV_WIPEONFORK */
-
#define MADV_HWPOISON 100 /* poison a page for testing */
#define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */
/* compatibility flags */
#define MAP_FILE 0
-#define MAP_VARIABLE 0
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 2b34294517a1..306dde7f0d3a 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -465,3 +465,30 @@ asmlinkage long parisc_inotify_init1(int flags)
flags = FIX_O_NONBLOCK(flags);
return sys_inotify_init1(flags);
}
+
+/*
+ * madvise() wrapper
+ *
+ * Up to kernel v6.1 parisc has different values than all other
+ * platforms for the MADV_xxx flags listed below.
+ * To keep binary compatibility with existing userspace programs
+ * translate the former values to the new values.
+ *
+ * XXX: Remove this wrapper in year 2025 (or later)
+ */
+
+asmlinkage notrace long parisc_madvise(unsigned long start, size_t len_in, int behavior)
+{
+ switch (behavior) {
+ case 65: behavior = MADV_MERGEABLE; break;
+ case 66: behavior = MADV_UNMERGEABLE; break;
+ case 67: behavior = MADV_HUGEPAGE; break;
+ case 68: behavior = MADV_NOHUGEPAGE; break;
+ case 69: behavior = MADV_DONTDUMP; break;
+ case 70: behavior = MADV_DODUMP; break;
+ case 71: behavior = MADV_WIPEONFORK; break;
+ case 72: behavior = MADV_KEEPONFORK; break;
+ }
+
+ return sys_madvise(start, len_in, behavior);
+}
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 8a99c998da9b..0e42fceb2d5e 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -131,7 +131,7 @@
116 common sysinfo sys_sysinfo compat_sys_sysinfo
117 common shutdown sys_shutdown
118 common fsync sys_fsync
-119 common madvise sys_madvise
+119 common madvise parisc_madvise
120 common clone sys_clone_wrapper
121 common setdomainname sys_setdomainname
122 common sendfile sys_sendfile compat_sys_sendfile
diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h
index 506c06a6536f..4cc88a642e10 100644
--- a/tools/arch/parisc/include/uapi/asm/mman.h
+++ b/tools/arch/parisc/include/uapi/asm/mman.h
@@ -1,20 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
#define TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
-#define MADV_DODUMP 70
+#define MADV_DODUMP 17
#define MADV_DOFORK 11
-#define MADV_DONTDUMP 69
+#define MADV_DONTDUMP 16
#define MADV_DONTFORK 10
#define MADV_DONTNEED 4
#define MADV_FREE 8
-#define MADV_HUGEPAGE 67
-#define MADV_MERGEABLE 65
-#define MADV_NOHUGEPAGE 68
+#define MADV_HUGEPAGE 14
+#define MADV_MERGEABLE 12
+#define MADV_NOHUGEPAGE 15
#define MADV_NORMAL 0
#define MADV_RANDOM 1
#define MADV_REMOVE 9
#define MADV_SEQUENTIAL 2
-#define MADV_UNMERGEABLE 66
+#define MADV_UNMERGEABLE 13
#define MADV_WILLNEED 3
#define MAP_ANONYMOUS 0x10
#define MAP_DENYWRITE 0x0800
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 6cefb4315d75..a5d49b3b6a09 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -10,25 +10,13 @@ extern struct timeval bench__start, bench__end, bench__runtime;
* The madvise transparent hugepage constants were added in glibc
* 2.13. For compatibility with older versions of glibc, define these
* tokens if they are not already defined.
- *
- * PA-RISC uses different madvise values from other architectures and
- * needs to be special-cased.
*/
-#ifdef __hppa__
-# ifndef MADV_HUGEPAGE
-# define MADV_HUGEPAGE 67
-# endif
-# ifndef MADV_NOHUGEPAGE
-# define MADV_NOHUGEPAGE 68
-# endif
-#else
# ifndef MADV_HUGEPAGE
# define MADV_HUGEPAGE 14
# endif
# ifndef MADV_NOHUGEPAGE
# define MADV_NOHUGEPAGE 15
# endif
-#endif
int bench_numa(int argc, const char **argv);
int bench_sched_messaging(int argc, const char **argv);