The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 56d5f3eba3f5de0efdd556de4ef381e109b973a9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025022414-grumble-cahoots-e4b7@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 56d5f3eba3f5de0efdd556de4ef381e109b973a9 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner(a)kernel.org>
Date: Tue, 11 Feb 2025 18:15:59 +0100
Subject: [PATCH] acct: perform last write from workqueue
In [1] it was reported that the acct(2) system call can be used to
trigger NULL deref in cases where it is set to write to a file that
triggers an internal lookup. This can e.g., happen when pointing acc(2)
to /sys/power/resume. At the point the where the write to this file
happens the calling task has already exited and called exit_fs(). A
lookup will thus trigger a NULL-deref when accessing current->fs.
Reorganize the code so that the the final write happens from the
workqueue but with the caller's credentials. This preserves the
(strange) permission model and has almost no regression risk.
This api should stop to exist though.
Link: https://lore.kernel.org/r/20250127091811.3183623-1-quzicheng@huawei.com [1]
Link: https://lore.kernel.org/r/20250211-work-acct-v1-1-1c16aecab8b3@kernel.org
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Zicheng Qu <quzicheng(a)huawei.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/kernel/acct.c b/kernel/acct.c
index 31222e8cd534..48283efe8a12 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -103,48 +103,50 @@ struct bsd_acct_struct {
atomic_long_t count;
struct rcu_head rcu;
struct mutex lock;
- int active;
+ bool active;
+ bool check_space;
unsigned long needcheck;
struct file *file;
struct pid_namespace *ns;
struct work_struct work;
struct completion done;
+ acct_t ac;
};
-static void do_acct_process(struct bsd_acct_struct *acct);
+static void fill_ac(struct bsd_acct_struct *acct);
+static void acct_write_process(struct bsd_acct_struct *acct);
/*
* Check the amount of free space and suspend/resume accordingly.
*/
-static int check_free_space(struct bsd_acct_struct *acct)
+static bool check_free_space(struct bsd_acct_struct *acct)
{
struct kstatfs sbuf;
- if (time_is_after_jiffies(acct->needcheck))
- goto out;
+ if (!acct->check_space)
+ return acct->active;
/* May block */
if (vfs_statfs(&acct->file->f_path, &sbuf))
- goto out;
+ return acct->active;
if (acct->active) {
u64 suspend = sbuf.f_blocks * SUSPEND;
do_div(suspend, 100);
if (sbuf.f_bavail <= suspend) {
- acct->active = 0;
+ acct->active = false;
pr_info("Process accounting paused\n");
}
} else {
u64 resume = sbuf.f_blocks * RESUME;
do_div(resume, 100);
if (sbuf.f_bavail >= resume) {
- acct->active = 1;
+ acct->active = true;
pr_info("Process accounting resumed\n");
}
}
acct->needcheck = jiffies + ACCT_TIMEOUT*HZ;
-out:
return acct->active;
}
@@ -189,7 +191,11 @@ static void acct_pin_kill(struct fs_pin *pin)
{
struct bsd_acct_struct *acct = to_acct(pin);
mutex_lock(&acct->lock);
- do_acct_process(acct);
+ /*
+ * Fill the accounting struct with the exiting task's info
+ * before punting to the workqueue.
+ */
+ fill_ac(acct);
schedule_work(&acct->work);
wait_for_completion(&acct->done);
cmpxchg(&acct->ns->bacct, pin, NULL);
@@ -202,6 +208,9 @@ static void close_work(struct work_struct *work)
{
struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
struct file *file = acct->file;
+
+ /* We were fired by acct_pin_kill() which holds acct->lock. */
+ acct_write_process(acct);
if (file->f_op->flush)
file->f_op->flush(file, NULL);
__fput_sync(file);
@@ -430,13 +439,27 @@ static u32 encode_float(u64 value)
* do_exit() or when switching to a different output file.
*/
-static void fill_ac(acct_t *ac)
+static void fill_ac(struct bsd_acct_struct *acct)
{
struct pacct_struct *pacct = ¤t->signal->pacct;
+ struct file *file = acct->file;
+ acct_t *ac = &acct->ac;
u64 elapsed, run_time;
time64_t btime;
struct tty_struct *tty;
+ lockdep_assert_held(&acct->lock);
+
+ if (time_is_after_jiffies(acct->needcheck)) {
+ acct->check_space = false;
+
+ /* Don't fill in @ac if nothing will be written. */
+ if (!acct->active)
+ return;
+ } else {
+ acct->check_space = true;
+ }
+
/*
* Fill the accounting struct with the needed info as recorded
* by the different kernel functions.
@@ -484,64 +507,61 @@ static void fill_ac(acct_t *ac)
ac->ac_majflt = encode_comp_t(pacct->ac_majflt);
ac->ac_exitcode = pacct->ac_exitcode;
spin_unlock_irq(¤t->sighand->siglock);
-}
-/*
- * do_acct_process does all actual work. Caller holds the reference to file.
- */
-static void do_acct_process(struct bsd_acct_struct *acct)
-{
- acct_t ac;
- unsigned long flim;
- const struct cred *orig_cred;
- struct file *file = acct->file;
- /*
- * Accounting records are not subject to resource limits.
- */
- flim = rlimit(RLIMIT_FSIZE);
- current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
- /* Perform file operations on behalf of whoever enabled accounting */
- orig_cred = override_creds(file->f_cred);
-
- /*
- * First check to see if there is enough free_space to continue
- * the process accounting system.
- */
- if (!check_free_space(acct))
- goto out;
-
- fill_ac(&ac);
/* we really need to bite the bullet and change layout */
- ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid);
- ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid);
+ ac->ac_uid = from_kuid_munged(file->f_cred->user_ns, current_uid());
+ ac->ac_gid = from_kgid_munged(file->f_cred->user_ns, current_gid());
#if ACCT_VERSION == 1 || ACCT_VERSION == 2
/* backward-compatible 16 bit fields */
- ac.ac_uid16 = ac.ac_uid;
- ac.ac_gid16 = ac.ac_gid;
+ ac->ac_uid16 = ac->ac_uid;
+ ac->ac_gid16 = ac->ac_gid;
#elif ACCT_VERSION == 3
{
struct pid_namespace *ns = acct->ns;
- ac.ac_pid = task_tgid_nr_ns(current, ns);
+ ac->ac_pid = task_tgid_nr_ns(current, ns);
rcu_read_lock();
- ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent),
- ns);
+ ac->ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
rcu_read_unlock();
}
#endif
+}
+
+static void acct_write_process(struct bsd_acct_struct *acct)
+{
+ struct file *file = acct->file;
+ const struct cred *cred;
+ acct_t *ac = &acct->ac;
+
+ /* Perform file operations on behalf of whoever enabled accounting */
+ cred = override_creds(file->f_cred);
+
/*
- * Get freeze protection. If the fs is frozen, just skip the write
- * as we could deadlock the system otherwise.
+ * First check to see if there is enough free_space to continue
+ * the process accounting system. Then get freeze protection. If
+ * the fs is frozen, just skip the write as we could deadlock
+ * the system otherwise.
*/
- if (file_start_write_trylock(file)) {
+ if (check_free_space(acct) && file_start_write_trylock(file)) {
/* it's been opened O_APPEND, so position is irrelevant */
loff_t pos = 0;
- __kernel_write(file, &ac, sizeof(acct_t), &pos);
+ __kernel_write(file, ac, sizeof(acct_t), &pos);
file_end_write(file);
}
-out:
+
+ revert_creds(cred);
+}
+
+static void do_acct_process(struct bsd_acct_struct *acct)
+{
+ unsigned long flim;
+
+ /* Accounting records are not subject to resource limits. */
+ flim = rlimit(RLIMIT_FSIZE);
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+ fill_ac(acct);
+ acct_write_process(acct);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
- revert_creds(orig_cred);
}
/**
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 654292a0b264e9b8c51b98394146218a21612aa1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025022408-skincare-aide-8932@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 654292a0b264e9b8c51b98394146218a21612aa1 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc(a)manguebit.com>
Date: Sun, 16 Feb 2025 18:02:47 -0300
Subject: [PATCH] smb: client: fix chmod(2) regression with ATTR_READONLY
When the user sets a file or directory as read-only (e.g. ~S_IWUGO),
the client will set the ATTR_READONLY attribute by sending an
SMB2_SET_INFO request to the server in cifs_setattr_{,nounix}(), but
cifsInodeInfo::cifsAttrs will be left unchanged as the client will
only update the new file attributes in the next call to
{smb311_posix,cifs}_get_inode_info() with the new metadata filled in
@data parameter.
Commit a18280e7fdea ("smb: cilent: set reparse mount points as
automounts") mistakenly removed the @data NULL check when calling
is_inode_cache_good(), which broke the above case as the new
ATTR_READONLY attribute would end up not being updated on files with a
read lease.
Fix this by updating the inode whenever we have cached metadata in
@data parameter.
Reported-by: Horst Reiterer <horst.reiterer(a)fabasoft.com>
Closes: https://lore.kernel.org/r/85a16504e09147a195ac0aac1c801280@fabasoft.com
Fixes: a18280e7fdea ("smb: cilent: set reparse mount points as automounts")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paulo Alcantara (Red Hat) <pc(a)manguebit.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 9cc31cf6ebd0..3261190e6f90 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -1408,7 +1408,7 @@ int cifs_get_inode_info(struct inode **inode,
struct cifs_fattr fattr = {};
int rc;
- if (is_inode_cache_good(*inode)) {
+ if (!data && is_inode_cache_good(*inode)) {
cifs_dbg(FYI, "No need to revalidate cached inode sizes\n");
return 0;
}
@@ -1507,7 +1507,7 @@ int smb311_posix_get_inode_info(struct inode **inode,
struct cifs_fattr fattr = {};
int rc;
- if (is_inode_cache_good(*inode)) {
+ if (!data && is_inode_cache_good(*inode)) {
cifs_dbg(FYI, "No need to revalidate cached inode sizes\n");
return 0;
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 66314e9a57a050f95cb0ebac904f5ab047a8926e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025022446-gratify-muster-5e00@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 66314e9a57a050f95cb0ebac904f5ab047a8926e Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong(a)kernel.org>
Date: Sun, 2 Feb 2025 16:50:14 -0800
Subject: [PATCH] xfs: fix online repair probing when
CONFIG_XFS_ONLINE_REPAIR=n
I received a report from the release engineering side of the house that
xfs_scrub without the -n flag (aka fix it mode) would try to fix a
broken filesystem even on a kernel that doesn't have online repair built
into it:
# xfs_scrub -dTvn /mnt/test
EXPERIMENTAL xfs_scrub program in use! Use at your own risk!
Phase 1: Find filesystem geometry.
/mnt/test: using 1 threads to scrub.
Phase 1: Memory used: 132k/0k (108k/25k), time: 0.00/ 0.00/ 0.00s
<snip>
Phase 4: Repair filesystem.
<snip>
Info: /mnt/test/some/victimdir directory entries: Attempting repair. (repair.c line 351)
Corruption: /mnt/test/some/victimdir directory entries: Repair unsuccessful; offline repair required. (repair.c line 204)
Source: https://blogs.oracle.com/linux/post/xfs-online-filesystem-repair
It is strange that xfs_scrub doesn't refuse to run, because the kernel
is supposed to return EOPNOTSUPP if we actually needed to run a repair,
and xfs_io's repair subcommand will perror that. And yet:
# xfs_io -x -c 'repair probe' /mnt/test
#
The first problem is commit dcb660f9222fd9 (4.15) which should have had
xchk_probe set the CORRUPT OFLAG so that any of the repair machinery
will get called at all.
It turns out that some refactoring that happened in the 6.6-6.8 era
broke the operation of this corner case. What we *really* want to
happen is that all the predicates that would steer xfs_scrub_metadata()
towards calling xrep_attempt() should function the same way that they do
when repair is compiled in; and then xrep_attempt gets to return the
fatal EOPNOTSUPP error code that causes the probe to fail.
Instead, commit 8336a64eb75cba (6.6) started the failwhale swimming by
hoisting OFLAG checking logic into a helper whose non-repair stub always
returns false, causing scrub to return "repair not needed" when in fact
the repair is not supported. Prior to that commit, the oflag checking
that was open-coded in scrub.c worked correctly.
Similarly, in commit 4bdfd7d15747b1 (6.8) we hoisted the IFLAG_REPAIR
and ALREADY_FIXED logic into a helper whose non-repair stub always
returns false, so we never enter the if test body that would have called
xrep_attempt, let alone fail to decode the OFLAGs correctly.
The final insult (yes, we're doing The Naked Gun now) is commit
48a72f60861f79 (6.8) in which we hoisted the "are we going to try a
repair?" predicate into yet another function with a non-repair stub
always returns false.
Fix xchk_probe to trigger xrep_probe if repair is enabled, or return
EOPNOTSUPP directly if it is not. For all the other scrub types, we
need to fix the header predicates so that the ->repair functions (which
are all xrep_notsupported) get called to return EOPNOTSUPP. Commit
48a72 is tagged here because the scrub code prior to LTS 6.12 are
incomplete and not worth patching.
Reported-by: David Flynn <david.flynn(a)oracle.com>
Cc: <stable(a)vger.kernel.org> # v6.8
Fixes: 8336a64eb75c ("xfs: don't complain about unfixed metadata when repairs were injected")
Signed-off-by: "Darrick J. Wong" <djwong(a)kernel.org>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Carlos Maiolino <cem(a)kernel.org>
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index bdcd40f0ec74..19877d99f255 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -224,7 +224,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
bool xchk_dir_looks_zapped(struct xfs_inode *dp);
bool xchk_pptr_looks_zapped(struct xfs_inode *ip);
-#ifdef CONFIG_XFS_ONLINE_REPAIR
/* Decide if a repair is required. */
static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm)
{
@@ -244,10 +243,6 @@ static inline bool xchk_could_repair(const struct xfs_scrub *sc)
return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
!(sc->flags & XREP_ALREADY_FIXED);
}
-#else
-# define xchk_needs_repair(sc) (false)
-# define xchk_could_repair(sc) (false)
-#endif /* CONFIG_XFS_ONLINE_REPAIR */
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 823c00d1a502..af0a3a9e5ed9 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -191,7 +191,16 @@ int xrep_reset_metafile_resv(struct xfs_scrub *sc);
#else
#define xrep_ino_dqattach(sc) (0)
-#define xrep_will_attempt(sc) (false)
+
+/*
+ * When online repair is not built into the kernel, we still want to attempt
+ * the repair so that the stub xrep_attempt below will return EOPNOTSUPP.
+ */
+static inline bool xrep_will_attempt(const struct xfs_scrub *sc)
+{
+ return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
+ xchk_needs_repair(sc->sm);
+}
static inline int
xrep_attempt(
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 7567dd5cad14..6fa9e3e5bab7 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -149,6 +149,18 @@ xchk_probe(
if (xchk_should_terminate(sc, &error))
return error;
+ /*
+ * If the caller is probing to see if repair works but repair isn't
+ * built into the kernel, return EOPNOTSUPP because that's the signal
+ * that userspace expects. If online repair is built in, set the
+ * CORRUPT flag (without any of the usual tracing/logging) to force us
+ * into xrep_probe.
+ */
+ if (xchk_could_repair(sc)) {
+ if (!IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR))
+ return -EOPNOTSUPP;
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ }
return 0;
}