Commit a1f9b1c0439db ("integrity/ima: switch to using __kernel_read")
replaced the __vfs_read() call in integrity_kernel_read() with
__kernel_read(), a new helper introduced by commit 61a707c543e2a ("fs: add
a __kernel_read helper").
Since the new helper requires that also the FMODE_CAN_READ flag is set in
file->f_mode, this patch saves the original f_mode and sets the flag if the
the file descriptor has the necessary file operation. Lastly, it restores
the original f_mode at the end of ima_calc_file_hash().
Cc: stable(a)vger.kernel.org # 5.8.x
Fixes: a1f9b1c0439db ("integrity/ima: switch to using __kernel_read")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
---
security/integrity/ima/ima_crypto.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 21989fa0c107..22ed86a0c964 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -537,6 +537,7 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
loff_t i_size;
int rc;
struct file *f = file;
+ fmode_t saved_mode;
bool new_file_instance = false, modified_mode = false;
/*
@@ -550,7 +551,7 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
}
/* Open a new file instance in O_RDONLY if we cannot read */
- if (!(file->f_mode & FMODE_READ)) {
+ if (!(file->f_mode & FMODE_READ) || !(file->f_mode & FMODE_CAN_READ)) {
int flags = file->f_flags & ~(O_WRONLY | O_APPEND |
O_TRUNC | O_CREAT | O_NOCTTY | O_EXCL);
flags |= O_RDONLY;
@@ -562,7 +563,10 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
*/
pr_info_ratelimited("Unable to reopen file for reading.\n");
f = file;
+ saved_mode = f->f_mode;
f->f_mode |= FMODE_READ;
+ if (likely(file->f_op->read || file->f_op->read_iter))
+ f->f_mode |= FMODE_CAN_READ;
modified_mode = true;
} else {
new_file_instance = true;
@@ -582,7 +586,7 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
if (new_file_instance)
fput(f);
else if (modified_mode)
- f->f_mode &= ~FMODE_READ;
+ f->f_mode = saved_mode;
return rc;
}
--
2.27.GIT
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 406100f3da08066c00105165db8520bbc7694a36
Gitweb: https://git.kernel.org/tip/406100f3da08066c00105165db8520bbc7694a36
Author: Daniel Jordan <daniel.m.jordan(a)oracle.com>
AuthorDate: Thu, 12 Nov 2020 12:17:11 -05:00
Committer: Peter Zijlstra <peterz(a)infradead.org>
CommitterDate: Thu, 19 Nov 2020 11:25:45 +01:00
cpuset: fix race between hotplug work and later CPU offline
One of our machines keeled over trying to rebuild the scheduler domains.
Mainline produces the same splat:
BUG: unable to handle page fault for address: 0000607f820054db
CPU: 2 PID: 149 Comm: kworker/1:1 Not tainted 5.10.0-rc1-master+ #6
Workqueue: events cpuset_hotplug_workfn
RIP: build_sched_domains
Call Trace:
partition_sched_domains_locked
rebuild_sched_domains_locked
cpuset_hotplug_workfn
It happens with cgroup2 and exclusive cpusets only. This reproducer
triggers it on an 8-cpu vm and works most effectively with no
preexisting child cgroups:
cd $UNIFIED_ROOT
mkdir cg1
echo 4-7 > cg1/cpuset.cpus
echo root > cg1/cpuset.cpus.partition
# with smt/control reading 'on',
echo off > /sys/devices/system/cpu/smt/control
RIP maps to
sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
from sd_init(). sd_id is calculated earlier in the same function:
cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
sd_id = cpumask_first(sched_domain_span(sd));
tl->mask(cpu), which reads cpu_sibling_map on x86, returns an empty mask
and so cpumask_first() returns >= nr_cpu_ids, which leads to the bogus
value from per_cpu_ptr() above.
The problem is a race between cpuset_hotplug_workfn() and a later
offline of CPU N. cpuset_hotplug_workfn() updates the effective masks
when N is still online, the offline clears N from cpu_sibling_map, and
then the worker uses the stale effective masks that still have N to
generate the scheduling domains, leading the worker to read
N's empty cpu_sibling_map in sd_init().
rebuild_sched_domains_locked() prevented the race during the cgroup2
cpuset series up until the Fixes commit changed its check. Make the
check more robust so that it can detect an offline CPU in any exclusive
cpuset's effective mask, not just the top one.
Fixes: 0ccea8feb980 ("cpuset: Make generate_sched_domains() work with partition")
Signed-off-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Acked-by: Tejun Heo <tj(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/20201112171711.639541-1-daniel.m.jordan@oracle.com
---
kernel/cgroup/cpuset.c | 33 ++++++++++++++++++++++++++++-----
1 file changed, 28 insertions(+), 5 deletions(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 57b5b5d..53c70c4 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -983,25 +983,48 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
*/
static void rebuild_sched_domains_locked(void)
{
+ struct cgroup_subsys_state *pos_css;
struct sched_domain_attr *attr;
cpumask_var_t *doms;
+ struct cpuset *cs;
int ndoms;
lockdep_assert_cpus_held();
percpu_rwsem_assert_held(&cpuset_rwsem);
/*
- * We have raced with CPU hotplug. Don't do anything to avoid
+ * If we have raced with CPU hotplug, return early to avoid
* passing doms with offlined cpu to partition_sched_domains().
- * Anyways, hotplug work item will rebuild sched domains.
+ * Anyways, cpuset_hotplug_workfn() will rebuild sched domains.
+ *
+ * With no CPUs in any subpartitions, top_cpuset's effective CPUs
+ * should be the same as the active CPUs, so checking only top_cpuset
+ * is enough to detect racing CPU offlines.
*/
if (!top_cpuset.nr_subparts_cpus &&
!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
return;
- if (top_cpuset.nr_subparts_cpus &&
- !cpumask_subset(top_cpuset.effective_cpus, cpu_active_mask))
- return;
+ /*
+ * With subpartition CPUs, however, the effective CPUs of a partition
+ * root should be only a subset of the active CPUs. Since a CPU in any
+ * partition root could be offlined, all must be checked.
+ */
+ if (top_cpuset.nr_subparts_cpus) {
+ rcu_read_lock();
+ cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
+ if (!is_partition_root(cs)) {
+ pos_css = css_rightmost_descendant(pos_css);
+ continue;
+ }
+ if (!cpumask_subset(cs->effective_cpus,
+ cpu_active_mask)) {
+ rcu_read_unlock();
+ return;
+ }
+ }
+ rcu_read_unlock();
+ }
/* Generate domain masks and attrs */
ndoms = generate_sched_domains(&doms, &attr);