Hi
[This is an automated email]
This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: 4.4+
The bot has tested the following trees: v5.6.13, v5.4.41, v4.19.123, v4.14.180, v4.9.223, v4.4.223.
v5.6.13: Failed to apply! Possible dependencies:
0024652895e3 ("btrfs: rename btrfs_put_fs_root and btrfs_grab_fs_root")
02162a0265eb ("btrfs: hold a ref on the root in __btrfs_run_defrag_inode")
04734e844894 ("btrfs: hold a ref on the root in btrfs_ioctl_get_subvol_info")
0b530bc5e11f ("btrfs: hold a ref on the root in build_backref_tree")
0d4b0463011d ("btrfs: export and rename free_fs_info")
2a2b5d620266 ("btrfs: hold ref on root in btrfs_ioctl_default_subvol")
3ca35e839e94 ("btrfs: hold a ref on the root in search_ioctl")
3d7babdcf2cc ("btrfs: hold a ref on the root in find_data_references")
41a2ee75aab0 ("btrfs: introduce per-inode file extent tree")
442b1ac5244e ("btrfs: hold a ref on the root in record_reloc_root_in_trans")
4c78e9f59632 ("btrfs: hold a ref on the root in open_ctree")
76deacf02387 ("btrfs: hold a ref on the root in create_reloc_inode")
81f096edf047 ("btrfs: use btrfs_put_fs_root to free roots always")
8727002f7909 ("btrfs: hold a ref on the root in fixup_tree_root_location")
88234012beaa ("btrfs: hold a ref on the root in btrfs_search_path_in_tree")
9326f76f4bc4 ("btrfs: hold a ref on the root in resolve_indirect_ref")
9f583209f20a ("btrfs: push grab_fs_root into read_fs_root")
ab9737bd7597 ("btrfs: hold a ref on the root in merge_reloc_roots")
b8a49ae1913f ("btrfs: hold a ref on the root in btrfs_search_path_in_tree_user")
bc44d7c4b2b1 ("btrfs: push btrfs_grab_fs_root into btrfs_get_fs_root")
bdf70b9e75f5 ("btrfs: hold a root ref in btrfs_get_dentry")
db2c2ca2db44 ("btrfs: hold a ref on the root in prepare_to_merge")
fc92f79856aa ("btrfs: hold a ref on the root in create_subvol")
v5.4.41: Failed to apply! Possible dependencies:
0024652895e3 ("btrfs: rename btrfs_put_fs_root and btrfs_grab_fs_root")
0d4b0463011d ("btrfs: export and rename free_fs_info")
33ca832fefa5 ("btrfs: separate out the extent leak code")
41a2ee75aab0 ("btrfs: introduce per-inode file extent tree")
6f0d04f8e72e ("btrfs: separate out the extent io init function")
81f096edf047 ("btrfs: use btrfs_put_fs_root to free roots always")
9326f76f4bc4 ("btrfs: hold a ref on the root in resolve_indirect_ref")
9c7d3a548331 ("btrfs: move extent_io_tree defs to their own header")
v4.19.123: Failed to apply! Possible dependencies:
370a11b8114b ("btrfs: qgroup: Introduce per-root swapped blocks infrastructure")
43eb5f297584 ("btrfs: Introduce extent_io_tree::owner to distinguish different io_trees")
57ec5fb478a3 ("btrfs: tests: move testing members of struct btrfs_root to the end")
7b4397386fbd ("btrfs: switch extent_io_tree::track_uptodate to bool")
c258d6e36442 ("btrfs: Introduce fs_info to extent_io_tree")
e06a1fc99cc7 ("btrfs: Remove extent_io_ops::set_bit_hook extent_io callback")
eede2bf34f4f ("Btrfs: prevent ioctls from interfering with a swap file")
v4.14.180: Failed to apply! Possible dependencies:
370a11b8114b ("btrfs: qgroup: Introduce per-root swapped blocks infrastructure")
429d6275d501 ("btrfs: qgroup: Fix wrong qgroup reservation update for relationship modification")
57ec5fb478a3 ("btrfs: tests: move testing members of struct btrfs_root to the end")
64cfaef6362f ("btrfs: qgroup: Introduce function to convert META_PREALLOC into META_PERTRANS")
64ee4e751a1c ("btrfs: qgroup: Update trace events to use new separate rsv types")
733e03a0b26a ("btrfs: qgroup: Split meta rsv type into meta_prealloc and meta_pertrans")
8287475a2055 ("btrfs: qgroup: Use root::qgroup_meta_rsv_* to record qgroup meta reserved space")
d4e5c92055d8 ("btrfs: qgroup: Skeleton to support separate qgroup reservation type")
dba213242fbc ("btrfs: qgroup: Make qgroup_reserve and its callers to use separate reservation type")
e1211d0e896b ("btrfs: qgroup: Don't use root->qgroup_meta_rsv for qgroup")
eede2bf34f4f ("Btrfs: prevent ioctls from interfering with a swap file")
f59c0347d4be ("btrfs: qgroup: Introduce helpers to update and access new qgroup rsv")
fd708b81d972 ("Btrfs: add a extent ref verify tool")
v4.9.223: Failed to apply! Possible dependencies:
0c476a5d7f63 ("btrfs: Ensure proper sector alignment for btrfs_free_reserved_data_space")
370a11b8114b ("btrfs: qgroup: Introduce per-root swapped blocks infrastructure")
4989d277eb4b ("btrfs: refactor __btrfs_lookup_bio_sums to use bio_for_each_segment_all")
62d1f9fe97dd ("btrfs: remove trivial helper btrfs_find_tree_block")
da17066c4047 ("btrfs: pull node/sector/stripe sizes out of root and into fs_info")
eede2bf34f4f ("Btrfs: prevent ioctls from interfering with a swap file")
fd708b81d972 ("Btrfs: add a extent ref verify tool")
v4.4.223: Failed to apply! Possible dependencies:
2f3165ecf103 ("btrfs: don't force mounts to wait for cleaner_kthread to delete one or more subvolumes")
370a11b8114b ("btrfs: qgroup: Introduce per-root swapped blocks infrastructure")
511711af91f2 ("btrfs: don't run delayed references while we are creating the free space tree")
70f6d82ec73c ("Btrfs: add free space tree mount option")
87241c2e6845 ("Btrfs: use root when checking need_async_flush")
90c711ab380d ("btrfs: avoid blocking open_ctree from cleaner_kthread")
9e7cc91a6d18 ("btrfs: fix fsfreeze hang caused by delayed iputs deal")
a5ed91828518 ("Btrfs: implement the free space B-tree")
afcdd129e05a ("Btrfs: add a flags field to btrfs_fs_info")
d38b349c39a9 ("Btrfs: don't bother kicking async if there's nothing to reclaim")
eede2bf34f4f ("Btrfs: prevent ioctls from interfering with a swap file")
f376df2b7da3 ("Btrfs: add tracepoints for flush events")
NOTE: The patch will not be queued to stable trees until it is upstream.
How should we proceed with this patch?
--
Thanks
Sasha
From: Herbert Xu <herbert(a)gondor.apana.org.au>
[ Upstream commit 6fc4dbcf0276279d488c5fbbfabe94734134f4fa ]
The function padata_reorder will use a timer when it cannot progress
while completed jobs are outstanding (pd->reorder_objects > 0). This
is suboptimal as if we do end up using the timer then it would have
introduced a gratuitous delay of one second.
In fact we can easily distinguish between whether completed jobs
are outstanding and whether we can make progress. All we have to
do is look at the next pqueue list.
This patch does that by replacing pd->processed with pd->cpu so
that the next pqueue is more accessible.
A work queue is used instead of the original try_again to avoid
hogging the CPU.
Note that we don't bother removing the work queue in
padata_flush_queues because the whole premise is broken. You
cannot flush async crypto requests so it makes no sense to even
try. A subsequent patch will fix it by replacing it with a ref
counting scheme.
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
[dj: - adjust context
- corrected setup_timer -> timer_setup to delete hunk
- skip padata_flush_queues() hunk, function already removed
in 4.19]
Signed-off-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
---
include/linux/padata.h | 13 ++----
kernel/padata.c | 95 ++++++++----------------------------------
2 files changed, 22 insertions(+), 86 deletions(-)
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 5d13d25da2c8..d803397a28f7 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -24,7 +24,6 @@
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#include <linux/list.h>
-#include <linux/timer.h>
#include <linux/notifier.h>
#include <linux/kobject.h>
@@ -85,18 +84,14 @@ struct padata_serial_queue {
* @serial: List to wait for serialization after reordering.
* @pwork: work struct for parallelization.
* @swork: work struct for serialization.
- * @pd: Backpointer to the internal control structure.
* @work: work struct for parallelization.
- * @reorder_work: work struct for reordering.
* @num_obj: Number of objects that are processed by this cpu.
* @cpu_index: Index of the cpu.
*/
struct padata_parallel_queue {
struct padata_list parallel;
struct padata_list reorder;
- struct parallel_data *pd;
struct work_struct work;
- struct work_struct reorder_work;
atomic_t num_obj;
int cpu_index;
};
@@ -122,10 +117,10 @@ struct padata_cpumask {
* @reorder_objects: Number of objects waiting in the reorder queues.
* @refcnt: Number of objects holding a reference on this parallel_data.
* @max_seq_nr: Maximal used sequence number.
+ * @cpu: Next CPU to be processed.
* @cpumask: The cpumasks in use for parallel and serial workers.
+ * @reorder_work: work struct for reordering.
* @lock: Reorder lock.
- * @processed: Number of already processed objects.
- * @timer: Reorder timer.
*/
struct parallel_data {
struct padata_instance *pinst;
@@ -134,10 +129,10 @@ struct parallel_data {
atomic_t reorder_objects;
atomic_t refcnt;
atomic_t seq_nr;
+ int cpu;
struct padata_cpumask cpumask;
+ struct work_struct reorder_work;
spinlock_t lock ____cacheline_aligned;
- unsigned int processed;
- struct timer_list timer;
};
/**
diff --git a/kernel/padata.c b/kernel/padata.c
index c280cb153915..47dc31ce15ac 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -167,23 +167,12 @@ EXPORT_SYMBOL(padata_do_parallel);
*/
static struct padata_priv *padata_get_next(struct parallel_data *pd)
{
- int cpu, num_cpus;
- unsigned int next_nr, next_index;
struct padata_parallel_queue *next_queue;
struct padata_priv *padata;
struct padata_list *reorder;
+ int cpu = pd->cpu;
- num_cpus = cpumask_weight(pd->cpumask.pcpu);
-
- /*
- * Calculate the percpu reorder queue and the sequence
- * number of the next object.
- */
- next_nr = pd->processed;
- next_index = next_nr % num_cpus;
- cpu = padata_index_to_cpu(pd, next_index);
next_queue = per_cpu_ptr(pd->pqueue, cpu);
-
reorder = &next_queue->reorder;
spin_lock(&reorder->lock);
@@ -194,7 +183,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
list_del_init(&padata->list);
atomic_dec(&pd->reorder_objects);
- pd->processed++;
+ pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1,
+ false);
spin_unlock(&reorder->lock);
goto out;
@@ -217,6 +207,7 @@ static void padata_reorder(struct parallel_data *pd)
struct padata_priv *padata;
struct padata_serial_queue *squeue;
struct padata_instance *pinst = pd->pinst;
+ struct padata_parallel_queue *next_queue;
/*
* We need to ensure that only one cpu can work on dequeueing of
@@ -248,7 +239,6 @@ static void padata_reorder(struct parallel_data *pd)
* so exit immediately.
*/
if (PTR_ERR(padata) == -ENODATA) {
- del_timer(&pd->timer);
spin_unlock_bh(&pd->lock);
return;
}
@@ -267,70 +257,29 @@ static void padata_reorder(struct parallel_data *pd)
/*
* The next object that needs serialization might have arrived to
- * the reorder queues in the meantime, we will be called again
- * from the timer function if no one else cares for it.
+ * the reorder queues in the meantime.
*
- * Ensure reorder_objects is read after pd->lock is dropped so we see
- * an increment from another task in padata_do_serial. Pairs with
+ * Ensure reorder queue is read after pd->lock is dropped so we see
+ * new objects from another task in padata_do_serial. Pairs with
* smp_mb__after_atomic in padata_do_serial.
*/
smp_mb();
- if (atomic_read(&pd->reorder_objects)
- && !(pinst->flags & PADATA_RESET))
- mod_timer(&pd->timer, jiffies + HZ);
- else
- del_timer(&pd->timer);
- return;
+ next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
+ if (!list_empty(&next_queue->reorder.list))
+ queue_work(pinst->wq, &pd->reorder_work);
}
static void invoke_padata_reorder(struct work_struct *work)
{
- struct padata_parallel_queue *pqueue;
struct parallel_data *pd;
local_bh_disable();
- pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
- pd = pqueue->pd;
+ pd = container_of(work, struct parallel_data, reorder_work);
padata_reorder(pd);
local_bh_enable();
}
-static void padata_reorder_timer(struct timer_list *t)
-{
- struct parallel_data *pd = from_timer(pd, t, timer);
- unsigned int weight;
- int target_cpu, cpu;
-
- cpu = get_cpu();
-
- /* We don't lock pd here to not interfere with parallel processing
- * padata_reorder() calls on other CPUs. We just need any CPU out of
- * the cpumask.pcpu set. It would be nice if it's the right one but
- * it doesn't matter if we're off to the next one by using an outdated
- * pd->processed value.
- */
- weight = cpumask_weight(pd->cpumask.pcpu);
- target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
-
- /* ensure to call the reorder callback on the correct CPU */
- if (cpu != target_cpu) {
- struct padata_parallel_queue *pqueue;
- struct padata_instance *pinst;
-
- /* The timer function is serialized wrt itself -- no locking
- * needed.
- */
- pinst = pd->pinst;
- pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
- queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
- } else {
- padata_reorder(pd);
- }
-
- put_cpu();
-}
-
static void padata_serial_worker(struct work_struct *serial_work)
{
struct padata_serial_queue *squeue;
@@ -384,9 +333,8 @@ void padata_do_serial(struct padata_priv *padata)
cpu = get_cpu();
- /* We need to run on the same CPU padata_do_parallel(.., padata, ..)
- * was called on -- or, at least, enqueue the padata object into the
- * correct per-cpu queue.
+ /* We need to enqueue the padata object into the correct
+ * per-cpu queue.
*/
if (cpu != padata->cpu) {
reorder_via_wq = 1;
@@ -396,12 +344,12 @@ void padata_do_serial(struct padata_priv *padata)
pqueue = per_cpu_ptr(pd->pqueue, cpu);
spin_lock(&pqueue->reorder.lock);
- atomic_inc(&pd->reorder_objects);
list_add_tail(&padata->list, &pqueue->reorder.list);
+ atomic_inc(&pd->reorder_objects);
spin_unlock(&pqueue->reorder.lock);
/*
- * Ensure the atomic_inc of reorder_objects above is ordered correctly
+ * Ensure the addition to the reorder list is ordered correctly
* with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
* in padata_reorder.
*/
@@ -409,13 +357,7 @@ void padata_do_serial(struct padata_priv *padata)
put_cpu();
- /* If we're running on the wrong CPU, call padata_reorder() via a
- * kernel worker.
- */
- if (reorder_via_wq)
- queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
- else
- padata_reorder(pd);
+ padata_reorder(pd);
}
EXPORT_SYMBOL(padata_do_serial);
@@ -471,14 +413,12 @@ static void padata_init_pqueues(struct parallel_data *pd)
continue;
}
- pqueue->pd = pd;
pqueue->cpu_index = cpu_index;
cpu_index++;
__padata_list_init(&pqueue->reorder);
__padata_list_init(&pqueue->parallel);
INIT_WORK(&pqueue->work, padata_parallel_worker);
- INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
atomic_set(&pqueue->num_obj, 0);
}
}
@@ -506,12 +446,13 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
padata_init_pqueues(pd);
padata_init_squeues(pd);
- timer_setup(&pd->timer, padata_reorder_timer, 0);
atomic_set(&pd->seq_nr, -1);
atomic_set(&pd->reorder_objects, 0);
atomic_set(&pd->refcnt, 1);
pd->pinst = pinst;
spin_lock_init(&pd->lock);
+ pd->cpu = cpumask_first(pcpumask);
+ INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
return pd;
--
2.26.2
I've backported fixes for I²C and media controller devices, dealing
with the lifetime of related cdev and struct device instances and some
similar race conditions. Fixing the lifetime issue for watchdog
devices looks impractical for 4.4, as it depends on a big refactoring
in 4.5.
All but one of these are already included in or queued for the later
stable branches. You dropped the I²C lifetime fix for 4.9, but I hope
my previous replies persuaded you that it is valid.
Ben.
--
Ben Hutchings, Software Developer Codethink Ltd
https://www.codethink.co.uk/ Dale House, 35 Dale Street
Manchester, M1 2HF, United Kingdom
From: Mathias Krause <minipli(a)googlemail.com>
[ Upstream commit 1bd845bcb41d5b7f83745e0cb99273eb376f2ec5 ]
The parallel queue per-cpu data structure gets initialized only for CPUs
in the 'pcpu' CPU mask set. This is not sufficient as the reorder timer
may run on a different CPU and might wrongly decide it's the target CPU
for the next reorder item as per-cpu memory gets memset(0) and we might
be waiting for the first CPU in cpumask.pcpu, i.e. cpu_index 0.
Make the '__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index'
compare in padata_get_next() fail in this case by initializing the
cpu_index member of all per-cpu parallel queues. Use -1 for unused ones.
Signed-off-by: Mathias Krause <minipli(a)googlemail.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
Signed-off-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
---
kernel/padata.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/kernel/padata.c b/kernel/padata.c
index 693536efccf9..52a1d3fd13b5 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -462,8 +462,14 @@ static void padata_init_pqueues(struct parallel_data *pd)
struct padata_parallel_queue *pqueue;
cpu_index = 0;
- for_each_cpu(cpu, pd->cpumask.pcpu) {
+ for_each_possible_cpu(cpu) {
pqueue = per_cpu_ptr(pd->pqueue, cpu);
+
+ if (!cpumask_test_cpu(cpu, pd->cpumask.pcpu)) {
+ pqueue->cpu_index = -1;
+ continue;
+ }
+
pqueue->pd = pd;
pqueue->cpu_index = cpu_index;
cpu_index++;
--
2.26.2
From: Mathias Krause <minipli(a)googlemail.com>
[ Upstream commit 1bd845bcb41d5b7f83745e0cb99273eb376f2ec5 ]
The parallel queue per-cpu data structure gets initialized only for CPUs
in the 'pcpu' CPU mask set. This is not sufficient as the reorder timer
may run on a different CPU and might wrongly decide it's the target CPU
for the next reorder item as per-cpu memory gets memset(0) and we might
be waiting for the first CPU in cpumask.pcpu, i.e. cpu_index 0.
Make the '__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index'
compare in padata_get_next() fail in this case by initializing the
cpu_index member of all per-cpu parallel queues. Use -1 for unused ones.
Signed-off-by: Mathias Krause <minipli(a)googlemail.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
Signed-off-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
---
kernel/padata.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/kernel/padata.c b/kernel/padata.c
index 40a0ebb8ea51..858e82179744 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -462,8 +462,14 @@ static void padata_init_pqueues(struct parallel_data *pd)
struct padata_parallel_queue *pqueue;
cpu_index = 0;
- for_each_cpu(cpu, pd->cpumask.pcpu) {
+ for_each_possible_cpu(cpu) {
pqueue = per_cpu_ptr(pd->pqueue, cpu);
+
+ if (!cpumask_test_cpu(cpu, pd->cpumask.pcpu)) {
+ pqueue->cpu_index = -1;
+ continue;
+ }
+
pqueue->pd = pd;
pqueue->cpu_index = cpu_index;
cpu_index++;
--
2.26.2
The patch titled
Subject: mm: ptdump: expand type of 'val' in note_page()
has been added to the -mm tree. Its filename is
mm-ptdump-expand-type-of-val-in-note_page.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-ptdump-expand-type-of-val-in-no…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-ptdump-expand-type-of-val-in-no…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Steven Price <steven.price(a)arm.com>
Subject: mm: ptdump: expand type of 'val' in note_page()
The page table entry is passed in the 'val' argument to note_page(),
however this was previously an "unsigned long" which is fine on 64-bit
platforms. But for 32 bit x86 it is not always big enough to contain a
page table entry which may be 64 bits.
Change the type to u64 to ensure that it is always big enough.
Link: http://lkml.kernel.org/r/20200521152308.33096-3-steven.price@arm.com
Signed-off-by: Steven Price <steven.price(a)arm.com>
Reported-by: Jan Beulich <jbeulich(a)suse.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/arm64/mm/dump.c | 2 +-
arch/x86/mm/dump_pagetables.c | 2 +-
include/linux/ptdump.h | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
--- a/arch/arm64/mm/dump.c~mm-ptdump-expand-type-of-val-in-note_page
+++ a/arch/arm64/mm/dump.c
@@ -247,7 +247,7 @@ static void note_prot_wx(struct pg_state
}
static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
- unsigned long val)
+ u64 val)
{
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
static const char units[] = "KMGTPE";
--- a/arch/x86/mm/dump_pagetables.c~mm-ptdump-expand-type-of-val-in-note_page
+++ a/arch/x86/mm/dump_pagetables.c
@@ -273,7 +273,7 @@ static void effective_prot(struct ptdump
* print what we collected so far.
*/
static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
- unsigned long val)
+ u64 val)
{
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
pgprotval_t new_prot, new_eff;
--- a/include/linux/ptdump.h~mm-ptdump-expand-type-of-val-in-note_page
+++ a/include/linux/ptdump.h
@@ -13,7 +13,7 @@ struct ptdump_range {
struct ptdump_state {
/* level is 0:PGD to 4:PTE, or -1 if unknown */
void (*note_page)(struct ptdump_state *st, unsigned long addr,
- int level, unsigned long val);
+ int level, u64 val);
void (*effective_prot)(struct ptdump_state *st, int level, u64 val);
const struct ptdump_range *range;
};
_
Patches currently in -mm which might be from steven.price(a)arm.com are
x86-mm-ptdump-calculate-effective-permissions-correctly.patch
mm-ptdump-expand-type-of-val-in-note_page.patch
The patch titled
Subject: x86: mm: ptdump: calculate effective permissions correctly
has been added to the -mm tree. Its filename is
x86-mm-ptdump-calculate-effective-permissions-correctly.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/x86-mm-ptdump-calculate-effective-…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/x86-mm-ptdump-calculate-effective-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Steven Price <steven.price(a)arm.com>
Subject: x86: mm: ptdump: calculate effective permissions correctly
Patch series "Fix W+X debug feature on x86"
Jan alerted me[1] that the W+X detection debug feature was broken in x86
by my change[2] to switch x86 to use the generic ptdump infrastructure.
Fundamentally the approach of trying to move the calculation of effective
permissions into note_page() was broken because note_page() is only called
for 'leaf' entries and the effective permissions are passed down via the
internal nodes of the page tree. The solution I've taken here is to
create a new (optional) callback which is called for all nodes of the page
tree and therefore can calculate the effective permissions.
Secondly on some configurations (32 bit with PAE) "unsigned long" is not
large enough to store the table entries. The fix here is simple - let's
just use a u64.
[1] https://lore.kernel.org/lkml/d573dc7e-e742-84de-473d-f971142fa319@suse.com/
[2] 2ae27137b2db ("x86: mm: convert dump_pagetables to use walk_page_range")
This patch (of 2):
By switching the x86 page table dump code to use the generic code the
effective permissions are no longer calculated correctly because the
note_page() function is only called for *leaf* entries. To calculate the
actual effective permissions it is necessary to observe the full hierarchy
of the page tree.
Introduce a new callback for ptdump which is called for every entry and
can therefore update the prot_levels array correctly. note_page() can
then simply access the appropriate element in the array.
Link: http://lkml.kernel.org/r/20200521152308.33096-1-steven.price@arm.com
Link: http://lkml.kernel.org/r/20200521152308.33096-2-steven.price@arm.com
Fixes: 2ae27137b2db ("x86: mm: convert dump_pagetables to use walk_page_range")
Signed-off-by: Steven Price <steven.price(a)arm.com>
Reported-by: Jan Beulich <jbeulich(a)suse.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/x86/mm/dump_pagetables.c | 31 +++++++++++++++++++------------
include/linux/ptdump.h | 1 +
mm/ptdump.c | 17 ++++++++++++++++-
3 files changed, 36 insertions(+), 13 deletions(-)
--- a/arch/x86/mm/dump_pagetables.c~x86-mm-ptdump-calculate-effective-permissions-correctly
+++ a/arch/x86/mm/dump_pagetables.c
@@ -249,10 +249,22 @@ static void note_wx(struct pg_state *st,
(void *)st->start_address);
}
-static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
+static void effective_prot(struct ptdump_state *pt_st, int level, u64 val)
{
- return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
- ((prot1 | prot2) & _PAGE_NX);
+ struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+ pgprotval_t prot = val & PTE_FLAGS_MASK;
+ pgprotval_t effective;
+
+ if (level > 0) {
+ pgprotval_t higher_prot = st->prot_levels[level - 1];
+
+ effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) |
+ ((higher_prot | prot) & _PAGE_NX);
+ } else {
+ effective = prot;
+ }
+
+ st->prot_levels[level] = effective;
}
/*
@@ -270,16 +282,10 @@ static void note_page(struct ptdump_stat
struct seq_file *m = st->seq;
new_prot = val & PTE_FLAGS_MASK;
+ new_eff = st->prot_levels[level];
- if (level > 0) {
- new_eff = effective_prot(st->prot_levels[level - 1],
- new_prot);
- } else {
- new_eff = new_prot;
- }
-
- if (level >= 0)
- st->prot_levels[level] = new_eff;
+ if (!val)
+ new_eff = 0;
/*
* If we have a "break" in the series, we need to flush the state that
@@ -374,6 +380,7 @@ static void ptdump_walk_pgd_level_core(s
struct pg_state st = {
.ptdump = {
.note_page = note_page,
+ .effective_prot = effective_prot,
.range = ptdump_ranges
},
.level = -1,
--- a/include/linux/ptdump.h~x86-mm-ptdump-calculate-effective-permissions-correctly
+++ a/include/linux/ptdump.h
@@ -14,6 +14,7 @@ struct ptdump_state {
/* level is 0:PGD to 4:PTE, or -1 if unknown */
void (*note_page)(struct ptdump_state *st, unsigned long addr,
int level, unsigned long val);
+ void (*effective_prot)(struct ptdump_state *st, int level, u64 val);
const struct ptdump_range *range;
};
--- a/mm/ptdump.c~x86-mm-ptdump-calculate-effective-permissions-correctly
+++ a/mm/ptdump.c
@@ -36,6 +36,9 @@ static int ptdump_pgd_entry(pgd_t *pgd,
return note_kasan_page_table(walk, addr);
#endif
+ if (st->effective_prot)
+ st->effective_prot(st, 0, pgd_val(val));
+
if (pgd_leaf(val))
st->note_page(st, addr, 0, pgd_val(val));
@@ -53,6 +56,9 @@ static int ptdump_p4d_entry(p4d_t *p4d,
return note_kasan_page_table(walk, addr);
#endif
+ if (st->effective_prot)
+ st->effective_prot(st, 1, p4d_val(val));
+
if (p4d_leaf(val))
st->note_page(st, addr, 1, p4d_val(val));
@@ -70,6 +76,9 @@ static int ptdump_pud_entry(pud_t *pud,
return note_kasan_page_table(walk, addr);
#endif
+ if (st->effective_prot)
+ st->effective_prot(st, 2, pud_val(val));
+
if (pud_leaf(val))
st->note_page(st, addr, 2, pud_val(val));
@@ -87,6 +96,8 @@ static int ptdump_pmd_entry(pmd_t *pmd,
return note_kasan_page_table(walk, addr);
#endif
+ if (st->effective_prot)
+ st->effective_prot(st, 3, pmd_val(val));
if (pmd_leaf(val))
st->note_page(st, addr, 3, pmd_val(val));
@@ -97,8 +108,12 @@ static int ptdump_pte_entry(pte_t *pte,
unsigned long next, struct mm_walk *walk)
{
struct ptdump_state *st = walk->private;
+ pte_t val = READ_ONCE(*pte);
+
+ if (st->effective_prot)
+ st->effective_prot(st, 4, pte_val(val));
- st->note_page(st, addr, 4, pte_val(READ_ONCE(*pte)));
+ st->note_page(st, addr, 4, pte_val(val));
return 0;
}
_
Patches currently in -mm which might be from steven.price(a)arm.com are
x86-mm-ptdump-calculate-effective-permissions-correctly.patch
mm-ptdump-expand-type-of-val-in-note_page.patch
I hope you are doing great?
This is Felix from Toronto-Canada. I have a lucrative business
offer that will benefit us both immensely within a very short
period of time. However, I need your initial approval of interest
prior to further and complete details regarding the deal.
Thanks,
Felix.