ioremap() calls pud_free_pmd_page() / pmd_free_pte_page() when it creates
a pud / pmd map. The following preconditions are met at their entry.
- All pte entries for a target pud/pmd address range have been cleared.
- System-wide TLB purges have been peformed for a target pud/pmd address
range.
The preconditions assure that there is no stale TLB entry for the range.
Speculation may not cache TLB entries since it requires all levels of page
entries, including ptes, to have P & A-bits set for an associated address.
However, speculation may cache pud/pmd entries (paging-structure caches)
when they have P-bit set.
Add a system-wide TLB purge (INVLPG) to a single page after clearing
pud/pmd entry's P-bit.
SDM 4.10.4.1, Operation that Invalidate TLBs and Paging-Structure Caches,
states that:
INVLPG invalidates all paging-structure caches associated with the
current PCID regardless of the liner addresses to which they correspond.
Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Signed-off-by: Toshi Kani <toshi.kani(a)hpe.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Joerg Roedel <joro(a)8bytes.org>
Cc: <stable(a)vger.kernel.org>
---
arch/x86/mm/pgtable.c | 36 ++++++++++++++++++++++++++++++------
1 file changed, 30 insertions(+), 6 deletions(-)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index fbd14e506758..e3deefb891da 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -725,24 +725,44 @@ int pmd_clear_huge(pmd_t *pmd)
* @pud: Pointer to a PUD.
* @addr: Virtual address associated with pud.
*
- * Context: The pud range has been unmaped and TLB purged.
+ * Context: The pud range has been unmapped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
+ *
+ * NOTE: Callers must allow a single page allocation.
*/
int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
- pmd_t *pmd;
+ pmd_t *pmd, *pmd_sv;
+ pte_t *pte;
int i;
if (pud_none(*pud))
return 1;
pmd = (pmd_t *)pud_page_vaddr(*pud);
+ pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
+ if (!pmd_sv)
+ return 0;
- for (i = 0; i < PTRS_PER_PMD; i++)
- if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE)))
- return 0;
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ pmd_sv[i] = pmd[i];
+ if (!pmd_none(pmd[i]))
+ pmd_clear(&pmd[i]);
+ }
pud_clear(pud);
+
+ /* INVLPG to clear all paging-structure caches */
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
+
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ if (!pmd_none(pmd_sv[i])) {
+ pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
+ free_page((unsigned long)pte);
+ }
+ }
+
+ free_page((unsigned long)pmd_sv);
free_page((unsigned long)pmd);
return 1;
@@ -753,7 +773,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
* @pmd: Pointer to a PMD.
* @addr: Virtual address associated with pmd.
*
- * Context: The pmd range has been unmaped and TLB purged.
+ * Context: The pmd range has been unmapped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
*/
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
@@ -765,6 +785,10 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
pte = (pte_t *)pmd_page_vaddr(*pmd);
pmd_clear(pmd);
+
+ /* INVLPG to clear all paging-structure caches */
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
+
free_page((unsigned long)pte);
return 1;
From: Jens Axboe <axboe(a)kernel.dk>
commit cd4a4ae4683dc2e09380118e205e057896dcda2b upstream
If we end up splitting a bio and the queue goes away between
the initial submission and the later split submission, then we
can block forever in blk_queue_enter() waiting for the reference
to drop to zero. This will never happen, since we already hold
a reference.
Mark a split bio as already having entered the queue, so we can
just use the live non-blocking queue enter variant.
Thanks to Tetsuo Handa for the analysis.
We're running fio tests and the tasks get stuck in a D state forever
when systemd-udevd tries to read the partition table. This patch solves
it. Please apply to 4.17 stable.
Reported-by: syzbot+c4f9cebf9d651f6e54de(a)syzkaller.appspotmail.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Alexandru Moise <00moses.alexander00(a)gmail.com>
---
v2: Fixed "From:"
v3: Added "From: Jens Axboe" above commit sha
block/blk-core.c | 4 +++-
block/blk-merge.c | 10 ++++++++++
include/linux/blk_types.h | 2 ++
3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index b559b9d4f1a2..47ab2d9d02d9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2392,7 +2392,9 @@ blk_qc_t generic_make_request(struct bio *bio)
if (bio->bi_opf & REQ_NOWAIT)
flags = BLK_MQ_REQ_NOWAIT;
- if (blk_queue_enter(q, flags) < 0) {
+ if (bio_flagged(bio, BIO_QUEUE_ENTERED))
+ blk_queue_enter_live(q);
+ else if (blk_queue_enter(q, flags) < 0) {
if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT))
bio_wouldblock_error(bio);
else
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 782940c65d8a..481dc02668f9 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -210,6 +210,16 @@ void blk_queue_split(struct request_queue *q, struct bio **bio)
/* there isn't chance to merge the splitted bio */
split->bi_opf |= REQ_NOMERGE;
+ /*
+ * Since we're recursing into make_request here, ensure
+ * that we mark this bio as already having entered the queue.
+ * If not, and the queue is going away, we can get stuck
+ * forever on waiting for the queue reference to drop. But
+ * that will never happen, as we're already holding a
+ * reference to it.
+ */
+ bio_set_flag(*bio, BIO_QUEUE_ENTERED);
+
bio_chain(split, *bio);
trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
generic_make_request(*bio);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 17b18b91ebac..1602bf4ab4cd 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -186,6 +186,8 @@ struct bio {
* throttling rules. Don't do it again. */
#define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion
* of this bio. */
+#define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */
+
/* See BVEC_POOL_OFFSET below before adding new flags */
/*
--
2.18.0
commit cd4a4ae4683dc2e09380118e205e057896dcda2b upstream.
If we end up splitting a bio and the queue goes away between
the initial submission and the later split submission, then we
can block forever in blk_queue_enter() waiting for the reference
to drop to zero. This will never happen, since we already hold
a reference.
Mark a split bio as already having entered the queue, so we can
just use the live non-blocking queue enter variant.
Thanks to Tetsuo Handa for the analysis.
We're running fio tests and the tasks get stuck in a D state forever
when systemd-udevd tries to read the partition table. This patch solves
it. Please apply to 4.17 stable.
Reported-by: syzbot+c4f9cebf9d651f6e54de(a)syzkaller.appspotmail.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Alexandru Moise <00moses.alexander00(a)gmail.com>
---
v2: Changed "From:" The email sent as being from Jens instead of me,
sorry again Jens.
block/blk-core.c | 4 +++-
block/blk-merge.c | 10 ++++++++++
include/linux/blk_types.h | 2 ++
3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index b559b9d4f1a2..47ab2d9d02d9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2392,7 +2392,9 @@ blk_qc_t generic_make_request(struct bio *bio)
if (bio->bi_opf & REQ_NOWAIT)
flags = BLK_MQ_REQ_NOWAIT;
- if (blk_queue_enter(q, flags) < 0) {
+ if (bio_flagged(bio, BIO_QUEUE_ENTERED))
+ blk_queue_enter_live(q);
+ else if (blk_queue_enter(q, flags) < 0) {
if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT))
bio_wouldblock_error(bio);
else
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 782940c65d8a..481dc02668f9 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -210,6 +210,16 @@ void blk_queue_split(struct request_queue *q, struct bio **bio)
/* there isn't chance to merge the splitted bio */
split->bi_opf |= REQ_NOMERGE;
+ /*
+ * Since we're recursing into make_request here, ensure
+ * that we mark this bio as already having entered the queue.
+ * If not, and the queue is going away, we can get stuck
+ * forever on waiting for the queue reference to drop. But
+ * that will never happen, as we're already holding a
+ * reference to it.
+ */
+ bio_set_flag(*bio, BIO_QUEUE_ENTERED);
+
bio_chain(split, *bio);
trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
generic_make_request(*bio);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 17b18b91ebac..1602bf4ab4cd 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -186,6 +186,8 @@ struct bio {
* throttling rules. Don't do it again. */
#define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion
* of this bio. */
+#define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */
+
/* See BVEC_POOL_OFFSET below before adding new flags */
/*
--
2.18.0
ubifs_log_start_commit() allocates a buffer with kmalloc(),
this buffer is used to build UBIFS CS and REF nodes, all structure
attributes get set, except for the padding field in the ubifs_ref_node.
That way we leak 28 bytes of kernel memory to the MTD.
Fix it by using kzalloc().
Cc: stable(a)vger.kernel.org
Fixes: 1e51764a3c2a ("UBIFS: add new flash file system")
Signed-off-by: Richard Weinberger <richard(a)nod.at>
---
fs/ubifs/log.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 7cffa120a750..60d49c6dd470 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -369,7 +369,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ;
max_len = ALIGN(max_len, c->min_io_size);
- buf = cs = kmalloc(max_len, GFP_NOFS);
+ buf = cs = kzalloc(max_len, GFP_NOFS);
if (!buf)
return -ENOMEM;
--
2.18.0