From: Ard Biesheuvel <ardb(a)kernel.org>
Subject: kmap_local: don't assume kmap PTEs are linear arrays in memory
The kmap_local conversion broke the ARM architecture, because the new code
assumes that all PTEs used for creating kmaps form a linear array in
memory, and uses array indexing to look up the kmap PTE belonging to a
certain kmap index.
On ARM, this cannot work, not only because the PTE pages may be
non-adjacent in memory, but also because ARM/!LPAE interleaves hardware
entries and extended entries (carrying software-only bits) in a way that
is not compatible with array indexing.
Fortunately, this only seems to affect configurations with more than 8
CPUs, due to the way the per-CPU kmap slots are organized in memory.
Work around this by permitting an architecture to set a Kconfig symbol
that signifies that the kmap PTEs do not form a lineary array in memory,
and so the only way to locate the appropriate one is to walk the page
tables.
Link: https://lore.kernel.org/linux-arm-kernel/20211026131249.3731275-1-ardb@kern…
Link: https://lkml.kernel.org/r/20211116094737.7391-1-ardb@kernel.org
Fixes: 2a15ba82fa6c ("ARM: highmem: Switch to generic kmap atomic")
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
Reported-by: Quanyang Wang <quanyang.wang(a)windriver.com>
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Acked-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/arm/Kconfig | 1 +
mm/Kconfig | 3 +++
mm/highmem.c | 32 +++++++++++++++++++++-----------
3 files changed, 25 insertions(+), 11 deletions(-)
--- a/arch/arm/Kconfig~kmap_local-dont-assume-kmap-ptes-are-linear-arrays-in-memory
+++ a/arch/arm/Kconfig
@@ -1463,6 +1463,7 @@ config HIGHMEM
bool "High Memory Support"
depends on MMU
select KMAP_LOCAL
+ select KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
help
The address space of ARM processors is only 4 Gigabytes large
and it has to accommodate user address space, kernel address
--- a/mm/highmem.c~kmap_local-dont-assume-kmap-ptes-are-linear-arrays-in-memory
+++ a/mm/highmem.c
@@ -503,16 +503,22 @@ static inline int kmap_local_calc_idx(in
static pte_t *__kmap_pte;
-static pte_t *kmap_get_pte(void)
+static pte_t *kmap_get_pte(unsigned long vaddr, int idx)
{
+ if (IS_ENABLED(CONFIG_KMAP_LOCAL_NON_LINEAR_PTE_ARRAY))
+ /*
+ * Set by the arch if __kmap_pte[-idx] does not produce
+ * the correct entry.
+ */
+ return virt_to_kpte(vaddr);
if (!__kmap_pte)
__kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
- return __kmap_pte;
+ return &__kmap_pte[-idx];
}
void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
{
- pte_t pteval, *kmap_pte = kmap_get_pte();
+ pte_t pteval, *kmap_pte;
unsigned long vaddr;
int idx;
@@ -524,9 +530,10 @@ void *__kmap_local_pfn_prot(unsigned lon
preempt_disable();
idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn);
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
- BUG_ON(!pte_none(*(kmap_pte - idx)));
+ kmap_pte = kmap_get_pte(vaddr, idx);
+ BUG_ON(!pte_none(*kmap_pte));
pteval = pfn_pte(pfn, prot);
- arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte - idx, pteval);
+ arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte, pteval);
arch_kmap_local_post_map(vaddr, pteval);
current->kmap_ctrl.pteval[kmap_local_idx()] = pteval;
preempt_enable();
@@ -559,7 +566,7 @@ EXPORT_SYMBOL(__kmap_local_page_prot);
void kunmap_local_indexed(void *vaddr)
{
unsigned long addr = (unsigned long) vaddr & PAGE_MASK;
- pte_t *kmap_pte = kmap_get_pte();
+ pte_t *kmap_pte;
int idx;
if (addr < __fix_to_virt(FIX_KMAP_END) ||
@@ -584,8 +591,9 @@ void kunmap_local_indexed(void *vaddr)
idx = arch_kmap_local_unmap_idx(kmap_local_idx(), addr);
WARN_ON_ONCE(addr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+ kmap_pte = kmap_get_pte(addr, idx);
arch_kmap_local_pre_unmap(addr);
- pte_clear(&init_mm, addr, kmap_pte - idx);
+ pte_clear(&init_mm, addr, kmap_pte);
arch_kmap_local_post_unmap(addr);
current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0);
kmap_local_idx_pop();
@@ -607,7 +615,7 @@ EXPORT_SYMBOL(kunmap_local_indexed);
void __kmap_local_sched_out(void)
{
struct task_struct *tsk = current;
- pte_t *kmap_pte = kmap_get_pte();
+ pte_t *kmap_pte;
int i;
/* Clear kmaps */
@@ -634,8 +642,9 @@ void __kmap_local_sched_out(void)
idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ kmap_pte = kmap_get_pte(addr, idx);
arch_kmap_local_pre_unmap(addr);
- pte_clear(&init_mm, addr, kmap_pte - idx);
+ pte_clear(&init_mm, addr, kmap_pte);
arch_kmap_local_post_unmap(addr);
}
}
@@ -643,7 +652,7 @@ void __kmap_local_sched_out(void)
void __kmap_local_sched_in(void)
{
struct task_struct *tsk = current;
- pte_t *kmap_pte = kmap_get_pte();
+ pte_t *kmap_pte;
int i;
/* Restore kmaps */
@@ -663,7 +672,8 @@ void __kmap_local_sched_in(void)
/* See comment in __kmap_local_sched_out() */
idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
- set_pte_at(&init_mm, addr, kmap_pte - idx, pteval);
+ kmap_pte = kmap_get_pte(addr, idx);
+ set_pte_at(&init_mm, addr, kmap_pte, pteval);
arch_kmap_local_post_map(addr, pteval);
}
}
--- a/mm/Kconfig~kmap_local-dont-assume-kmap-ptes-are-linear-arrays-in-memory
+++ a/mm/Kconfig
@@ -890,6 +890,9 @@ config MAPPING_DIRTY_HELPERS
config KMAP_LOCAL
bool
+config KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
+ bool
+
# struct io_mapping based helper. Selected by drivers that need them
config IO_MAPPING
bool
_
From: Mina Almasry <almasrymina(a)google.com>
Subject: hugetlb, userfaultfd: fix reservation restore on userfaultfd error
Currently in the is_continue case in hugetlb_mcopy_atomic_pte(), if we
bail out using "goto out_release_unlock;" in the cases where idx >= size,
or !huge_pte_none(), the code will detect that new_pagecache_page ==
false, and so call restore_reserve_on_error(). In this case I see
restore_reserve_on_error() delete the reservation, and the following call
to remove_inode_hugepages() will increment h->resv_hugepages causing a
100% reproducible leak.
We should treat the is_continue case similar to adding a page into the
pagecache and set new_pagecache_page to true, to indicate that there is no
reservation to restore on the error path, and we need not call
restore_reserve_on_error(). Rename new_pagecache_page to
page_in_pagecache to make that clear.
Link: https://lkml.kernel.org/r/20211117193825.378528-1-almasrymina@google.com
Fixes: c7b1850dfb41 ("hugetlb: don't pass page cache pages to restore_reserve_on_error")
Signed-off-by: Mina Almasry <almasrymina(a)google.com>
Reported-by: James Houghton <jthoughton(a)google.com>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Wei Xu <weixugc(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
--- a/mm/hugetlb.c~hugetlb-userfaultfd-fix-reservation-restore-on-userfaultfd-error
+++ a/mm/hugetlb.c
@@ -5736,13 +5736,14 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
int ret = -ENOMEM;
struct page *page;
int writable;
- bool new_pagecache_page = false;
+ bool page_in_pagecache = false;
if (is_continue) {
ret = -EFAULT;
page = find_lock_page(mapping, idx);
if (!page)
goto out;
+ page_in_pagecache = true;
} else if (!*pagep) {
/* If a page already exists, then it's UFFDIO_COPY for
* a non-missing case. Return -EEXIST.
@@ -5830,7 +5831,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
ret = huge_add_to_page_cache(page, mapping, idx);
if (ret)
goto out_release_nounlock;
- new_pagecache_page = true;
+ page_in_pagecache = true;
}
ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
@@ -5894,7 +5895,7 @@ out_release_unlock:
if (vm_shared || is_continue)
unlock_page(page);
out_release_nounlock:
- if (!new_pagecache_page)
+ if (!page_in_pagecache)
restore_reserve_on_error(h, dst_vma, dst_addr, page);
put_page(page);
goto out;
_
From: Rustam Kovhaev <rkovhaev(a)gmail.com>
Subject: mm: kmemleak: slob: respect SLAB_NOLEAKTRACE flag
When kmemleak is enabled for SLOB, system does not boot and does not print
anything to the console. At the very early stage in the boot process we
hit infinite recursion from kmemleak_init() and eventually kernel crashes.
kmemleak_init() specifies SLAB_NOLEAKTRACE for KMEM_CACHE(), but
kmem_cache_create_usercopy() removes it because CACHE_CREATE_MASK is not
valid for SLOB.
Let's fix CACHE_CREATE_MASK and make kmemleak work with SLOB
Link: https://lkml.kernel.org/r/20211115020850.3154366-1-rkovhaev@gmail.com
Fixes: d8843922fba4 ("slab: Ignore internal flags in cache creation")
Signed-off-by: Rustam Kovhaev <rkovhaev(a)gmail.com>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Reviewed-by: Muchun Song <songmuchun(a)bytedance.com>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: Pekka Enberg <penberg(a)kernel.org>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Glauber Costa <glommer(a)parallels.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/slab.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/slab.h~mm-kmemleak-slob-respect-slab_noleaktrace-flag
+++ a/mm/slab.h
@@ -147,7 +147,7 @@ static inline slab_flags_t kmem_cache_fl
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | SLAB_ACCOUNT)
#else
-#define SLAB_CACHE_FLAGS (0)
+#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
#endif
/* Common flags available with current configuration */
_
From: Nathan Chancellor <nathan(a)kernel.org>
Subject: hexagon: clean up timer-regs.h
When building allmodconfig, there is a warning about TIMER_ENABLE being
redefined:
drivers/clocksource/timer-oxnas-rps.c:39:9: error: 'TIMER_ENABLE' macro redefined [-Werror,-Wmacro-redefined]
#define TIMER_ENABLE BIT(7)
^
./arch/hexagon/include/asm/timer-regs.h:13:9: note: previous definition is here
#define TIMER_ENABLE 0
^
1 error generated.
The values in this header are only used in one file each, if they are
used at all. Remove the header and sink all of the constants into their
respective files.
TCX0_CLK_RATE is only used in arch/hexagon/include/asm/timex.h
TIMER_ENABLE, RTOS_TIMER_INT, RTOS_TIMER_REGS_ADDR are only used in
arch/hexagon/kernel/time.c.
SLEEP_CLK_RATE and TIMER_CLR_ON_MATCH have both been unused since the
file's introduction in commit 71e4a47f32f4 ("Hexagon: Add time and timer
functions").
TIMER_ENABLE is redefined as BIT(0) so the shift is moved into the
definition, rather than its use.
Link: https://lkml.kernel.org/r/20211115174250.1994179-3-nathan@kernel.org
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
Acked-by: Brian Cain <bcain(a)codeaurora.org>
Cc: Nick Desaulniers <ndesaulniers(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/hexagon/include/asm/timer-regs.h | 26 ------------------------
arch/hexagon/include/asm/timex.h | 3 --
arch/hexagon/kernel/time.c | 12 +++++++++--
3 files changed, 11 insertions(+), 30 deletions(-)
--- a/arch/hexagon/include/asm/timer-regs.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Timer support for Hexagon
- *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-#ifndef _ASM_TIMER_REGS_H
-#define _ASM_TIMER_REGS_H
-
-/* This stuff should go into a platform specific file */
-#define TCX0_CLK_RATE 19200
-#define TIMER_ENABLE 0
-#define TIMER_CLR_ON_MATCH 1
-
-/*
- * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until
- * release 1.1, and then it's "adjustable" and probably not defaulted.
- */
-#define RTOS_TIMER_INT 3
-#ifdef CONFIG_HEXAGON_COMET
-#define RTOS_TIMER_REGS_ADDR 0xAB000000UL
-#endif
-#define SLEEP_CLK_RATE 32000
-
-#endif
--- a/arch/hexagon/include/asm/timex.h~hexagon-clean-up-timer-regsh
+++ a/arch/hexagon/include/asm/timex.h
@@ -7,11 +7,10 @@
#define _ASM_TIMEX_H
#include <asm-generic/timex.h>
-#include <asm/timer-regs.h>
#include <asm/hexagon_vm.h>
/* Using TCX0 as our clock. CLOCK_TICK_RATE scheduled to be removed. */
-#define CLOCK_TICK_RATE TCX0_CLK_RATE
+#define CLOCK_TICK_RATE 19200
#define ARCH_HAS_READ_CURRENT_TIMER
--- a/arch/hexagon/kernel/time.c~hexagon-clean-up-timer-regsh
+++ a/arch/hexagon/kernel/time.c
@@ -17,9 +17,10 @@
#include <linux/of_irq.h>
#include <linux/module.h>
-#include <asm/timer-regs.h>
#include <asm/hexagon_vm.h>
+#define TIMER_ENABLE BIT(0)
+
/*
* For the clocksource we need:
* pcycle frequency (600MHz)
@@ -33,6 +34,13 @@ cycles_t pcycle_freq_mhz;
cycles_t thread_freq_mhz;
cycles_t sleep_clk_freq;
+/*
+ * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until
+ * release 1.1, and then it's "adjustable" and probably not defaulted.
+ */
+#define RTOS_TIMER_INT 3
+#define RTOS_TIMER_REGS_ADDR 0xAB000000UL
+
static struct resource rtos_timer_resources[] = {
{
.start = RTOS_TIMER_REGS_ADDR,
@@ -80,7 +88,7 @@ static int set_next_event(unsigned long
iowrite32(0, &rtos_timer->clear);
iowrite32(delta, &rtos_timer->match);
- iowrite32(1 << TIMER_ENABLE, &rtos_timer->enable);
+ iowrite32(TIMER_ENABLE, &rtos_timer->enable);
return 0;
}
_