This code does not work as stated in the comment.
$(CONFIG_MODVERSIONS) is always empty because it is expanded before
include/config/auto.conf is included. Hence, 'make modules' with
CONFIG_MODVERSION=y cannot record the version CRCs.
This has been broken since 2003, commit ("kbuild: Enable modules to be
build using the "make dir/" syntax"). [1]
[1]: https://git.kernel.org/pub/scm/linux/kernel/git/history/history.git/commit/…
Cc: linux-stable <stable(a)vger.kernel.org> # v2.5.71+
Signed-off-by: Masahiro Yamada <masahiroy(a)kernel.org>
---
Makefile | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/Makefile b/Makefile
index 2df903429d31..b856f84e28c9 100644
--- a/Makefile
+++ b/Makefile
@@ -619,12 +619,8 @@ KBUILD_MODULES :=
KBUILD_BUILTIN := 1
# If we have only "make modules", don't compile built-in objects.
-# When we're building modules with modversions, we need to consider
-# the built-in objects during the descend as well, in order to
-# make sure the checksums are up to date before we record them.
-
ifeq ($(MAKECMDGOALS),modules)
- KBUILD_BUILTIN := $(if $(CONFIG_MODVERSIONS),1)
+ KBUILD_BUILTIN :=
endif
# If we have "make <whatever> modules", compile modules
@@ -1337,6 +1333,13 @@ ifdef CONFIG_MODULES
all: modules
+# When we're building modules with modversions, we need to consider
+# the built-in objects during the descend as well, in order to
+# make sure the checksums are up to date before we record them.
+ifdef CONFIG_MODVERSIONS
+ KBUILD_BUILTIN := 1
+endif
+
# Build modules
#
# A module can be listed more than once in obj-m resulting in
--
2.25.1
Since v4.19 commit b0dedc49a2da ("mm/vmscan.c: iterate only over charged
shrinkers during memcg shrink_slab()") a memcg aware shrinker is only
called when the per-memcg per-node shrinker_map indicates that the
shrinker may have objects to release to the memcg and node.
shmem_unused_huge_count and shmem_unused_huge_scan support the per-tmpfs
shrinker which advertises per memcg and numa awareness. The shmem
shrinker releases memory by splitting hugepages that extend beyond
i_size.
Shmem does not currently set bits in shrinker_map. So, starting with
b0dedc49a2da, memcg reclaim avoids calling the shmem shrinker under
pressure. This leads to undeserved memcg OOM kills.
Example that reliably sees memcg OOM kill in unpatched kernel:
FS=/tmp/fs
CONTAINER=/cgroup/memory/tmpfs_shrinker
mkdir -p $FS
mount -t tmpfs -o huge=always nodev $FS
# Create 1000 MB container, which shouldn't suffer OOM.
mkdir $CONTAINER
echo 1000M > $CONTAINER/memory.limit_in_bytes
echo $BASHPID >> $CONTAINER/cgroup.procs
# Create 4000 files. Ideally each file uses 4k data page + a little
# metadata. Assume 8k total per-file, 32MB (4000*8k) should easily
# fit within container's 1000 MB. But if data pages use 2MB
# hugepages (due to aggressive huge=always) then files consume 8GB,
# which hits memcg 1000 MB limit.
for i in {1..4000}; do
echo . > $FS/$i
done
v5.4 commit 87eaceb3faa5 ("mm: thp: make deferred split shrinker memcg
aware") maintains the per-node per-memcg shrinker bitmap for THP
shrinker. But there's no such logic in shmem. Make shmem set the
per-memcg per-node shrinker bits when it modifies inodes to have
shrinkable pages.
Fixes: b0dedc49a2da ("mm/vmscan.c: iterate only over charged shrinkers during memcg shrink_slab()")
Cc: <stable(a)vger.kernel.org> # 4.19+
Signed-off-by: Greg Thelen <gthelen(a)google.com>
---
mm/shmem.c | 61 +++++++++++++++++++++++++++++++-----------------------
1 file changed, 35 insertions(+), 26 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index bd8840082c94..e11090f78cb5 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1002,6 +1002,33 @@ static int shmem_getattr(const struct path *path, struct kstat *stat,
return 0;
}
+/*
+ * Expose inode and optional page to shrinker as having a possibly splittable
+ * hugepage that reaches beyond i_size.
+ */
+static void shmem_shrinker_add(struct shmem_sb_info *sbinfo,
+ struct inode *inode, struct page *page)
+{
+ struct shmem_inode_info *info = SHMEM_I(inode);
+
+ spin_lock(&sbinfo->shrinklist_lock);
+ /*
+ * _careful to defend against unlocked access to ->shrink_list in
+ * shmem_unused_huge_shrink()
+ */
+ if (list_empty_careful(&info->shrinklist)) {
+ list_add_tail(&info->shrinklist, &sbinfo->shrinklist);
+ sbinfo->shrinklist_len++;
+ }
+ spin_unlock(&sbinfo->shrinklist_lock);
+
+#ifdef CONFIG_MEMCG
+ if (page && PageTransHuge(page))
+ memcg_set_shrinker_bit(page->mem_cgroup, page_to_nid(page),
+ inode->i_sb->s_shrink.id);
+#endif
+}
+
static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -1048,17 +1075,13 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
* to shrink under memory pressure.
*/
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
- spin_lock(&sbinfo->shrinklist_lock);
- /*
- * _careful to defend against unlocked access to
- * ->shrink_list in shmem_unused_huge_shrink()
- */
- if (list_empty_careful(&info->shrinklist)) {
- list_add_tail(&info->shrinklist,
- &sbinfo->shrinklist);
- sbinfo->shrinklist_len++;
- }
- spin_unlock(&sbinfo->shrinklist_lock);
+ struct page *page;
+
+ page = find_get_page(inode->i_mapping,
+ (newsize & HPAGE_PMD_MASK) >> PAGE_SHIFT);
+ shmem_shrinker_add(sbinfo, inode, page);
+ if (page)
+ put_page(page);
}
}
}
@@ -1889,21 +1912,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
if (PageTransHuge(page) &&
DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
hindex + HPAGE_PMD_NR - 1) {
- /*
- * Part of the huge page is beyond i_size: subject
- * to shrink under memory pressure.
- */
- spin_lock(&sbinfo->shrinklist_lock);
- /*
- * _careful to defend against unlocked access to
- * ->shrink_list in shmem_unused_huge_shrink()
- */
- if (list_empty_careful(&info->shrinklist)) {
- list_add_tail(&info->shrinklist,
- &sbinfo->shrinklist);
- sbinfo->shrinklist_len++;
- }
- spin_unlock(&sbinfo->shrinklist_lock);
+ shmem_shrinker_add(sbinfo, inode, page);
}
/*
--
2.27.0.rc0.183.gde8f92d652-goog
The patch titled
Subject: kernel/relay.c: handle alloc_percpu returning NULL in relay_open
has been removed from the -mm tree. Its filename was
relay-handle-alloc_percpu-returning-null-in-relay_open.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Daniel Axtens <dja(a)axtens.net>
Subject: kernel/relay.c: handle alloc_percpu returning NULL in relay_open
alloc_percpu() may return NULL, which means chan->buf may be set to NULL.
In that case, when we do *per_cpu_ptr(chan->buf, ...), we dereference an
invalid pointer:
BUG: Unable to handle kernel data access at 0x7dae0000
Faulting instruction address: 0xc0000000003f3fec
...
NIP [c0000000003f3fec] relay_open+0x29c/0x600
LR [c0000000003f3fc0] relay_open+0x270/0x600
Call Trace:
[c000000054353a70] [c0000000003f3fb4] relay_open+0x264/0x600 (unreliable)
[c000000054353b00] [c000000000451764] __blk_trace_setup+0x254/0x600
[c000000054353bb0] [c000000000451b78] blk_trace_setup+0x68/0xa0
[c000000054353c10] [c0000000010da77c] sg_ioctl+0x7bc/0x2e80
[c000000054353cd0] [c000000000758cbc] do_vfs_ioctl+0x13c/0x1300
[c000000054353d90] [c000000000759f14] ksys_ioctl+0x94/0x130
[c000000054353de0] [c000000000759ff8] sys_ioctl+0x48/0xb0
[c000000054353e20] [c00000000000bcd0] system_call+0x5c/0x68
Check if alloc_percpu returns NULL.
This was found by syzkaller both on x86 and powerpc, and the reproducer it
found on powerpc is capable of hitting the issue as an unprivileged user.
Link: http://lkml.kernel.org/r/20191219121256.26480-1-dja@axtens.net
Fixes: 017c59c042d0 ("relay: Use per CPU constructs for the relay channel buffer pointers")
Signed-off-by: Daniel Axtens <dja(a)axtens.net>
Reviewed-by: Michael Ellerman <mpe(a)ellerman.id.au>
Reviewed-by: Andrew Donnellan <ajd(a)linux.ibm.com>
Acked-by: David Rientjes <rientjes(a)google.com>
Reported-by: syzbot+1e925b4b836afe85a1c6(a)syzkaller-ppc64.appspotmail.com
Reported-by: syzbot+587b2421926808309d21(a)syzkaller-ppc64.appspotmail.com
Reported-by: syzbot+58320b7171734bf79d26(a)syzkaller.appspotmail.com
Reported-by: syzbot+d6074fb08bdb2e010520(a)syzkaller.appspotmail.com
Cc: Akash Goel <akash.goel(a)intel.com>
Cc: Andrew Donnellan <ajd(a)linux.ibm.com>
Cc: Guenter Roeck <linux(a)roeck-us.net>
Cc: Salvatore Bonaccorso <carnil(a)debian.org>
Cc: <stable(a)vger.kernel.org> [4.10+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/relay.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/kernel/relay.c~relay-handle-alloc_percpu-returning-null-in-relay_open
+++ a/kernel/relay.c
@@ -581,6 +581,11 @@ struct rchan *relay_open(const char *bas
return NULL;
chan->buf = alloc_percpu(struct rchan_buf *);
+ if (!chan->buf) {
+ kfree(chan);
+ return NULL;
+ }
+
chan->version = RELAYFS_CHANNEL_VERSION;
chan->n_subbufs = n_subbufs;
chan->subbuf_size = subbuf_size;
_
Patches currently in -mm which might be from dja(a)axtens.net are
Luis reports that, when reverse debugging with GDB, single-step does not
function as expected on arm64:
| I've noticed, under very specific conditions, that a PTRACE_SINGLESTEP
| request by GDB won't execute the underlying instruction. As a consequence,
| the PC doesn't move, but we return a SIGTRAP just like we would for a
| regular successful PTRACE_SINGLESTEP request.
The underlying problem is that when the CPU register state is restored
as part of a reverse step, the SPSR.SS bit is cleared and so the hardware
single-step state can transition to the "active-pending" state, causing
an unexpected step exception to be taken immediately if a step operation
is attempted.
In hindsight, we probably shouldn't have exposed SPSR.SS in the pstate
accessible by the GPR regset, but it's a bit late for that now. Instead,
simply prevent userspace from configuring the bit to a value which is
inconsistent with the TIF_SINGLESTEP state for the task being traced.
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/1eed6d69-d53d-9657-1fc9-c089be07f98c@linaro.org
Reported-by: Luis Machado <luis.machado(a)linaro.org>
Tested-by: Luis Machado <luis.machado(a)linaro.org>
Signed-off-by: Will Deacon <will(a)kernel.org>
---
arch/arm64/include/asm/debug-monitors.h | 2 ++
arch/arm64/kernel/debug-monitors.c | 20 ++++++++++++++++----
arch/arm64/kernel/ptrace.c | 4 ++--
arch/arm64/kernel/signal.c | 6 +++++-
4 files changed, 25 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index e5ceea213e39..0b298f48f5bf 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -109,6 +109,8 @@ void disable_debug_monitors(enum dbg_active_el el);
void user_rewind_single_step(struct task_struct *task);
void user_fastforward_single_step(struct task_struct *task);
+void user_regs_reset_single_step(struct user_pt_regs *regs,
+ struct task_struct *task);
void kernel_enable_single_step(struct pt_regs *regs);
void kernel_disable_single_step(void);
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 15e80c876d46..732e7ecaa692 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -141,17 +141,20 @@ postcore_initcall(debug_monitors_init);
/*
* Single step API and exception handling.
*/
-static void set_regs_spsr_ss(struct pt_regs *regs)
+static void set_user_regs_spsr_ss(struct user_pt_regs *regs)
{
regs->pstate |= DBG_SPSR_SS;
}
-NOKPROBE_SYMBOL(set_regs_spsr_ss);
+NOKPROBE_SYMBOL(set_user_regs_spsr_ss);
-static void clear_regs_spsr_ss(struct pt_regs *regs)
+static void clear_user_regs_spsr_ss(struct user_pt_regs *regs)
{
regs->pstate &= ~DBG_SPSR_SS;
}
-NOKPROBE_SYMBOL(clear_regs_spsr_ss);
+NOKPROBE_SYMBOL(clear_user_regs_spsr_ss);
+
+#define set_regs_spsr_ss(r) set_user_regs_spsr_ss(&(r)->user_regs)
+#define clear_regs_spsr_ss(r) clear_user_regs_spsr_ss(&(r)->user_regs)
static DEFINE_SPINLOCK(debug_hook_lock);
static LIST_HEAD(user_step_hook);
@@ -402,6 +405,15 @@ void user_fastforward_single_step(struct task_struct *task)
clear_regs_spsr_ss(task_pt_regs(task));
}
+void user_regs_reset_single_step(struct user_pt_regs *regs,
+ struct task_struct *task)
+{
+ if (test_tsk_thread_flag(task, TIF_SINGLESTEP))
+ set_user_regs_spsr_ss(regs);
+ else
+ clear_user_regs_spsr_ss(regs);
+}
+
/* Kernel API */
void kernel_enable_single_step(struct pt_regs *regs)
{
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 585dd7f5c826..e871ab3ab29b 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1934,8 +1934,8 @@ static int valid_native_regs(struct user_pt_regs *regs)
*/
int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task)
{
- if (!test_tsk_thread_flag(task, TIF_SINGLESTEP))
- regs->pstate &= ~DBG_SPSR_SS;
+ /* https://lore.kernel.org/lkml/20191118131525.GA4180@willie-the-truck */
+ user_regs_reset_single_step(regs, task);
if (is_compat_thread(task_thread_info(task)))
return valid_compat_regs(regs);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 801d56cdf701..c57a077f66cf 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -505,8 +505,12 @@ static int restore_sigframe(struct pt_regs *regs,
forget_syscall(regs);
err |= !valid_user_regs(®s->user_regs, current);
- if (err == 0)
+
+ if (err == 0) {
+ /* Make it look like we stepped the sigreturn system call */
+ user_fastforward_single_step(current);
err = parse_user_sigframe(&user, sf);
+ }
if (err == 0 && system_supports_fpsimd()) {
if (!user.fpsimd)
--
2.27.0.rc2.251.g90737beb825-goog